]> git.proxmox.com Git - mirror_ovs.git/blame - lib/dpif-netdev.c
dpif-netdev: Use packet key to parse TCP flags.
[mirror_ovs.git] / lib / dpif-netdev.c
CommitLineData
72865317 1/*
ff073a71 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
72865317
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpif.h"
19
72865317
BP
20#include <ctype.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
72865317 24#include <netinet/in.h>
9d82ec47 25#include <sys/socket.h>
7f3adc00 26#include <net/if.h>
cdee00fd 27#include <stdint.h>
72865317
BP
28#include <stdlib.h>
29#include <string.h>
30#include <sys/ioctl.h>
31#include <sys/stat.h>
72865317
BP
32#include <unistd.h>
33
2c0ea78f 34#include "classifier.h"
72865317 35#include "csum.h"
614c4892 36#include "dpif.h"
72865317 37#include "dpif-provider.h"
614c4892 38#include "dummy.h"
36956a7d 39#include "dynamic-string.h"
72865317
BP
40#include "flow.h"
41#include "hmap.h"
6c3eee82 42#include "latch.h"
72865317 43#include "list.h"
8c301900 44#include "meta-flow.h"
72865317 45#include "netdev.h"
de281153 46#include "netdev-vport.h"
cdee00fd 47#include "netlink.h"
f094af7b 48#include "odp-execute.h"
72865317
BP
49#include "odp-util.h"
50#include "ofp-print.h"
51#include "ofpbuf.h"
61e7deb1 52#include "ovs-rcu.h"
72865317
BP
53#include "packets.h"
54#include "poll-loop.h"
26c6b6cd 55#include "random.h"
d33ed218 56#include "seq.h"
462278db 57#include "shash.h"
0cbfe35d 58#include "sset.h"
72865317 59#include "timeval.h"
74cc3969 60#include "unixctl.h"
72865317 61#include "util.h"
72865317 62#include "vlog.h"
5136ce49 63
d98e6007 64VLOG_DEFINE_THIS_MODULE(dpif_netdev);
72865317 65
2c0ea78f
GS
66/* By default, choose a priority in the middle. */
67#define NETDEV_RULE_PRIORITY 0x8000
68
72865317 69/* Configuration parameters. */
72865317
BP
70enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
71
72/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
73 * headers to be aligned on a 4-byte boundary. */
74enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
75
856081f6
BP
76/* Queues. */
77enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */
78enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
79enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
80BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
81
8a4e3a85
BP
82/* Protects against changes to 'dp_netdevs'. */
83static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
84
85/* Contains all 'struct dp_netdev's. */
86static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
87 = SHASH_INITIALIZER(&dp_netdevs);
88
d88b629b
BP
89struct dp_netdev_upcall {
90 struct dpif_upcall upcall; /* Queued upcall information. */
91 struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */
92};
93
8a4e3a85
BP
94/* A queue passing packets from a struct dp_netdev to its clients.
95 *
96 *
97 * Thread-safety
98 * =============
99 *
100 * Any access at all requires the owning 'dp_netdev''s queue_mutex. */
856081f6 101struct dp_netdev_queue {
f5126b57
BP
102 struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN] OVS_GUARDED;
103 unsigned int head OVS_GUARDED;
104 unsigned int tail OVS_GUARDED;
856081f6
BP
105};
106
8a4e3a85
BP
107/* Datapath based on the network device interface from netdev.h.
108 *
109 *
110 * Thread-safety
111 * =============
112 *
113 * Some members, marked 'const', are immutable. Accessing other members
114 * requires synchronization, as noted in more detail below.
115 *
116 * Acquisition order is, from outermost to innermost:
117 *
118 * dp_netdev_mutex (global)
119 * port_rwlock
120 * flow_mutex
121 * cls.rwlock
122 * queue_mutex
123 */
72865317 124struct dp_netdev {
8a4e3a85
BP
125 const struct dpif_class *const class;
126 const char *const name;
6a8267c5
BP
127 struct ovs_refcount ref_cnt;
128 atomic_flag destroyed;
72865317 129
8a4e3a85
BP
130 /* Flows.
131 *
132 * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock.
133 *
134 * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then
135 * the 'cls->rwlock' write lock. (The outer 'flow_mutex' allows writers to
136 * atomically perform multiple operations on 'cls' and 'flow_table'.)
137 */
138 struct ovs_mutex flow_mutex;
139 struct classifier cls; /* Classifier. Protected by cls.rwlock. */
140 struct hmap flow_table OVS_GUARDED; /* Flow table. */
141
142 /* Queues.
143 *
144 * Everything in 'queues' is protected by 'queue_mutex'. */
f5126b57
BP
145 struct ovs_mutex queue_mutex;
146 struct dp_netdev_queue queues[N_QUEUES];
d33ed218 147 struct seq *queue_seq; /* Incremented whenever a packet is queued. */
72865317 148
8a4e3a85
BP
149 /* Statistics.
150 *
51852a57
BP
151 * ovsthread_stats is internally synchronized. */
152 struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */
72865317 153
8a4e3a85
BP
154 /* Ports.
155 *
156 * Any lookup into 'ports' or any access to the dp_netdev_ports found
157 * through 'ports' requires taking 'port_rwlock'. */
158 struct ovs_rwlock port_rwlock;
159 struct hmap ports OVS_GUARDED;
d33ed218 160 struct seq *port_seq; /* Incremented whenever a port changes. */
6c3eee82
BP
161
162 /* Forwarding threads. */
163 struct latch exit_latch;
164 struct dp_forwarder *forwarders;
165 size_t n_forwarders;
72865317
BP
166};
167
8a4e3a85
BP
168static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
169 odp_port_t)
170 OVS_REQ_RDLOCK(dp->port_rwlock);
ff073a71 171
51852a57
BP
172enum dp_stat_type {
173 DP_STAT_HIT, /* Packets that matched in the flow table. */
174 DP_STAT_MISS, /* Packets that did not match. */
175 DP_STAT_LOST, /* Packets not passed up to the client. */
176 DP_N_STATS
177};
178
179/* Contained by struct dp_netdev's 'stats' member. */
180struct dp_netdev_stats {
181 struct ovs_mutex mutex; /* Protects 'n'. */
182
183 /* Indexed by DP_STAT_*, protected by 'mutex'. */
184 unsigned long long int n[DP_N_STATS] OVS_GUARDED;
185};
186
187
72865317
BP
188/* A port in a netdev-based datapath. */
189struct dp_netdev_port {
ff073a71
BP
190 struct hmap_node node; /* Node in dp_netdev's 'ports'. */
191 odp_port_t port_no;
72865317 192 struct netdev *netdev;
4b609110 193 struct netdev_saved_flags *sf;
796223f5 194 struct netdev_rx *rx;
0cbfe35d 195 char *type; /* Port type as requested by user. */
72865317
BP
196};
197
8a4e3a85
BP
198/* A flow in dp_netdev's 'flow_table'.
199 *
200 *
201 * Thread-safety
202 * =============
203 *
204 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
205 * its dp_netdev's classifier. The text below calls this classifier 'cls'.
206 *
207 * Motivation
208 * ----------
209 *
210 * The thread safety rules described here for "struct dp_netdev_flow" are
211 * motivated by two goals:
212 *
213 * - Prevent threads that read members of "struct dp_netdev_flow" from
214 * reading bad data due to changes by some thread concurrently modifying
215 * those members.
216 *
217 * - Prevent two threads making changes to members of a given "struct
218 * dp_netdev_flow" from interfering with each other.
219 *
220 *
221 * Rules
222 * -----
223 *
224 * A flow 'flow' may be accessed without a risk of being freed by code that
225 * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to
226 * 'flow->ref_cnt' (or both). Code that needs to hold onto a flow for a while
227 * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt'
228 * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'.
229 *
230 * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the
231 * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't
232 * protect members of 'flow' from modification (that's 'flow->mutex').
233 *
234 * 'flow->mutex' protects the members of 'flow' from modification. It doesn't
235 * protect the flow from being deleted from 'cls' (that's 'cls->rwlock') and it
236 * doesn't prevent the flow from being freed (that's 'flow->ref_cnt').
237 *
238 * Some members, marked 'const', are immutable. Accessing other members
239 * requires synchronization, as noted in more detail below.
240 */
72865317 241struct dp_netdev_flow {
2c0ea78f 242 /* Packet classification. */
8a4e3a85 243 const struct cls_rule cr; /* In owning dp_netdev's 'cls'. */
2c0ea78f 244
8a4e3a85
BP
245 /* Hash table index by unmasked flow. */
246 const struct hmap_node node; /* In owning dp_netdev's 'flow_table'. */
247 const struct flow flow; /* The flow that created this entry. */
72865317 248
8a4e3a85
BP
249 /* Protects members marked OVS_GUARDED.
250 *
251 * Acquire after datapath's flow_mutex. */
252 struct ovs_mutex mutex OVS_ACQ_AFTER(dp_netdev_mutex);
253
254 /* Statistics.
255 *
256 * Reading or writing these members requires 'mutex'. */
679ba04c 257 struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */
8a4e3a85
BP
258
259 /* Actions.
260 *
261 * Reading 'actions' requires 'mutex'.
262 * Writing 'actions' requires 'mutex' and (to allow for transactions) the
263 * datapath's flow_mutex. */
61e7deb1 264 OVSRCU_TYPE(struct dp_netdev_actions *) actions;
72865317
BP
265};
266
61e7deb1 267static void dp_netdev_flow_free(struct dp_netdev_flow *);
8a4e3a85 268
679ba04c
BP
269/* Contained by struct dp_netdev_flow's 'stats' member. */
270struct dp_netdev_flow_stats {
271 struct ovs_mutex mutex; /* Guards all the other members. */
272
273 long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
274 long long int packet_count OVS_GUARDED; /* Number of packets matched. */
275 long long int byte_count OVS_GUARDED; /* Number of bytes matched. */
276 uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
277};
278
a84cb64a
BP
279/* A set of datapath actions within a "struct dp_netdev_flow".
280 *
281 *
282 * Thread-safety
283 * =============
284 *
285 * A struct dp_netdev_actions 'actions' may be accessed without a risk of being
286 * freed by code that holds a read-lock or write-lock on 'flow->mutex' (where
287 * 'flow' is the dp_netdev_flow for which 'flow->actions == actions') or that
288 * owns a reference to 'actions->ref_cnt' (or both). */
289struct dp_netdev_actions {
a84cb64a
BP
290 /* These members are immutable: they do not change during the struct's
291 * lifetime. */
292 struct nlattr *actions; /* Sequence of OVS_ACTION_ATTR_* attributes. */
293 unsigned int size; /* Size of 'actions', in bytes. */
294};
295
296struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
297 size_t);
61e7deb1
BP
298struct dp_netdev_actions *dp_netdev_flow_get_actions(
299 const struct dp_netdev_flow *);
300static void dp_netdev_actions_free(struct dp_netdev_actions *);
a84cb64a 301
6c3eee82
BP
302/* A thread that receives packets from some ports, looks them up in the flow
303 * table, and executes the actions it finds. */
304struct dp_forwarder {
305 struct dp_netdev *dp;
306 pthread_t thread;
307 char *name;
308 uint32_t min_hash, max_hash;
309};
310
72865317
BP
311/* Interface to netdev-based datapath. */
312struct dpif_netdev {
313 struct dpif dpif;
314 struct dp_netdev *dp;
d33ed218 315 uint64_t last_port_seq;
72865317
BP
316};
317
8a4e3a85
BP
318static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
319 struct dp_netdev_port **portp)
320 OVS_REQ_RDLOCK(dp->port_rwlock);
321static int get_port_by_name(struct dp_netdev *dp, const char *devname,
322 struct dp_netdev_port **portp)
323 OVS_REQ_RDLOCK(dp->port_rwlock);
324static void dp_netdev_free(struct dp_netdev *)
325 OVS_REQUIRES(dp_netdev_mutex);
72865317 326static void dp_netdev_flow_flush(struct dp_netdev *);
8a4e3a85
BP
327static int do_add_port(struct dp_netdev *dp, const char *devname,
328 const char *type, odp_port_t port_no)
329 OVS_REQ_WRLOCK(dp->port_rwlock);
330static int do_del_port(struct dp_netdev *dp, odp_port_t port_no)
331 OVS_REQ_WRLOCK(dp->port_rwlock);
614c4892
BP
332static int dpif_netdev_open(const struct dpif_class *, const char *name,
333 bool create, struct dpif **);
f5126b57 334static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
856081f6 335 int queue_no, const struct flow *,
f5126b57
BP
336 const struct nlattr *userdata)
337 OVS_EXCLUDED(dp->queue_mutex);
8a4e3a85
BP
338static void dp_netdev_execute_actions(struct dp_netdev *dp,
339 const struct flow *, struct ofpbuf *,
340 struct pkt_metadata *,
4edb9ae9 341 const struct nlattr *actions,
8a4e3a85
BP
342 size_t actions_len)
343 OVS_REQ_RDLOCK(dp->port_rwlock);
758c456d 344static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
8a4e3a85
BP
345 struct pkt_metadata *)
346 OVS_REQ_RDLOCK(dp->port_rwlock);
6c3eee82 347static void dp_netdev_set_threads(struct dp_netdev *, int n);
72865317
BP
348
349static struct dpif_netdev *
350dpif_netdev_cast(const struct dpif *dpif)
351{
cb22974d 352 ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
72865317
BP
353 return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
354}
355
356static struct dp_netdev *
357get_dp_netdev(const struct dpif *dpif)
358{
359 return dpif_netdev_cast(dpif)->dp;
360}
361
2197d7ab
GL
362static int
363dpif_netdev_enumerate(struct sset *all_dps)
364{
365 struct shash_node *node;
366
97be1538 367 ovs_mutex_lock(&dp_netdev_mutex);
2197d7ab
GL
368 SHASH_FOR_EACH(node, &dp_netdevs) {
369 sset_add(all_dps, node->name);
370 }
97be1538 371 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 372
2197d7ab
GL
373 return 0;
374}
375
add90f6f
EJ
376static bool
377dpif_netdev_class_is_dummy(const struct dpif_class *class)
378{
379 return class != &dpif_netdev_class;
380}
381
0aeaabc8
JP
382static const char *
383dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
384{
385 return strcmp(type, "internal") ? type
add90f6f 386 : dpif_netdev_class_is_dummy(class) ? "dummy"
0aeaabc8
JP
387 : "tap";
388}
389
72865317
BP
390static struct dpif *
391create_dpif_netdev(struct dp_netdev *dp)
392{
462278db 393 uint16_t netflow_id = hash_string(dp->name, 0);
72865317 394 struct dpif_netdev *dpif;
72865317 395
6a8267c5 396 ovs_refcount_ref(&dp->ref_cnt);
72865317 397
72865317 398 dpif = xmalloc(sizeof *dpif);
614c4892 399 dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
72865317 400 dpif->dp = dp;
d33ed218 401 dpif->last_port_seq = seq_read(dp->port_seq);
72865317
BP
402
403 return &dpif->dpif;
404}
405
4e022ec0
AW
406/* Choose an unused, non-zero port number and return it on success.
407 * Return ODPP_NONE on failure. */
408static odp_port_t
e44768b7 409choose_port(struct dp_netdev *dp, const char *name)
8a4e3a85 410 OVS_REQ_RDLOCK(dp->port_rwlock)
e44768b7 411{
4e022ec0 412 uint32_t port_no;
e44768b7
JP
413
414 if (dp->class != &dpif_netdev_class) {
415 const char *p;
416 int start_no = 0;
417
418 /* If the port name begins with "br", start the number search at
419 * 100 to make writing tests easier. */
420 if (!strncmp(name, "br", 2)) {
421 start_no = 100;
422 }
423
424 /* If the port name contains a number, try to assign that port number.
425 * This can make writing unit tests easier because port numbers are
426 * predictable. */
427 for (p = name; *p != '\0'; p++) {
428 if (isdigit((unsigned char) *p)) {
429 port_no = start_no + strtol(p, NULL, 10);
ff073a71
BP
430 if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
431 && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 432 return u32_to_odp(port_no);
e44768b7
JP
433 }
434 break;
435 }
436 }
437 }
438
ff073a71
BP
439 for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
440 if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 441 return u32_to_odp(port_no);
e44768b7
JP
442 }
443 }
444
4e022ec0 445 return ODPP_NONE;
e44768b7
JP
446}
447
72865317 448static int
614c4892
BP
449create_dp_netdev(const char *name, const struct dpif_class *class,
450 struct dp_netdev **dpp)
8a4e3a85 451 OVS_REQUIRES(dp_netdev_mutex)
72865317
BP
452{
453 struct dp_netdev *dp;
454 int error;
455 int i;
456
462278db 457 dp = xzalloc(sizeof *dp);
8a4e3a85
BP
458 shash_add(&dp_netdevs, name, dp);
459
460 *CONST_CAST(const struct dpif_class **, &dp->class) = class;
461 *CONST_CAST(const char **, &dp->name) = xstrdup(name);
6a8267c5 462 ovs_refcount_init(&dp->ref_cnt);
1a65ba85 463 atomic_flag_clear(&dp->destroyed);
8a4e3a85
BP
464
465 ovs_mutex_init(&dp->flow_mutex);
466 classifier_init(&dp->cls, NULL);
467 hmap_init(&dp->flow_table);
468
f5126b57
BP
469 ovs_mutex_init(&dp->queue_mutex);
470 ovs_mutex_lock(&dp->queue_mutex);
72865317 471 for (i = 0; i < N_QUEUES; i++) {
856081f6 472 dp->queues[i].head = dp->queues[i].tail = 0;
72865317 473 }
f5126b57 474 ovs_mutex_unlock(&dp->queue_mutex);
d33ed218 475 dp->queue_seq = seq_create();
ed27e010 476
51852a57 477 ovsthread_stats_init(&dp->stats);
ed27e010 478
8a4e3a85 479 ovs_rwlock_init(&dp->port_rwlock);
ff073a71 480 hmap_init(&dp->ports);
d33ed218 481 dp->port_seq = seq_create();
6c3eee82 482 latch_init(&dp->exit_latch);
e44768b7 483
8a4e3a85 484 ovs_rwlock_wrlock(&dp->port_rwlock);
4e022ec0 485 error = do_add_port(dp, name, "internal", ODPP_LOCAL);
8a4e3a85 486 ovs_rwlock_unlock(&dp->port_rwlock);
72865317
BP
487 if (error) {
488 dp_netdev_free(dp);
462278db 489 return error;
72865317 490 }
6c3eee82 491 dp_netdev_set_threads(dp, 2);
72865317 492
462278db 493 *dpp = dp;
72865317
BP
494 return 0;
495}
496
497static int
614c4892 498dpif_netdev_open(const struct dpif_class *class, const char *name,
4a387741 499 bool create, struct dpif **dpifp)
72865317 500{
462278db 501 struct dp_netdev *dp;
5279f8fd 502 int error;
462278db 503
97be1538 504 ovs_mutex_lock(&dp_netdev_mutex);
462278db
BP
505 dp = shash_find_data(&dp_netdevs, name);
506 if (!dp) {
5279f8fd 507 error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
72865317 508 } else {
5279f8fd
BP
509 error = (dp->class != class ? EINVAL
510 : create ? EEXIST
511 : 0);
512 }
513 if (!error) {
514 *dpifp = create_dpif_netdev(dp);
72865317 515 }
97be1538 516 ovs_mutex_unlock(&dp_netdev_mutex);
462278db 517
5279f8fd 518 return error;
72865317
BP
519}
520
521static void
1ba530f4 522dp_netdev_purge_queues(struct dp_netdev *dp)
72865317
BP
523{
524 int i;
525
f5126b57 526 ovs_mutex_lock(&dp->queue_mutex);
72865317 527 for (i = 0; i < N_QUEUES; i++) {
856081f6 528 struct dp_netdev_queue *q = &dp->queues[i];
856081f6 529
1ba530f4 530 while (q->tail != q->head) {
d88b629b 531 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
da546e07 532 ofpbuf_uninit(&u->upcall.packet);
d88b629b 533 ofpbuf_uninit(&u->buf);
856081f6 534 }
72865317 535 }
f5126b57 536 ovs_mutex_unlock(&dp->queue_mutex);
1ba530f4
BP
537}
538
8a4e3a85
BP
539/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
540 * through the 'dp_netdevs' shash while freeing 'dp'. */
1ba530f4
BP
541static void
542dp_netdev_free(struct dp_netdev *dp)
8a4e3a85 543 OVS_REQUIRES(dp_netdev_mutex)
1ba530f4 544{
4ad28026 545 struct dp_netdev_port *port, *next;
51852a57
BP
546 struct dp_netdev_stats *bucket;
547 int i;
4ad28026 548
8a4e3a85
BP
549 shash_find_and_delete(&dp_netdevs, dp->name);
550
6c3eee82
BP
551 dp_netdev_set_threads(dp, 0);
552 free(dp->forwarders);
553
1ba530f4 554 dp_netdev_flow_flush(dp);
8a4e3a85 555 ovs_rwlock_wrlock(&dp->port_rwlock);
ff073a71 556 HMAP_FOR_EACH_SAFE (port, next, node, &dp->ports) {
1ba530f4
BP
557 do_del_port(dp, port->port_no);
558 }
8a4e3a85 559 ovs_rwlock_unlock(&dp->port_rwlock);
51852a57
BP
560
561 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
562 ovs_mutex_destroy(&bucket->mutex);
563 free_cacheline(bucket);
564 }
565 ovsthread_stats_destroy(&dp->stats);
f5126b57 566
1ba530f4 567 dp_netdev_purge_queues(dp);
d33ed218 568 seq_destroy(dp->queue_seq);
f5126b57
BP
569 ovs_mutex_destroy(&dp->queue_mutex);
570
2c0ea78f 571 classifier_destroy(&dp->cls);
72865317 572 hmap_destroy(&dp->flow_table);
8a4e3a85 573 ovs_mutex_destroy(&dp->flow_mutex);
d33ed218 574 seq_destroy(dp->port_seq);
ff073a71 575 hmap_destroy(&dp->ports);
6c3eee82 576 latch_destroy(&dp->exit_latch);
8a4e3a85 577 free(CONST_CAST(char *, dp->name));
72865317
BP
578 free(dp);
579}
580
8a4e3a85
BP
581static void
582dp_netdev_unref(struct dp_netdev *dp)
583{
584 if (dp) {
585 /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
586 * get a new reference to 'dp' through the 'dp_netdevs' shash. */
587 ovs_mutex_lock(&dp_netdev_mutex);
588 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
589 dp_netdev_free(dp);
590 }
591 ovs_mutex_unlock(&dp_netdev_mutex);
592 }
593}
594
72865317
BP
595static void
596dpif_netdev_close(struct dpif *dpif)
597{
598 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 599
8a4e3a85 600 dp_netdev_unref(dp);
72865317
BP
601 free(dpif);
602}
603
604static int
7dab847a 605dpif_netdev_destroy(struct dpif *dpif)
72865317
BP
606{
607 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 608
6a8267c5
BP
609 if (!atomic_flag_test_and_set(&dp->destroyed)) {
610 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
611 /* Can't happen: 'dpif' still owns a reference to 'dp'. */
612 OVS_NOT_REACHED();
613 }
614 }
5279f8fd 615
72865317
BP
616 return 0;
617}
618
619static int
a8d9304d 620dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
72865317
BP
621{
622 struct dp_netdev *dp = get_dp_netdev(dpif);
51852a57
BP
623 struct dp_netdev_stats *bucket;
624 size_t i;
5279f8fd 625
06f81620 626 fat_rwlock_rdlock(&dp->cls.rwlock);
f180c2e2 627 stats->n_flows = hmap_count(&dp->flow_table);
06f81620 628 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 629
51852a57
BP
630 stats->n_hit = stats->n_missed = stats->n_lost = 0;
631 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
632 ovs_mutex_lock(&bucket->mutex);
633 stats->n_hit += bucket->n[DP_STAT_HIT];
634 stats->n_missed += bucket->n[DP_STAT_MISS];
635 stats->n_lost += bucket->n[DP_STAT_LOST];
636 ovs_mutex_unlock(&bucket->mutex);
637 }
1ce3fa06 638 stats->n_masks = UINT32_MAX;
847108dc 639 stats->n_mask_hit = UINT64_MAX;
5279f8fd 640
72865317
BP
641 return 0;
642}
643
72865317 644static int
c3827f61 645do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
4e022ec0 646 odp_port_t port_no)
8a4e3a85 647 OVS_REQ_WRLOCK(dp->port_rwlock)
72865317 648{
4b609110 649 struct netdev_saved_flags *sf;
72865317
BP
650 struct dp_netdev_port *port;
651 struct netdev *netdev;
796223f5 652 struct netdev_rx *rx;
2499a8ce 653 enum netdev_flags flags;
0cbfe35d 654 const char *open_type;
72865317
BP
655 int error;
656
657 /* XXX reject devices already in some dp_netdev. */
658
659 /* Open and validate network device. */
0aeaabc8 660 open_type = dpif_netdev_port_open_type(dp->class, type);
0cbfe35d 661 error = netdev_open(devname, open_type, &netdev);
72865317
BP
662 if (error) {
663 return error;
664 }
72865317
BP
665 /* XXX reject non-Ethernet devices */
666
2499a8ce
AC
667 netdev_get_flags(netdev, &flags);
668 if (flags & NETDEV_LOOPBACK) {
669 VLOG_ERR("%s: cannot add a loopback device", devname);
670 netdev_close(netdev);
671 return EINVAL;
672 }
673
796223f5 674 error = netdev_rx_open(netdev, &rx);
add90f6f
EJ
675 if (error
676 && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
7b6b0ef4 677 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
10a89ef0 678 devname, ovs_strerror(errno));
7b6b0ef4
BP
679 netdev_close(netdev);
680 return error;
681 }
682
4b609110 683 error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
72865317 684 if (error) {
796223f5 685 netdev_rx_close(rx);
72865317
BP
686 netdev_close(netdev);
687 return error;
688 }
689
690 port = xmalloc(sizeof *port);
691 port->port_no = port_no;
692 port->netdev = netdev;
4b609110 693 port->sf = sf;
796223f5 694 port->rx = rx;
0cbfe35d 695 port->type = xstrdup(type);
72865317 696
ff073a71 697 hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
d33ed218 698 seq_change(dp->port_seq);
72865317
BP
699
700 return 0;
701}
702
247527db
BP
703static int
704dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
4e022ec0 705 odp_port_t *port_nop)
247527db
BP
706{
707 struct dp_netdev *dp = get_dp_netdev(dpif);
3aa30359
BP
708 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
709 const char *dpif_port;
4e022ec0 710 odp_port_t port_no;
5279f8fd 711 int error;
247527db 712
8a4e3a85 713 ovs_rwlock_wrlock(&dp->port_rwlock);
3aa30359 714 dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
4e022ec0 715 if (*port_nop != ODPP_NONE) {
ff073a71
BP
716 port_no = *port_nop;
717 error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
232dfa4a 718 } else {
3aa30359 719 port_no = choose_port(dp, dpif_port);
5279f8fd 720 error = port_no == ODPP_NONE ? EFBIG : 0;
232dfa4a 721 }
5279f8fd 722 if (!error) {
247527db 723 *port_nop = port_no;
5279f8fd 724 error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
247527db 725 }
8a4e3a85 726 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd
BP
727
728 return error;
72865317
BP
729}
730
731static int
4e022ec0 732dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
72865317
BP
733{
734 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
735 int error;
736
8a4e3a85 737 ovs_rwlock_wrlock(&dp->port_rwlock);
5279f8fd 738 error = port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no);
8a4e3a85 739 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd
BP
740
741 return error;
72865317
BP
742}
743
744static bool
4e022ec0 745is_valid_port_number(odp_port_t port_no)
72865317 746{
ff073a71
BP
747 return port_no != ODPP_NONE;
748}
749
750static struct dp_netdev_port *
751dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
8a4e3a85 752 OVS_REQ_RDLOCK(dp->port_rwlock)
ff073a71
BP
753{
754 struct dp_netdev_port *port;
755
756 HMAP_FOR_EACH_IN_BUCKET (port, node, hash_int(odp_to_u32(port_no), 0),
757 &dp->ports) {
758 if (port->port_no == port_no) {
759 return port;
760 }
761 }
762 return NULL;
72865317
BP
763}
764
765static int
766get_port_by_number(struct dp_netdev *dp,
4e022ec0 767 odp_port_t port_no, struct dp_netdev_port **portp)
8a4e3a85 768 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317
BP
769{
770 if (!is_valid_port_number(port_no)) {
771 *portp = NULL;
772 return EINVAL;
773 } else {
ff073a71 774 *portp = dp_netdev_lookup_port(dp, port_no);
72865317
BP
775 return *portp ? 0 : ENOENT;
776 }
777}
778
779static int
780get_port_by_name(struct dp_netdev *dp,
781 const char *devname, struct dp_netdev_port **portp)
8a4e3a85 782 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317
BP
783{
784 struct dp_netdev_port *port;
785
ff073a71 786 HMAP_FOR_EACH (port, node, &dp->ports) {
3efb6063 787 if (!strcmp(netdev_get_name(port->netdev), devname)) {
72865317
BP
788 *portp = port;
789 return 0;
790 }
791 }
792 return ENOENT;
793}
794
795static int
4e022ec0 796do_del_port(struct dp_netdev *dp, odp_port_t port_no)
8a4e3a85 797 OVS_REQ_WRLOCK(dp->port_rwlock)
72865317
BP
798{
799 struct dp_netdev_port *port;
800 int error;
801
802 error = get_port_by_number(dp, port_no, &port);
803 if (error) {
804 return error;
805 }
806
ff073a71 807 hmap_remove(&dp->ports, &port->node);
d33ed218 808 seq_change(dp->port_seq);
72865317
BP
809
810 netdev_close(port->netdev);
4b609110 811 netdev_restore_flags(port->sf);
796223f5 812 netdev_rx_close(port->rx);
0cbfe35d 813 free(port->type);
72865317
BP
814 free(port);
815
816 return 0;
817}
818
819static void
4c738a8d
BP
820answer_port_query(const struct dp_netdev_port *port,
821 struct dpif_port *dpif_port)
72865317 822{
3efb6063 823 dpif_port->name = xstrdup(netdev_get_name(port->netdev));
0cbfe35d 824 dpif_port->type = xstrdup(port->type);
4c738a8d 825 dpif_port->port_no = port->port_no;
72865317
BP
826}
827
828static int
4e022ec0 829dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 830 struct dpif_port *dpif_port)
72865317
BP
831{
832 struct dp_netdev *dp = get_dp_netdev(dpif);
833 struct dp_netdev_port *port;
834 int error;
835
8a4e3a85 836 ovs_rwlock_rdlock(&dp->port_rwlock);
72865317 837 error = get_port_by_number(dp, port_no, &port);
4afba28d 838 if (!error && dpif_port) {
4c738a8d 839 answer_port_query(port, dpif_port);
72865317 840 }
8a4e3a85 841 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 842
72865317
BP
843 return error;
844}
845
846static int
847dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 848 struct dpif_port *dpif_port)
72865317
BP
849{
850 struct dp_netdev *dp = get_dp_netdev(dpif);
851 struct dp_netdev_port *port;
852 int error;
853
8a4e3a85 854 ovs_rwlock_rdlock(&dp->port_rwlock);
72865317 855 error = get_port_by_name(dp, devname, &port);
4afba28d 856 if (!error && dpif_port) {
4c738a8d 857 answer_port_query(port, dpif_port);
72865317 858 }
8a4e3a85 859 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 860
72865317
BP
861 return error;
862}
863
61e7deb1
BP
864static void
865dp_netdev_flow_free(struct dp_netdev_flow *flow)
866{
867 struct dp_netdev_flow_stats *bucket;
868 size_t i;
869
870 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) {
871 ovs_mutex_destroy(&bucket->mutex);
872 free_cacheline(bucket);
873 }
874 ovsthread_stats_destroy(&flow->stats);
875
876 cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
877 dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
878 ovs_mutex_destroy(&flow->mutex);
879 free(flow);
880}
881
72865317 882static void
8a4e3a85
BP
883dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
884 OVS_REQ_WRLOCK(dp->cls.rwlock)
885 OVS_REQUIRES(dp->flow_mutex)
72865317 886{
8a4e3a85
BP
887 struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
888 struct hmap_node *node = CONST_CAST(struct hmap_node *, &flow->node);
2c0ea78f 889
8a4e3a85
BP
890 classifier_remove(&dp->cls, cr);
891 hmap_remove(&dp->flow_table, node);
61e7deb1 892 ovsrcu_postpone(dp_netdev_flow_free, flow);
72865317
BP
893}
894
895static void
896dp_netdev_flow_flush(struct dp_netdev *dp)
897{
1763b4b8 898 struct dp_netdev_flow *netdev_flow, *next;
72865317 899
8a4e3a85 900 ovs_mutex_lock(&dp->flow_mutex);
06f81620 901 fat_rwlock_wrlock(&dp->cls.rwlock);
1763b4b8 902 HMAP_FOR_EACH_SAFE (netdev_flow, next, node, &dp->flow_table) {
8a4e3a85 903 dp_netdev_remove_flow(dp, netdev_flow);
72865317 904 }
06f81620 905 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 906 ovs_mutex_unlock(&dp->flow_mutex);
72865317
BP
907}
908
909static int
910dpif_netdev_flow_flush(struct dpif *dpif)
911{
912 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 913
72865317
BP
914 dp_netdev_flow_flush(dp);
915 return 0;
916}
917
b0ec0f27 918struct dp_netdev_port_state {
ff073a71
BP
919 uint32_t bucket;
920 uint32_t offset;
4c738a8d 921 char *name;
b0ec0f27
BP
922};
923
924static int
925dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
926{
927 *statep = xzalloc(sizeof(struct dp_netdev_port_state));
928 return 0;
929}
930
72865317 931static int
b0ec0f27 932dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
4c738a8d 933 struct dpif_port *dpif_port)
72865317 934{
b0ec0f27 935 struct dp_netdev_port_state *state = state_;
72865317 936 struct dp_netdev *dp = get_dp_netdev(dpif);
ff073a71
BP
937 struct hmap_node *node;
938 int retval;
72865317 939
8a4e3a85 940 ovs_rwlock_rdlock(&dp->port_rwlock);
ff073a71
BP
941 node = hmap_at_position(&dp->ports, &state->bucket, &state->offset);
942 if (node) {
943 struct dp_netdev_port *port;
5279f8fd 944
ff073a71
BP
945 port = CONTAINER_OF(node, struct dp_netdev_port, node);
946
947 free(state->name);
948 state->name = xstrdup(netdev_get_name(port->netdev));
949 dpif_port->name = state->name;
950 dpif_port->type = port->type;
951 dpif_port->port_no = port->port_no;
952
953 retval = 0;
954 } else {
955 retval = EOF;
72865317 956 }
8a4e3a85 957 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 958
ff073a71 959 return retval;
b0ec0f27
BP
960}
961
962static int
4c738a8d 963dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
b0ec0f27 964{
4c738a8d
BP
965 struct dp_netdev_port_state *state = state_;
966 free(state->name);
b0ec0f27
BP
967 free(state);
968 return 0;
72865317
BP
969}
970
971static int
67a4917b 972dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
72865317
BP
973{
974 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
d33ed218 975 uint64_t new_port_seq;
5279f8fd
BP
976 int error;
977
d33ed218
BP
978 new_port_seq = seq_read(dpif->dp->port_seq);
979 if (dpif->last_port_seq != new_port_seq) {
980 dpif->last_port_seq = new_port_seq;
5279f8fd 981 error = ENOBUFS;
72865317 982 } else {
5279f8fd 983 error = EAGAIN;
72865317 984 }
5279f8fd
BP
985
986 return error;
72865317
BP
987}
988
989static void
990dpif_netdev_port_poll_wait(const struct dpif *dpif_)
991{
992 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd 993
d33ed218 994 seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
8a4e3a85
BP
995}
996
997static struct dp_netdev_flow *
998dp_netdev_flow_cast(const struct cls_rule *cr)
999{
1000 return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
72865317
BP
1001}
1002
72865317 1003static struct dp_netdev_flow *
2c0ea78f 1004dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *flow)
8a4e3a85 1005 OVS_EXCLUDED(dp->cls.rwlock)
2c0ea78f 1006{
8a4e3a85 1007 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1008
06f81620 1009 fat_rwlock_rdlock(&dp->cls.rwlock);
8a4e3a85 1010 netdev_flow = dp_netdev_flow_cast(classifier_lookup(&dp->cls, flow, NULL));
06f81620 1011 fat_rwlock_unlock(&dp->cls.rwlock);
2c0ea78f 1012
8a4e3a85 1013 return netdev_flow;
2c0ea78f
GS
1014}
1015
1016static struct dp_netdev_flow *
1017dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
8a4e3a85 1018 OVS_REQ_RDLOCK(dp->cls.rwlock)
72865317 1019{
1763b4b8 1020 struct dp_netdev_flow *netdev_flow;
72865317 1021
2c0ea78f 1022 HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
1763b4b8 1023 &dp->flow_table) {
2c0ea78f 1024 if (flow_equal(&netdev_flow->flow, flow)) {
61e7deb1 1025 return netdev_flow;
72865317
BP
1026 }
1027 }
8a4e3a85 1028
72865317
BP
1029 return NULL;
1030}
1031
1032static void
1763b4b8
GS
1033get_dpif_flow_stats(struct dp_netdev_flow *netdev_flow,
1034 struct dpif_flow_stats *stats)
feebdea2 1035{
679ba04c
BP
1036 struct dp_netdev_flow_stats *bucket;
1037 size_t i;
1038
1039 memset(stats, 0, sizeof *stats);
1040 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
1041 ovs_mutex_lock(&bucket->mutex);
1042 stats->n_packets += bucket->packet_count;
1043 stats->n_bytes += bucket->byte_count;
1044 stats->used = MAX(stats->used, bucket->used);
1045 stats->tcp_flags |= bucket->tcp_flags;
1046 ovs_mutex_unlock(&bucket->mutex);
1047 }
72865317
BP
1048}
1049
36956a7d 1050static int
8c301900
JR
1051dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1052 const struct nlattr *mask_key,
1053 uint32_t mask_key_len, const struct flow *flow,
1054 struct flow *mask)
1055{
1056 if (mask_key_len) {
80e44883
BP
1057 enum odp_key_fitness fitness;
1058
1059 fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
1060 if (fitness) {
8c301900
JR
1061 /* This should not happen: it indicates that
1062 * odp_flow_key_from_mask() and odp_flow_key_to_mask()
1063 * disagree on the acceptable form of a mask. Log the problem
1064 * as an error, with enough details to enable debugging. */
1065 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1066
1067 if (!VLOG_DROP_ERR(&rl)) {
1068 struct ds s;
1069
1070 ds_init(&s);
1071 odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
1072 true);
80e44883
BP
1073 VLOG_ERR("internal error parsing flow mask %s (%s)",
1074 ds_cstr(&s), odp_key_fitness_to_string(fitness));
8c301900
JR
1075 ds_destroy(&s);
1076 }
1077
1078 return EINVAL;
1079 }
1080 /* Force unwildcard the in_port. */
1081 mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
1082 } else {
1083 enum mf_field_id id;
1084 /* No mask key, unwildcard everything except fields whose
1085 * prerequisities are not met. */
1086 memset(mask, 0x0, sizeof *mask);
1087
1088 for (id = 0; id < MFF_N_IDS; ++id) {
1089 /* Skip registers and metadata. */
1090 if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
1091 && id != MFF_METADATA) {
1092 const struct mf_field *mf = mf_from_id(id);
1093 if (mf_are_prereqs_ok(mf, flow)) {
1094 mf_mask_field(mf, mask);
1095 }
1096 }
1097 }
1098 }
1099
1100 return 0;
1101}
1102
1103static int
1104dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1105 struct flow *flow)
36956a7d 1106{
586ddea5
BP
1107 odp_port_t in_port;
1108
8c301900 1109 if (odp_flow_key_to_flow(key, key_len, flow)) {
36956a7d 1110 /* This should not happen: it indicates that odp_flow_key_from_flow()
8c301900
JR
1111 * and odp_flow_key_to_flow() disagree on the acceptable form of a
1112 * flow. Log the problem as an error, with enough details to enable
1113 * debugging. */
36956a7d
BP
1114 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1115
1116 if (!VLOG_DROP_ERR(&rl)) {
1117 struct ds s;
1118
1119 ds_init(&s);
8c301900 1120 odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
36956a7d
BP
1121 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
1122 ds_destroy(&s);
1123 }
1124
1125 return EINVAL;
1126 }
1127
586ddea5
BP
1128 in_port = flow->in_port.odp_port;
1129 if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
18886b60
BP
1130 return EINVAL;
1131 }
1132
36956a7d
BP
1133 return 0;
1134}
1135
72865317 1136static int
693c4a01 1137dpif_netdev_flow_get(const struct dpif *dpif,
feebdea2 1138 const struct nlattr *nl_key, size_t nl_key_len,
c97fb132 1139 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
72865317
BP
1140{
1141 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1142 struct dp_netdev_flow *netdev_flow;
bc4a05c6
BP
1143 struct flow key;
1144 int error;
36956a7d 1145
feebdea2 1146 error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
bc4a05c6
BP
1147 if (error) {
1148 return error;
1149 }
14608a15 1150
06f81620 1151 fat_rwlock_rdlock(&dp->cls.rwlock);
2c0ea78f 1152 netdev_flow = dp_netdev_find_flow(dp, &key);
06f81620 1153 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 1154
1763b4b8 1155 if (netdev_flow) {
5279f8fd 1156 if (stats) {
1763b4b8 1157 get_dpif_flow_stats(netdev_flow, stats);
5279f8fd 1158 }
679ba04c 1159
5279f8fd 1160 if (actionsp) {
61e7deb1 1161 struct dp_netdev_actions *actions;
8a4e3a85 1162
61e7deb1 1163 actions = dp_netdev_flow_get_actions(netdev_flow);
8a4e3a85 1164 *actionsp = ofpbuf_clone_data(actions->actions, actions->size);
5279f8fd 1165 }
61e7deb1 1166 } else {
5279f8fd 1167 error = ENOENT;
72865317 1168 }
bc4a05c6 1169
5279f8fd 1170 return error;
72865317
BP
1171}
1172
72865317 1173static int
2c0ea78f
GS
1174dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow,
1175 const struct flow_wildcards *wc,
1176 const struct nlattr *actions,
1177 size_t actions_len)
8a4e3a85 1178 OVS_REQUIRES(dp->flow_mutex)
72865317 1179{
1763b4b8 1180 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1181 struct match match;
72865317 1182
1763b4b8 1183 netdev_flow = xzalloc(sizeof *netdev_flow);
8a4e3a85 1184 *CONST_CAST(struct flow *, &netdev_flow->flow) = *flow;
8a4e3a85
BP
1185
1186 ovs_mutex_init(&netdev_flow->mutex);
8a4e3a85 1187
679ba04c
BP
1188 ovsthread_stats_init(&netdev_flow->stats);
1189
61e7deb1
BP
1190 ovsrcu_set(&netdev_flow->actions,
1191 dp_netdev_actions_create(actions, actions_len));
2c0ea78f
GS
1192
1193 match_init(&match, flow, wc);
8a4e3a85
BP
1194 cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
1195 &match, NETDEV_RULE_PRIORITY);
06f81620 1196 fat_rwlock_wrlock(&dp->cls.rwlock);
8a4e3a85
BP
1197 classifier_insert(&dp->cls,
1198 CONST_CAST(struct cls_rule *, &netdev_flow->cr));
1199 hmap_insert(&dp->flow_table,
1200 CONST_CAST(struct hmap_node *, &netdev_flow->node),
1201 flow_hash(flow, 0));
06f81620 1202 fat_rwlock_unlock(&dp->cls.rwlock);
72865317 1203
72865317
BP
1204 return 0;
1205}
1206
1207static void
1763b4b8 1208clear_stats(struct dp_netdev_flow *netdev_flow)
72865317 1209{
679ba04c
BP
1210 struct dp_netdev_flow_stats *bucket;
1211 size_t i;
1212
1213 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
1214 ovs_mutex_lock(&bucket->mutex);
1215 bucket->used = 0;
1216 bucket->packet_count = 0;
1217 bucket->byte_count = 0;
1218 bucket->tcp_flags = 0;
1219 ovs_mutex_unlock(&bucket->mutex);
1220 }
72865317
BP
1221}
1222
1223static int
89625d1e 1224dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
72865317
BP
1225{
1226 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1227 struct dp_netdev_flow *netdev_flow;
2c0ea78f
GS
1228 struct flow flow;
1229 struct flow_wildcards wc;
36956a7d
BP
1230 int error;
1231
8c301900
JR
1232 error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow);
1233 if (error) {
1234 return error;
1235 }
1236 error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
1237 put->mask, put->mask_len,
1238 &flow, &wc.masks);
36956a7d
BP
1239 if (error) {
1240 return error;
1241 }
72865317 1242
8a4e3a85 1243 ovs_mutex_lock(&dp->flow_mutex);
2c0ea78f 1244 netdev_flow = dp_netdev_lookup_flow(dp, &flow);
1763b4b8 1245 if (!netdev_flow) {
89625d1e 1246 if (put->flags & DPIF_FP_CREATE) {
72865317 1247 if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
89625d1e
BP
1248 if (put->stats) {
1249 memset(put->stats, 0, sizeof *put->stats);
feebdea2 1250 }
2c0ea78f 1251 error = dp_netdev_flow_add(dp, &flow, &wc, put->actions,
5279f8fd 1252 put->actions_len);
72865317 1253 } else {
5279f8fd 1254 error = EFBIG;
72865317
BP
1255 }
1256 } else {
5279f8fd 1257 error = ENOENT;
72865317
BP
1258 }
1259 } else {
2c0ea78f
GS
1260 if (put->flags & DPIF_FP_MODIFY
1261 && flow_equal(&flow, &netdev_flow->flow)) {
8a4e3a85
BP
1262 struct dp_netdev_actions *new_actions;
1263 struct dp_netdev_actions *old_actions;
1264
1265 new_actions = dp_netdev_actions_create(put->actions,
1266 put->actions_len);
1267
61e7deb1
BP
1268 old_actions = dp_netdev_flow_get_actions(netdev_flow);
1269 ovsrcu_set(&netdev_flow->actions, new_actions);
679ba04c 1270
a84cb64a
BP
1271 if (put->stats) {
1272 get_dpif_flow_stats(netdev_flow, put->stats);
1273 }
1274 if (put->flags & DPIF_FP_ZERO_STATS) {
1275 clear_stats(netdev_flow);
72865317 1276 }
8a4e3a85 1277
61e7deb1 1278 ovsrcu_postpone(dp_netdev_actions_free, old_actions);
2c0ea78f 1279 } else if (put->flags & DPIF_FP_CREATE) {
5279f8fd 1280 error = EEXIST;
2c0ea78f
GS
1281 } else {
1282 /* Overlapping flow. */
1283 error = EINVAL;
72865317
BP
1284 }
1285 }
8a4e3a85 1286 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1287
1288 return error;
72865317
BP
1289}
1290
72865317 1291static int
b99d3cee 1292dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
72865317
BP
1293{
1294 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1295 struct dp_netdev_flow *netdev_flow;
14608a15 1296 struct flow key;
36956a7d
BP
1297 int error;
1298
b99d3cee 1299 error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
36956a7d
BP
1300 if (error) {
1301 return error;
1302 }
72865317 1303
8a4e3a85 1304 ovs_mutex_lock(&dp->flow_mutex);
06f81620 1305 fat_rwlock_wrlock(&dp->cls.rwlock);
2c0ea78f 1306 netdev_flow = dp_netdev_find_flow(dp, &key);
1763b4b8 1307 if (netdev_flow) {
b99d3cee 1308 if (del->stats) {
1763b4b8 1309 get_dpif_flow_stats(netdev_flow, del->stats);
feebdea2 1310 }
8a4e3a85 1311 dp_netdev_remove_flow(dp, netdev_flow);
72865317 1312 } else {
5279f8fd 1313 error = ENOENT;
72865317 1314 }
06f81620 1315 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 1316 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1317
1318 return error;
72865317
BP
1319}
1320
704a1e09 1321struct dp_netdev_flow_state {
a84cb64a 1322 struct dp_netdev_actions *actions;
19cf4069 1323 struct odputil_keybuf keybuf;
2c0ea78f 1324 struct odputil_keybuf maskbuf;
c97fb132 1325 struct dpif_flow_stats stats;
704a1e09
BP
1326};
1327
e723fd32
JS
1328struct dp_netdev_flow_iter {
1329 uint32_t bucket;
1330 uint32_t offset;
d2ad7ef1
JS
1331 int status;
1332 struct ovs_mutex mutex;
e723fd32
JS
1333};
1334
1335static void
1336dpif_netdev_flow_dump_state_init(void **statep)
72865317 1337{
feebdea2
BP
1338 struct dp_netdev_flow_state *state;
1339
1340 *statep = state = xmalloc(sizeof *state);
feebdea2 1341 state->actions = NULL;
e723fd32
JS
1342}
1343
1344static void
1345dpif_netdev_flow_dump_state_uninit(void *state_)
1346{
1347 struct dp_netdev_flow_state *state = state_;
1348
e723fd32
JS
1349 free(state);
1350}
1351
1352static int
1353dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **iterp)
1354{
1355 struct dp_netdev_flow_iter *iter;
1356
1357 *iterp = iter = xmalloc(sizeof *iter);
1358 iter->bucket = 0;
1359 iter->offset = 0;
d2ad7ef1
JS
1360 iter->status = 0;
1361 ovs_mutex_init(&iter->mutex);
704a1e09
BP
1362 return 0;
1363}
1364
61e7deb1 1365/* XXX the caller must use 'actions' without quiescing */
704a1e09 1366static int
d2ad7ef1 1367dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_,
feebdea2 1368 const struct nlattr **key, size_t *key_len,
e6cc0bab 1369 const struct nlattr **mask, size_t *mask_len,
feebdea2 1370 const struct nlattr **actions, size_t *actions_len,
c97fb132 1371 const struct dpif_flow_stats **stats)
704a1e09 1372{
e723fd32 1373 struct dp_netdev_flow_iter *iter = iter_;
d2ad7ef1 1374 struct dp_netdev_flow_state *state = state_;
72865317 1375 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1376 struct dp_netdev_flow *netdev_flow;
d2ad7ef1 1377 int error;
14608a15 1378
d2ad7ef1
JS
1379 ovs_mutex_lock(&iter->mutex);
1380 error = iter->status;
1381 if (!error) {
1382 struct hmap_node *node;
1383
1384 fat_rwlock_rdlock(&dp->cls.rwlock);
1385 node = hmap_at_position(&dp->flow_table, &iter->bucket, &iter->offset);
1386 if (node) {
1387 netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
d2ad7ef1
JS
1388 }
1389 fat_rwlock_unlock(&dp->cls.rwlock);
1390 if (!node) {
1391 iter->status = error = EOF;
1392 }
8a4e3a85 1393 }
d2ad7ef1
JS
1394 ovs_mutex_unlock(&iter->mutex);
1395 if (error) {
1396 return error;
72865317 1397 }
704a1e09 1398
feebdea2
BP
1399 if (key) {
1400 struct ofpbuf buf;
1401
19cf4069 1402 ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
2c0ea78f
GS
1403 odp_flow_key_from_flow(&buf, &netdev_flow->flow,
1404 netdev_flow->flow.in_port.odp_port);
36956a7d 1405
feebdea2
BP
1406 *key = buf.data;
1407 *key_len = buf.size;
1408 }
1409
2c0ea78f
GS
1410 if (key && mask) {
1411 struct ofpbuf buf;
1412 struct flow_wildcards wc;
1413
1414 ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf);
1415 minimask_expand(&netdev_flow->cr.match.mask, &wc);
1416 odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
8bfd0fda
BP
1417 odp_to_u32(wc.masks.in_port.odp_port),
1418 SIZE_MAX);
2c0ea78f
GS
1419
1420 *mask = buf.data;
1421 *mask_len = buf.size;
e6cc0bab
AZ
1422 }
1423
8a4e3a85 1424 if (actions || stats) {
8a4e3a85 1425 state->actions = NULL;
feebdea2 1426
8a4e3a85 1427 if (actions) {
61e7deb1 1428 state->actions = dp_netdev_flow_get_actions(netdev_flow);
8a4e3a85
BP
1429 *actions = state->actions->actions;
1430 *actions_len = state->actions->size;
1431 }
679ba04c 1432
8a4e3a85
BP
1433 if (stats) {
1434 get_dpif_flow_stats(netdev_flow, &state->stats);
1435 *stats = &state->stats;
1436 }
feebdea2 1437 }
704a1e09
BP
1438
1439 return 0;
1440}
1441
1442static int
e723fd32 1443dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *iter_)
704a1e09 1444{
e723fd32 1445 struct dp_netdev_flow_iter *iter = iter_;
feebdea2 1446
d2ad7ef1 1447 ovs_mutex_destroy(&iter->mutex);
e723fd32 1448 free(iter);
704a1e09 1449 return 0;
72865317
BP
1450}
1451
1452static int
758c456d 1453dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
72865317
BP
1454{
1455 struct dp_netdev *dp = get_dp_netdev(dpif);
758c456d
JR
1456 struct pkt_metadata *md = &execute->md;
1457 struct flow key;
72865317 1458
89625d1e
BP
1459 if (execute->packet->size < ETH_HEADER_LEN ||
1460 execute->packet->size > UINT16_MAX) {
72865317
BP
1461 return EINVAL;
1462 }
1463
758c456d 1464 /* Extract flow key. */
b5e7e61a 1465 flow_extract(execute->packet, md, &key);
8a4e3a85
BP
1466
1467 ovs_rwlock_rdlock(&dp->port_rwlock);
758c456d
JR
1468 dp_netdev_execute_actions(dp, &key, execute->packet, md, execute->actions,
1469 execute->actions_len);
8a4e3a85
BP
1470 ovs_rwlock_unlock(&dp->port_rwlock);
1471
758c456d 1472 return 0;
72865317
BP
1473}
1474
1475static int
a12b3ead 1476dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED)
72865317 1477{
82272ede 1478 return 0;
72865317
BP
1479}
1480
5bf93d67
EJ
1481static int
1482dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
1483 uint32_t queue_id, uint32_t *priority)
1484{
1485 *priority = queue_id;
1486 return 0;
1487}
1488
856081f6 1489static struct dp_netdev_queue *
f5126b57
BP
1490find_nonempty_queue(struct dp_netdev *dp)
1491 OVS_REQUIRES(dp->queue_mutex)
72865317 1492{
72865317
BP
1493 int i;
1494
1495 for (i = 0; i < N_QUEUES; i++) {
856081f6 1496 struct dp_netdev_queue *q = &dp->queues[i];
a12b3ead 1497 if (q->head != q->tail) {
856081f6 1498 return q;
72865317
BP
1499 }
1500 }
856081f6 1501 return NULL;
72865317
BP
1502}
1503
1504static int
90a7c55e
BP
1505dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
1506 struct ofpbuf *buf)
72865317 1507{
f5126b57 1508 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
1509 struct dp_netdev_queue *q;
1510 int error;
1511
f5126b57
BP
1512 ovs_mutex_lock(&dp->queue_mutex);
1513 q = find_nonempty_queue(dp);
856081f6 1514 if (q) {
d88b629b
BP
1515 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
1516
1517 *upcall = u->upcall;
b3907fbc 1518
90a7c55e 1519 ofpbuf_uninit(buf);
d88b629b 1520 *buf = u->buf;
90a7c55e 1521
5279f8fd 1522 error = 0;
72865317 1523 } else {
5279f8fd 1524 error = EAGAIN;
72865317 1525 }
f5126b57 1526 ovs_mutex_unlock(&dp->queue_mutex);
5279f8fd
BP
1527
1528 return error;
72865317
BP
1529}
1530
1531static void
1532dpif_netdev_recv_wait(struct dpif *dpif)
1533{
d33ed218
BP
1534 struct dp_netdev *dp = get_dp_netdev(dpif);
1535 uint64_t seq;
5279f8fd 1536
f5126b57 1537 ovs_mutex_lock(&dp->queue_mutex);
d33ed218 1538 seq = seq_read(dp->queue_seq);
f5126b57 1539 if (find_nonempty_queue(dp)) {
72865317 1540 poll_immediate_wake();
d33ed218
BP
1541 } else {
1542 seq_wait(dp->queue_seq, seq);
72865317 1543 }
f5126b57 1544 ovs_mutex_unlock(&dp->queue_mutex);
72865317 1545}
1ba530f4
BP
1546
1547static void
1548dpif_netdev_recv_purge(struct dpif *dpif)
1549{
1550 struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
f5126b57 1551
1ba530f4
BP
1552 dp_netdev_purge_queues(dpif_netdev->dp);
1553}
72865317 1554\f
a84cb64a
BP
1555/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
1556 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
1557 * 'ofpacts'. */
1558struct dp_netdev_actions *
1559dp_netdev_actions_create(const struct nlattr *actions, size_t size)
1560{
1561 struct dp_netdev_actions *netdev_actions;
1562
1563 netdev_actions = xmalloc(sizeof *netdev_actions);
a84cb64a
BP
1564 netdev_actions->actions = xmemdup(actions, size);
1565 netdev_actions->size = size;
1566
1567 return netdev_actions;
1568}
1569
a84cb64a 1570struct dp_netdev_actions *
61e7deb1 1571dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow)
a84cb64a 1572{
61e7deb1 1573 return ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
a84cb64a
BP
1574}
1575
61e7deb1
BP
1576static void
1577dp_netdev_actions_free(struct dp_netdev_actions *actions)
a84cb64a 1578{
61e7deb1
BP
1579 free(actions->actions);
1580 free(actions);
a84cb64a
BP
1581}
1582\f
6c3eee82
BP
1583static void *
1584dp_forwarder_main(void *f_)
1585{
1586 struct dp_forwarder *f = f_;
1587 struct dp_netdev *dp = f->dp;
1588 struct ofpbuf packet;
1589
1590 f->name = xasprintf("forwarder_%u", ovsthread_id_self());
1591 set_subprogram_name("%s", f->name);
1592
1593 ofpbuf_init(&packet, 0);
1594 while (!latch_is_set(&dp->exit_latch)) {
1595 bool received_anything;
1596 int i;
1597
1598 ovs_rwlock_rdlock(&dp->port_rwlock);
1599 for (i = 0; i < 50; i++) {
1600 struct dp_netdev_port *port;
1601
1602 received_anything = false;
1603 HMAP_FOR_EACH (port, node, &f->dp->ports) {
1604 if (port->rx
1605 && port->node.hash >= f->min_hash
1606 && port->node.hash <= f->max_hash) {
1607 int buf_size;
1608 int error;
1609 int mtu;
1610
1611 if (netdev_get_mtu(port->netdev, &mtu)) {
1612 mtu = ETH_PAYLOAD_MAX;
1613 }
1614 buf_size = DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + mtu;
1615
1616 ofpbuf_clear(&packet);
1617 ofpbuf_reserve_with_tailroom(&packet, DP_NETDEV_HEADROOM,
1618 buf_size);
1619
1620 error = netdev_rx_recv(port->rx, &packet);
1621 if (!error) {
1622 struct pkt_metadata md
1623 = PKT_METADATA_INITIALIZER(port->port_no);
6c3eee82 1624
b5e7e61a 1625 dp_netdev_port_input(dp, &packet, &md);
6c3eee82
BP
1626 received_anything = true;
1627 } else if (error != EAGAIN && error != EOPNOTSUPP) {
1628 static struct vlog_rate_limit rl
1629 = VLOG_RATE_LIMIT_INIT(1, 5);
1630
1631 VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
1632 netdev_get_name(port->netdev),
1633 ovs_strerror(error));
1634 }
1635 }
1636 }
1637
1638 if (!received_anything) {
1639 break;
1640 }
1641 }
1642
1643 if (received_anything) {
1644 poll_immediate_wake();
1645 } else {
1646 struct dp_netdev_port *port;
1647
1648 HMAP_FOR_EACH (port, node, &f->dp->ports)
1649 if (port->rx
1650 && port->node.hash >= f->min_hash
1651 && port->node.hash <= f->max_hash) {
1652 netdev_rx_wait(port->rx);
1653 }
1654 seq_wait(dp->port_seq, seq_read(dp->port_seq));
1655 latch_wait(&dp->exit_latch);
1656 }
1657 ovs_rwlock_unlock(&dp->port_rwlock);
1658
1659 poll_block();
1660 }
1661 ofpbuf_uninit(&packet);
1662
1663 free(f->name);
1664
1665 return NULL;
1666}
1667
1668static void
1669dp_netdev_set_threads(struct dp_netdev *dp, int n)
1670{
1671 int i;
1672
1673 if (n == dp->n_forwarders) {
1674 return;
1675 }
1676
1677 /* Stop existing threads. */
1678 latch_set(&dp->exit_latch);
1679 for (i = 0; i < dp->n_forwarders; i++) {
1680 struct dp_forwarder *f = &dp->forwarders[i];
1681
1682 xpthread_join(f->thread, NULL);
1683 }
1684 latch_poll(&dp->exit_latch);
1685 free(dp->forwarders);
1686
1687 /* Start new threads. */
1688 dp->forwarders = xmalloc(n * sizeof *dp->forwarders);
1689 dp->n_forwarders = n;
1690 for (i = 0; i < n; i++) {
1691 struct dp_forwarder *f = &dp->forwarders[i];
1692
1693 f->dp = dp;
1694 f->min_hash = UINT32_MAX / n * i;
1695 f->max_hash = UINT32_MAX / n * (i + 1) - 1;
1696 if (i == n - 1) {
1697 f->max_hash = UINT32_MAX;
1698 }
1699 xpthread_create(&f->thread, NULL, dp_forwarder_main, f);
1700 }
1701}
1702\f
679ba04c
BP
1703static void *
1704dp_netdev_flow_stats_new_cb(void)
1705{
1706 struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1707 ovs_mutex_init(&bucket->mutex);
1708 return bucket;
1709}
1710
72865317 1711static void
1763b4b8 1712dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
855dd13c
JR
1713 const struct ofpbuf *packet,
1714 const struct flow *key)
72865317 1715{
855dd13c 1716 uint16_t tcp_flags = packet_get_tcp_flags(packet, key);
679ba04c
BP
1717 long long int now = time_msec();
1718 struct dp_netdev_flow_stats *bucket;
1719
1720 bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
1721 dp_netdev_flow_stats_new_cb);
1722
1723 ovs_mutex_lock(&bucket->mutex);
1724 bucket->used = MAX(now, bucket->used);
1725 bucket->packet_count++;
1726 bucket->byte_count += packet->size;
1727 bucket->tcp_flags |= tcp_flags;
1728 ovs_mutex_unlock(&bucket->mutex);
72865317
BP
1729}
1730
51852a57
BP
1731static void *
1732dp_netdev_stats_new_cb(void)
1733{
1734 struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1735 ovs_mutex_init(&bucket->mutex);
1736 return bucket;
1737}
1738
1739static void
1740dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
1741{
1742 struct dp_netdev_stats *bucket;
1743
1744 bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
1745 ovs_mutex_lock(&bucket->mutex);
1746 bucket->n[type]++;
1747 ovs_mutex_unlock(&bucket->mutex);
1748}
1749
72865317 1750static void
758c456d
JR
1751dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
1752 struct pkt_metadata *md)
8a4e3a85 1753 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317 1754{
1763b4b8 1755 struct dp_netdev_flow *netdev_flow;
14608a15 1756 struct flow key;
72865317 1757
1805876e
BP
1758 if (packet->size < ETH_HEADER_LEN) {
1759 return;
1760 }
b5e7e61a 1761 flow_extract(packet, md, &key);
1763b4b8
GS
1762 netdev_flow = dp_netdev_lookup_flow(dp, &key);
1763 if (netdev_flow) {
a84cb64a
BP
1764 struct dp_netdev_actions *actions;
1765
855dd13c 1766 dp_netdev_flow_used(netdev_flow, packet, &key);
679ba04c 1767
61e7deb1 1768 actions = dp_netdev_flow_get_actions(netdev_flow);
758c456d 1769 dp_netdev_execute_actions(dp, &key, packet, md,
a84cb64a 1770 actions->actions, actions->size);
51852a57 1771 dp_netdev_count_packet(dp, DP_STAT_HIT);
72865317 1772 } else {
51852a57 1773 dp_netdev_count_packet(dp, DP_STAT_MISS);
e995e3df 1774 dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
72865317
BP
1775 }
1776}
1777
72865317 1778static int
da546e07 1779dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
e995e3df
BP
1780 int queue_no, const struct flow *flow,
1781 const struct nlattr *userdata)
f5126b57 1782 OVS_EXCLUDED(dp->queue_mutex)
72865317 1783{
856081f6 1784 struct dp_netdev_queue *q = &dp->queues[queue_no];
f5126b57
BP
1785 int error;
1786
1787 ovs_mutex_lock(&dp->queue_mutex);
e995e3df
BP
1788 if (q->head - q->tail < MAX_QUEUE_LEN) {
1789 struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
1790 struct dpif_upcall *upcall = &u->upcall;
1791 struct ofpbuf *buf = &u->buf;
1792 size_t buf_size;
1793
1794 upcall->type = queue_no;
1795
1796 /* Allocate buffer big enough for everything. */
da546e07 1797 buf_size = ODPUTIL_FLOW_KEY_BYTES;
e995e3df
BP
1798 if (userdata) {
1799 buf_size += NLA_ALIGN(userdata->nla_len);
1800 }
1801 ofpbuf_init(buf, buf_size);
72865317 1802
e995e3df 1803 /* Put ODP flow. */
4e022ec0 1804 odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port);
e995e3df
BP
1805 upcall->key = buf->data;
1806 upcall->key_len = buf->size;
d88b629b 1807
e995e3df
BP
1808 /* Put userdata. */
1809 if (userdata) {
1810 upcall->userdata = ofpbuf_put(buf, userdata,
1811 NLA_ALIGN(userdata->nla_len));
1812 }
856081f6 1813
da546e07
JR
1814 /* Steal packet data. */
1815 ovs_assert(packet->source == OFPBUF_MALLOC);
1816 upcall->packet = *packet;
1817 ofpbuf_use(packet, NULL, 0);
856081f6 1818
d33ed218
BP
1819 seq_change(dp->queue_seq);
1820
f5126b57 1821 error = 0;
e995e3df 1822 } else {
51852a57 1823 dp_netdev_count_packet(dp, DP_STAT_LOST);
f5126b57 1824 error = ENOBUFS;
e995e3df 1825 }
f5126b57
BP
1826 ovs_mutex_unlock(&dp->queue_mutex);
1827
1828 return error;
72865317
BP
1829}
1830
9080a111
JR
1831struct dp_netdev_execute_aux {
1832 struct dp_netdev *dp;
1833 const struct flow *key;
1834};
1835
1836static void
758c456d
JR
1837dp_execute_cb(void *aux_, struct ofpbuf *packet,
1838 const struct pkt_metadata *md OVS_UNUSED,
09f9da0b 1839 const struct nlattr *a, bool may_steal)
8a4e3a85 1840 OVS_NO_THREAD_SAFETY_ANALYSIS
9080a111
JR
1841{
1842 struct dp_netdev_execute_aux *aux = aux_;
09f9da0b 1843 int type = nl_attr_type(a);
8a4e3a85 1844 struct dp_netdev_port *p;
9080a111 1845
09f9da0b
JR
1846 switch ((enum ovs_action_attr)type) {
1847 case OVS_ACTION_ATTR_OUTPUT:
8a4e3a85
BP
1848 p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
1849 if (p) {
1850 netdev_send(p->netdev, packet);
1851 }
09f9da0b
JR
1852 break;
1853
1854 case OVS_ACTION_ATTR_USERSPACE: {
1855 const struct nlattr *userdata;
4fc65926 1856
09f9da0b 1857 userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
da546e07 1858
09f9da0b
JR
1859 /* Make a copy if we are not allowed to steal the packet's data. */
1860 if (!may_steal) {
1861 packet = ofpbuf_clone_with_headroom(packet, DP_NETDEV_HEADROOM);
1862 }
1863 dp_netdev_output_userspace(aux->dp, packet, DPIF_UC_ACTION, aux->key,
1864 userdata);
1865 if (!may_steal) {
1866 ofpbuf_uninit(packet);
1867 }
1868 break;
da546e07 1869 }
09f9da0b
JR
1870 case OVS_ACTION_ATTR_PUSH_VLAN:
1871 case OVS_ACTION_ATTR_POP_VLAN:
1872 case OVS_ACTION_ATTR_PUSH_MPLS:
1873 case OVS_ACTION_ATTR_POP_MPLS:
1874 case OVS_ACTION_ATTR_SET:
1875 case OVS_ACTION_ATTR_SAMPLE:
1876 case OVS_ACTION_ATTR_UNSPEC:
1877 case __OVS_ACTION_ATTR_MAX:
1878 OVS_NOT_REACHED();
da546e07 1879 }
98403001
BP
1880}
1881
4edb9ae9 1882static void
9080a111 1883dp_netdev_execute_actions(struct dp_netdev *dp, const struct flow *key,
758c456d 1884 struct ofpbuf *packet, struct pkt_metadata *md,
9080a111 1885 const struct nlattr *actions, size_t actions_len)
8a4e3a85 1886 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317 1887{
9080a111 1888 struct dp_netdev_execute_aux aux = {dp, key};
9080a111 1889
758c456d 1890 odp_execute_actions(&aux, packet, md, actions, actions_len, dp_execute_cb);
72865317
BP
1891}
1892
1893const struct dpif_class dpif_netdev_class = {
72865317 1894 "netdev",
2197d7ab 1895 dpif_netdev_enumerate,
0aeaabc8 1896 dpif_netdev_port_open_type,
72865317
BP
1897 dpif_netdev_open,
1898 dpif_netdev_close,
7dab847a 1899 dpif_netdev_destroy,
6c3eee82
BP
1900 NULL, /* run */
1901 NULL, /* wait */
72865317 1902 dpif_netdev_get_stats,
72865317
BP
1903 dpif_netdev_port_add,
1904 dpif_netdev_port_del,
1905 dpif_netdev_port_query_by_number,
1906 dpif_netdev_port_query_by_name,
98403001 1907 NULL, /* port_get_pid */
b0ec0f27
BP
1908 dpif_netdev_port_dump_start,
1909 dpif_netdev_port_dump_next,
1910 dpif_netdev_port_dump_done,
72865317
BP
1911 dpif_netdev_port_poll,
1912 dpif_netdev_port_poll_wait,
72865317
BP
1913 dpif_netdev_flow_get,
1914 dpif_netdev_flow_put,
1915 dpif_netdev_flow_del,
1916 dpif_netdev_flow_flush,
e723fd32 1917 dpif_netdev_flow_dump_state_init,
704a1e09
BP
1918 dpif_netdev_flow_dump_start,
1919 dpif_netdev_flow_dump_next,
bdeadfdd 1920 NULL,
704a1e09 1921 dpif_netdev_flow_dump_done,
e723fd32 1922 dpif_netdev_flow_dump_state_uninit,
72865317 1923 dpif_netdev_execute,
6bc60024 1924 NULL, /* operate */
a12b3ead 1925 dpif_netdev_recv_set,
5bf93d67 1926 dpif_netdev_queue_to_priority,
72865317
BP
1927 dpif_netdev_recv,
1928 dpif_netdev_recv_wait,
1ba530f4 1929 dpif_netdev_recv_purge,
72865317 1930};
614c4892 1931
74cc3969
BP
1932static void
1933dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
1934 const char *argv[], void *aux OVS_UNUSED)
1935{
1936 struct dp_netdev_port *port;
1937 struct dp_netdev *dp;
ff073a71 1938 odp_port_t port_no;
74cc3969 1939
8a4e3a85 1940 ovs_mutex_lock(&dp_netdev_mutex);
74cc3969
BP
1941 dp = shash_find_data(&dp_netdevs, argv[1]);
1942 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
8a4e3a85 1943 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969
BP
1944 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
1945 return;
1946 }
8a4e3a85
BP
1947 ovs_refcount_ref(&dp->ref_cnt);
1948 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969 1949
8a4e3a85 1950 ovs_rwlock_wrlock(&dp->port_rwlock);
74cc3969
BP
1951 if (get_port_by_name(dp, argv[2], &port)) {
1952 unixctl_command_reply_error(conn, "unknown port");
8a4e3a85 1953 goto exit;
74cc3969
BP
1954 }
1955
ff073a71
BP
1956 port_no = u32_to_odp(atoi(argv[3]));
1957 if (!port_no || port_no == ODPP_NONE) {
74cc3969 1958 unixctl_command_reply_error(conn, "bad port number");
8a4e3a85 1959 goto exit;
74cc3969 1960 }
ff073a71 1961 if (dp_netdev_lookup_port(dp, port_no)) {
74cc3969 1962 unixctl_command_reply_error(conn, "port number already in use");
8a4e3a85 1963 goto exit;
74cc3969 1964 }
ff073a71
BP
1965 hmap_remove(&dp->ports, &port->node);
1966 port->port_no = port_no;
1967 hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
d33ed218 1968 seq_change(dp->port_seq);
74cc3969 1969 unixctl_command_reply(conn, NULL);
8a4e3a85
BP
1970
1971exit:
1972 ovs_rwlock_unlock(&dp->port_rwlock);
1973 dp_netdev_unref(dp);
74cc3969
BP
1974}
1975
0cbfe35d
BP
1976static void
1977dpif_dummy_register__(const char *type)
1978{
1979 struct dpif_class *class;
1980
1981 class = xmalloc(sizeof *class);
1982 *class = dpif_netdev_class;
1983 class->type = xstrdup(type);
1984 dp_register_provider(class);
1985}
1986
614c4892 1987void
0cbfe35d 1988dpif_dummy_register(bool override)
614c4892 1989{
0cbfe35d
BP
1990 if (override) {
1991 struct sset types;
1992 const char *type;
1993
1994 sset_init(&types);
1995 dp_enumerate_types(&types);
1996 SSET_FOR_EACH (type, &types) {
1997 if (!dp_unregister_provider(type)) {
1998 dpif_dummy_register__(type);
1999 }
2000 }
2001 sset_destroy(&types);
614c4892 2002 }
0cbfe35d
BP
2003
2004 dpif_dummy_register__("dummy");
74cc3969
BP
2005
2006 unixctl_command_register("dpif-dummy/change-port-number",
2007 "DP PORT NEW-NUMBER",
2008 3, 3, dpif_dummy_change_port_number, NULL);
614c4892 2009}