]> git.proxmox.com Git - mirror_ovs.git/blame - lib/dpif-netdev.c
ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.
[mirror_ovs.git] / lib / dpif-netdev.c
CommitLineData
72865317 1/*
ff073a71 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
72865317
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpif.h"
19
72865317
BP
20#include <ctype.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
72865317 24#include <netinet/in.h>
9d82ec47 25#include <sys/socket.h>
7f3adc00 26#include <net/if.h>
cdee00fd 27#include <stdint.h>
72865317
BP
28#include <stdlib.h>
29#include <string.h>
30#include <sys/ioctl.h>
31#include <sys/stat.h>
72865317
BP
32#include <unistd.h>
33
2c0ea78f 34#include "classifier.h"
72865317 35#include "csum.h"
614c4892 36#include "dpif.h"
72865317 37#include "dpif-provider.h"
614c4892 38#include "dummy.h"
36956a7d 39#include "dynamic-string.h"
72865317
BP
40#include "flow.h"
41#include "hmap.h"
6c3eee82 42#include "latch.h"
72865317 43#include "list.h"
8c301900 44#include "meta-flow.h"
72865317 45#include "netdev.h"
de281153 46#include "netdev-vport.h"
cdee00fd 47#include "netlink.h"
f094af7b 48#include "odp-execute.h"
72865317
BP
49#include "odp-util.h"
50#include "ofp-print.h"
51#include "ofpbuf.h"
52#include "packets.h"
53#include "poll-loop.h"
26c6b6cd 54#include "random.h"
d33ed218 55#include "seq.h"
462278db 56#include "shash.h"
0cbfe35d 57#include "sset.h"
72865317 58#include "timeval.h"
74cc3969 59#include "unixctl.h"
72865317 60#include "util.h"
72865317 61#include "vlog.h"
5136ce49 62
d98e6007 63VLOG_DEFINE_THIS_MODULE(dpif_netdev);
72865317 64
2c0ea78f
GS
65/* By default, choose a priority in the middle. */
66#define NETDEV_RULE_PRIORITY 0x8000
67
72865317 68/* Configuration parameters. */
72865317
BP
69enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
70
71/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
72 * headers to be aligned on a 4-byte boundary. */
73enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
74
856081f6
BP
75/* Queues. */
76enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */
77enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
78enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
79BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
80
8a4e3a85
BP
81/* Protects against changes to 'dp_netdevs'. */
82static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
83
84/* Contains all 'struct dp_netdev's. */
85static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
86 = SHASH_INITIALIZER(&dp_netdevs);
87
d88b629b
BP
88struct dp_netdev_upcall {
89 struct dpif_upcall upcall; /* Queued upcall information. */
90 struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */
91};
92
8a4e3a85
BP
93/* A queue passing packets from a struct dp_netdev to its clients.
94 *
95 *
96 * Thread-safety
97 * =============
98 *
99 * Any access at all requires the owning 'dp_netdev''s queue_mutex. */
856081f6 100struct dp_netdev_queue {
f5126b57
BP
101 struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN] OVS_GUARDED;
102 unsigned int head OVS_GUARDED;
103 unsigned int tail OVS_GUARDED;
856081f6
BP
104};
105
8a4e3a85
BP
106/* Datapath based on the network device interface from netdev.h.
107 *
108 *
109 * Thread-safety
110 * =============
111 *
112 * Some members, marked 'const', are immutable. Accessing other members
113 * requires synchronization, as noted in more detail below.
114 *
115 * Acquisition order is, from outermost to innermost:
116 *
117 * dp_netdev_mutex (global)
118 * port_rwlock
119 * flow_mutex
120 * cls.rwlock
121 * queue_mutex
122 */
72865317 123struct dp_netdev {
8a4e3a85
BP
124 const struct dpif_class *const class;
125 const char *const name;
6a8267c5
BP
126 struct ovs_refcount ref_cnt;
127 atomic_flag destroyed;
72865317 128
8a4e3a85
BP
129 /* Flows.
130 *
131 * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock.
132 *
133 * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then
134 * the 'cls->rwlock' write lock. (The outer 'flow_mutex' allows writers to
135 * atomically perform multiple operations on 'cls' and 'flow_table'.)
136 */
137 struct ovs_mutex flow_mutex;
138 struct classifier cls; /* Classifier. Protected by cls.rwlock. */
139 struct hmap flow_table OVS_GUARDED; /* Flow table. */
140
141 /* Queues.
142 *
143 * Everything in 'queues' is protected by 'queue_mutex'. */
f5126b57
BP
144 struct ovs_mutex queue_mutex;
145 struct dp_netdev_queue queues[N_QUEUES];
d33ed218 146 struct seq *queue_seq; /* Incremented whenever a packet is queued. */
72865317 147
8a4e3a85
BP
148 /* Statistics.
149 *
51852a57
BP
150 * ovsthread_stats is internally synchronized. */
151 struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */
72865317 152
8a4e3a85
BP
153 /* Ports.
154 *
155 * Any lookup into 'ports' or any access to the dp_netdev_ports found
156 * through 'ports' requires taking 'port_rwlock'. */
157 struct ovs_rwlock port_rwlock;
158 struct hmap ports OVS_GUARDED;
d33ed218 159 struct seq *port_seq; /* Incremented whenever a port changes. */
6c3eee82
BP
160
161 /* Forwarding threads. */
162 struct latch exit_latch;
163 struct dp_forwarder *forwarders;
164 size_t n_forwarders;
72865317
BP
165};
166
8a4e3a85
BP
167static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
168 odp_port_t)
169 OVS_REQ_RDLOCK(dp->port_rwlock);
ff073a71 170
51852a57
BP
171enum dp_stat_type {
172 DP_STAT_HIT, /* Packets that matched in the flow table. */
173 DP_STAT_MISS, /* Packets that did not match. */
174 DP_STAT_LOST, /* Packets not passed up to the client. */
175 DP_N_STATS
176};
177
178/* Contained by struct dp_netdev's 'stats' member. */
179struct dp_netdev_stats {
180 struct ovs_mutex mutex; /* Protects 'n'. */
181
182 /* Indexed by DP_STAT_*, protected by 'mutex'. */
183 unsigned long long int n[DP_N_STATS] OVS_GUARDED;
184};
185
186
72865317
BP
187/* A port in a netdev-based datapath. */
188struct dp_netdev_port {
ff073a71
BP
189 struct hmap_node node; /* Node in dp_netdev's 'ports'. */
190 odp_port_t port_no;
72865317 191 struct netdev *netdev;
4b609110 192 struct netdev_saved_flags *sf;
796223f5 193 struct netdev_rx *rx;
0cbfe35d 194 char *type; /* Port type as requested by user. */
72865317
BP
195};
196
8a4e3a85
BP
197/* A flow in dp_netdev's 'flow_table'.
198 *
199 *
200 * Thread-safety
201 * =============
202 *
203 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
204 * its dp_netdev's classifier. The text below calls this classifier 'cls'.
205 *
206 * Motivation
207 * ----------
208 *
209 * The thread safety rules described here for "struct dp_netdev_flow" are
210 * motivated by two goals:
211 *
212 * - Prevent threads that read members of "struct dp_netdev_flow" from
213 * reading bad data due to changes by some thread concurrently modifying
214 * those members.
215 *
216 * - Prevent two threads making changes to members of a given "struct
217 * dp_netdev_flow" from interfering with each other.
218 *
219 *
220 * Rules
221 * -----
222 *
223 * A flow 'flow' may be accessed without a risk of being freed by code that
224 * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to
225 * 'flow->ref_cnt' (or both). Code that needs to hold onto a flow for a while
226 * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt'
227 * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'.
228 *
229 * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the
230 * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't
231 * protect members of 'flow' from modification (that's 'flow->mutex').
232 *
233 * 'flow->mutex' protects the members of 'flow' from modification. It doesn't
234 * protect the flow from being deleted from 'cls' (that's 'cls->rwlock') and it
235 * doesn't prevent the flow from being freed (that's 'flow->ref_cnt').
236 *
237 * Some members, marked 'const', are immutable. Accessing other members
238 * requires synchronization, as noted in more detail below.
239 */
72865317 240struct dp_netdev_flow {
2c0ea78f 241 /* Packet classification. */
8a4e3a85 242 const struct cls_rule cr; /* In owning dp_netdev's 'cls'. */
2c0ea78f 243
8a4e3a85
BP
244 /* Hash table index by unmasked flow. */
245 const struct hmap_node node; /* In owning dp_netdev's 'flow_table'. */
246 const struct flow flow; /* The flow that created this entry. */
72865317 247
8a4e3a85
BP
248 /* Number of references.
249 * The classifier owns one reference.
250 * Any thread trying to keep a rule from being freed should hold its own
251 * reference. */
252 struct ovs_refcount ref_cnt;
72865317 253
8a4e3a85
BP
254 /* Protects members marked OVS_GUARDED.
255 *
256 * Acquire after datapath's flow_mutex. */
257 struct ovs_mutex mutex OVS_ACQ_AFTER(dp_netdev_mutex);
258
259 /* Statistics.
260 *
261 * Reading or writing these members requires 'mutex'. */
262 long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
263 long long int packet_count OVS_GUARDED; /* Number of packets matched. */
264 long long int byte_count OVS_GUARDED; /* Number of bytes matched. */
265 uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
266
267 /* Actions.
268 *
269 * Reading 'actions' requires 'mutex'.
270 * Writing 'actions' requires 'mutex' and (to allow for transactions) the
271 * datapath's flow_mutex. */
272 struct dp_netdev_actions *actions OVS_GUARDED;
72865317
BP
273};
274
8a4e3a85
BP
275static struct dp_netdev_flow *dp_netdev_flow_ref(
276 const struct dp_netdev_flow *);
277static void dp_netdev_flow_unref(struct dp_netdev_flow *);
278
a84cb64a
BP
279/* A set of datapath actions within a "struct dp_netdev_flow".
280 *
281 *
282 * Thread-safety
283 * =============
284 *
285 * A struct dp_netdev_actions 'actions' may be accessed without a risk of being
286 * freed by code that holds a read-lock or write-lock on 'flow->mutex' (where
287 * 'flow' is the dp_netdev_flow for which 'flow->actions == actions') or that
288 * owns a reference to 'actions->ref_cnt' (or both). */
289struct dp_netdev_actions {
290 struct ovs_refcount ref_cnt;
291
292 /* These members are immutable: they do not change during the struct's
293 * lifetime. */
294 struct nlattr *actions; /* Sequence of OVS_ACTION_ATTR_* attributes. */
295 unsigned int size; /* Size of 'actions', in bytes. */
296};
297
298struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
299 size_t);
300struct dp_netdev_actions *dp_netdev_actions_ref(
301 const struct dp_netdev_actions *);
302void dp_netdev_actions_unref(struct dp_netdev_actions *);
303
6c3eee82
BP
304/* A thread that receives packets from some ports, looks them up in the flow
305 * table, and executes the actions it finds. */
306struct dp_forwarder {
307 struct dp_netdev *dp;
308 pthread_t thread;
309 char *name;
310 uint32_t min_hash, max_hash;
311};
312
72865317
BP
313/* Interface to netdev-based datapath. */
314struct dpif_netdev {
315 struct dpif dpif;
316 struct dp_netdev *dp;
d33ed218 317 uint64_t last_port_seq;
72865317
BP
318};
319
8a4e3a85
BP
320static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
321 struct dp_netdev_port **portp)
322 OVS_REQ_RDLOCK(dp->port_rwlock);
323static int get_port_by_name(struct dp_netdev *dp, const char *devname,
324 struct dp_netdev_port **portp)
325 OVS_REQ_RDLOCK(dp->port_rwlock);
326static void dp_netdev_free(struct dp_netdev *)
327 OVS_REQUIRES(dp_netdev_mutex);
72865317 328static void dp_netdev_flow_flush(struct dp_netdev *);
8a4e3a85
BP
329static int do_add_port(struct dp_netdev *dp, const char *devname,
330 const char *type, odp_port_t port_no)
331 OVS_REQ_WRLOCK(dp->port_rwlock);
332static int do_del_port(struct dp_netdev *dp, odp_port_t port_no)
333 OVS_REQ_WRLOCK(dp->port_rwlock);
614c4892
BP
334static int dpif_netdev_open(const struct dpif_class *, const char *name,
335 bool create, struct dpif **);
f5126b57 336static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
856081f6 337 int queue_no, const struct flow *,
f5126b57
BP
338 const struct nlattr *userdata)
339 OVS_EXCLUDED(dp->queue_mutex);
8a4e3a85
BP
340static void dp_netdev_execute_actions(struct dp_netdev *dp,
341 const struct flow *, struct ofpbuf *,
342 struct pkt_metadata *,
4edb9ae9 343 const struct nlattr *actions,
8a4e3a85
BP
344 size_t actions_len)
345 OVS_REQ_RDLOCK(dp->port_rwlock);
758c456d 346static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
8a4e3a85
BP
347 struct pkt_metadata *)
348 OVS_REQ_RDLOCK(dp->port_rwlock);
6c3eee82 349static void dp_netdev_set_threads(struct dp_netdev *, int n);
72865317
BP
350
351static struct dpif_netdev *
352dpif_netdev_cast(const struct dpif *dpif)
353{
cb22974d 354 ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
72865317
BP
355 return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
356}
357
358static struct dp_netdev *
359get_dp_netdev(const struct dpif *dpif)
360{
361 return dpif_netdev_cast(dpif)->dp;
362}
363
2197d7ab
GL
364static int
365dpif_netdev_enumerate(struct sset *all_dps)
366{
367 struct shash_node *node;
368
97be1538 369 ovs_mutex_lock(&dp_netdev_mutex);
2197d7ab
GL
370 SHASH_FOR_EACH(node, &dp_netdevs) {
371 sset_add(all_dps, node->name);
372 }
97be1538 373 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 374
2197d7ab
GL
375 return 0;
376}
377
add90f6f
EJ
378static bool
379dpif_netdev_class_is_dummy(const struct dpif_class *class)
380{
381 return class != &dpif_netdev_class;
382}
383
0aeaabc8
JP
384static const char *
385dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
386{
387 return strcmp(type, "internal") ? type
add90f6f 388 : dpif_netdev_class_is_dummy(class) ? "dummy"
0aeaabc8
JP
389 : "tap";
390}
391
72865317
BP
392static struct dpif *
393create_dpif_netdev(struct dp_netdev *dp)
394{
462278db 395 uint16_t netflow_id = hash_string(dp->name, 0);
72865317 396 struct dpif_netdev *dpif;
72865317 397
6a8267c5 398 ovs_refcount_ref(&dp->ref_cnt);
72865317 399
72865317 400 dpif = xmalloc(sizeof *dpif);
614c4892 401 dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
72865317 402 dpif->dp = dp;
d33ed218 403 dpif->last_port_seq = seq_read(dp->port_seq);
72865317
BP
404
405 return &dpif->dpif;
406}
407
4e022ec0
AW
408/* Choose an unused, non-zero port number and return it on success.
409 * Return ODPP_NONE on failure. */
410static odp_port_t
e44768b7 411choose_port(struct dp_netdev *dp, const char *name)
8a4e3a85 412 OVS_REQ_RDLOCK(dp->port_rwlock)
e44768b7 413{
4e022ec0 414 uint32_t port_no;
e44768b7
JP
415
416 if (dp->class != &dpif_netdev_class) {
417 const char *p;
418 int start_no = 0;
419
420 /* If the port name begins with "br", start the number search at
421 * 100 to make writing tests easier. */
422 if (!strncmp(name, "br", 2)) {
423 start_no = 100;
424 }
425
426 /* If the port name contains a number, try to assign that port number.
427 * This can make writing unit tests easier because port numbers are
428 * predictable. */
429 for (p = name; *p != '\0'; p++) {
430 if (isdigit((unsigned char) *p)) {
431 port_no = start_no + strtol(p, NULL, 10);
ff073a71
BP
432 if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
433 && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 434 return u32_to_odp(port_no);
e44768b7
JP
435 }
436 break;
437 }
438 }
439 }
440
ff073a71
BP
441 for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
442 if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 443 return u32_to_odp(port_no);
e44768b7
JP
444 }
445 }
446
4e022ec0 447 return ODPP_NONE;
e44768b7
JP
448}
449
72865317 450static int
614c4892
BP
451create_dp_netdev(const char *name, const struct dpif_class *class,
452 struct dp_netdev **dpp)
8a4e3a85 453 OVS_REQUIRES(dp_netdev_mutex)
72865317
BP
454{
455 struct dp_netdev *dp;
456 int error;
457 int i;
458
462278db 459 dp = xzalloc(sizeof *dp);
8a4e3a85
BP
460 shash_add(&dp_netdevs, name, dp);
461
462 *CONST_CAST(const struct dpif_class **, &dp->class) = class;
463 *CONST_CAST(const char **, &dp->name) = xstrdup(name);
6a8267c5 464 ovs_refcount_init(&dp->ref_cnt);
1a65ba85 465 atomic_flag_clear(&dp->destroyed);
8a4e3a85
BP
466
467 ovs_mutex_init(&dp->flow_mutex);
468 classifier_init(&dp->cls, NULL);
469 hmap_init(&dp->flow_table);
470
f5126b57
BP
471 ovs_mutex_init(&dp->queue_mutex);
472 ovs_mutex_lock(&dp->queue_mutex);
72865317 473 for (i = 0; i < N_QUEUES; i++) {
856081f6 474 dp->queues[i].head = dp->queues[i].tail = 0;
72865317 475 }
f5126b57 476 ovs_mutex_unlock(&dp->queue_mutex);
d33ed218 477 dp->queue_seq = seq_create();
ed27e010 478
51852a57 479 ovsthread_stats_init(&dp->stats);
ed27e010 480
8a4e3a85 481 ovs_rwlock_init(&dp->port_rwlock);
ff073a71 482 hmap_init(&dp->ports);
d33ed218 483 dp->port_seq = seq_create();
6c3eee82 484 latch_init(&dp->exit_latch);
e44768b7 485
8a4e3a85 486 ovs_rwlock_wrlock(&dp->port_rwlock);
4e022ec0 487 error = do_add_port(dp, name, "internal", ODPP_LOCAL);
8a4e3a85 488 ovs_rwlock_unlock(&dp->port_rwlock);
72865317
BP
489 if (error) {
490 dp_netdev_free(dp);
462278db 491 return error;
72865317 492 }
6c3eee82 493 dp_netdev_set_threads(dp, 2);
72865317 494
462278db 495 *dpp = dp;
72865317
BP
496 return 0;
497}
498
499static int
614c4892 500dpif_netdev_open(const struct dpif_class *class, const char *name,
4a387741 501 bool create, struct dpif **dpifp)
72865317 502{
462278db 503 struct dp_netdev *dp;
5279f8fd 504 int error;
462278db 505
97be1538 506 ovs_mutex_lock(&dp_netdev_mutex);
462278db
BP
507 dp = shash_find_data(&dp_netdevs, name);
508 if (!dp) {
5279f8fd 509 error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
72865317 510 } else {
5279f8fd
BP
511 error = (dp->class != class ? EINVAL
512 : create ? EEXIST
513 : 0);
514 }
515 if (!error) {
516 *dpifp = create_dpif_netdev(dp);
72865317 517 }
97be1538 518 ovs_mutex_unlock(&dp_netdev_mutex);
462278db 519
5279f8fd 520 return error;
72865317
BP
521}
522
523static void
1ba530f4 524dp_netdev_purge_queues(struct dp_netdev *dp)
72865317
BP
525{
526 int i;
527
f5126b57 528 ovs_mutex_lock(&dp->queue_mutex);
72865317 529 for (i = 0; i < N_QUEUES; i++) {
856081f6 530 struct dp_netdev_queue *q = &dp->queues[i];
856081f6 531
1ba530f4 532 while (q->tail != q->head) {
d88b629b 533 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
da546e07 534 ofpbuf_uninit(&u->upcall.packet);
d88b629b 535 ofpbuf_uninit(&u->buf);
856081f6 536 }
72865317 537 }
f5126b57 538 ovs_mutex_unlock(&dp->queue_mutex);
1ba530f4
BP
539}
540
8a4e3a85
BP
541/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
542 * through the 'dp_netdevs' shash while freeing 'dp'. */
1ba530f4
BP
543static void
544dp_netdev_free(struct dp_netdev *dp)
8a4e3a85 545 OVS_REQUIRES(dp_netdev_mutex)
1ba530f4 546{
4ad28026 547 struct dp_netdev_port *port, *next;
51852a57
BP
548 struct dp_netdev_stats *bucket;
549 int i;
4ad28026 550
8a4e3a85
BP
551 shash_find_and_delete(&dp_netdevs, dp->name);
552
6c3eee82
BP
553 dp_netdev_set_threads(dp, 0);
554 free(dp->forwarders);
555
1ba530f4 556 dp_netdev_flow_flush(dp);
8a4e3a85 557 ovs_rwlock_wrlock(&dp->port_rwlock);
ff073a71 558 HMAP_FOR_EACH_SAFE (port, next, node, &dp->ports) {
1ba530f4
BP
559 do_del_port(dp, port->port_no);
560 }
8a4e3a85 561 ovs_rwlock_unlock(&dp->port_rwlock);
51852a57
BP
562
563 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
564 ovs_mutex_destroy(&bucket->mutex);
565 free_cacheline(bucket);
566 }
567 ovsthread_stats_destroy(&dp->stats);
f5126b57 568
1ba530f4 569 dp_netdev_purge_queues(dp);
d33ed218 570 seq_destroy(dp->queue_seq);
f5126b57
BP
571 ovs_mutex_destroy(&dp->queue_mutex);
572
2c0ea78f 573 classifier_destroy(&dp->cls);
72865317 574 hmap_destroy(&dp->flow_table);
8a4e3a85 575 ovs_mutex_destroy(&dp->flow_mutex);
d33ed218 576 seq_destroy(dp->port_seq);
ff073a71 577 hmap_destroy(&dp->ports);
6c3eee82 578 latch_destroy(&dp->exit_latch);
8a4e3a85 579 free(CONST_CAST(char *, dp->name));
72865317
BP
580 free(dp);
581}
582
8a4e3a85
BP
583static void
584dp_netdev_unref(struct dp_netdev *dp)
585{
586 if (dp) {
587 /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
588 * get a new reference to 'dp' through the 'dp_netdevs' shash. */
589 ovs_mutex_lock(&dp_netdev_mutex);
590 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
591 dp_netdev_free(dp);
592 }
593 ovs_mutex_unlock(&dp_netdev_mutex);
594 }
595}
596
72865317
BP
597static void
598dpif_netdev_close(struct dpif *dpif)
599{
600 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 601
8a4e3a85 602 dp_netdev_unref(dp);
72865317
BP
603 free(dpif);
604}
605
606static int
7dab847a 607dpif_netdev_destroy(struct dpif *dpif)
72865317
BP
608{
609 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 610
6a8267c5
BP
611 if (!atomic_flag_test_and_set(&dp->destroyed)) {
612 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
613 /* Can't happen: 'dpif' still owns a reference to 'dp'. */
614 OVS_NOT_REACHED();
615 }
616 }
5279f8fd 617
72865317
BP
618 return 0;
619}
620
621static int
a8d9304d 622dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
72865317
BP
623{
624 struct dp_netdev *dp = get_dp_netdev(dpif);
51852a57
BP
625 struct dp_netdev_stats *bucket;
626 size_t i;
5279f8fd 627
06f81620 628 fat_rwlock_rdlock(&dp->cls.rwlock);
f180c2e2 629 stats->n_flows = hmap_count(&dp->flow_table);
06f81620 630 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 631
51852a57
BP
632 stats->n_hit = stats->n_missed = stats->n_lost = 0;
633 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
634 ovs_mutex_lock(&bucket->mutex);
635 stats->n_hit += bucket->n[DP_STAT_HIT];
636 stats->n_missed += bucket->n[DP_STAT_MISS];
637 stats->n_lost += bucket->n[DP_STAT_LOST];
638 ovs_mutex_unlock(&bucket->mutex);
639 }
1ce3fa06 640 stats->n_masks = UINT32_MAX;
847108dc 641 stats->n_mask_hit = UINT64_MAX;
5279f8fd 642
72865317
BP
643 return 0;
644}
645
72865317 646static int
c3827f61 647do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
4e022ec0 648 odp_port_t port_no)
8a4e3a85 649 OVS_REQ_WRLOCK(dp->port_rwlock)
72865317 650{
4b609110 651 struct netdev_saved_flags *sf;
72865317
BP
652 struct dp_netdev_port *port;
653 struct netdev *netdev;
796223f5 654 struct netdev_rx *rx;
2499a8ce 655 enum netdev_flags flags;
0cbfe35d 656 const char *open_type;
72865317
BP
657 int error;
658
659 /* XXX reject devices already in some dp_netdev. */
660
661 /* Open and validate network device. */
0aeaabc8 662 open_type = dpif_netdev_port_open_type(dp->class, type);
0cbfe35d 663 error = netdev_open(devname, open_type, &netdev);
72865317
BP
664 if (error) {
665 return error;
666 }
72865317
BP
667 /* XXX reject non-Ethernet devices */
668
2499a8ce
AC
669 netdev_get_flags(netdev, &flags);
670 if (flags & NETDEV_LOOPBACK) {
671 VLOG_ERR("%s: cannot add a loopback device", devname);
672 netdev_close(netdev);
673 return EINVAL;
674 }
675
796223f5 676 error = netdev_rx_open(netdev, &rx);
add90f6f
EJ
677 if (error
678 && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
7b6b0ef4 679 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
10a89ef0 680 devname, ovs_strerror(errno));
7b6b0ef4
BP
681 netdev_close(netdev);
682 return error;
683 }
684
4b609110 685 error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
72865317 686 if (error) {
796223f5 687 netdev_rx_close(rx);
72865317
BP
688 netdev_close(netdev);
689 return error;
690 }
691
692 port = xmalloc(sizeof *port);
693 port->port_no = port_no;
694 port->netdev = netdev;
4b609110 695 port->sf = sf;
796223f5 696 port->rx = rx;
0cbfe35d 697 port->type = xstrdup(type);
72865317 698
ff073a71 699 hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
d33ed218 700 seq_change(dp->port_seq);
72865317
BP
701
702 return 0;
703}
704
247527db
BP
705static int
706dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
4e022ec0 707 odp_port_t *port_nop)
247527db
BP
708{
709 struct dp_netdev *dp = get_dp_netdev(dpif);
3aa30359
BP
710 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
711 const char *dpif_port;
4e022ec0 712 odp_port_t port_no;
5279f8fd 713 int error;
247527db 714
8a4e3a85 715 ovs_rwlock_wrlock(&dp->port_rwlock);
3aa30359 716 dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
4e022ec0 717 if (*port_nop != ODPP_NONE) {
ff073a71
BP
718 port_no = *port_nop;
719 error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
232dfa4a 720 } else {
3aa30359 721 port_no = choose_port(dp, dpif_port);
5279f8fd 722 error = port_no == ODPP_NONE ? EFBIG : 0;
232dfa4a 723 }
5279f8fd 724 if (!error) {
247527db 725 *port_nop = port_no;
5279f8fd 726 error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
247527db 727 }
8a4e3a85 728 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd
BP
729
730 return error;
72865317
BP
731}
732
733static int
4e022ec0 734dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
72865317
BP
735{
736 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
737 int error;
738
8a4e3a85 739 ovs_rwlock_wrlock(&dp->port_rwlock);
5279f8fd 740 error = port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no);
8a4e3a85 741 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd
BP
742
743 return error;
72865317
BP
744}
745
746static bool
4e022ec0 747is_valid_port_number(odp_port_t port_no)
72865317 748{
ff073a71
BP
749 return port_no != ODPP_NONE;
750}
751
752static struct dp_netdev_port *
753dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
8a4e3a85 754 OVS_REQ_RDLOCK(dp->port_rwlock)
ff073a71
BP
755{
756 struct dp_netdev_port *port;
757
758 HMAP_FOR_EACH_IN_BUCKET (port, node, hash_int(odp_to_u32(port_no), 0),
759 &dp->ports) {
760 if (port->port_no == port_no) {
761 return port;
762 }
763 }
764 return NULL;
72865317
BP
765}
766
767static int
768get_port_by_number(struct dp_netdev *dp,
4e022ec0 769 odp_port_t port_no, struct dp_netdev_port **portp)
8a4e3a85 770 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317
BP
771{
772 if (!is_valid_port_number(port_no)) {
773 *portp = NULL;
774 return EINVAL;
775 } else {
ff073a71 776 *portp = dp_netdev_lookup_port(dp, port_no);
72865317
BP
777 return *portp ? 0 : ENOENT;
778 }
779}
780
781static int
782get_port_by_name(struct dp_netdev *dp,
783 const char *devname, struct dp_netdev_port **portp)
8a4e3a85 784 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317
BP
785{
786 struct dp_netdev_port *port;
787
ff073a71 788 HMAP_FOR_EACH (port, node, &dp->ports) {
3efb6063 789 if (!strcmp(netdev_get_name(port->netdev), devname)) {
72865317
BP
790 *portp = port;
791 return 0;
792 }
793 }
794 return ENOENT;
795}
796
797static int
4e022ec0 798do_del_port(struct dp_netdev *dp, odp_port_t port_no)
8a4e3a85 799 OVS_REQ_WRLOCK(dp->port_rwlock)
72865317
BP
800{
801 struct dp_netdev_port *port;
802 int error;
803
804 error = get_port_by_number(dp, port_no, &port);
805 if (error) {
806 return error;
807 }
808
ff073a71 809 hmap_remove(&dp->ports, &port->node);
d33ed218 810 seq_change(dp->port_seq);
72865317
BP
811
812 netdev_close(port->netdev);
4b609110 813 netdev_restore_flags(port->sf);
796223f5 814 netdev_rx_close(port->rx);
0cbfe35d 815 free(port->type);
72865317
BP
816 free(port);
817
818 return 0;
819}
820
821static void
4c738a8d
BP
822answer_port_query(const struct dp_netdev_port *port,
823 struct dpif_port *dpif_port)
72865317 824{
3efb6063 825 dpif_port->name = xstrdup(netdev_get_name(port->netdev));
0cbfe35d 826 dpif_port->type = xstrdup(port->type);
4c738a8d 827 dpif_port->port_no = port->port_no;
72865317
BP
828}
829
830static int
4e022ec0 831dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 832 struct dpif_port *dpif_port)
72865317
BP
833{
834 struct dp_netdev *dp = get_dp_netdev(dpif);
835 struct dp_netdev_port *port;
836 int error;
837
8a4e3a85 838 ovs_rwlock_rdlock(&dp->port_rwlock);
72865317 839 error = get_port_by_number(dp, port_no, &port);
4afba28d 840 if (!error && dpif_port) {
4c738a8d 841 answer_port_query(port, dpif_port);
72865317 842 }
8a4e3a85 843 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 844
72865317
BP
845 return error;
846}
847
848static int
849dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 850 struct dpif_port *dpif_port)
72865317
BP
851{
852 struct dp_netdev *dp = get_dp_netdev(dpif);
853 struct dp_netdev_port *port;
854 int error;
855
8a4e3a85 856 ovs_rwlock_rdlock(&dp->port_rwlock);
72865317 857 error = get_port_by_name(dp, devname, &port);
4afba28d 858 if (!error && dpif_port) {
4c738a8d 859 answer_port_query(port, dpif_port);
72865317 860 }
8a4e3a85 861 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 862
72865317
BP
863 return error;
864}
865
866static void
8a4e3a85
BP
867dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
868 OVS_REQ_WRLOCK(dp->cls.rwlock)
869 OVS_REQUIRES(dp->flow_mutex)
72865317 870{
8a4e3a85
BP
871 struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
872 struct hmap_node *node = CONST_CAST(struct hmap_node *, &flow->node);
2c0ea78f 873
8a4e3a85
BP
874 classifier_remove(&dp->cls, cr);
875 hmap_remove(&dp->flow_table, node);
876 dp_netdev_flow_unref(flow);
877}
878
879static struct dp_netdev_flow *
880dp_netdev_flow_ref(const struct dp_netdev_flow *flow_)
881{
882 struct dp_netdev_flow *flow = CONST_CAST(struct dp_netdev_flow *, flow_);
883 if (flow) {
884 ovs_refcount_ref(&flow->ref_cnt);
885 }
886 return flow;
887}
888
889static void
890dp_netdev_flow_unref(struct dp_netdev_flow *flow)
891{
892 if (flow && ovs_refcount_unref(&flow->ref_cnt) == 1) {
893 cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
894 ovs_mutex_lock(&flow->mutex);
895 dp_netdev_actions_unref(flow->actions);
896 ovs_mutex_unlock(&flow->mutex);
897 ovs_mutex_destroy(&flow->mutex);
898 free(flow);
899 }
72865317
BP
900}
901
902static void
903dp_netdev_flow_flush(struct dp_netdev *dp)
904{
1763b4b8 905 struct dp_netdev_flow *netdev_flow, *next;
72865317 906
8a4e3a85 907 ovs_mutex_lock(&dp->flow_mutex);
06f81620 908 fat_rwlock_wrlock(&dp->cls.rwlock);
1763b4b8 909 HMAP_FOR_EACH_SAFE (netdev_flow, next, node, &dp->flow_table) {
8a4e3a85 910 dp_netdev_remove_flow(dp, netdev_flow);
72865317 911 }
06f81620 912 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 913 ovs_mutex_unlock(&dp->flow_mutex);
72865317
BP
914}
915
916static int
917dpif_netdev_flow_flush(struct dpif *dpif)
918{
919 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 920
72865317
BP
921 dp_netdev_flow_flush(dp);
922 return 0;
923}
924
b0ec0f27 925struct dp_netdev_port_state {
ff073a71
BP
926 uint32_t bucket;
927 uint32_t offset;
4c738a8d 928 char *name;
b0ec0f27
BP
929};
930
931static int
932dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
933{
934 *statep = xzalloc(sizeof(struct dp_netdev_port_state));
935 return 0;
936}
937
72865317 938static int
b0ec0f27 939dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
4c738a8d 940 struct dpif_port *dpif_port)
72865317 941{
b0ec0f27 942 struct dp_netdev_port_state *state = state_;
72865317 943 struct dp_netdev *dp = get_dp_netdev(dpif);
ff073a71
BP
944 struct hmap_node *node;
945 int retval;
72865317 946
8a4e3a85 947 ovs_rwlock_rdlock(&dp->port_rwlock);
ff073a71
BP
948 node = hmap_at_position(&dp->ports, &state->bucket, &state->offset);
949 if (node) {
950 struct dp_netdev_port *port;
5279f8fd 951
ff073a71
BP
952 port = CONTAINER_OF(node, struct dp_netdev_port, node);
953
954 free(state->name);
955 state->name = xstrdup(netdev_get_name(port->netdev));
956 dpif_port->name = state->name;
957 dpif_port->type = port->type;
958 dpif_port->port_no = port->port_no;
959
960 retval = 0;
961 } else {
962 retval = EOF;
72865317 963 }
8a4e3a85 964 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 965
ff073a71 966 return retval;
b0ec0f27
BP
967}
968
969static int
4c738a8d 970dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
b0ec0f27 971{
4c738a8d
BP
972 struct dp_netdev_port_state *state = state_;
973 free(state->name);
b0ec0f27
BP
974 free(state);
975 return 0;
72865317
BP
976}
977
978static int
67a4917b 979dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
72865317
BP
980{
981 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
d33ed218 982 uint64_t new_port_seq;
5279f8fd
BP
983 int error;
984
d33ed218
BP
985 new_port_seq = seq_read(dpif->dp->port_seq);
986 if (dpif->last_port_seq != new_port_seq) {
987 dpif->last_port_seq = new_port_seq;
5279f8fd 988 error = ENOBUFS;
72865317 989 } else {
5279f8fd 990 error = EAGAIN;
72865317 991 }
5279f8fd
BP
992
993 return error;
72865317
BP
994}
995
996static void
997dpif_netdev_port_poll_wait(const struct dpif *dpif_)
998{
999 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd 1000
d33ed218 1001 seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
8a4e3a85
BP
1002}
1003
1004static struct dp_netdev_flow *
1005dp_netdev_flow_cast(const struct cls_rule *cr)
1006{
1007 return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
72865317
BP
1008}
1009
72865317 1010static struct dp_netdev_flow *
2c0ea78f 1011dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *flow)
8a4e3a85 1012 OVS_EXCLUDED(dp->cls.rwlock)
2c0ea78f 1013{
8a4e3a85 1014 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1015
06f81620 1016 fat_rwlock_rdlock(&dp->cls.rwlock);
8a4e3a85
BP
1017 netdev_flow = dp_netdev_flow_cast(classifier_lookup(&dp->cls, flow, NULL));
1018 dp_netdev_flow_ref(netdev_flow);
06f81620 1019 fat_rwlock_unlock(&dp->cls.rwlock);
2c0ea78f 1020
8a4e3a85 1021 return netdev_flow;
2c0ea78f
GS
1022}
1023
1024static struct dp_netdev_flow *
1025dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
8a4e3a85 1026 OVS_REQ_RDLOCK(dp->cls.rwlock)
72865317 1027{
1763b4b8 1028 struct dp_netdev_flow *netdev_flow;
72865317 1029
2c0ea78f 1030 HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
1763b4b8 1031 &dp->flow_table) {
2c0ea78f 1032 if (flow_equal(&netdev_flow->flow, flow)) {
8a4e3a85 1033 return dp_netdev_flow_ref(netdev_flow);
72865317
BP
1034 }
1035 }
8a4e3a85 1036
72865317
BP
1037 return NULL;
1038}
1039
1040static void
1763b4b8
GS
1041get_dpif_flow_stats(struct dp_netdev_flow *netdev_flow,
1042 struct dpif_flow_stats *stats)
8a4e3a85 1043 OVS_REQ_RDLOCK(netdev_flow->mutex)
feebdea2 1044{
1763b4b8
GS
1045 stats->n_packets = netdev_flow->packet_count;
1046 stats->n_bytes = netdev_flow->byte_count;
1047 stats->used = netdev_flow->used;
1048 stats->tcp_flags = netdev_flow->tcp_flags;
72865317
BP
1049}
1050
36956a7d 1051static int
8c301900
JR
1052dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1053 const struct nlattr *mask_key,
1054 uint32_t mask_key_len, const struct flow *flow,
1055 struct flow *mask)
1056{
1057 if (mask_key_len) {
80e44883
BP
1058 enum odp_key_fitness fitness;
1059
1060 fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
1061 if (fitness) {
8c301900
JR
1062 /* This should not happen: it indicates that
1063 * odp_flow_key_from_mask() and odp_flow_key_to_mask()
1064 * disagree on the acceptable form of a mask. Log the problem
1065 * as an error, with enough details to enable debugging. */
1066 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1067
1068 if (!VLOG_DROP_ERR(&rl)) {
1069 struct ds s;
1070
1071 ds_init(&s);
1072 odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
1073 true);
80e44883
BP
1074 VLOG_ERR("internal error parsing flow mask %s (%s)",
1075 ds_cstr(&s), odp_key_fitness_to_string(fitness));
8c301900
JR
1076 ds_destroy(&s);
1077 }
1078
1079 return EINVAL;
1080 }
1081 /* Force unwildcard the in_port. */
1082 mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
1083 } else {
1084 enum mf_field_id id;
1085 /* No mask key, unwildcard everything except fields whose
1086 * prerequisities are not met. */
1087 memset(mask, 0x0, sizeof *mask);
1088
1089 for (id = 0; id < MFF_N_IDS; ++id) {
1090 /* Skip registers and metadata. */
1091 if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
1092 && id != MFF_METADATA) {
1093 const struct mf_field *mf = mf_from_id(id);
1094 if (mf_are_prereqs_ok(mf, flow)) {
1095 mf_mask_field(mf, mask);
1096 }
1097 }
1098 }
1099 }
1100
1101 return 0;
1102}
1103
1104static int
1105dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1106 struct flow *flow)
36956a7d 1107{
586ddea5
BP
1108 odp_port_t in_port;
1109
8c301900 1110 if (odp_flow_key_to_flow(key, key_len, flow)) {
36956a7d 1111 /* This should not happen: it indicates that odp_flow_key_from_flow()
8c301900
JR
1112 * and odp_flow_key_to_flow() disagree on the acceptable form of a
1113 * flow. Log the problem as an error, with enough details to enable
1114 * debugging. */
36956a7d
BP
1115 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1116
1117 if (!VLOG_DROP_ERR(&rl)) {
1118 struct ds s;
1119
1120 ds_init(&s);
8c301900 1121 odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
36956a7d
BP
1122 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
1123 ds_destroy(&s);
1124 }
1125
1126 return EINVAL;
1127 }
1128
586ddea5
BP
1129 in_port = flow->in_port.odp_port;
1130 if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
18886b60
BP
1131 return EINVAL;
1132 }
1133
36956a7d
BP
1134 return 0;
1135}
1136
72865317 1137static int
693c4a01 1138dpif_netdev_flow_get(const struct dpif *dpif,
feebdea2 1139 const struct nlattr *nl_key, size_t nl_key_len,
c97fb132 1140 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
72865317
BP
1141{
1142 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1143 struct dp_netdev_flow *netdev_flow;
bc4a05c6
BP
1144 struct flow key;
1145 int error;
36956a7d 1146
feebdea2 1147 error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
bc4a05c6
BP
1148 if (error) {
1149 return error;
1150 }
14608a15 1151
06f81620 1152 fat_rwlock_rdlock(&dp->cls.rwlock);
2c0ea78f 1153 netdev_flow = dp_netdev_find_flow(dp, &key);
06f81620 1154 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 1155
1763b4b8 1156 if (netdev_flow) {
8a4e3a85
BP
1157 struct dp_netdev_actions *actions = NULL;
1158
1159 ovs_mutex_lock(&netdev_flow->mutex);
5279f8fd 1160 if (stats) {
1763b4b8 1161 get_dpif_flow_stats(netdev_flow, stats);
5279f8fd
BP
1162 }
1163 if (actionsp) {
8a4e3a85
BP
1164 actions = dp_netdev_actions_ref(netdev_flow->actions);
1165 }
1166 ovs_mutex_unlock(&netdev_flow->mutex);
1167
1168 dp_netdev_flow_unref(netdev_flow);
1169
1170 if (actionsp) {
1171 *actionsp = ofpbuf_clone_data(actions->actions, actions->size);
1172 dp_netdev_actions_unref(actions);
5279f8fd
BP
1173 }
1174 } else {
1175 error = ENOENT;
72865317 1176 }
bc4a05c6 1177
5279f8fd 1178 return error;
72865317
BP
1179}
1180
72865317 1181static int
2c0ea78f
GS
1182dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow,
1183 const struct flow_wildcards *wc,
1184 const struct nlattr *actions,
1185 size_t actions_len)
8a4e3a85 1186 OVS_REQUIRES(dp->flow_mutex)
72865317 1187{
1763b4b8 1188 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1189 struct match match;
72865317 1190
1763b4b8 1191 netdev_flow = xzalloc(sizeof *netdev_flow);
8a4e3a85
BP
1192 *CONST_CAST(struct flow *, &netdev_flow->flow) = *flow;
1193 ovs_refcount_init(&netdev_flow->ref_cnt);
1194
1195 ovs_mutex_init(&netdev_flow->mutex);
1196 ovs_mutex_lock(&netdev_flow->mutex);
1197
a84cb64a 1198 netdev_flow->actions = dp_netdev_actions_create(actions, actions_len);
2c0ea78f
GS
1199
1200 match_init(&match, flow, wc);
8a4e3a85
BP
1201 cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
1202 &match, NETDEV_RULE_PRIORITY);
06f81620 1203 fat_rwlock_wrlock(&dp->cls.rwlock);
8a4e3a85
BP
1204 classifier_insert(&dp->cls,
1205 CONST_CAST(struct cls_rule *, &netdev_flow->cr));
1206 hmap_insert(&dp->flow_table,
1207 CONST_CAST(struct hmap_node *, &netdev_flow->node),
1208 flow_hash(flow, 0));
06f81620 1209 fat_rwlock_unlock(&dp->cls.rwlock);
72865317 1210
8a4e3a85
BP
1211 ovs_mutex_unlock(&netdev_flow->mutex);
1212
72865317
BP
1213 return 0;
1214}
1215
1216static void
1763b4b8 1217clear_stats(struct dp_netdev_flow *netdev_flow)
8a4e3a85 1218 OVS_REQUIRES(netdev_flow->mutex)
72865317 1219{
1763b4b8
GS
1220 netdev_flow->used = 0;
1221 netdev_flow->packet_count = 0;
1222 netdev_flow->byte_count = 0;
1223 netdev_flow->tcp_flags = 0;
72865317
BP
1224}
1225
1226static int
89625d1e 1227dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
72865317
BP
1228{
1229 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1230 struct dp_netdev_flow *netdev_flow;
2c0ea78f
GS
1231 struct flow flow;
1232 struct flow_wildcards wc;
36956a7d
BP
1233 int error;
1234
8c301900
JR
1235 error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow);
1236 if (error) {
1237 return error;
1238 }
1239 error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
1240 put->mask, put->mask_len,
1241 &flow, &wc.masks);
36956a7d
BP
1242 if (error) {
1243 return error;
1244 }
72865317 1245
8a4e3a85 1246 ovs_mutex_lock(&dp->flow_mutex);
2c0ea78f 1247 netdev_flow = dp_netdev_lookup_flow(dp, &flow);
1763b4b8 1248 if (!netdev_flow) {
89625d1e 1249 if (put->flags & DPIF_FP_CREATE) {
72865317 1250 if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
89625d1e
BP
1251 if (put->stats) {
1252 memset(put->stats, 0, sizeof *put->stats);
feebdea2 1253 }
2c0ea78f 1254 error = dp_netdev_flow_add(dp, &flow, &wc, put->actions,
5279f8fd 1255 put->actions_len);
72865317 1256 } else {
5279f8fd 1257 error = EFBIG;
72865317
BP
1258 }
1259 } else {
5279f8fd 1260 error = ENOENT;
72865317
BP
1261 }
1262 } else {
2c0ea78f
GS
1263 if (put->flags & DPIF_FP_MODIFY
1264 && flow_equal(&flow, &netdev_flow->flow)) {
8a4e3a85
BP
1265 struct dp_netdev_actions *new_actions;
1266 struct dp_netdev_actions *old_actions;
1267
1268 new_actions = dp_netdev_actions_create(put->actions,
1269 put->actions_len);
1270
1271 ovs_mutex_lock(&netdev_flow->mutex);
1272 old_actions = netdev_flow->actions;
1273 netdev_flow->actions = new_actions;
a84cb64a
BP
1274 if (put->stats) {
1275 get_dpif_flow_stats(netdev_flow, put->stats);
1276 }
1277 if (put->flags & DPIF_FP_ZERO_STATS) {
1278 clear_stats(netdev_flow);
72865317 1279 }
8a4e3a85
BP
1280 ovs_mutex_unlock(&netdev_flow->mutex);
1281
1282 dp_netdev_actions_unref(old_actions);
2c0ea78f 1283 } else if (put->flags & DPIF_FP_CREATE) {
5279f8fd 1284 error = EEXIST;
2c0ea78f
GS
1285 } else {
1286 /* Overlapping flow. */
1287 error = EINVAL;
72865317 1288 }
8a4e3a85 1289 dp_netdev_flow_unref(netdev_flow);
72865317 1290 }
8a4e3a85 1291 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1292
1293 return error;
72865317
BP
1294}
1295
72865317 1296static int
b99d3cee 1297dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
72865317
BP
1298{
1299 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1300 struct dp_netdev_flow *netdev_flow;
14608a15 1301 struct flow key;
36956a7d
BP
1302 int error;
1303
b99d3cee 1304 error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
36956a7d
BP
1305 if (error) {
1306 return error;
1307 }
72865317 1308
8a4e3a85 1309 ovs_mutex_lock(&dp->flow_mutex);
06f81620 1310 fat_rwlock_wrlock(&dp->cls.rwlock);
2c0ea78f 1311 netdev_flow = dp_netdev_find_flow(dp, &key);
1763b4b8 1312 if (netdev_flow) {
b99d3cee 1313 if (del->stats) {
8a4e3a85 1314 ovs_mutex_lock(&netdev_flow->mutex);
1763b4b8 1315 get_dpif_flow_stats(netdev_flow, del->stats);
8a4e3a85 1316 ovs_mutex_unlock(&netdev_flow->mutex);
feebdea2 1317 }
8a4e3a85 1318 dp_netdev_remove_flow(dp, netdev_flow);
71c24bb0 1319 dp_netdev_flow_unref(netdev_flow);
72865317 1320 } else {
5279f8fd 1321 error = ENOENT;
72865317 1322 }
06f81620 1323 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 1324 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1325
1326 return error;
72865317
BP
1327}
1328
704a1e09 1329struct dp_netdev_flow_state {
a84cb64a 1330 struct dp_netdev_actions *actions;
19cf4069 1331 struct odputil_keybuf keybuf;
2c0ea78f 1332 struct odputil_keybuf maskbuf;
c97fb132 1333 struct dpif_flow_stats stats;
704a1e09
BP
1334};
1335
e723fd32
JS
1336struct dp_netdev_flow_iter {
1337 uint32_t bucket;
1338 uint32_t offset;
d2ad7ef1
JS
1339 int status;
1340 struct ovs_mutex mutex;
e723fd32
JS
1341};
1342
1343static void
1344dpif_netdev_flow_dump_state_init(void **statep)
72865317 1345{
feebdea2
BP
1346 struct dp_netdev_flow_state *state;
1347
1348 *statep = state = xmalloc(sizeof *state);
feebdea2 1349 state->actions = NULL;
e723fd32
JS
1350}
1351
1352static void
1353dpif_netdev_flow_dump_state_uninit(void *state_)
1354{
1355 struct dp_netdev_flow_state *state = state_;
1356
1357 dp_netdev_actions_unref(state->actions);
1358 free(state);
1359}
1360
1361static int
1362dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **iterp)
1363{
1364 struct dp_netdev_flow_iter *iter;
1365
1366 *iterp = iter = xmalloc(sizeof *iter);
1367 iter->bucket = 0;
1368 iter->offset = 0;
d2ad7ef1
JS
1369 iter->status = 0;
1370 ovs_mutex_init(&iter->mutex);
704a1e09
BP
1371 return 0;
1372}
1373
1374static int
d2ad7ef1 1375dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_,
feebdea2 1376 const struct nlattr **key, size_t *key_len,
e6cc0bab 1377 const struct nlattr **mask, size_t *mask_len,
feebdea2 1378 const struct nlattr **actions, size_t *actions_len,
c97fb132 1379 const struct dpif_flow_stats **stats)
704a1e09 1380{
e723fd32 1381 struct dp_netdev_flow_iter *iter = iter_;
d2ad7ef1 1382 struct dp_netdev_flow_state *state = state_;
72865317 1383 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1384 struct dp_netdev_flow *netdev_flow;
d2ad7ef1 1385 int error;
14608a15 1386
d2ad7ef1
JS
1387 ovs_mutex_lock(&iter->mutex);
1388 error = iter->status;
1389 if (!error) {
1390 struct hmap_node *node;
1391
1392 fat_rwlock_rdlock(&dp->cls.rwlock);
1393 node = hmap_at_position(&dp->flow_table, &iter->bucket, &iter->offset);
1394 if (node) {
1395 netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
1396 dp_netdev_flow_ref(netdev_flow);
1397 }
1398 fat_rwlock_unlock(&dp->cls.rwlock);
1399 if (!node) {
1400 iter->status = error = EOF;
1401 }
8a4e3a85 1402 }
d2ad7ef1
JS
1403 ovs_mutex_unlock(&iter->mutex);
1404 if (error) {
1405 return error;
72865317 1406 }
704a1e09 1407
feebdea2
BP
1408 if (key) {
1409 struct ofpbuf buf;
1410
19cf4069 1411 ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
2c0ea78f
GS
1412 odp_flow_key_from_flow(&buf, &netdev_flow->flow,
1413 netdev_flow->flow.in_port.odp_port);
36956a7d 1414
feebdea2
BP
1415 *key = buf.data;
1416 *key_len = buf.size;
1417 }
1418
2c0ea78f
GS
1419 if (key && mask) {
1420 struct ofpbuf buf;
1421 struct flow_wildcards wc;
1422
1423 ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf);
1424 minimask_expand(&netdev_flow->cr.match.mask, &wc);
1425 odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
8bfd0fda
BP
1426 odp_to_u32(wc.masks.in_port.odp_port),
1427 SIZE_MAX);
2c0ea78f
GS
1428
1429 *mask = buf.data;
1430 *mask_len = buf.size;
e6cc0bab
AZ
1431 }
1432
8a4e3a85 1433 if (actions || stats) {
a84cb64a 1434 dp_netdev_actions_unref(state->actions);
8a4e3a85 1435 state->actions = NULL;
feebdea2 1436
8a4e3a85
BP
1437 ovs_mutex_lock(&netdev_flow->mutex);
1438 if (actions) {
1439 state->actions = dp_netdev_actions_ref(netdev_flow->actions);
1440 *actions = state->actions->actions;
1441 *actions_len = state->actions->size;
1442 }
1443 if (stats) {
1444 get_dpif_flow_stats(netdev_flow, &state->stats);
1445 *stats = &state->stats;
1446 }
1447 ovs_mutex_unlock(&netdev_flow->mutex);
feebdea2 1448 }
704a1e09 1449
8a4e3a85
BP
1450 dp_netdev_flow_unref(netdev_flow);
1451
704a1e09
BP
1452 return 0;
1453}
1454
1455static int
e723fd32 1456dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *iter_)
704a1e09 1457{
e723fd32 1458 struct dp_netdev_flow_iter *iter = iter_;
feebdea2 1459
d2ad7ef1 1460 ovs_mutex_destroy(&iter->mutex);
e723fd32 1461 free(iter);
704a1e09 1462 return 0;
72865317
BP
1463}
1464
1465static int
758c456d 1466dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
72865317
BP
1467{
1468 struct dp_netdev *dp = get_dp_netdev(dpif);
758c456d
JR
1469 struct pkt_metadata *md = &execute->md;
1470 struct flow key;
72865317 1471
89625d1e
BP
1472 if (execute->packet->size < ETH_HEADER_LEN ||
1473 execute->packet->size > UINT16_MAX) {
72865317
BP
1474 return EINVAL;
1475 }
1476
758c456d 1477 /* Extract flow key. */
b5e7e61a 1478 flow_extract(execute->packet, md, &key);
8a4e3a85
BP
1479
1480 ovs_rwlock_rdlock(&dp->port_rwlock);
758c456d
JR
1481 dp_netdev_execute_actions(dp, &key, execute->packet, md, execute->actions,
1482 execute->actions_len);
8a4e3a85
BP
1483 ovs_rwlock_unlock(&dp->port_rwlock);
1484
758c456d 1485 return 0;
72865317
BP
1486}
1487
1488static int
a12b3ead 1489dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED)
72865317 1490{
82272ede 1491 return 0;
72865317
BP
1492}
1493
5bf93d67
EJ
1494static int
1495dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
1496 uint32_t queue_id, uint32_t *priority)
1497{
1498 *priority = queue_id;
1499 return 0;
1500}
1501
856081f6 1502static struct dp_netdev_queue *
f5126b57
BP
1503find_nonempty_queue(struct dp_netdev *dp)
1504 OVS_REQUIRES(dp->queue_mutex)
72865317 1505{
72865317
BP
1506 int i;
1507
1508 for (i = 0; i < N_QUEUES; i++) {
856081f6 1509 struct dp_netdev_queue *q = &dp->queues[i];
a12b3ead 1510 if (q->head != q->tail) {
856081f6 1511 return q;
72865317
BP
1512 }
1513 }
856081f6 1514 return NULL;
72865317
BP
1515}
1516
1517static int
90a7c55e
BP
1518dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
1519 struct ofpbuf *buf)
72865317 1520{
f5126b57 1521 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
1522 struct dp_netdev_queue *q;
1523 int error;
1524
f5126b57
BP
1525 ovs_mutex_lock(&dp->queue_mutex);
1526 q = find_nonempty_queue(dp);
856081f6 1527 if (q) {
d88b629b
BP
1528 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
1529
1530 *upcall = u->upcall;
b3907fbc 1531
90a7c55e 1532 ofpbuf_uninit(buf);
d88b629b 1533 *buf = u->buf;
90a7c55e 1534
5279f8fd 1535 error = 0;
72865317 1536 } else {
5279f8fd 1537 error = EAGAIN;
72865317 1538 }
f5126b57 1539 ovs_mutex_unlock(&dp->queue_mutex);
5279f8fd
BP
1540
1541 return error;
72865317
BP
1542}
1543
1544static void
1545dpif_netdev_recv_wait(struct dpif *dpif)
1546{
d33ed218
BP
1547 struct dp_netdev *dp = get_dp_netdev(dpif);
1548 uint64_t seq;
5279f8fd 1549
f5126b57 1550 ovs_mutex_lock(&dp->queue_mutex);
d33ed218 1551 seq = seq_read(dp->queue_seq);
f5126b57 1552 if (find_nonempty_queue(dp)) {
72865317 1553 poll_immediate_wake();
d33ed218
BP
1554 } else {
1555 seq_wait(dp->queue_seq, seq);
72865317 1556 }
f5126b57 1557 ovs_mutex_unlock(&dp->queue_mutex);
72865317 1558}
1ba530f4
BP
1559
1560static void
1561dpif_netdev_recv_purge(struct dpif *dpif)
1562{
1563 struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
f5126b57 1564
1ba530f4
BP
1565 dp_netdev_purge_queues(dpif_netdev->dp);
1566}
72865317 1567\f
a84cb64a
BP
1568/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
1569 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
1570 * 'ofpacts'. */
1571struct dp_netdev_actions *
1572dp_netdev_actions_create(const struct nlattr *actions, size_t size)
1573{
1574 struct dp_netdev_actions *netdev_actions;
1575
1576 netdev_actions = xmalloc(sizeof *netdev_actions);
1577 ovs_refcount_init(&netdev_actions->ref_cnt);
1578 netdev_actions->actions = xmemdup(actions, size);
1579 netdev_actions->size = size;
1580
1581 return netdev_actions;
1582}
1583
1584/* Increments 'actions''s refcount. */
1585struct dp_netdev_actions *
1586dp_netdev_actions_ref(const struct dp_netdev_actions *actions_)
1587{
1588 struct dp_netdev_actions *actions;
1589
1590 actions = CONST_CAST(struct dp_netdev_actions *, actions_);
1591 if (actions) {
1592 ovs_refcount_ref(&actions->ref_cnt);
1593 }
1594 return actions;
1595}
1596
1597/* Decrements 'actions''s refcount and frees 'actions' if the refcount reaches
1598 * 0. */
1599void
1600dp_netdev_actions_unref(struct dp_netdev_actions *actions)
1601{
1602 if (actions && ovs_refcount_unref(&actions->ref_cnt) == 1) {
1603 free(actions->actions);
1604 free(actions);
1605 }
1606}
1607\f
6c3eee82
BP
1608static void *
1609dp_forwarder_main(void *f_)
1610{
1611 struct dp_forwarder *f = f_;
1612 struct dp_netdev *dp = f->dp;
1613 struct ofpbuf packet;
1614
1615 f->name = xasprintf("forwarder_%u", ovsthread_id_self());
1616 set_subprogram_name("%s", f->name);
1617
1618 ofpbuf_init(&packet, 0);
1619 while (!latch_is_set(&dp->exit_latch)) {
1620 bool received_anything;
1621 int i;
1622
1623 ovs_rwlock_rdlock(&dp->port_rwlock);
1624 for (i = 0; i < 50; i++) {
1625 struct dp_netdev_port *port;
1626
1627 received_anything = false;
1628 HMAP_FOR_EACH (port, node, &f->dp->ports) {
1629 if (port->rx
1630 && port->node.hash >= f->min_hash
1631 && port->node.hash <= f->max_hash) {
1632 int buf_size;
1633 int error;
1634 int mtu;
1635
1636 if (netdev_get_mtu(port->netdev, &mtu)) {
1637 mtu = ETH_PAYLOAD_MAX;
1638 }
1639 buf_size = DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + mtu;
1640
1641 ofpbuf_clear(&packet);
1642 ofpbuf_reserve_with_tailroom(&packet, DP_NETDEV_HEADROOM,
1643 buf_size);
1644
1645 error = netdev_rx_recv(port->rx, &packet);
1646 if (!error) {
1647 struct pkt_metadata md
1648 = PKT_METADATA_INITIALIZER(port->port_no);
6c3eee82 1649
b5e7e61a 1650 dp_netdev_port_input(dp, &packet, &md);
6c3eee82
BP
1651 received_anything = true;
1652 } else if (error != EAGAIN && error != EOPNOTSUPP) {
1653 static struct vlog_rate_limit rl
1654 = VLOG_RATE_LIMIT_INIT(1, 5);
1655
1656 VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
1657 netdev_get_name(port->netdev),
1658 ovs_strerror(error));
1659 }
1660 }
1661 }
1662
1663 if (!received_anything) {
1664 break;
1665 }
1666 }
1667
1668 if (received_anything) {
1669 poll_immediate_wake();
1670 } else {
1671 struct dp_netdev_port *port;
1672
1673 HMAP_FOR_EACH (port, node, &f->dp->ports)
1674 if (port->rx
1675 && port->node.hash >= f->min_hash
1676 && port->node.hash <= f->max_hash) {
1677 netdev_rx_wait(port->rx);
1678 }
1679 seq_wait(dp->port_seq, seq_read(dp->port_seq));
1680 latch_wait(&dp->exit_latch);
1681 }
1682 ovs_rwlock_unlock(&dp->port_rwlock);
1683
1684 poll_block();
1685 }
1686 ofpbuf_uninit(&packet);
1687
1688 free(f->name);
1689
1690 return NULL;
1691}
1692
1693static void
1694dp_netdev_set_threads(struct dp_netdev *dp, int n)
1695{
1696 int i;
1697
1698 if (n == dp->n_forwarders) {
1699 return;
1700 }
1701
1702 /* Stop existing threads. */
1703 latch_set(&dp->exit_latch);
1704 for (i = 0; i < dp->n_forwarders; i++) {
1705 struct dp_forwarder *f = &dp->forwarders[i];
1706
1707 xpthread_join(f->thread, NULL);
1708 }
1709 latch_poll(&dp->exit_latch);
1710 free(dp->forwarders);
1711
1712 /* Start new threads. */
1713 dp->forwarders = xmalloc(n * sizeof *dp->forwarders);
1714 dp->n_forwarders = n;
1715 for (i = 0; i < n; i++) {
1716 struct dp_forwarder *f = &dp->forwarders[i];
1717
1718 f->dp = dp;
1719 f->min_hash = UINT32_MAX / n * i;
1720 f->max_hash = UINT32_MAX / n * (i + 1) - 1;
1721 if (i == n - 1) {
1722 f->max_hash = UINT32_MAX;
1723 }
1724 xpthread_create(&f->thread, NULL, dp_forwarder_main, f);
1725 }
1726}
1727\f
72865317 1728static void
1763b4b8
GS
1729dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
1730 const struct ofpbuf *packet)
8a4e3a85 1731 OVS_REQUIRES(netdev_flow->mutex)
72865317 1732{
1763b4b8
GS
1733 netdev_flow->used = time_msec();
1734 netdev_flow->packet_count++;
1735 netdev_flow->byte_count += packet->size;
2c0ea78f 1736 netdev_flow->tcp_flags |= packet_get_tcp_flags(packet, &netdev_flow->flow);
72865317
BP
1737}
1738
51852a57
BP
1739static void *
1740dp_netdev_stats_new_cb(void)
1741{
1742 struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1743 ovs_mutex_init(&bucket->mutex);
1744 return bucket;
1745}
1746
1747static void
1748dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
1749{
1750 struct dp_netdev_stats *bucket;
1751
1752 bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
1753 ovs_mutex_lock(&bucket->mutex);
1754 bucket->n[type]++;
1755 ovs_mutex_unlock(&bucket->mutex);
1756}
1757
72865317 1758static void
758c456d
JR
1759dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
1760 struct pkt_metadata *md)
8a4e3a85 1761 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317 1762{
1763b4b8 1763 struct dp_netdev_flow *netdev_flow;
14608a15 1764 struct flow key;
72865317 1765
1805876e
BP
1766 if (packet->size < ETH_HEADER_LEN) {
1767 return;
1768 }
b5e7e61a 1769 flow_extract(packet, md, &key);
1763b4b8
GS
1770 netdev_flow = dp_netdev_lookup_flow(dp, &key);
1771 if (netdev_flow) {
a84cb64a
BP
1772 struct dp_netdev_actions *actions;
1773
8a4e3a85 1774 ovs_mutex_lock(&netdev_flow->mutex);
1763b4b8 1775 dp_netdev_flow_used(netdev_flow, packet);
a84cb64a 1776 actions = dp_netdev_actions_ref(netdev_flow->actions);
8a4e3a85
BP
1777 ovs_mutex_unlock(&netdev_flow->mutex);
1778
758c456d 1779 dp_netdev_execute_actions(dp, &key, packet, md,
a84cb64a
BP
1780 actions->actions, actions->size);
1781 dp_netdev_actions_unref(actions);
71c24bb0 1782 dp_netdev_flow_unref(netdev_flow);
51852a57 1783 dp_netdev_count_packet(dp, DP_STAT_HIT);
72865317 1784 } else {
51852a57 1785 dp_netdev_count_packet(dp, DP_STAT_MISS);
e995e3df 1786 dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
72865317
BP
1787 }
1788}
1789
72865317 1790static int
da546e07 1791dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
e995e3df
BP
1792 int queue_no, const struct flow *flow,
1793 const struct nlattr *userdata)
f5126b57 1794 OVS_EXCLUDED(dp->queue_mutex)
72865317 1795{
856081f6 1796 struct dp_netdev_queue *q = &dp->queues[queue_no];
f5126b57
BP
1797 int error;
1798
1799 ovs_mutex_lock(&dp->queue_mutex);
e995e3df
BP
1800 if (q->head - q->tail < MAX_QUEUE_LEN) {
1801 struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
1802 struct dpif_upcall *upcall = &u->upcall;
1803 struct ofpbuf *buf = &u->buf;
1804 size_t buf_size;
1805
1806 upcall->type = queue_no;
1807
1808 /* Allocate buffer big enough for everything. */
da546e07 1809 buf_size = ODPUTIL_FLOW_KEY_BYTES;
e995e3df
BP
1810 if (userdata) {
1811 buf_size += NLA_ALIGN(userdata->nla_len);
1812 }
1813 ofpbuf_init(buf, buf_size);
72865317 1814
e995e3df 1815 /* Put ODP flow. */
4e022ec0 1816 odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port);
e995e3df
BP
1817 upcall->key = buf->data;
1818 upcall->key_len = buf->size;
d88b629b 1819
e995e3df
BP
1820 /* Put userdata. */
1821 if (userdata) {
1822 upcall->userdata = ofpbuf_put(buf, userdata,
1823 NLA_ALIGN(userdata->nla_len));
1824 }
856081f6 1825
da546e07
JR
1826 /* Steal packet data. */
1827 ovs_assert(packet->source == OFPBUF_MALLOC);
1828 upcall->packet = *packet;
1829 ofpbuf_use(packet, NULL, 0);
856081f6 1830
d33ed218
BP
1831 seq_change(dp->queue_seq);
1832
f5126b57 1833 error = 0;
e995e3df 1834 } else {
51852a57 1835 dp_netdev_count_packet(dp, DP_STAT_LOST);
f5126b57 1836 error = ENOBUFS;
e995e3df 1837 }
f5126b57
BP
1838 ovs_mutex_unlock(&dp->queue_mutex);
1839
1840 return error;
72865317
BP
1841}
1842
9080a111
JR
1843struct dp_netdev_execute_aux {
1844 struct dp_netdev *dp;
1845 const struct flow *key;
1846};
1847
1848static void
758c456d
JR
1849dp_execute_cb(void *aux_, struct ofpbuf *packet,
1850 const struct pkt_metadata *md OVS_UNUSED,
09f9da0b 1851 const struct nlattr *a, bool may_steal)
8a4e3a85 1852 OVS_NO_THREAD_SAFETY_ANALYSIS
9080a111
JR
1853{
1854 struct dp_netdev_execute_aux *aux = aux_;
09f9da0b 1855 int type = nl_attr_type(a);
8a4e3a85 1856 struct dp_netdev_port *p;
9080a111 1857
09f9da0b
JR
1858 switch ((enum ovs_action_attr)type) {
1859 case OVS_ACTION_ATTR_OUTPUT:
8a4e3a85
BP
1860 p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
1861 if (p) {
1862 netdev_send(p->netdev, packet);
1863 }
09f9da0b
JR
1864 break;
1865
1866 case OVS_ACTION_ATTR_USERSPACE: {
1867 const struct nlattr *userdata;
4fc65926 1868
09f9da0b 1869 userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
da546e07 1870
09f9da0b
JR
1871 /* Make a copy if we are not allowed to steal the packet's data. */
1872 if (!may_steal) {
1873 packet = ofpbuf_clone_with_headroom(packet, DP_NETDEV_HEADROOM);
1874 }
1875 dp_netdev_output_userspace(aux->dp, packet, DPIF_UC_ACTION, aux->key,
1876 userdata);
1877 if (!may_steal) {
1878 ofpbuf_uninit(packet);
1879 }
1880 break;
da546e07 1881 }
09f9da0b
JR
1882 case OVS_ACTION_ATTR_PUSH_VLAN:
1883 case OVS_ACTION_ATTR_POP_VLAN:
1884 case OVS_ACTION_ATTR_PUSH_MPLS:
1885 case OVS_ACTION_ATTR_POP_MPLS:
1886 case OVS_ACTION_ATTR_SET:
1887 case OVS_ACTION_ATTR_SAMPLE:
1888 case OVS_ACTION_ATTR_UNSPEC:
1889 case __OVS_ACTION_ATTR_MAX:
1890 OVS_NOT_REACHED();
da546e07 1891 }
98403001
BP
1892}
1893
4edb9ae9 1894static void
9080a111 1895dp_netdev_execute_actions(struct dp_netdev *dp, const struct flow *key,
758c456d 1896 struct ofpbuf *packet, struct pkt_metadata *md,
9080a111 1897 const struct nlattr *actions, size_t actions_len)
8a4e3a85 1898 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317 1899{
9080a111 1900 struct dp_netdev_execute_aux aux = {dp, key};
9080a111 1901
758c456d 1902 odp_execute_actions(&aux, packet, md, actions, actions_len, dp_execute_cb);
72865317
BP
1903}
1904
1905const struct dpif_class dpif_netdev_class = {
72865317 1906 "netdev",
2197d7ab 1907 dpif_netdev_enumerate,
0aeaabc8 1908 dpif_netdev_port_open_type,
72865317
BP
1909 dpif_netdev_open,
1910 dpif_netdev_close,
7dab847a 1911 dpif_netdev_destroy,
6c3eee82
BP
1912 NULL, /* run */
1913 NULL, /* wait */
72865317 1914 dpif_netdev_get_stats,
72865317
BP
1915 dpif_netdev_port_add,
1916 dpif_netdev_port_del,
1917 dpif_netdev_port_query_by_number,
1918 dpif_netdev_port_query_by_name,
98403001 1919 NULL, /* port_get_pid */
b0ec0f27
BP
1920 dpif_netdev_port_dump_start,
1921 dpif_netdev_port_dump_next,
1922 dpif_netdev_port_dump_done,
72865317
BP
1923 dpif_netdev_port_poll,
1924 dpif_netdev_port_poll_wait,
72865317
BP
1925 dpif_netdev_flow_get,
1926 dpif_netdev_flow_put,
1927 dpif_netdev_flow_del,
1928 dpif_netdev_flow_flush,
e723fd32 1929 dpif_netdev_flow_dump_state_init,
704a1e09
BP
1930 dpif_netdev_flow_dump_start,
1931 dpif_netdev_flow_dump_next,
bdeadfdd 1932 NULL,
704a1e09 1933 dpif_netdev_flow_dump_done,
e723fd32 1934 dpif_netdev_flow_dump_state_uninit,
72865317 1935 dpif_netdev_execute,
6bc60024 1936 NULL, /* operate */
a12b3ead 1937 dpif_netdev_recv_set,
5bf93d67 1938 dpif_netdev_queue_to_priority,
72865317
BP
1939 dpif_netdev_recv,
1940 dpif_netdev_recv_wait,
1ba530f4 1941 dpif_netdev_recv_purge,
72865317 1942};
614c4892 1943
74cc3969
BP
1944static void
1945dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
1946 const char *argv[], void *aux OVS_UNUSED)
1947{
1948 struct dp_netdev_port *port;
1949 struct dp_netdev *dp;
ff073a71 1950 odp_port_t port_no;
74cc3969 1951
8a4e3a85 1952 ovs_mutex_lock(&dp_netdev_mutex);
74cc3969
BP
1953 dp = shash_find_data(&dp_netdevs, argv[1]);
1954 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
8a4e3a85 1955 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969
BP
1956 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
1957 return;
1958 }
8a4e3a85
BP
1959 ovs_refcount_ref(&dp->ref_cnt);
1960 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969 1961
8a4e3a85 1962 ovs_rwlock_wrlock(&dp->port_rwlock);
74cc3969
BP
1963 if (get_port_by_name(dp, argv[2], &port)) {
1964 unixctl_command_reply_error(conn, "unknown port");
8a4e3a85 1965 goto exit;
74cc3969
BP
1966 }
1967
ff073a71
BP
1968 port_no = u32_to_odp(atoi(argv[3]));
1969 if (!port_no || port_no == ODPP_NONE) {
74cc3969 1970 unixctl_command_reply_error(conn, "bad port number");
8a4e3a85 1971 goto exit;
74cc3969 1972 }
ff073a71 1973 if (dp_netdev_lookup_port(dp, port_no)) {
74cc3969 1974 unixctl_command_reply_error(conn, "port number already in use");
8a4e3a85 1975 goto exit;
74cc3969 1976 }
ff073a71
BP
1977 hmap_remove(&dp->ports, &port->node);
1978 port->port_no = port_no;
1979 hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
d33ed218 1980 seq_change(dp->port_seq);
74cc3969 1981 unixctl_command_reply(conn, NULL);
8a4e3a85
BP
1982
1983exit:
1984 ovs_rwlock_unlock(&dp->port_rwlock);
1985 dp_netdev_unref(dp);
74cc3969
BP
1986}
1987
0cbfe35d
BP
1988static void
1989dpif_dummy_register__(const char *type)
1990{
1991 struct dpif_class *class;
1992
1993 class = xmalloc(sizeof *class);
1994 *class = dpif_netdev_class;
1995 class->type = xstrdup(type);
1996 dp_register_provider(class);
1997}
1998
614c4892 1999void
0cbfe35d 2000dpif_dummy_register(bool override)
614c4892 2001{
0cbfe35d
BP
2002 if (override) {
2003 struct sset types;
2004 const char *type;
2005
2006 sset_init(&types);
2007 dp_enumerate_types(&types);
2008 SSET_FOR_EACH (type, &types) {
2009 if (!dp_unregister_provider(type)) {
2010 dpif_dummy_register__(type);
2011 }
2012 }
2013 sset_destroy(&types);
614c4892 2014 }
0cbfe35d
BP
2015
2016 dpif_dummy_register__("dummy");
74cc3969
BP
2017
2018 unixctl_command_register("dpif-dummy/change-port-number",
2019 "DP PORT NEW-NUMBER",
2020 3, 3, dpif_dummy_change_port_number, NULL);
614c4892 2021}