]> git.proxmox.com Git - ovs.git/blame - lib/dpif-netdev.c
lib/classifier: Clean up includes.
[ovs.git] / lib / dpif-netdev.c
CommitLineData
72865317 1/*
ff073a71 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
72865317
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpif.h"
19
72865317
BP
20#include <ctype.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
72865317 24#include <netinet/in.h>
9d82ec47 25#include <sys/socket.h>
7f3adc00 26#include <net/if.h>
cdee00fd 27#include <stdint.h>
72865317
BP
28#include <stdlib.h>
29#include <string.h>
30#include <sys/ioctl.h>
31#include <sys/stat.h>
72865317
BP
32#include <unistd.h>
33
2c0ea78f 34#include "classifier.h"
59e6d833 35#include "cmap.h"
72865317 36#include "csum.h"
614c4892 37#include "dpif.h"
72865317 38#include "dpif-provider.h"
614c4892 39#include "dummy.h"
36956a7d 40#include "dynamic-string.h"
72865317
BP
41#include "flow.h"
42#include "hmap.h"
6c3eee82 43#include "latch.h"
72865317 44#include "list.h"
8c301900 45#include "meta-flow.h"
72865317 46#include "netdev.h"
8617afff 47#include "netdev-dpdk.h"
de281153 48#include "netdev-vport.h"
cdee00fd 49#include "netlink.h"
f094af7b 50#include "odp-execute.h"
72865317
BP
51#include "odp-util.h"
52#include "ofp-print.h"
53#include "ofpbuf.h"
61e7deb1 54#include "ovs-rcu.h"
72865317
BP
55#include "packets.h"
56#include "poll-loop.h"
26c6b6cd 57#include "random.h"
d33ed218 58#include "seq.h"
462278db 59#include "shash.h"
0cbfe35d 60#include "sset.h"
72865317 61#include "timeval.h"
74cc3969 62#include "unixctl.h"
72865317 63#include "util.h"
72865317 64#include "vlog.h"
5136ce49 65
d98e6007 66VLOG_DEFINE_THIS_MODULE(dpif_netdev);
72865317 67
2c0ea78f
GS
68/* By default, choose a priority in the middle. */
69#define NETDEV_RULE_PRIORITY 0x8000
70
e4cfed38 71#define NR_THREADS 1
adcf00ba
AZ
72/* Use per thread recirc_depth to prevent recirculation loop. */
73#define MAX_RECIRC_DEPTH 5
74DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
e4cfed38 75
72865317 76/* Configuration parameters. */
72865317
BP
77enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
78
856081f6 79/* Queues. */
856081f6
BP
80enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
81enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
82BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
83
8a4e3a85
BP
84/* Protects against changes to 'dp_netdevs'. */
85static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
86
87/* Contains all 'struct dp_netdev's. */
88static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
89 = SHASH_INITIALIZER(&dp_netdevs);
90
d88b629b
BP
91struct dp_netdev_upcall {
92 struct dpif_upcall upcall; /* Queued upcall information. */
93 struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */
94};
95
63be20be 96/* A queue passing packets from a struct dp_netdev to its clients (handlers).
8a4e3a85
BP
97 *
98 *
99 * Thread-safety
100 * =============
101 *
63be20be
AW
102 * Any access at all requires the owning 'dp_netdev''s queue_rwlock and
103 * its own mutex. */
856081f6 104struct dp_netdev_queue {
63be20be
AW
105 struct ovs_mutex mutex;
106 struct seq *seq; /* Incremented whenever a packet is queued. */
f5126b57
BP
107 struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN] OVS_GUARDED;
108 unsigned int head OVS_GUARDED;
109 unsigned int tail OVS_GUARDED;
856081f6
BP
110};
111
8a4e3a85
BP
112/* Datapath based on the network device interface from netdev.h.
113 *
114 *
115 * Thread-safety
116 * =============
117 *
118 * Some members, marked 'const', are immutable. Accessing other members
119 * requires synchronization, as noted in more detail below.
120 *
121 * Acquisition order is, from outermost to innermost:
122 *
123 * dp_netdev_mutex (global)
59e6d833 124 * port_mutex
8a4e3a85
BP
125 * flow_mutex
126 * cls.rwlock
63be20be 127 * queue_rwlock
8a4e3a85 128 */
72865317 129struct dp_netdev {
8a4e3a85
BP
130 const struct dpif_class *const class;
131 const char *const name;
6a8267c5
BP
132 struct ovs_refcount ref_cnt;
133 atomic_flag destroyed;
72865317 134
8a4e3a85
BP
135 /* Flows.
136 *
137 * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock.
138 *
139 * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then
140 * the 'cls->rwlock' write lock. (The outer 'flow_mutex' allows writers to
141 * atomically perform multiple operations on 'cls' and 'flow_table'.)
142 */
143 struct ovs_mutex flow_mutex;
144 struct classifier cls; /* Classifier. Protected by cls.rwlock. */
145 struct hmap flow_table OVS_GUARDED; /* Flow table. */
146
147 /* Queues.
148 *
63be20be
AW
149 * 'queue_rwlock' protects the modification of 'handler_queues' and
150 * 'n_handlers'. The queue elements are protected by its
151 * 'handler_queues''s mutex. */
152 struct fat_rwlock queue_rwlock;
153 struct dp_netdev_queue *handler_queues;
154 uint32_t n_handlers;
72865317 155
8a4e3a85
BP
156 /* Statistics.
157 *
51852a57
BP
158 * ovsthread_stats is internally synchronized. */
159 struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */
72865317 160
8a4e3a85
BP
161 /* Ports.
162 *
59e6d833
BP
163 * Protected by RCU. Take the mutex to add or remove ports. */
164 struct ovs_mutex port_mutex;
165 struct cmap ports;
d33ed218 166 struct seq *port_seq; /* Incremented whenever a port changes. */
6c3eee82
BP
167
168 /* Forwarding threads. */
169 struct latch exit_latch;
e4cfed38
PS
170 struct pmd_thread *pmd_threads;
171 size_t n_pmd_threads;
172 int pmd_count;
72865317
BP
173};
174
8a4e3a85 175static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
59e6d833 176 odp_port_t);
ff073a71 177
51852a57
BP
178enum dp_stat_type {
179 DP_STAT_HIT, /* Packets that matched in the flow table. */
180 DP_STAT_MISS, /* Packets that did not match. */
181 DP_STAT_LOST, /* Packets not passed up to the client. */
182 DP_N_STATS
183};
184
185/* Contained by struct dp_netdev's 'stats' member. */
186struct dp_netdev_stats {
187 struct ovs_mutex mutex; /* Protects 'n'. */
188
189 /* Indexed by DP_STAT_*, protected by 'mutex'. */
190 unsigned long long int n[DP_N_STATS] OVS_GUARDED;
191};
192
193
72865317
BP
194/* A port in a netdev-based datapath. */
195struct dp_netdev_port {
59e6d833 196 struct cmap_node node; /* Node in dp_netdev's 'ports'. */
ff073a71 197 odp_port_t port_no;
72865317 198 struct netdev *netdev;
4b609110 199 struct netdev_saved_flags *sf;
55c955bd 200 struct netdev_rxq **rxq;
b284085e 201 struct ovs_refcount ref_cnt;
0cbfe35d 202 char *type; /* Port type as requested by user. */
72865317
BP
203};
204
8a4e3a85
BP
205/* A flow in dp_netdev's 'flow_table'.
206 *
207 *
208 * Thread-safety
209 * =============
210 *
211 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
212 * its dp_netdev's classifier. The text below calls this classifier 'cls'.
213 *
214 * Motivation
215 * ----------
216 *
217 * The thread safety rules described here for "struct dp_netdev_flow" are
218 * motivated by two goals:
219 *
220 * - Prevent threads that read members of "struct dp_netdev_flow" from
221 * reading bad data due to changes by some thread concurrently modifying
222 * those members.
223 *
224 * - Prevent two threads making changes to members of a given "struct
225 * dp_netdev_flow" from interfering with each other.
226 *
227 *
228 * Rules
229 * -----
230 *
231 * A flow 'flow' may be accessed without a risk of being freed by code that
232 * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to
233 * 'flow->ref_cnt' (or both). Code that needs to hold onto a flow for a while
234 * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt'
235 * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'.
236 *
237 * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the
238 * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't
45c626a3 239 * protect members of 'flow' from modification.
8a4e3a85
BP
240 *
241 * Some members, marked 'const', are immutable. Accessing other members
242 * requires synchronization, as noted in more detail below.
243 */
72865317 244struct dp_netdev_flow {
2c0ea78f 245 /* Packet classification. */
8a4e3a85 246 const struct cls_rule cr; /* In owning dp_netdev's 'cls'. */
2c0ea78f 247
8a4e3a85
BP
248 /* Hash table index by unmasked flow. */
249 const struct hmap_node node; /* In owning dp_netdev's 'flow_table'. */
250 const struct flow flow; /* The flow that created this entry. */
72865317 251
8a4e3a85
BP
252 /* Statistics.
253 *
254 * Reading or writing these members requires 'mutex'. */
679ba04c 255 struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */
8a4e3a85 256
45c626a3 257 /* Actions. */
61e7deb1 258 OVSRCU_TYPE(struct dp_netdev_actions *) actions;
72865317
BP
259};
260
61e7deb1 261static void dp_netdev_flow_free(struct dp_netdev_flow *);
8a4e3a85 262
679ba04c
BP
263/* Contained by struct dp_netdev_flow's 'stats' member. */
264struct dp_netdev_flow_stats {
265 struct ovs_mutex mutex; /* Guards all the other members. */
266
267 long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
268 long long int packet_count OVS_GUARDED; /* Number of packets matched. */
269 long long int byte_count OVS_GUARDED; /* Number of bytes matched. */
270 uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
271};
272
a84cb64a
BP
273/* A set of datapath actions within a "struct dp_netdev_flow".
274 *
275 *
276 * Thread-safety
277 * =============
278 *
45c626a3 279 * A struct dp_netdev_actions 'actions' is protected with RCU. */
a84cb64a 280struct dp_netdev_actions {
a84cb64a
BP
281 /* These members are immutable: they do not change during the struct's
282 * lifetime. */
283 struct nlattr *actions; /* Sequence of OVS_ACTION_ATTR_* attributes. */
284 unsigned int size; /* Size of 'actions', in bytes. */
285};
286
287struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
288 size_t);
61e7deb1
BP
289struct dp_netdev_actions *dp_netdev_flow_get_actions(
290 const struct dp_netdev_flow *);
291static void dp_netdev_actions_free(struct dp_netdev_actions *);
a84cb64a 292
e4cfed38
PS
293/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
294 * the performance overhead of interrupt processing. Therefore netdev can
295 * not implement rx-wait for these devices. dpif-netdev needs to poll
296 * these device to check for recv buffer. pmd-thread does polling for
297 * devices assigned to itself thread.
298 *
299 * DPDK used PMD for accessing NIC.
300 *
301 * A thread that receives packets from PMD ports, looks them up in the flow
302 * table, and executes the actions it finds.
303 **/
304struct pmd_thread {
6c3eee82
BP
305 struct dp_netdev *dp;
306 pthread_t thread;
e4cfed38
PS
307 int id;
308 atomic_uint change_seq;
6c3eee82
BP
309};
310
72865317
BP
311/* Interface to netdev-based datapath. */
312struct dpif_netdev {
313 struct dpif dpif;
314 struct dp_netdev *dp;
d33ed218 315 uint64_t last_port_seq;
72865317
BP
316};
317
8a4e3a85 318static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
59e6d833 319 struct dp_netdev_port **portp);
8a4e3a85 320static int get_port_by_name(struct dp_netdev *dp, const char *devname,
59e6d833 321 struct dp_netdev_port **portp);
8a4e3a85
BP
322static void dp_netdev_free(struct dp_netdev *)
323 OVS_REQUIRES(dp_netdev_mutex);
72865317 324static void dp_netdev_flow_flush(struct dp_netdev *);
8a4e3a85
BP
325static int do_add_port(struct dp_netdev *dp, const char *devname,
326 const char *type, odp_port_t port_no)
59e6d833 327 OVS_REQUIRES(dp->port_mutex);
c40b890f 328static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
59e6d833 329 OVS_REQUIRES(dp->port_mutex);
63be20be
AW
330static void dp_netdev_destroy_all_queues(struct dp_netdev *dp)
331 OVS_REQ_WRLOCK(dp->queue_rwlock);
614c4892
BP
332static int dpif_netdev_open(const struct dpif_class *, const char *name,
333 bool create, struct dpif **);
f5126b57 334static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
63be20be 335 int queue_no, int type,
4f150744 336 const struct miniflow *,
e4cfed38 337 const struct nlattr *userdata);
8a4e3a85 338static void dp_netdev_execute_actions(struct dp_netdev *dp,
4f150744
JR
339 const struct miniflow *,
340 struct ofpbuf *, bool may_steal,
8a4e3a85 341 struct pkt_metadata *,
4edb9ae9 342 const struct nlattr *actions,
e4cfed38 343 size_t actions_len);
758c456d 344static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
e4cfed38
PS
345 struct pkt_metadata *);
346
347static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n);
72865317
BP
348
349static struct dpif_netdev *
350dpif_netdev_cast(const struct dpif *dpif)
351{
cb22974d 352 ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
72865317
BP
353 return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
354}
355
356static struct dp_netdev *
357get_dp_netdev(const struct dpif *dpif)
358{
359 return dpif_netdev_cast(dpif)->dp;
360}
361
2197d7ab
GL
362static int
363dpif_netdev_enumerate(struct sset *all_dps)
364{
365 struct shash_node *node;
366
97be1538 367 ovs_mutex_lock(&dp_netdev_mutex);
2197d7ab
GL
368 SHASH_FOR_EACH(node, &dp_netdevs) {
369 sset_add(all_dps, node->name);
370 }
97be1538 371 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 372
2197d7ab
GL
373 return 0;
374}
375
add90f6f
EJ
376static bool
377dpif_netdev_class_is_dummy(const struct dpif_class *class)
378{
379 return class != &dpif_netdev_class;
380}
381
0aeaabc8
JP
382static const char *
383dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
384{
385 return strcmp(type, "internal") ? type
add90f6f 386 : dpif_netdev_class_is_dummy(class) ? "dummy"
0aeaabc8
JP
387 : "tap";
388}
389
72865317
BP
390static struct dpif *
391create_dpif_netdev(struct dp_netdev *dp)
392{
462278db 393 uint16_t netflow_id = hash_string(dp->name, 0);
72865317 394 struct dpif_netdev *dpif;
72865317 395
6a8267c5 396 ovs_refcount_ref(&dp->ref_cnt);
72865317 397
72865317 398 dpif = xmalloc(sizeof *dpif);
614c4892 399 dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
72865317 400 dpif->dp = dp;
d33ed218 401 dpif->last_port_seq = seq_read(dp->port_seq);
72865317
BP
402
403 return &dpif->dpif;
404}
405
4e022ec0
AW
406/* Choose an unused, non-zero port number and return it on success.
407 * Return ODPP_NONE on failure. */
408static odp_port_t
e44768b7 409choose_port(struct dp_netdev *dp, const char *name)
59e6d833 410 OVS_REQUIRES(dp->port_mutex)
e44768b7 411{
4e022ec0 412 uint32_t port_no;
e44768b7
JP
413
414 if (dp->class != &dpif_netdev_class) {
415 const char *p;
416 int start_no = 0;
417
418 /* If the port name begins with "br", start the number search at
419 * 100 to make writing tests easier. */
420 if (!strncmp(name, "br", 2)) {
421 start_no = 100;
422 }
423
424 /* If the port name contains a number, try to assign that port number.
425 * This can make writing unit tests easier because port numbers are
426 * predictable. */
427 for (p = name; *p != '\0'; p++) {
428 if (isdigit((unsigned char) *p)) {
429 port_no = start_no + strtol(p, NULL, 10);
ff073a71
BP
430 if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
431 && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 432 return u32_to_odp(port_no);
e44768b7
JP
433 }
434 break;
435 }
436 }
437 }
438
ff073a71
BP
439 for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
440 if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 441 return u32_to_odp(port_no);
e44768b7
JP
442 }
443 }
444
4e022ec0 445 return ODPP_NONE;
e44768b7
JP
446}
447
72865317 448static int
614c4892
BP
449create_dp_netdev(const char *name, const struct dpif_class *class,
450 struct dp_netdev **dpp)
8a4e3a85 451 OVS_REQUIRES(dp_netdev_mutex)
72865317
BP
452{
453 struct dp_netdev *dp;
454 int error;
72865317 455
462278db 456 dp = xzalloc(sizeof *dp);
8a4e3a85
BP
457 shash_add(&dp_netdevs, name, dp);
458
459 *CONST_CAST(const struct dpif_class **, &dp->class) = class;
460 *CONST_CAST(const char **, &dp->name) = xstrdup(name);
6a8267c5 461 ovs_refcount_init(&dp->ref_cnt);
1a65ba85 462 atomic_flag_clear(&dp->destroyed);
8a4e3a85
BP
463
464 ovs_mutex_init(&dp->flow_mutex);
465 classifier_init(&dp->cls, NULL);
466 hmap_init(&dp->flow_table);
467
63be20be 468 fat_rwlock_init(&dp->queue_rwlock);
ed27e010 469
51852a57 470 ovsthread_stats_init(&dp->stats);
ed27e010 471
59e6d833
BP
472 ovs_mutex_init(&dp->port_mutex);
473 cmap_init(&dp->ports);
d33ed218 474 dp->port_seq = seq_create();
6c3eee82 475 latch_init(&dp->exit_latch);
e44768b7 476
59e6d833 477 ovs_mutex_lock(&dp->port_mutex);
4e022ec0 478 error = do_add_port(dp, name, "internal", ODPP_LOCAL);
59e6d833 479 ovs_mutex_unlock(&dp->port_mutex);
72865317
BP
480 if (error) {
481 dp_netdev_free(dp);
462278db 482 return error;
72865317
BP
483 }
484
462278db 485 *dpp = dp;
72865317
BP
486 return 0;
487}
488
489static int
614c4892 490dpif_netdev_open(const struct dpif_class *class, const char *name,
4a387741 491 bool create, struct dpif **dpifp)
72865317 492{
462278db 493 struct dp_netdev *dp;
5279f8fd 494 int error;
462278db 495
97be1538 496 ovs_mutex_lock(&dp_netdev_mutex);
462278db
BP
497 dp = shash_find_data(&dp_netdevs, name);
498 if (!dp) {
5279f8fd 499 error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
72865317 500 } else {
5279f8fd
BP
501 error = (dp->class != class ? EINVAL
502 : create ? EEXIST
503 : 0);
504 }
505 if (!error) {
506 *dpifp = create_dpif_netdev(dp);
72865317 507 }
97be1538 508 ovs_mutex_unlock(&dp_netdev_mutex);
462278db 509
5279f8fd 510 return error;
72865317
BP
511}
512
513static void
1ba530f4 514dp_netdev_purge_queues(struct dp_netdev *dp)
63be20be 515 OVS_REQ_WRLOCK(dp->queue_rwlock)
72865317
BP
516{
517 int i;
518
63be20be
AW
519 for (i = 0; i < dp->n_handlers; i++) {
520 struct dp_netdev_queue *q = &dp->handler_queues[i];
856081f6 521
63be20be 522 ovs_mutex_lock(&q->mutex);
1ba530f4 523 while (q->tail != q->head) {
d88b629b 524 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
da546e07 525 ofpbuf_uninit(&u->upcall.packet);
d88b629b 526 ofpbuf_uninit(&u->buf);
856081f6 527 }
63be20be 528 ovs_mutex_unlock(&q->mutex);
72865317 529 }
1ba530f4
BP
530}
531
8a4e3a85
BP
532/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
533 * through the 'dp_netdevs' shash while freeing 'dp'. */
1ba530f4
BP
534static void
535dp_netdev_free(struct dp_netdev *dp)
8a4e3a85 536 OVS_REQUIRES(dp_netdev_mutex)
1ba530f4 537{
59e6d833 538 struct dp_netdev_port *port;
51852a57 539 struct dp_netdev_stats *bucket;
59e6d833 540 struct cmap_cursor cursor;
51852a57 541 int i;
4ad28026 542
8a4e3a85
BP
543 shash_find_and_delete(&dp_netdevs, dp->name);
544
e4cfed38
PS
545 dp_netdev_set_pmd_threads(dp, 0);
546 free(dp->pmd_threads);
6c3eee82 547
1ba530f4 548 dp_netdev_flow_flush(dp);
59e6d833
BP
549 ovs_mutex_lock(&dp->port_mutex);
550 CMAP_FOR_EACH (port, node, &cursor, &dp->ports) {
c40b890f 551 do_del_port(dp, port);
1ba530f4 552 }
59e6d833 553 ovs_mutex_unlock(&dp->port_mutex);
51852a57
BP
554
555 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
556 ovs_mutex_destroy(&bucket->mutex);
557 free_cacheline(bucket);
558 }
559 ovsthread_stats_destroy(&dp->stats);
f5126b57 560
63be20be
AW
561 fat_rwlock_wrlock(&dp->queue_rwlock);
562 dp_netdev_destroy_all_queues(dp);
563 fat_rwlock_unlock(&dp->queue_rwlock);
564
565 fat_rwlock_destroy(&dp->queue_rwlock);
f5126b57 566
2c0ea78f 567 classifier_destroy(&dp->cls);
72865317 568 hmap_destroy(&dp->flow_table);
8a4e3a85 569 ovs_mutex_destroy(&dp->flow_mutex);
d33ed218 570 seq_destroy(dp->port_seq);
59e6d833 571 cmap_destroy(&dp->ports);
6c3eee82 572 latch_destroy(&dp->exit_latch);
8a4e3a85 573 free(CONST_CAST(char *, dp->name));
72865317
BP
574 free(dp);
575}
576
8a4e3a85
BP
577static void
578dp_netdev_unref(struct dp_netdev *dp)
579{
580 if (dp) {
581 /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
582 * get a new reference to 'dp' through the 'dp_netdevs' shash. */
583 ovs_mutex_lock(&dp_netdev_mutex);
584 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
585 dp_netdev_free(dp);
586 }
587 ovs_mutex_unlock(&dp_netdev_mutex);
588 }
589}
590
72865317
BP
591static void
592dpif_netdev_close(struct dpif *dpif)
593{
594 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 595
8a4e3a85 596 dp_netdev_unref(dp);
72865317
BP
597 free(dpif);
598}
599
600static int
7dab847a 601dpif_netdev_destroy(struct dpif *dpif)
72865317
BP
602{
603 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 604
6a8267c5
BP
605 if (!atomic_flag_test_and_set(&dp->destroyed)) {
606 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
607 /* Can't happen: 'dpif' still owns a reference to 'dp'. */
608 OVS_NOT_REACHED();
609 }
610 }
5279f8fd 611
72865317
BP
612 return 0;
613}
614
615static int
a8d9304d 616dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
72865317
BP
617{
618 struct dp_netdev *dp = get_dp_netdev(dpif);
51852a57
BP
619 struct dp_netdev_stats *bucket;
620 size_t i;
5279f8fd 621
06f81620 622 fat_rwlock_rdlock(&dp->cls.rwlock);
f180c2e2 623 stats->n_flows = hmap_count(&dp->flow_table);
06f81620 624 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 625
51852a57
BP
626 stats->n_hit = stats->n_missed = stats->n_lost = 0;
627 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
628 ovs_mutex_lock(&bucket->mutex);
629 stats->n_hit += bucket->n[DP_STAT_HIT];
630 stats->n_missed += bucket->n[DP_STAT_MISS];
631 stats->n_lost += bucket->n[DP_STAT_LOST];
632 ovs_mutex_unlock(&bucket->mutex);
633 }
1ce3fa06 634 stats->n_masks = UINT32_MAX;
847108dc 635 stats->n_mask_hit = UINT64_MAX;
5279f8fd 636
72865317
BP
637 return 0;
638}
639
e4cfed38
PS
640static void
641dp_netdev_reload_pmd_threads(struct dp_netdev *dp)
642{
643 int i;
644
645 for (i = 0; i < dp->n_pmd_threads; i++) {
646 struct pmd_thread *f = &dp->pmd_threads[i];
647 int id;
648
649 atomic_add(&f->change_seq, 1, &id);
650 }
651}
652
59e6d833
BP
653static uint32_t
654hash_port_no(odp_port_t port_no)
655{
656 return hash_int(odp_to_u32(port_no), 0);
657}
658
72865317 659static int
c3827f61 660do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
4e022ec0 661 odp_port_t port_no)
59e6d833 662 OVS_REQUIRES(dp->port_mutex)
72865317 663{
4b609110 664 struct netdev_saved_flags *sf;
72865317
BP
665 struct dp_netdev_port *port;
666 struct netdev *netdev;
2499a8ce 667 enum netdev_flags flags;
0cbfe35d 668 const char *open_type;
72865317 669 int error;
55c955bd 670 int i;
72865317
BP
671
672 /* XXX reject devices already in some dp_netdev. */
673
674 /* Open and validate network device. */
0aeaabc8 675 open_type = dpif_netdev_port_open_type(dp->class, type);
0cbfe35d 676 error = netdev_open(devname, open_type, &netdev);
72865317
BP
677 if (error) {
678 return error;
679 }
72865317
BP
680 /* XXX reject non-Ethernet devices */
681
2499a8ce
AC
682 netdev_get_flags(netdev, &flags);
683 if (flags & NETDEV_LOOPBACK) {
684 VLOG_ERR("%s: cannot add a loopback device", devname);
685 netdev_close(netdev);
686 return EINVAL;
687 }
688
e4cfed38
PS
689 port = xzalloc(sizeof *port);
690 port->port_no = port_no;
691 port->netdev = netdev;
55c955bd 692 port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
e4cfed38 693 port->type = xstrdup(type);
55c955bd
PS
694 for (i = 0; i < netdev_n_rxq(netdev); i++) {
695 error = netdev_rxq_open(netdev, &port->rxq[i], i);
696 if (error
697 && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
698 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
699 devname, ovs_strerror(errno));
700 netdev_close(netdev);
701 return error;
702 }
7b6b0ef4
BP
703 }
704
4b609110 705 error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
72865317 706 if (error) {
55c955bd
PS
707 for (i = 0; i < netdev_n_rxq(netdev); i++) {
708 netdev_rxq_close(port->rxq[i]);
709 }
72865317 710 netdev_close(netdev);
f7791740 711 free(port->rxq);
e4cfed38 712 free(port);
72865317
BP
713 return error;
714 }
4b609110 715 port->sf = sf;
e4cfed38
PS
716
717 if (netdev_is_pmd(netdev)) {
718 dp->pmd_count++;
719 dp_netdev_set_pmd_threads(dp, NR_THREADS);
720 dp_netdev_reload_pmd_threads(dp);
721 }
722 ovs_refcount_init(&port->ref_cnt);
72865317 723
59e6d833 724 cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
d33ed218 725 seq_change(dp->port_seq);
72865317
BP
726
727 return 0;
728}
729
247527db
BP
730static int
731dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
4e022ec0 732 odp_port_t *port_nop)
247527db
BP
733{
734 struct dp_netdev *dp = get_dp_netdev(dpif);
3aa30359
BP
735 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
736 const char *dpif_port;
4e022ec0 737 odp_port_t port_no;
5279f8fd 738 int error;
247527db 739
59e6d833 740 ovs_mutex_lock(&dp->port_mutex);
3aa30359 741 dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
4e022ec0 742 if (*port_nop != ODPP_NONE) {
ff073a71
BP
743 port_no = *port_nop;
744 error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
232dfa4a 745 } else {
3aa30359 746 port_no = choose_port(dp, dpif_port);
5279f8fd 747 error = port_no == ODPP_NONE ? EFBIG : 0;
232dfa4a 748 }
5279f8fd 749 if (!error) {
247527db 750 *port_nop = port_no;
5279f8fd 751 error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
247527db 752 }
59e6d833 753 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd
BP
754
755 return error;
72865317
BP
756}
757
758static int
4e022ec0 759dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
72865317
BP
760{
761 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
762 int error;
763
59e6d833 764 ovs_mutex_lock(&dp->port_mutex);
c40b890f
BP
765 if (port_no == ODPP_LOCAL) {
766 error = EINVAL;
767 } else {
768 struct dp_netdev_port *port;
769
770 error = get_port_by_number(dp, port_no, &port);
771 if (!error) {
772 do_del_port(dp, port);
773 }
774 }
59e6d833 775 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd
BP
776
777 return error;
72865317
BP
778}
779
780static bool
4e022ec0 781is_valid_port_number(odp_port_t port_no)
72865317 782{
ff073a71
BP
783 return port_no != ODPP_NONE;
784}
785
786static struct dp_netdev_port *
787dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
788{
789 struct dp_netdev_port *port;
790
59e6d833 791 CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
ff073a71
BP
792 if (port->port_no == port_no) {
793 return port;
794 }
795 }
796 return NULL;
72865317
BP
797}
798
799static int
800get_port_by_number(struct dp_netdev *dp,
4e022ec0 801 odp_port_t port_no, struct dp_netdev_port **portp)
72865317
BP
802{
803 if (!is_valid_port_number(port_no)) {
804 *portp = NULL;
805 return EINVAL;
806 } else {
ff073a71 807 *portp = dp_netdev_lookup_port(dp, port_no);
72865317
BP
808 return *portp ? 0 : ENOENT;
809 }
810}
811
b284085e
PS
812static void
813port_ref(struct dp_netdev_port *port)
814{
815 if (port) {
816 ovs_refcount_ref(&port->ref_cnt);
817 }
818}
819
820static void
59e6d833 821port_destroy__(struct dp_netdev_port *port)
b284085e 822{
98de6beb 823 int n_rxq = netdev_n_rxq(port->netdev);
59e6d833 824 int i;
55c955bd 825
59e6d833
BP
826 netdev_close(port->netdev);
827 netdev_restore_flags(port->sf);
55c955bd 828
59e6d833
BP
829 for (i = 0; i < n_rxq; i++) {
830 netdev_rxq_close(port->rxq[i]);
831 }
832 free(port->rxq);
833 free(port->type);
834 free(port);
835}
836
837static void
838port_unref(struct dp_netdev_port *port)
839{
840 if (port && ovs_refcount_unref(&port->ref_cnt) == 1) {
841 ovsrcu_postpone(port_destroy__, port);
b284085e
PS
842 }
843}
844
72865317
BP
845static int
846get_port_by_name(struct dp_netdev *dp,
847 const char *devname, struct dp_netdev_port **portp)
59e6d833 848 OVS_REQUIRES(dp->port_mutex)
72865317
BP
849{
850 struct dp_netdev_port *port;
59e6d833 851 struct cmap_cursor cursor;
72865317 852
59e6d833 853 CMAP_FOR_EACH (port, node, &cursor, &dp->ports) {
3efb6063 854 if (!strcmp(netdev_get_name(port->netdev), devname)) {
72865317
BP
855 *portp = port;
856 return 0;
857 }
858 }
859 return ENOENT;
860}
861
c40b890f
BP
862static void
863do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
59e6d833 864 OVS_REQUIRES(dp->port_mutex)
72865317 865{
c40b890f 866 cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
d33ed218 867 seq_change(dp->port_seq);
e4cfed38
PS
868 if (netdev_is_pmd(port->netdev)) {
869 dp_netdev_reload_pmd_threads(dp);
870 }
72865317 871
b284085e 872 port_unref(port);
72865317
BP
873}
874
875static void
4c738a8d
BP
876answer_port_query(const struct dp_netdev_port *port,
877 struct dpif_port *dpif_port)
72865317 878{
3efb6063 879 dpif_port->name = xstrdup(netdev_get_name(port->netdev));
0cbfe35d 880 dpif_port->type = xstrdup(port->type);
4c738a8d 881 dpif_port->port_no = port->port_no;
72865317
BP
882}
883
884static int
4e022ec0 885dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 886 struct dpif_port *dpif_port)
72865317
BP
887{
888 struct dp_netdev *dp = get_dp_netdev(dpif);
889 struct dp_netdev_port *port;
890 int error;
891
892 error = get_port_by_number(dp, port_no, &port);
4afba28d 893 if (!error && dpif_port) {
4c738a8d 894 answer_port_query(port, dpif_port);
72865317 895 }
5279f8fd 896
72865317
BP
897 return error;
898}
899
900static int
901dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 902 struct dpif_port *dpif_port)
72865317
BP
903{
904 struct dp_netdev *dp = get_dp_netdev(dpif);
905 struct dp_netdev_port *port;
906 int error;
907
59e6d833 908 ovs_mutex_lock(&dp->port_mutex);
72865317 909 error = get_port_by_name(dp, devname, &port);
4afba28d 910 if (!error && dpif_port) {
4c738a8d 911 answer_port_query(port, dpif_port);
72865317 912 }
59e6d833 913 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd 914
72865317
BP
915 return error;
916}
917
61e7deb1
BP
918static void
919dp_netdev_flow_free(struct dp_netdev_flow *flow)
920{
921 struct dp_netdev_flow_stats *bucket;
922 size_t i;
923
924 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) {
925 ovs_mutex_destroy(&bucket->mutex);
926 free_cacheline(bucket);
927 }
928 ovsthread_stats_destroy(&flow->stats);
929
930 cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
931 dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
61e7deb1
BP
932 free(flow);
933}
934
72865317 935static void
8a4e3a85
BP
936dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
937 OVS_REQ_WRLOCK(dp->cls.rwlock)
938 OVS_REQUIRES(dp->flow_mutex)
72865317 939{
8a4e3a85
BP
940 struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
941 struct hmap_node *node = CONST_CAST(struct hmap_node *, &flow->node);
2c0ea78f 942
8a4e3a85
BP
943 classifier_remove(&dp->cls, cr);
944 hmap_remove(&dp->flow_table, node);
61e7deb1 945 ovsrcu_postpone(dp_netdev_flow_free, flow);
72865317
BP
946}
947
948static void
949dp_netdev_flow_flush(struct dp_netdev *dp)
950{
1763b4b8 951 struct dp_netdev_flow *netdev_flow, *next;
72865317 952
8a4e3a85 953 ovs_mutex_lock(&dp->flow_mutex);
06f81620 954 fat_rwlock_wrlock(&dp->cls.rwlock);
1763b4b8 955 HMAP_FOR_EACH_SAFE (netdev_flow, next, node, &dp->flow_table) {
8a4e3a85 956 dp_netdev_remove_flow(dp, netdev_flow);
72865317 957 }
06f81620 958 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 959 ovs_mutex_unlock(&dp->flow_mutex);
72865317
BP
960}
961
962static int
963dpif_netdev_flow_flush(struct dpif *dpif)
964{
965 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 966
72865317
BP
967 dp_netdev_flow_flush(dp);
968 return 0;
969}
970
b0ec0f27 971struct dp_netdev_port_state {
59e6d833 972 struct cmap_position position;
4c738a8d 973 char *name;
b0ec0f27
BP
974};
975
976static int
977dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
978{
979 *statep = xzalloc(sizeof(struct dp_netdev_port_state));
980 return 0;
981}
982
72865317 983static int
b0ec0f27 984dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
4c738a8d 985 struct dpif_port *dpif_port)
72865317 986{
b0ec0f27 987 struct dp_netdev_port_state *state = state_;
72865317 988 struct dp_netdev *dp = get_dp_netdev(dpif);
59e6d833 989 struct cmap_node *node;
ff073a71 990 int retval;
72865317 991
59e6d833 992 node = cmap_next_position(&dp->ports, &state->position);
ff073a71
BP
993 if (node) {
994 struct dp_netdev_port *port;
5279f8fd 995
ff073a71
BP
996 port = CONTAINER_OF(node, struct dp_netdev_port, node);
997
998 free(state->name);
999 state->name = xstrdup(netdev_get_name(port->netdev));
1000 dpif_port->name = state->name;
1001 dpif_port->type = port->type;
1002 dpif_port->port_no = port->port_no;
1003
1004 retval = 0;
1005 } else {
1006 retval = EOF;
72865317 1007 }
5279f8fd 1008
ff073a71 1009 return retval;
b0ec0f27
BP
1010}
1011
1012static int
4c738a8d 1013dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
b0ec0f27 1014{
4c738a8d
BP
1015 struct dp_netdev_port_state *state = state_;
1016 free(state->name);
b0ec0f27
BP
1017 free(state);
1018 return 0;
72865317
BP
1019}
1020
1021static int
67a4917b 1022dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
72865317
BP
1023{
1024 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
d33ed218 1025 uint64_t new_port_seq;
5279f8fd
BP
1026 int error;
1027
d33ed218
BP
1028 new_port_seq = seq_read(dpif->dp->port_seq);
1029 if (dpif->last_port_seq != new_port_seq) {
1030 dpif->last_port_seq = new_port_seq;
5279f8fd 1031 error = ENOBUFS;
72865317 1032 } else {
5279f8fd 1033 error = EAGAIN;
72865317 1034 }
5279f8fd
BP
1035
1036 return error;
72865317
BP
1037}
1038
1039static void
1040dpif_netdev_port_poll_wait(const struct dpif *dpif_)
1041{
1042 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd 1043
d33ed218 1044 seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
8a4e3a85
BP
1045}
1046
1047static struct dp_netdev_flow *
1048dp_netdev_flow_cast(const struct cls_rule *cr)
1049{
1050 return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
72865317
BP
1051}
1052
72865317 1053static struct dp_netdev_flow *
4f150744 1054dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key)
8a4e3a85 1055 OVS_EXCLUDED(dp->cls.rwlock)
2c0ea78f 1056{
8a4e3a85 1057 struct dp_netdev_flow *netdev_flow;
4f150744 1058 struct cls_rule *rule;
2c0ea78f 1059
06f81620 1060 fat_rwlock_rdlock(&dp->cls.rwlock);
4f150744
JR
1061 rule = classifier_lookup_miniflow_first(&dp->cls, key);
1062 netdev_flow = dp_netdev_flow_cast(rule);
06f81620 1063 fat_rwlock_unlock(&dp->cls.rwlock);
2c0ea78f 1064
8a4e3a85 1065 return netdev_flow;
2c0ea78f
GS
1066}
1067
1068static struct dp_netdev_flow *
1069dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
8a4e3a85 1070 OVS_REQ_RDLOCK(dp->cls.rwlock)
72865317 1071{
1763b4b8 1072 struct dp_netdev_flow *netdev_flow;
72865317 1073
2c0ea78f 1074 HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
1763b4b8 1075 &dp->flow_table) {
2c0ea78f 1076 if (flow_equal(&netdev_flow->flow, flow)) {
61e7deb1 1077 return netdev_flow;
72865317
BP
1078 }
1079 }
8a4e3a85 1080
72865317
BP
1081 return NULL;
1082}
1083
1084static void
1763b4b8
GS
1085get_dpif_flow_stats(struct dp_netdev_flow *netdev_flow,
1086 struct dpif_flow_stats *stats)
feebdea2 1087{
679ba04c
BP
1088 struct dp_netdev_flow_stats *bucket;
1089 size_t i;
1090
1091 memset(stats, 0, sizeof *stats);
1092 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
1093 ovs_mutex_lock(&bucket->mutex);
1094 stats->n_packets += bucket->packet_count;
1095 stats->n_bytes += bucket->byte_count;
1096 stats->used = MAX(stats->used, bucket->used);
1097 stats->tcp_flags |= bucket->tcp_flags;
1098 ovs_mutex_unlock(&bucket->mutex);
1099 }
72865317
BP
1100}
1101
36956a7d 1102static int
8c301900
JR
1103dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1104 const struct nlattr *mask_key,
1105 uint32_t mask_key_len, const struct flow *flow,
1106 struct flow *mask)
1107{
1108 if (mask_key_len) {
80e44883
BP
1109 enum odp_key_fitness fitness;
1110
1111 fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
1112 if (fitness) {
8c301900
JR
1113 /* This should not happen: it indicates that
1114 * odp_flow_key_from_mask() and odp_flow_key_to_mask()
1115 * disagree on the acceptable form of a mask. Log the problem
1116 * as an error, with enough details to enable debugging. */
1117 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1118
1119 if (!VLOG_DROP_ERR(&rl)) {
1120 struct ds s;
1121
1122 ds_init(&s);
1123 odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
1124 true);
80e44883
BP
1125 VLOG_ERR("internal error parsing flow mask %s (%s)",
1126 ds_cstr(&s), odp_key_fitness_to_string(fitness));
8c301900
JR
1127 ds_destroy(&s);
1128 }
1129
1130 return EINVAL;
1131 }
8c301900
JR
1132 } else {
1133 enum mf_field_id id;
1134 /* No mask key, unwildcard everything except fields whose
1135 * prerequisities are not met. */
1136 memset(mask, 0x0, sizeof *mask);
1137
1138 for (id = 0; id < MFF_N_IDS; ++id) {
1139 /* Skip registers and metadata. */
1140 if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
1141 && id != MFF_METADATA) {
1142 const struct mf_field *mf = mf_from_id(id);
1143 if (mf_are_prereqs_ok(mf, flow)) {
1144 mf_mask_field(mf, mask);
1145 }
1146 }
1147 }
1148 }
1149
f3f750e5
BP
1150 /* Force unwildcard the in_port.
1151 *
1152 * We need to do this even in the case where we unwildcard "everything"
1153 * above because "everything" only includes the 16-bit OpenFlow port number
1154 * mask->in_port.ofp_port, which only covers half of the 32-bit datapath
1155 * port number mask->in_port.odp_port. */
1156 mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
1157
8c301900
JR
1158 return 0;
1159}
1160
1161static int
1162dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1163 struct flow *flow)
36956a7d 1164{
586ddea5
BP
1165 odp_port_t in_port;
1166
8c301900 1167 if (odp_flow_key_to_flow(key, key_len, flow)) {
36956a7d 1168 /* This should not happen: it indicates that odp_flow_key_from_flow()
8c301900
JR
1169 * and odp_flow_key_to_flow() disagree on the acceptable form of a
1170 * flow. Log the problem as an error, with enough details to enable
1171 * debugging. */
36956a7d
BP
1172 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1173
1174 if (!VLOG_DROP_ERR(&rl)) {
1175 struct ds s;
1176
1177 ds_init(&s);
8c301900 1178 odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
36956a7d
BP
1179 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
1180 ds_destroy(&s);
1181 }
1182
1183 return EINVAL;
1184 }
1185
586ddea5
BP
1186 in_port = flow->in_port.odp_port;
1187 if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
18886b60
BP
1188 return EINVAL;
1189 }
1190
36956a7d
BP
1191 return 0;
1192}
1193
72865317 1194static int
693c4a01 1195dpif_netdev_flow_get(const struct dpif *dpif,
feebdea2 1196 const struct nlattr *nl_key, size_t nl_key_len,
c97fb132 1197 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
72865317
BP
1198{
1199 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1200 struct dp_netdev_flow *netdev_flow;
bc4a05c6
BP
1201 struct flow key;
1202 int error;
36956a7d 1203
feebdea2 1204 error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
bc4a05c6
BP
1205 if (error) {
1206 return error;
1207 }
14608a15 1208
06f81620 1209 fat_rwlock_rdlock(&dp->cls.rwlock);
2c0ea78f 1210 netdev_flow = dp_netdev_find_flow(dp, &key);
06f81620 1211 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 1212
1763b4b8 1213 if (netdev_flow) {
5279f8fd 1214 if (stats) {
1763b4b8 1215 get_dpif_flow_stats(netdev_flow, stats);
5279f8fd 1216 }
679ba04c 1217
5279f8fd 1218 if (actionsp) {
61e7deb1 1219 struct dp_netdev_actions *actions;
8a4e3a85 1220
61e7deb1 1221 actions = dp_netdev_flow_get_actions(netdev_flow);
8a4e3a85 1222 *actionsp = ofpbuf_clone_data(actions->actions, actions->size);
5279f8fd 1223 }
61e7deb1 1224 } else {
5279f8fd 1225 error = ENOENT;
72865317 1226 }
bc4a05c6 1227
5279f8fd 1228 return error;
72865317
BP
1229}
1230
72865317 1231static int
2c0ea78f
GS
1232dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow,
1233 const struct flow_wildcards *wc,
1234 const struct nlattr *actions,
1235 size_t actions_len)
8a4e3a85 1236 OVS_REQUIRES(dp->flow_mutex)
72865317 1237{
1763b4b8 1238 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1239 struct match match;
72865317 1240
1763b4b8 1241 netdev_flow = xzalloc(sizeof *netdev_flow);
8a4e3a85 1242 *CONST_CAST(struct flow *, &netdev_flow->flow) = *flow;
8a4e3a85 1243
679ba04c
BP
1244 ovsthread_stats_init(&netdev_flow->stats);
1245
61e7deb1
BP
1246 ovsrcu_set(&netdev_flow->actions,
1247 dp_netdev_actions_create(actions, actions_len));
2c0ea78f
GS
1248
1249 match_init(&match, flow, wc);
8a4e3a85
BP
1250 cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
1251 &match, NETDEV_RULE_PRIORITY);
06f81620 1252 fat_rwlock_wrlock(&dp->cls.rwlock);
8a4e3a85
BP
1253 classifier_insert(&dp->cls,
1254 CONST_CAST(struct cls_rule *, &netdev_flow->cr));
1255 hmap_insert(&dp->flow_table,
1256 CONST_CAST(struct hmap_node *, &netdev_flow->node),
1257 flow_hash(flow, 0));
06f81620 1258 fat_rwlock_unlock(&dp->cls.rwlock);
72865317 1259
72865317
BP
1260 return 0;
1261}
1262
1263static void
1763b4b8 1264clear_stats(struct dp_netdev_flow *netdev_flow)
72865317 1265{
679ba04c
BP
1266 struct dp_netdev_flow_stats *bucket;
1267 size_t i;
1268
1269 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
1270 ovs_mutex_lock(&bucket->mutex);
1271 bucket->used = 0;
1272 bucket->packet_count = 0;
1273 bucket->byte_count = 0;
1274 bucket->tcp_flags = 0;
1275 ovs_mutex_unlock(&bucket->mutex);
1276 }
72865317
BP
1277}
1278
1279static int
89625d1e 1280dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
72865317
BP
1281{
1282 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1283 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1284 struct flow flow;
4f150744 1285 struct miniflow miniflow;
2c0ea78f 1286 struct flow_wildcards wc;
36956a7d
BP
1287 int error;
1288
8c301900
JR
1289 error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow);
1290 if (error) {
1291 return error;
1292 }
1293 error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
1294 put->mask, put->mask_len,
1295 &flow, &wc.masks);
36956a7d
BP
1296 if (error) {
1297 return error;
1298 }
4f150744 1299 miniflow_init(&miniflow, &flow);
72865317 1300
8a4e3a85 1301 ovs_mutex_lock(&dp->flow_mutex);
4f150744 1302 netdev_flow = dp_netdev_lookup_flow(dp, &miniflow);
1763b4b8 1303 if (!netdev_flow) {
89625d1e 1304 if (put->flags & DPIF_FP_CREATE) {
72865317 1305 if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
89625d1e
BP
1306 if (put->stats) {
1307 memset(put->stats, 0, sizeof *put->stats);
feebdea2 1308 }
2c0ea78f 1309 error = dp_netdev_flow_add(dp, &flow, &wc, put->actions,
5279f8fd 1310 put->actions_len);
72865317 1311 } else {
5279f8fd 1312 error = EFBIG;
72865317
BP
1313 }
1314 } else {
5279f8fd 1315 error = ENOENT;
72865317
BP
1316 }
1317 } else {
2c0ea78f
GS
1318 if (put->flags & DPIF_FP_MODIFY
1319 && flow_equal(&flow, &netdev_flow->flow)) {
8a4e3a85
BP
1320 struct dp_netdev_actions *new_actions;
1321 struct dp_netdev_actions *old_actions;
1322
1323 new_actions = dp_netdev_actions_create(put->actions,
1324 put->actions_len);
1325
61e7deb1
BP
1326 old_actions = dp_netdev_flow_get_actions(netdev_flow);
1327 ovsrcu_set(&netdev_flow->actions, new_actions);
679ba04c 1328
a84cb64a
BP
1329 if (put->stats) {
1330 get_dpif_flow_stats(netdev_flow, put->stats);
1331 }
1332 if (put->flags & DPIF_FP_ZERO_STATS) {
1333 clear_stats(netdev_flow);
72865317 1334 }
8a4e3a85 1335
61e7deb1 1336 ovsrcu_postpone(dp_netdev_actions_free, old_actions);
2c0ea78f 1337 } else if (put->flags & DPIF_FP_CREATE) {
5279f8fd 1338 error = EEXIST;
2c0ea78f
GS
1339 } else {
1340 /* Overlapping flow. */
1341 error = EINVAL;
72865317
BP
1342 }
1343 }
8a4e3a85 1344 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1345
1346 return error;
72865317
BP
1347}
1348
72865317 1349static int
b99d3cee 1350dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
72865317
BP
1351{
1352 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1353 struct dp_netdev_flow *netdev_flow;
14608a15 1354 struct flow key;
36956a7d
BP
1355 int error;
1356
b99d3cee 1357 error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
36956a7d
BP
1358 if (error) {
1359 return error;
1360 }
72865317 1361
8a4e3a85 1362 ovs_mutex_lock(&dp->flow_mutex);
06f81620 1363 fat_rwlock_wrlock(&dp->cls.rwlock);
2c0ea78f 1364 netdev_flow = dp_netdev_find_flow(dp, &key);
1763b4b8 1365 if (netdev_flow) {
b99d3cee 1366 if (del->stats) {
1763b4b8 1367 get_dpif_flow_stats(netdev_flow, del->stats);
feebdea2 1368 }
8a4e3a85 1369 dp_netdev_remove_flow(dp, netdev_flow);
72865317 1370 } else {
5279f8fd 1371 error = ENOENT;
72865317 1372 }
06f81620 1373 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 1374 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1375
1376 return error;
72865317
BP
1377}
1378
ac64794a
BP
1379struct dpif_netdev_flow_dump {
1380 struct dpif_flow_dump up;
e723fd32
JS
1381 uint32_t bucket;
1382 uint32_t offset;
d2ad7ef1
JS
1383 int status;
1384 struct ovs_mutex mutex;
e723fd32
JS
1385};
1386
ac64794a
BP
1387static struct dpif_netdev_flow_dump *
1388dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump)
72865317 1389{
ac64794a 1390 return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up);
e723fd32
JS
1391}
1392
ac64794a
BP
1393static struct dpif_flow_dump *
1394dpif_netdev_flow_dump_create(const struct dpif *dpif_)
e723fd32 1395{
ac64794a 1396 struct dpif_netdev_flow_dump *dump;
e723fd32 1397
ac64794a
BP
1398 dump = xmalloc(sizeof *dump);
1399 dpif_flow_dump_init(&dump->up, dpif_);
1400 dump->bucket = 0;
1401 dump->offset = 0;
1402 dump->status = 0;
1403 ovs_mutex_init(&dump->mutex);
1404
1405 return &dump->up;
e723fd32
JS
1406}
1407
1408static int
ac64794a 1409dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_)
e723fd32 1410{
ac64794a 1411 struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
e723fd32 1412
ac64794a
BP
1413 ovs_mutex_destroy(&dump->mutex);
1414 free(dump);
704a1e09
BP
1415 return 0;
1416}
1417
ac64794a
BP
1418struct dpif_netdev_flow_dump_thread {
1419 struct dpif_flow_dump_thread up;
1420 struct dpif_netdev_flow_dump *dump;
1421 struct odputil_keybuf keybuf;
1422 struct odputil_keybuf maskbuf;
1423};
1424
1425static struct dpif_netdev_flow_dump_thread *
1426dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
1427{
1428 return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up);
1429}
1430
1431static struct dpif_flow_dump_thread *
1432dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_)
1433{
1434 struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
1435 struct dpif_netdev_flow_dump_thread *thread;
1436
1437 thread = xmalloc(sizeof *thread);
1438 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1439 thread->dump = dump;
1440 return &thread->up;
1441}
1442
1443static void
1444dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
1445{
1446 struct dpif_netdev_flow_dump_thread *thread
1447 = dpif_netdev_flow_dump_thread_cast(thread_);
1448
1449 free(thread);
1450}
1451
61e7deb1 1452/* XXX the caller must use 'actions' without quiescing */
704a1e09 1453static int
ac64794a
BP
1454dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1455 struct dpif_flow *f, int max_flows OVS_UNUSED)
1456{
1457 struct dpif_netdev_flow_dump_thread *thread
1458 = dpif_netdev_flow_dump_thread_cast(thread_);
1459 struct dpif_netdev_flow_dump *dump = thread->dump;
1460 struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
1461 struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
1763b4b8 1462 struct dp_netdev_flow *netdev_flow;
fbfe01de 1463 struct flow_wildcards wc;
ac64794a
BP
1464 struct dp_netdev_actions *dp_actions;
1465 struct ofpbuf buf;
d2ad7ef1 1466 int error;
14608a15 1467
ac64794a
BP
1468 ovs_mutex_lock(&dump->mutex);
1469 error = dump->status;
d2ad7ef1
JS
1470 if (!error) {
1471 struct hmap_node *node;
1472
1473 fat_rwlock_rdlock(&dp->cls.rwlock);
ac64794a 1474 node = hmap_at_position(&dp->flow_table, &dump->bucket, &dump->offset);
d2ad7ef1
JS
1475 if (node) {
1476 netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
d2ad7ef1
JS
1477 }
1478 fat_rwlock_unlock(&dp->cls.rwlock);
1479 if (!node) {
ac64794a 1480 dump->status = error = EOF;
d2ad7ef1 1481 }
8a4e3a85 1482 }
ac64794a 1483 ovs_mutex_unlock(&dump->mutex);
d2ad7ef1 1484 if (error) {
ac64794a 1485 return 0;
72865317 1486 }
704a1e09 1487
fbfe01de
AZ
1488 minimask_expand(&netdev_flow->cr.match.mask, &wc);
1489
ac64794a
BP
1490 /* Key. */
1491 ofpbuf_use_stack(&buf, &thread->keybuf, sizeof thread->keybuf);
1492 odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks,
1493 netdev_flow->flow.in_port.odp_port, true);
1494 f->key = ofpbuf_data(&buf);
1495 f->key_len = ofpbuf_size(&buf);
1496
1497 /* Mask. */
1498 ofpbuf_use_stack(&buf, &thread->maskbuf, sizeof thread->maskbuf);
1499 odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
1500 odp_to_u32(wc.masks.in_port.odp_port),
1501 SIZE_MAX, true);
1502 f->mask = ofpbuf_data(&buf);
1503 f->mask_len = ofpbuf_size(&buf);
45c626a3 1504
ac64794a
BP
1505 /* Actions. */
1506 dp_actions = dp_netdev_flow_get_actions(netdev_flow);
1507 f->actions = dp_actions->actions;
1508 f->actions_len = dp_actions->size;
704a1e09 1509
ac64794a
BP
1510 /* Stats. */
1511 get_dpif_flow_stats(netdev_flow, &f->stats);
feebdea2 1512
ac64794a 1513 return 1;
72865317
BP
1514}
1515
1516static int
758c456d 1517dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
72865317
BP
1518{
1519 struct dp_netdev *dp = get_dp_netdev(dpif);
758c456d 1520 struct pkt_metadata *md = &execute->md;
27bbe15d
JR
1521 struct {
1522 struct miniflow flow;
1523 uint32_t buf[FLOW_U32S];
1524 } key;
72865317 1525
1f317cb5
PS
1526 if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN ||
1527 ofpbuf_size(execute->packet) > UINT16_MAX) {
72865317
BP
1528 return EINVAL;
1529 }
1530
758c456d 1531 /* Extract flow key. */
27bbe15d
JR
1532 miniflow_initialize(&key.flow, key.buf);
1533 miniflow_extract(execute->packet, md, &key.flow);
8a4e3a85 1534
27bbe15d 1535 dp_netdev_execute_actions(dp, &key.flow, execute->packet, false, md,
df1e5a3b 1536 execute->actions, execute->actions_len);
8a4e3a85 1537
758c456d 1538 return 0;
72865317
BP
1539}
1540
63be20be
AW
1541static void
1542dp_netdev_destroy_all_queues(struct dp_netdev *dp)
1543 OVS_REQ_WRLOCK(dp->queue_rwlock)
1544{
1545 size_t i;
1546
1547 dp_netdev_purge_queues(dp);
1548
1549 for (i = 0; i < dp->n_handlers; i++) {
1550 struct dp_netdev_queue *q = &dp->handler_queues[i];
1551
1552 ovs_mutex_destroy(&q->mutex);
1553 seq_destroy(q->seq);
1554 }
1555 free(dp->handler_queues);
1556 dp->handler_queues = NULL;
1557 dp->n_handlers = 0;
1558}
1559
1560static void
1561dp_netdev_refresh_queues(struct dp_netdev *dp, uint32_t n_handlers)
1562 OVS_REQ_WRLOCK(dp->queue_rwlock)
1563{
1564 if (dp->n_handlers != n_handlers) {
1565 size_t i;
1566
1567 dp_netdev_destroy_all_queues(dp);
1568
1569 dp->n_handlers = n_handlers;
1570 dp->handler_queues = xzalloc(n_handlers * sizeof *dp->handler_queues);
1571
1572 for (i = 0; i < n_handlers; i++) {
1573 struct dp_netdev_queue *q = &dp->handler_queues[i];
1574
1575 ovs_mutex_init(&q->mutex);
1576 q->seq = seq_create();
1577 }
1578 }
1579}
1580
72865317 1581static int
63be20be 1582dpif_netdev_recv_set(struct dpif *dpif, bool enable)
72865317 1583{
63be20be
AW
1584 struct dp_netdev *dp = get_dp_netdev(dpif);
1585
1586 if ((dp->handler_queues != NULL) == enable) {
1587 return 0;
1588 }
1589
1590 fat_rwlock_wrlock(&dp->queue_rwlock);
1591 if (!enable) {
1592 dp_netdev_destroy_all_queues(dp);
1593 } else {
1594 dp_netdev_refresh_queues(dp, 1);
1595 }
1596 fat_rwlock_unlock(&dp->queue_rwlock);
1597
82272ede 1598 return 0;
72865317
BP
1599}
1600
1954e6bb 1601static int
63be20be 1602dpif_netdev_handlers_set(struct dpif *dpif, uint32_t n_handlers)
1954e6bb 1603{
63be20be
AW
1604 struct dp_netdev *dp = get_dp_netdev(dpif);
1605
1606 fat_rwlock_wrlock(&dp->queue_rwlock);
1607 if (dp->handler_queues) {
1608 dp_netdev_refresh_queues(dp, n_handlers);
1609 }
1610 fat_rwlock_unlock(&dp->queue_rwlock);
1611
1954e6bb
AW
1612 return 0;
1613}
1614
5bf93d67
EJ
1615static int
1616dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
1617 uint32_t queue_id, uint32_t *priority)
1618{
1619 *priority = queue_id;
1620 return 0;
1621}
1622
63be20be
AW
1623static bool
1624dp_netdev_recv_check(const struct dp_netdev *dp, const uint32_t handler_id)
1625 OVS_REQ_RDLOCK(dp->queue_rwlock)
72865317 1626{
63be20be 1627 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
72865317 1628
63be20be
AW
1629 if (!dp->handler_queues) {
1630 VLOG_WARN_RL(&rl, "receiving upcall disabled");
1631 return false;
72865317 1632 }
63be20be
AW
1633
1634 if (handler_id >= dp->n_handlers) {
1635 VLOG_WARN_RL(&rl, "handler index out of bound");
1636 return false;
1637 }
1638
1639 return true;
72865317
BP
1640}
1641
1642static int
63be20be 1643dpif_netdev_recv(struct dpif *dpif, uint32_t handler_id,
1954e6bb 1644 struct dpif_upcall *upcall, struct ofpbuf *buf)
72865317 1645{
f5126b57 1646 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 1647 struct dp_netdev_queue *q;
63be20be
AW
1648 int error = 0;
1649
1650 fat_rwlock_rdlock(&dp->queue_rwlock);
5279f8fd 1651
63be20be
AW
1652 if (!dp_netdev_recv_check(dp, handler_id)) {
1653 error = EAGAIN;
1654 goto out;
1655 }
1656
1657 q = &dp->handler_queues[handler_id];
1658 ovs_mutex_lock(&q->mutex);
1659 if (q->head != q->tail) {
d88b629b
BP
1660 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
1661
1662 *upcall = u->upcall;
b3907fbc 1663
90a7c55e 1664 ofpbuf_uninit(buf);
d88b629b 1665 *buf = u->buf;
72865317 1666 } else {
5279f8fd 1667 error = EAGAIN;
72865317 1668 }
63be20be
AW
1669 ovs_mutex_unlock(&q->mutex);
1670
1671out:
1672 fat_rwlock_unlock(&dp->queue_rwlock);
5279f8fd
BP
1673
1674 return error;
72865317
BP
1675}
1676
1677static void
63be20be 1678dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id)
72865317 1679{
d33ed218 1680 struct dp_netdev *dp = get_dp_netdev(dpif);
63be20be 1681 struct dp_netdev_queue *q;
d33ed218 1682 uint64_t seq;
5279f8fd 1683
63be20be
AW
1684 fat_rwlock_rdlock(&dp->queue_rwlock);
1685
1686 if (!dp_netdev_recv_check(dp, handler_id)) {
1687 goto out;
1688 }
1689
1690 q = &dp->handler_queues[handler_id];
1691 ovs_mutex_lock(&q->mutex);
1692 seq = seq_read(q->seq);
1693 if (q->head != q->tail) {
72865317 1694 poll_immediate_wake();
d33ed218 1695 } else {
63be20be 1696 seq_wait(q->seq, seq);
72865317 1697 }
63be20be
AW
1698
1699 ovs_mutex_unlock(&q->mutex);
1700
1701out:
1702 fat_rwlock_unlock(&dp->queue_rwlock);
72865317 1703}
1ba530f4
BP
1704
1705static void
1706dpif_netdev_recv_purge(struct dpif *dpif)
1707{
1708 struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
f5126b57 1709
63be20be 1710 fat_rwlock_wrlock(&dpif_netdev->dp->queue_rwlock);
1ba530f4 1711 dp_netdev_purge_queues(dpif_netdev->dp);
63be20be 1712 fat_rwlock_unlock(&dpif_netdev->dp->queue_rwlock);
1ba530f4 1713}
72865317 1714\f
a84cb64a
BP
1715/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
1716 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
1717 * 'ofpacts'. */
1718struct dp_netdev_actions *
1719dp_netdev_actions_create(const struct nlattr *actions, size_t size)
1720{
1721 struct dp_netdev_actions *netdev_actions;
1722
1723 netdev_actions = xmalloc(sizeof *netdev_actions);
a84cb64a
BP
1724 netdev_actions->actions = xmemdup(actions, size);
1725 netdev_actions->size = size;
1726
1727 return netdev_actions;
1728}
1729
a84cb64a 1730struct dp_netdev_actions *
61e7deb1 1731dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow)
a84cb64a 1732{
61e7deb1 1733 return ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
a84cb64a
BP
1734}
1735
61e7deb1
BP
1736static void
1737dp_netdev_actions_free(struct dp_netdev_actions *actions)
a84cb64a 1738{
61e7deb1
BP
1739 free(actions->actions);
1740 free(actions);
a84cb64a
BP
1741}
1742\f
e4cfed38 1743
5794e276 1744static void
f7791740 1745dp_netdev_process_rxq_port(struct dp_netdev *dp,
e4cfed38 1746 struct dp_netdev_port *port,
f7791740 1747 struct netdev_rxq *rxq)
e4cfed38
PS
1748{
1749 struct ofpbuf *packet[NETDEV_MAX_RX_BATCH];
1750 int error, c;
1751
f7791740 1752 error = netdev_rxq_recv(rxq, packet, &c);
e4cfed38
PS
1753 if (!error) {
1754 struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no);
1755 int i;
1756
1757 for (i = 0; i < c; i++) {
1758 dp_netdev_port_input(dp, packet[i], &md);
1759 }
1760 } else if (error != EAGAIN && error != EOPNOTSUPP) {
1761 static struct vlog_rate_limit rl
1762 = VLOG_RATE_LIMIT_INIT(1, 5);
1763
1764 VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
1765 netdev_get_name(port->netdev),
1766 ovs_strerror(error));
1767 }
1768}
1769
1770static void
1771dpif_netdev_run(struct dpif *dpif)
1772{
1773 struct dp_netdev_port *port;
1774 struct dp_netdev *dp = get_dp_netdev(dpif);
59e6d833 1775 struct cmap_cursor cursor;
e4cfed38 1776
59e6d833 1777 CMAP_FOR_EACH (port, node, &cursor, &dp->ports) {
55c955bd
PS
1778 if (!netdev_is_pmd(port->netdev)) {
1779 int i;
1780
1781 for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
1782 dp_netdev_process_rxq_port(dp, port, port->rxq[i]);
1783 }
e4cfed38
PS
1784 }
1785 }
e4cfed38
PS
1786}
1787
1788static void
1789dpif_netdev_wait(struct dpif *dpif)
1790{
1791 struct dp_netdev_port *port;
1792 struct dp_netdev *dp = get_dp_netdev(dpif);
59e6d833 1793 struct cmap_cursor cursor;
e4cfed38 1794
59e6d833
BP
1795 ovs_mutex_lock(&dp_netdev_mutex);
1796 CMAP_FOR_EACH (port, node, &cursor, &dp->ports) {
55c955bd
PS
1797 if (!netdev_is_pmd(port->netdev)) {
1798 int i;
1799
1800 for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
1801 netdev_rxq_wait(port->rxq[i]);
1802 }
e4cfed38
PS
1803 }
1804 }
59e6d833 1805 ovs_mutex_unlock(&dp_netdev_mutex);
e4cfed38
PS
1806}
1807
f7791740 1808struct rxq_poll {
e4cfed38 1809 struct dp_netdev_port *port;
55c955bd 1810 struct netdev_rxq *rx;
e4cfed38
PS
1811};
1812
1813static int
1814pmd_load_queues(struct pmd_thread *f,
f7791740 1815 struct rxq_poll **ppoll_list, int poll_cnt)
e4cfed38
PS
1816{
1817 struct dp_netdev *dp = f->dp;
f7791740 1818 struct rxq_poll *poll_list = *ppoll_list;
e4cfed38 1819 struct dp_netdev_port *port;
59e6d833 1820 struct cmap_cursor cursor;
e4cfed38
PS
1821 int id = f->id;
1822 int index;
1823 int i;
1824
1825 /* Simple scheduler for netdev rx polling. */
e4cfed38
PS
1826 for (i = 0; i < poll_cnt; i++) {
1827 port_unref(poll_list[i].port);
1828 }
1829
1830 poll_cnt = 0;
1831 index = 0;
1832
59e6d833 1833 CMAP_FOR_EACH (port, node, &cursor, &f->dp->ports) {
e4cfed38 1834 if (netdev_is_pmd(port->netdev)) {
55c955bd
PS
1835 int i;
1836
1837 for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
1838 if ((index % dp->n_pmd_threads) == id) {
1839 poll_list = xrealloc(poll_list, sizeof *poll_list * (poll_cnt + 1));
e4cfed38 1840
55c955bd
PS
1841 port_ref(port);
1842 poll_list[poll_cnt].port = port;
1843 poll_list[poll_cnt].rx = port->rxq[i];
1844 poll_cnt++;
1845 }
1846 index++;
e4cfed38 1847 }
e4cfed38
PS
1848 }
1849 }
1850
e4cfed38
PS
1851 *ppoll_list = poll_list;
1852 return poll_cnt;
1853}
1854
6c3eee82 1855static void *
e4cfed38 1856pmd_thread_main(void *f_)
6c3eee82 1857{
e4cfed38 1858 struct pmd_thread *f = f_;
6c3eee82 1859 struct dp_netdev *dp = f->dp;
e4cfed38 1860 unsigned int lc = 0;
f7791740 1861 struct rxq_poll *poll_list;
e4cfed38
PS
1862 unsigned int port_seq;
1863 int poll_cnt;
1864 int i;
6c3eee82 1865
e4cfed38
PS
1866 poll_cnt = 0;
1867 poll_list = NULL;
1868
8617afff 1869 pmd_thread_setaffinity_cpu(f->id);
e4cfed38
PS
1870reload:
1871 poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt);
1872 atomic_read(&f->change_seq, &port_seq);
6c3eee82 1873
e4cfed38
PS
1874 for (;;) {
1875 unsigned int c_port_seq;
6c3eee82
BP
1876 int i;
1877
e4cfed38 1878 for (i = 0; i < poll_cnt; i++) {
55c955bd 1879 dp_netdev_process_rxq_port(dp, poll_list[i].port, poll_list[i].rx);
e4cfed38
PS
1880 }
1881
1882 if (lc++ > 1024) {
1883 ovsrcu_quiesce();
6c3eee82 1884
e4cfed38
PS
1885 /* TODO: need completely userspace based signaling method.
1886 * to keep this thread entirely in userspace.
1887 * For now using atomic counter. */
1888 lc = 0;
1889 atomic_read_explicit(&f->change_seq, &c_port_seq, memory_order_consume);
1890 if (c_port_seq != port_seq) {
6c3eee82
BP
1891 break;
1892 }
1893 }
e4cfed38 1894 }
6c3eee82 1895
e4cfed38
PS
1896 if (!latch_is_set(&f->dp->exit_latch)){
1897 goto reload;
1898 }
6c3eee82 1899
e4cfed38
PS
1900 for (i = 0; i < poll_cnt; i++) {
1901 port_unref(poll_list[i].port);
6c3eee82 1902 }
6c3eee82 1903
e4cfed38 1904 free(poll_list);
6c3eee82
BP
1905 return NULL;
1906}
1907
1908static void
e4cfed38 1909dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n)
6c3eee82
BP
1910{
1911 int i;
1912
e4cfed38 1913 if (n == dp->n_pmd_threads) {
6c3eee82
BP
1914 return;
1915 }
1916
1917 /* Stop existing threads. */
1918 latch_set(&dp->exit_latch);
e4cfed38
PS
1919 dp_netdev_reload_pmd_threads(dp);
1920 for (i = 0; i < dp->n_pmd_threads; i++) {
1921 struct pmd_thread *f = &dp->pmd_threads[i];
6c3eee82
BP
1922
1923 xpthread_join(f->thread, NULL);
1924 }
1925 latch_poll(&dp->exit_latch);
e4cfed38 1926 free(dp->pmd_threads);
6c3eee82
BP
1927
1928 /* Start new threads. */
e4cfed38
PS
1929 dp->pmd_threads = xmalloc(n * sizeof *dp->pmd_threads);
1930 dp->n_pmd_threads = n;
1931
6c3eee82 1932 for (i = 0; i < n; i++) {
e4cfed38 1933 struct pmd_thread *f = &dp->pmd_threads[i];
6c3eee82
BP
1934
1935 f->dp = dp;
e4cfed38
PS
1936 f->id = i;
1937 atomic_store(&f->change_seq, 1);
1938
1939 /* Each thread will distribute all devices rx-queues among
1940 * themselves. */
8ba0a522 1941 f->thread = ovs_thread_create("pmd", pmd_thread_main, f);
6c3eee82
BP
1942 }
1943}
e4cfed38 1944
6c3eee82 1945\f
679ba04c
BP
1946static void *
1947dp_netdev_flow_stats_new_cb(void)
1948{
1949 struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1950 ovs_mutex_init(&bucket->mutex);
1951 return bucket;
1952}
1953
72865317 1954static void
1763b4b8 1955dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
855dd13c 1956 const struct ofpbuf *packet,
4f150744 1957 const struct miniflow *key)
72865317 1958{
4f150744 1959 uint16_t tcp_flags = miniflow_get_tcp_flags(key);
679ba04c
BP
1960 long long int now = time_msec();
1961 struct dp_netdev_flow_stats *bucket;
1962
1963 bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
1964 dp_netdev_flow_stats_new_cb);
1965
1966 ovs_mutex_lock(&bucket->mutex);
1967 bucket->used = MAX(now, bucket->used);
1968 bucket->packet_count++;
1f317cb5 1969 bucket->byte_count += ofpbuf_size(packet);
679ba04c
BP
1970 bucket->tcp_flags |= tcp_flags;
1971 ovs_mutex_unlock(&bucket->mutex);
72865317
BP
1972}
1973
51852a57
BP
1974static void *
1975dp_netdev_stats_new_cb(void)
1976{
1977 struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1978 ovs_mutex_init(&bucket->mutex);
1979 return bucket;
1980}
1981
1982static void
1983dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
1984{
1985 struct dp_netdev_stats *bucket;
1986
1987 bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
1988 ovs_mutex_lock(&bucket->mutex);
1989 bucket->n[type]++;
1990 ovs_mutex_unlock(&bucket->mutex);
1991}
1992
72865317 1993static void
adcf00ba
AZ
1994dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
1995 struct pkt_metadata *md)
72865317 1996{
1763b4b8 1997 struct dp_netdev_flow *netdev_flow;
27bbe15d
JR
1998 struct {
1999 struct miniflow flow;
2000 uint32_t buf[FLOW_U32S];
2001 } key;
72865317 2002
1f317cb5 2003 if (ofpbuf_size(packet) < ETH_HEADER_LEN) {
df1e5a3b 2004 ofpbuf_delete(packet);
1805876e
BP
2005 return;
2006 }
27bbe15d
JR
2007 miniflow_initialize(&key.flow, key.buf);
2008 miniflow_extract(packet, md, &key.flow);
4f150744 2009
27bbe15d 2010 netdev_flow = dp_netdev_lookup_flow(dp, &key.flow);
1763b4b8 2011 if (netdev_flow) {
a84cb64a
BP
2012 struct dp_netdev_actions *actions;
2013
27bbe15d 2014 dp_netdev_flow_used(netdev_flow, packet, &key.flow);
679ba04c 2015
61e7deb1 2016 actions = dp_netdev_flow_get_actions(netdev_flow);
27bbe15d 2017 dp_netdev_execute_actions(dp, &key.flow, packet, true, md,
a84cb64a 2018 actions->actions, actions->size);
51852a57 2019 dp_netdev_count_packet(dp, DP_STAT_HIT);
63be20be 2020 } else if (dp->handler_queues) {
51852a57 2021 dp_netdev_count_packet(dp, DP_STAT_MISS);
63be20be 2022 dp_netdev_output_userspace(dp, packet,
27bbe15d 2023 miniflow_hash_5tuple(&key.flow, 0)
4f150744 2024 % dp->n_handlers,
27bbe15d 2025 DPIF_UC_MISS, &key.flow, NULL);
72865317
BP
2026 }
2027}
2028
adcf00ba
AZ
2029static void
2030dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
2031 struct pkt_metadata *md)
adcf00ba
AZ
2032{
2033 uint32_t *recirc_depth = recirc_depth_get();
2034
2035 *recirc_depth = 0;
2036 dp_netdev_input(dp, packet, md);
2037}
2038
72865317 2039static int
da546e07 2040dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
4f150744 2041 int queue_no, int type, const struct miniflow *key,
e995e3df 2042 const struct nlattr *userdata)
72865317 2043{
63be20be 2044 struct dp_netdev_queue *q;
f5126b57
BP
2045 int error;
2046
63be20be
AW
2047 fat_rwlock_rdlock(&dp->queue_rwlock);
2048 q = &dp->handler_queues[queue_no];
2049 ovs_mutex_lock(&q->mutex);
e995e3df
BP
2050 if (q->head - q->tail < MAX_QUEUE_LEN) {
2051 struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
2052 struct dpif_upcall *upcall = &u->upcall;
2053 struct ofpbuf *buf = &u->buf;
2054 size_t buf_size;
4f150744 2055 struct flow flow;
e995e3df 2056
63be20be 2057 upcall->type = type;
e995e3df
BP
2058
2059 /* Allocate buffer big enough for everything. */
da546e07 2060 buf_size = ODPUTIL_FLOW_KEY_BYTES;
e995e3df
BP
2061 if (userdata) {
2062 buf_size += NLA_ALIGN(userdata->nla_len);
2063 }
2064 ofpbuf_init(buf, buf_size);
72865317 2065
e995e3df 2066 /* Put ODP flow. */
4f150744 2067 miniflow_expand(key, &flow);
7ce2769e 2068 odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port, true);
1f317cb5
PS
2069 upcall->key = ofpbuf_data(buf);
2070 upcall->key_len = ofpbuf_size(buf);
d88b629b 2071
e995e3df
BP
2072 /* Put userdata. */
2073 if (userdata) {
2074 upcall->userdata = ofpbuf_put(buf, userdata,
2075 NLA_ALIGN(userdata->nla_len));
2076 }
856081f6 2077
143859ec 2078 upcall->packet = *packet;
856081f6 2079
63be20be 2080 seq_change(q->seq);
d33ed218 2081
f5126b57 2082 error = 0;
e995e3df 2083 } else {
51852a57 2084 dp_netdev_count_packet(dp, DP_STAT_LOST);
143859ec 2085 ofpbuf_delete(packet);
f5126b57 2086 error = ENOBUFS;
e995e3df 2087 }
63be20be
AW
2088 ovs_mutex_unlock(&q->mutex);
2089 fat_rwlock_unlock(&dp->queue_rwlock);
f5126b57
BP
2090
2091 return error;
72865317
BP
2092}
2093
9080a111
JR
2094struct dp_netdev_execute_aux {
2095 struct dp_netdev *dp;
4f150744 2096 const struct miniflow *key;
9080a111
JR
2097};
2098
2099static void
758c456d 2100dp_execute_cb(void *aux_, struct ofpbuf *packet,
572f732a 2101 struct pkt_metadata *md,
09f9da0b 2102 const struct nlattr *a, bool may_steal)
8a4e3a85 2103 OVS_NO_THREAD_SAFETY_ANALYSIS
9080a111
JR
2104{
2105 struct dp_netdev_execute_aux *aux = aux_;
09f9da0b 2106 int type = nl_attr_type(a);
8a4e3a85 2107 struct dp_netdev_port *p;
adcf00ba 2108 uint32_t *depth = recirc_depth_get();
9080a111 2109
09f9da0b
JR
2110 switch ((enum ovs_action_attr)type) {
2111 case OVS_ACTION_ATTR_OUTPUT:
8a4e3a85
BP
2112 p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
2113 if (p) {
40d26f04 2114 netdev_send(p->netdev, packet, may_steal);
8a4e3a85 2115 }
09f9da0b
JR
2116 break;
2117
2118 case OVS_ACTION_ATTR_USERSPACE: {
143859ec 2119 struct ofpbuf *userspace_packet;
09f9da0b 2120 const struct nlattr *userdata;
4fc65926 2121
09f9da0b 2122 userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
143859ec 2123 userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
da546e07 2124
143859ec 2125 dp_netdev_output_userspace(aux->dp, userspace_packet,
4f150744 2126 miniflow_hash_5tuple(aux->key, 0)
63be20be
AW
2127 % aux->dp->n_handlers,
2128 DPIF_UC_ACTION, aux->key,
09f9da0b 2129 userdata);
09f9da0b 2130 break;
da546e07 2131 }
572f732a 2132
347bf289
AZ
2133 case OVS_ACTION_ATTR_HASH: {
2134 const struct ovs_action_hash *hash_act;
2135 uint32_t hash;
2136
2137 hash_act = nl_attr_get(a);
2138 if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
4f150744
JR
2139 /* Hash need not be symmetric, nor does it need to include
2140 * L2 fields. */
62ac1f20 2141 hash = miniflow_hash_5tuple(aux->key, hash_act->hash_basis);
347bf289
AZ
2142 if (!hash) {
2143 hash = 1; /* 0 is not valid */
2144 }
2145
2146 } else {
2147 VLOG_WARN("Unknown hash algorithm specified for the hash action.");
2148 hash = 2;
2149 }
2150
2151 md->dp_hash = hash;
2152 break;
2153 }
2154
adcf00ba
AZ
2155 case OVS_ACTION_ATTR_RECIRC:
2156 if (*depth < MAX_RECIRC_DEPTH) {
4347b9b3 2157 struct pkt_metadata recirc_md = *md;
adcf00ba 2158 struct ofpbuf *recirc_packet;
9b516652 2159
adcf00ba 2160 recirc_packet = may_steal ? packet : ofpbuf_clone(packet);
347bf289 2161 recirc_md.recirc_id = nl_attr_get_u32(a);
572f732a 2162
adcf00ba 2163 (*depth)++;
4347b9b3 2164 dp_netdev_input(aux->dp, recirc_packet, &recirc_md);
adcf00ba
AZ
2165 (*depth)--;
2166
adcf00ba
AZ
2167 break;
2168 } else {
2169 VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
2170 }
572f732a 2171 break;
572f732a 2172
09f9da0b
JR
2173 case OVS_ACTION_ATTR_PUSH_VLAN:
2174 case OVS_ACTION_ATTR_POP_VLAN:
2175 case OVS_ACTION_ATTR_PUSH_MPLS:
2176 case OVS_ACTION_ATTR_POP_MPLS:
2177 case OVS_ACTION_ATTR_SET:
2178 case OVS_ACTION_ATTR_SAMPLE:
2179 case OVS_ACTION_ATTR_UNSPEC:
2180 case __OVS_ACTION_ATTR_MAX:
2181 OVS_NOT_REACHED();
da546e07 2182 }
98403001
BP
2183}
2184
4edb9ae9 2185static void
4f150744 2186dp_netdev_execute_actions(struct dp_netdev *dp, const struct miniflow *key,
df1e5a3b
PS
2187 struct ofpbuf *packet, bool may_steal,
2188 struct pkt_metadata *md,
9080a111 2189 const struct nlattr *actions, size_t actions_len)
72865317 2190{
9080a111 2191 struct dp_netdev_execute_aux aux = {dp, key};
9080a111 2192
df1e5a3b
PS
2193 odp_execute_actions(&aux, packet, may_steal, md,
2194 actions, actions_len, dp_execute_cb);
72865317
BP
2195}
2196
2197const struct dpif_class dpif_netdev_class = {
72865317 2198 "netdev",
2197d7ab 2199 dpif_netdev_enumerate,
0aeaabc8 2200 dpif_netdev_port_open_type,
72865317
BP
2201 dpif_netdev_open,
2202 dpif_netdev_close,
7dab847a 2203 dpif_netdev_destroy,
e4cfed38
PS
2204 dpif_netdev_run,
2205 dpif_netdev_wait,
72865317 2206 dpif_netdev_get_stats,
72865317
BP
2207 dpif_netdev_port_add,
2208 dpif_netdev_port_del,
2209 dpif_netdev_port_query_by_number,
2210 dpif_netdev_port_query_by_name,
98403001 2211 NULL, /* port_get_pid */
b0ec0f27
BP
2212 dpif_netdev_port_dump_start,
2213 dpif_netdev_port_dump_next,
2214 dpif_netdev_port_dump_done,
72865317
BP
2215 dpif_netdev_port_poll,
2216 dpif_netdev_port_poll_wait,
72865317
BP
2217 dpif_netdev_flow_get,
2218 dpif_netdev_flow_put,
2219 dpif_netdev_flow_del,
2220 dpif_netdev_flow_flush,
ac64794a
BP
2221 dpif_netdev_flow_dump_create,
2222 dpif_netdev_flow_dump_destroy,
2223 dpif_netdev_flow_dump_thread_create,
2224 dpif_netdev_flow_dump_thread_destroy,
704a1e09 2225 dpif_netdev_flow_dump_next,
72865317 2226 dpif_netdev_execute,
6bc60024 2227 NULL, /* operate */
a12b3ead 2228 dpif_netdev_recv_set,
1954e6bb 2229 dpif_netdev_handlers_set,
5bf93d67 2230 dpif_netdev_queue_to_priority,
72865317
BP
2231 dpif_netdev_recv,
2232 dpif_netdev_recv_wait,
1ba530f4 2233 dpif_netdev_recv_purge,
72865317 2234};
614c4892 2235
74cc3969
BP
2236static void
2237dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
2238 const char *argv[], void *aux OVS_UNUSED)
2239{
59e6d833
BP
2240 struct dp_netdev_port *old_port;
2241 struct dp_netdev_port *new_port;
74cc3969 2242 struct dp_netdev *dp;
ff073a71 2243 odp_port_t port_no;
74cc3969 2244
8a4e3a85 2245 ovs_mutex_lock(&dp_netdev_mutex);
74cc3969
BP
2246 dp = shash_find_data(&dp_netdevs, argv[1]);
2247 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
8a4e3a85 2248 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969
BP
2249 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
2250 return;
2251 }
8a4e3a85
BP
2252 ovs_refcount_ref(&dp->ref_cnt);
2253 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969 2254
59e6d833
BP
2255 ovs_mutex_lock(&dp->port_mutex);
2256 if (get_port_by_name(dp, argv[2], &old_port)) {
74cc3969 2257 unixctl_command_reply_error(conn, "unknown port");
8a4e3a85 2258 goto exit;
74cc3969
BP
2259 }
2260
ff073a71
BP
2261 port_no = u32_to_odp(atoi(argv[3]));
2262 if (!port_no || port_no == ODPP_NONE) {
74cc3969 2263 unixctl_command_reply_error(conn, "bad port number");
8a4e3a85 2264 goto exit;
74cc3969 2265 }
ff073a71 2266 if (dp_netdev_lookup_port(dp, port_no)) {
74cc3969 2267 unixctl_command_reply_error(conn, "port number already in use");
8a4e3a85 2268 goto exit;
74cc3969 2269 }
59e6d833
BP
2270
2271 /* Remove old port. */
2272 cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
2273 ovsrcu_postpone(free, old_port);
2274
2275 /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
2276 new_port = xmemdup(old_port, sizeof *old_port);
2277 new_port->port_no = port_no;
2278 cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));
2279
d33ed218 2280 seq_change(dp->port_seq);
74cc3969 2281 unixctl_command_reply(conn, NULL);
8a4e3a85
BP
2282
2283exit:
59e6d833 2284 ovs_mutex_unlock(&dp->port_mutex);
8a4e3a85 2285 dp_netdev_unref(dp);
74cc3969
BP
2286}
2287
c40b890f
BP
2288static void
2289dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED,
2290 const char *argv[], void *aux OVS_UNUSED)
2291{
2292 struct dp_netdev_port *port;
2293 struct dp_netdev *dp;
2294
2295 ovs_mutex_lock(&dp_netdev_mutex);
2296 dp = shash_find_data(&dp_netdevs, argv[1]);
2297 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
2298 ovs_mutex_unlock(&dp_netdev_mutex);
2299 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
2300 return;
2301 }
2302 ovs_refcount_ref(&dp->ref_cnt);
2303 ovs_mutex_unlock(&dp_netdev_mutex);
2304
2305 ovs_mutex_lock(&dp->port_mutex);
2306 if (get_port_by_name(dp, argv[2], &port)) {
2307 unixctl_command_reply_error(conn, "unknown port");
2308 } else if (port->port_no == ODPP_LOCAL) {
2309 unixctl_command_reply_error(conn, "can't delete local port");
2310 } else {
2311 do_del_port(dp, port);
2312 unixctl_command_reply(conn, NULL);
2313 }
2314 ovs_mutex_unlock(&dp->port_mutex);
2315
2316 dp_netdev_unref(dp);
2317}
2318
0cbfe35d
BP
2319static void
2320dpif_dummy_register__(const char *type)
2321{
2322 struct dpif_class *class;
2323
2324 class = xmalloc(sizeof *class);
2325 *class = dpif_netdev_class;
2326 class->type = xstrdup(type);
2327 dp_register_provider(class);
2328}
2329
614c4892 2330void
0cbfe35d 2331dpif_dummy_register(bool override)
614c4892 2332{
0cbfe35d
BP
2333 if (override) {
2334 struct sset types;
2335 const char *type;
2336
2337 sset_init(&types);
2338 dp_enumerate_types(&types);
2339 SSET_FOR_EACH (type, &types) {
2340 if (!dp_unregister_provider(type)) {
2341 dpif_dummy_register__(type);
2342 }
2343 }
2344 sset_destroy(&types);
614c4892 2345 }
0cbfe35d
BP
2346
2347 dpif_dummy_register__("dummy");
74cc3969
BP
2348
2349 unixctl_command_register("dpif-dummy/change-port-number",
2350 "DP PORT NEW-NUMBER",
2351 3, 3, dpif_dummy_change_port_number, NULL);
c40b890f
BP
2352 unixctl_command_register("dpif-dummy/delete-port", "DP PORT",
2353 2, 2, dpif_dummy_delete_port, NULL);
614c4892 2354}