]> git.proxmox.com Git - ovs.git/blame - lib/dpif-netdev.c
ofproto-dpif: Correctly refresh all ports on ENOBUFS from dpif_port_poll().
[ovs.git] / lib / dpif-netdev.c
CommitLineData
72865317 1/*
de281153 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
72865317
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpif.h"
19
72865317
BP
20#include <ctype.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
72865317 24#include <netinet/in.h>
9d82ec47 25#include <sys/socket.h>
7f3adc00 26#include <net/if.h>
cdee00fd 27#include <stdint.h>
72865317
BP
28#include <stdlib.h>
29#include <string.h>
30#include <sys/ioctl.h>
31#include <sys/stat.h>
72865317
BP
32#include <unistd.h>
33
34#include "csum.h"
614c4892 35#include "dpif.h"
72865317 36#include "dpif-provider.h"
614c4892 37#include "dummy.h"
36956a7d 38#include "dynamic-string.h"
72865317
BP
39#include "flow.h"
40#include "hmap.h"
41#include "list.h"
42#include "netdev.h"
de281153 43#include "netdev-vport.h"
cdee00fd 44#include "netlink.h"
f094af7b 45#include "odp-execute.h"
72865317
BP
46#include "odp-util.h"
47#include "ofp-print.h"
48#include "ofpbuf.h"
49#include "packets.h"
50#include "poll-loop.h"
26c6b6cd 51#include "random.h"
462278db 52#include "shash.h"
0cbfe35d 53#include "sset.h"
72865317
BP
54#include "timeval.h"
55#include "util.h"
72865317 56#include "vlog.h"
5136ce49 57
d98e6007 58VLOG_DEFINE_THIS_MODULE(dpif_netdev);
72865317
BP
59
60/* Configuration parameters. */
72865317
BP
61enum { MAX_PORTS = 256 }; /* Maximum number of ports. */
62enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
63
64/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
65 * headers to be aligned on a 4-byte boundary. */
66enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
67
856081f6
BP
68/* Queues. */
69enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */
70enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
71enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
72BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
73
d88b629b
BP
74struct dp_netdev_upcall {
75 struct dpif_upcall upcall; /* Queued upcall information. */
76 struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */
77};
78
856081f6 79struct dp_netdev_queue {
d88b629b 80 struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN];
856081f6
BP
81 unsigned int head, tail;
82};
83
72865317
BP
84/* Datapath based on the network device interface from netdev.h. */
85struct dp_netdev {
614c4892 86 const struct dpif_class *class;
462278db 87 char *name;
72865317 88 int open_cnt;
7dab847a 89 bool destroyed;
3b0aab93 90 int max_mtu; /* Maximum MTU of any port added so far. */
72865317 91
856081f6 92 struct dp_netdev_queue queues[N_QUEUES];
72865317 93 struct hmap flow_table; /* Flow table. */
72865317
BP
94
95 /* Statistics. */
72865317
BP
96 long long int n_hit; /* Number of flow table matches. */
97 long long int n_missed; /* Number of flow table misses. */
98 long long int n_lost; /* Number of misses not passed to client. */
99
100 /* Ports. */
72865317
BP
101 struct dp_netdev_port *ports[MAX_PORTS];
102 struct list port_list;
103 unsigned int serial;
104};
105
106/* A port in a netdev-based datapath. */
107struct dp_netdev_port {
4e022ec0 108 odp_port_t port_no; /* Index into dp_netdev's 'ports'. */
72865317
BP
109 struct list node; /* Element in dp_netdev's 'port_list'. */
110 struct netdev *netdev;
4b609110 111 struct netdev_saved_flags *sf;
796223f5 112 struct netdev_rx *rx;
0cbfe35d 113 char *type; /* Port type as requested by user. */
72865317
BP
114};
115
116/* A flow in dp_netdev's 'flow_table'. */
117struct dp_netdev_flow {
118 struct hmap_node node; /* Element in dp_netdev's 'flow_table'. */
14608a15 119 struct flow key;
72865317
BP
120
121 /* Statistics. */
c97fb132 122 long long int used; /* Last used time, in monotonic msecs. */
2105ccc8
BP
123 long long int packet_count; /* Number of packets matched. */
124 long long int byte_count; /* Number of bytes matched. */
7c808e39 125 uint8_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
72865317
BP
126
127 /* Actions. */
cdee00fd 128 struct nlattr *actions;
cf22f8cb 129 size_t actions_len;
72865317
BP
130};
131
132/* Interface to netdev-based datapath. */
133struct dpif_netdev {
134 struct dpif dpif;
135 struct dp_netdev *dp;
72865317
BP
136 unsigned int dp_serial;
137};
138
139/* All netdev-based datapaths. */
462278db 140static struct shash dp_netdevs = SHASH_INITIALIZER(&dp_netdevs);
72865317 141
5279f8fd
BP
142/* Global lock for all data. */
143static pthread_mutex_t dp_netdev_mutex = PTHREAD_MUTEX_INITIALIZER;
144
4e022ec0 145static int get_port_by_number(struct dp_netdev *, odp_port_t port_no,
72865317
BP
146 struct dp_netdev_port **portp);
147static int get_port_by_name(struct dp_netdev *, const char *devname,
148 struct dp_netdev_port **portp);
149static void dp_netdev_free(struct dp_netdev *);
150static void dp_netdev_flow_flush(struct dp_netdev *);
c3827f61 151static int do_add_port(struct dp_netdev *, const char *devname,
4e022ec0
AW
152 const char *type, odp_port_t port_no);
153static int do_del_port(struct dp_netdev *, odp_port_t port_no);
614c4892
BP
154static int dpif_netdev_open(const struct dpif_class *, const char *name,
155 bool create, struct dpif **);
b85d8d61 156static int dp_netdev_output_userspace(struct dp_netdev *, const struct ofpbuf *,
856081f6 157 int queue_no, const struct flow *,
e995e3df 158 const struct nlattr *userdata);
4edb9ae9
PS
159static void dp_netdev_execute_actions(struct dp_netdev *,
160 struct ofpbuf *, struct flow *,
161 const struct nlattr *actions,
162 size_t actions_len);
6c13071b
SH
163static void dp_netdev_port_input(struct dp_netdev *dp,
164 struct dp_netdev_port *port,
165 struct ofpbuf *packet, uint32_t skb_priority,
166 uint32_t skb_mark, const struct flow_tnl *tnl);
72865317
BP
167
168static struct dpif_netdev *
169dpif_netdev_cast(const struct dpif *dpif)
170{
cb22974d 171 ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
72865317
BP
172 return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
173}
174
175static struct dp_netdev *
176get_dp_netdev(const struct dpif *dpif)
177{
178 return dpif_netdev_cast(dpif)->dp;
179}
180
2197d7ab
GL
181static int
182dpif_netdev_enumerate(struct sset *all_dps)
183{
184 struct shash_node *node;
185
5279f8fd 186 xpthread_mutex_lock(&dp_netdev_mutex);
2197d7ab
GL
187 SHASH_FOR_EACH(node, &dp_netdevs) {
188 sset_add(all_dps, node->name);
189 }
5279f8fd
BP
190 xpthread_mutex_unlock(&dp_netdev_mutex);
191
2197d7ab
GL
192 return 0;
193}
194
add90f6f
EJ
195static bool
196dpif_netdev_class_is_dummy(const struct dpif_class *class)
197{
198 return class != &dpif_netdev_class;
199}
200
0aeaabc8
JP
201static const char *
202dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
203{
204 return strcmp(type, "internal") ? type
add90f6f 205 : dpif_netdev_class_is_dummy(class) ? "dummy"
0aeaabc8
JP
206 : "tap";
207}
208
72865317
BP
209static struct dpif *
210create_dpif_netdev(struct dp_netdev *dp)
211{
462278db 212 uint16_t netflow_id = hash_string(dp->name, 0);
72865317 213 struct dpif_netdev *dpif;
72865317
BP
214
215 dp->open_cnt++;
216
72865317 217 dpif = xmalloc(sizeof *dpif);
614c4892 218 dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
72865317 219 dpif->dp = dp;
72865317 220 dpif->dp_serial = dp->serial;
72865317
BP
221
222 return &dpif->dpif;
223}
224
4e022ec0
AW
225/* Choose an unused, non-zero port number and return it on success.
226 * Return ODPP_NONE on failure. */
227static odp_port_t
e44768b7
JP
228choose_port(struct dp_netdev *dp, const char *name)
229{
4e022ec0 230 uint32_t port_no;
e44768b7
JP
231
232 if (dp->class != &dpif_netdev_class) {
233 const char *p;
234 int start_no = 0;
235
236 /* If the port name begins with "br", start the number search at
237 * 100 to make writing tests easier. */
238 if (!strncmp(name, "br", 2)) {
239 start_no = 100;
240 }
241
242 /* If the port name contains a number, try to assign that port number.
243 * This can make writing unit tests easier because port numbers are
244 * predictable. */
245 for (p = name; *p != '\0'; p++) {
246 if (isdigit((unsigned char) *p)) {
247 port_no = start_no + strtol(p, NULL, 10);
248 if (port_no > 0 && port_no < MAX_PORTS
249 && !dp->ports[port_no]) {
4e022ec0 250 return u32_to_odp(port_no);
e44768b7
JP
251 }
252 break;
253 }
254 }
255 }
256
257 for (port_no = 1; port_no < MAX_PORTS; port_no++) {
258 if (!dp->ports[port_no]) {
4e022ec0 259 return u32_to_odp(port_no);
e44768b7
JP
260 }
261 }
262
4e022ec0 263 return ODPP_NONE;
e44768b7
JP
264}
265
72865317 266static int
614c4892
BP
267create_dp_netdev(const char *name, const struct dpif_class *class,
268 struct dp_netdev **dpp)
72865317
BP
269{
270 struct dp_netdev *dp;
271 int error;
272 int i;
273
462278db 274 dp = xzalloc(sizeof *dp);
614c4892 275 dp->class = class;
462278db 276 dp->name = xstrdup(name);
72865317 277 dp->open_cnt = 0;
3b0aab93 278 dp->max_mtu = ETH_PAYLOAD_MAX;
72865317 279 for (i = 0; i < N_QUEUES; i++) {
856081f6 280 dp->queues[i].head = dp->queues[i].tail = 0;
72865317
BP
281 }
282 hmap_init(&dp->flow_table);
72865317 283 list_init(&dp->port_list);
e44768b7 284
4e022ec0 285 error = do_add_port(dp, name, "internal", ODPP_LOCAL);
72865317
BP
286 if (error) {
287 dp_netdev_free(dp);
462278db 288 return error;
72865317
BP
289 }
290
462278db
BP
291 shash_add(&dp_netdevs, name, dp);
292
293 *dpp = dp;
72865317
BP
294 return 0;
295}
296
297static int
614c4892 298dpif_netdev_open(const struct dpif_class *class, const char *name,
4a387741 299 bool create, struct dpif **dpifp)
72865317 300{
462278db 301 struct dp_netdev *dp;
5279f8fd 302 int error;
462278db 303
5279f8fd 304 xpthread_mutex_lock(&dp_netdev_mutex);
462278db
BP
305 dp = shash_find_data(&dp_netdevs, name);
306 if (!dp) {
5279f8fd 307 error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
72865317 308 } else {
5279f8fd
BP
309 error = (dp->class != class ? EINVAL
310 : create ? EEXIST
311 : 0);
312 }
313 if (!error) {
314 *dpifp = create_dpif_netdev(dp);
72865317 315 }
5279f8fd 316 xpthread_mutex_unlock(&dp_netdev_mutex);
462278db 317
5279f8fd 318 return error;
72865317
BP
319}
320
321static void
1ba530f4 322dp_netdev_purge_queues(struct dp_netdev *dp)
72865317
BP
323{
324 int i;
325
72865317 326 for (i = 0; i < N_QUEUES; i++) {
856081f6 327 struct dp_netdev_queue *q = &dp->queues[i];
856081f6 328
1ba530f4 329 while (q->tail != q->head) {
d88b629b
BP
330 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
331 ofpbuf_uninit(&u->buf);
856081f6 332 }
72865317 333 }
1ba530f4
BP
334}
335
336static void
337dp_netdev_free(struct dp_netdev *dp)
338{
4ad28026
BP
339 struct dp_netdev_port *port, *next;
340
1ba530f4 341 dp_netdev_flow_flush(dp);
4ad28026 342 LIST_FOR_EACH_SAFE (port, next, node, &dp->port_list) {
1ba530f4
BP
343 do_del_port(dp, port->port_no);
344 }
345 dp_netdev_purge_queues(dp);
72865317 346 hmap_destroy(&dp->flow_table);
462278db 347 free(dp->name);
72865317
BP
348 free(dp);
349}
350
351static void
352dpif_netdev_close(struct dpif *dpif)
353{
354 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
355
356 xpthread_mutex_lock(&dp_netdev_mutex);
357
cb22974d 358 ovs_assert(dp->open_cnt > 0);
7dab847a 359 if (--dp->open_cnt == 0 && dp->destroyed) {
462278db 360 shash_find_and_delete(&dp_netdevs, dp->name);
72865317
BP
361 dp_netdev_free(dp);
362 }
363 free(dpif);
5279f8fd
BP
364
365 xpthread_mutex_unlock(&dp_netdev_mutex);
72865317
BP
366}
367
368static int
7dab847a 369dpif_netdev_destroy(struct dpif *dpif)
72865317
BP
370{
371 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
372
373 xpthread_mutex_lock(&dp_netdev_mutex);
7dab847a 374 dp->destroyed = true;
5279f8fd
BP
375 xpthread_mutex_unlock(&dp_netdev_mutex);
376
72865317
BP
377 return 0;
378}
379
380static int
a8d9304d 381dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
72865317
BP
382{
383 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
384
385 xpthread_mutex_lock(&dp_netdev_mutex);
f180c2e2 386 stats->n_flows = hmap_count(&dp->flow_table);
72865317
BP
387 stats->n_hit = dp->n_hit;
388 stats->n_missed = dp->n_missed;
389 stats->n_lost = dp->n_lost;
5279f8fd
BP
390 xpthread_mutex_unlock(&dp_netdev_mutex);
391
72865317
BP
392 return 0;
393}
394
72865317 395static int
c3827f61 396do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
4e022ec0 397 odp_port_t port_no)
72865317 398{
4b609110 399 struct netdev_saved_flags *sf;
72865317
BP
400 struct dp_netdev_port *port;
401 struct netdev *netdev;
796223f5 402 struct netdev_rx *rx;
0cbfe35d 403 const char *open_type;
72865317
BP
404 int mtu;
405 int error;
406
407 /* XXX reject devices already in some dp_netdev. */
408
409 /* Open and validate network device. */
0aeaabc8 410 open_type = dpif_netdev_port_open_type(dp->class, type);
0cbfe35d 411 error = netdev_open(devname, open_type, &netdev);
72865317
BP
412 if (error) {
413 return error;
414 }
415 /* XXX reject loopback devices */
416 /* XXX reject non-Ethernet devices */
417
796223f5 418 error = netdev_rx_open(netdev, &rx);
add90f6f
EJ
419 if (error
420 && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
7b6b0ef4 421 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
10a89ef0 422 devname, ovs_strerror(errno));
7b6b0ef4
BP
423 netdev_close(netdev);
424 return error;
425 }
426
4b609110 427 error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
72865317 428 if (error) {
796223f5 429 netdev_rx_close(rx);
72865317
BP
430 netdev_close(netdev);
431 return error;
432 }
433
434 port = xmalloc(sizeof *port);
435 port->port_no = port_no;
436 port->netdev = netdev;
4b609110 437 port->sf = sf;
796223f5 438 port->rx = rx;
0cbfe35d 439 port->type = xstrdup(type);
72865317 440
9b020780 441 error = netdev_get_mtu(netdev, &mtu);
3b0aab93
BP
442 if (!error && mtu > dp->max_mtu) {
443 dp->max_mtu = mtu;
72865317
BP
444 }
445
446 list_push_back(&dp->port_list, &port->node);
4e022ec0 447 dp->ports[odp_to_u32(port_no)] = port;
72865317
BP
448 dp->serial++;
449
450 return 0;
451}
452
247527db
BP
453static int
454dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
4e022ec0 455 odp_port_t *port_nop)
247527db
BP
456{
457 struct dp_netdev *dp = get_dp_netdev(dpif);
3aa30359
BP
458 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
459 const char *dpif_port;
4e022ec0 460 odp_port_t port_no;
5279f8fd 461 int error;
247527db 462
5279f8fd 463 xpthread_mutex_lock(&dp_netdev_mutex);
3aa30359 464 dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
4e022ec0
AW
465 if (*port_nop != ODPP_NONE) {
466 uint32_t port_idx = odp_to_u32(*port_nop);
467 if (port_idx >= MAX_PORTS) {
5279f8fd 468 error = EFBIG;
4e022ec0 469 } else if (dp->ports[port_idx]) {
5279f8fd
BP
470 error = EBUSY;
471 } else {
472 error = 0;
473 port_no = *port_nop;
232dfa4a 474 }
232dfa4a 475 } else {
3aa30359 476 port_no = choose_port(dp, dpif_port);
5279f8fd 477 error = port_no == ODPP_NONE ? EFBIG : 0;
232dfa4a 478 }
5279f8fd 479 if (!error) {
247527db 480 *port_nop = port_no;
5279f8fd 481 error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
247527db 482 }
5279f8fd
BP
483 xpthread_mutex_unlock(&dp_netdev_mutex);
484
485 return error;
72865317
BP
486}
487
488static int
4e022ec0 489dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
72865317
BP
490{
491 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
492 int error;
493
494 xpthread_mutex_lock(&dp_netdev_mutex);
495 error = port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no);
496 xpthread_mutex_unlock(&dp_netdev_mutex);
497
498 return error;
72865317
BP
499}
500
501static bool
4e022ec0 502is_valid_port_number(odp_port_t port_no)
72865317 503{
4e022ec0 504 return odp_to_u32(port_no) < MAX_PORTS;
72865317
BP
505}
506
507static int
508get_port_by_number(struct dp_netdev *dp,
4e022ec0 509 odp_port_t port_no, struct dp_netdev_port **portp)
72865317
BP
510{
511 if (!is_valid_port_number(port_no)) {
512 *portp = NULL;
513 return EINVAL;
514 } else {
4e022ec0 515 *portp = dp->ports[odp_to_u32(port_no)];
72865317
BP
516 return *portp ? 0 : ENOENT;
517 }
518}
519
520static int
521get_port_by_name(struct dp_netdev *dp,
522 const char *devname, struct dp_netdev_port **portp)
523{
524 struct dp_netdev_port *port;
525
4e8e4213 526 LIST_FOR_EACH (port, node, &dp->port_list) {
3efb6063 527 if (!strcmp(netdev_get_name(port->netdev), devname)) {
72865317
BP
528 *portp = port;
529 return 0;
530 }
531 }
532 return ENOENT;
533}
534
535static int
4e022ec0 536do_del_port(struct dp_netdev *dp, odp_port_t port_no)
72865317
BP
537{
538 struct dp_netdev_port *port;
539 int error;
540
541 error = get_port_by_number(dp, port_no, &port);
542 if (error) {
543 return error;
544 }
545
546 list_remove(&port->node);
4e022ec0 547 dp->ports[odp_to_u32(port_no)] = NULL;
72865317
BP
548 dp->serial++;
549
550 netdev_close(port->netdev);
4b609110 551 netdev_restore_flags(port->sf);
796223f5 552 netdev_rx_close(port->rx);
0cbfe35d 553 free(port->type);
72865317
BP
554 free(port);
555
556 return 0;
557}
558
559static void
4c738a8d
BP
560answer_port_query(const struct dp_netdev_port *port,
561 struct dpif_port *dpif_port)
72865317 562{
3efb6063 563 dpif_port->name = xstrdup(netdev_get_name(port->netdev));
0cbfe35d 564 dpif_port->type = xstrdup(port->type);
4c738a8d 565 dpif_port->port_no = port->port_no;
72865317
BP
566}
567
568static int
4e022ec0 569dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 570 struct dpif_port *dpif_port)
72865317
BP
571{
572 struct dp_netdev *dp = get_dp_netdev(dpif);
573 struct dp_netdev_port *port;
574 int error;
575
5279f8fd 576 xpthread_mutex_lock(&dp_netdev_mutex);
72865317 577 error = get_port_by_number(dp, port_no, &port);
4afba28d 578 if (!error && dpif_port) {
4c738a8d 579 answer_port_query(port, dpif_port);
72865317 580 }
5279f8fd
BP
581 xpthread_mutex_unlock(&dp_netdev_mutex);
582
72865317
BP
583 return error;
584}
585
586static int
587dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 588 struct dpif_port *dpif_port)
72865317
BP
589{
590 struct dp_netdev *dp = get_dp_netdev(dpif);
591 struct dp_netdev_port *port;
592 int error;
593
5279f8fd 594 xpthread_mutex_lock(&dp_netdev_mutex);
72865317 595 error = get_port_by_name(dp, devname, &port);
4afba28d 596 if (!error && dpif_port) {
4c738a8d 597 answer_port_query(port, dpif_port);
72865317 598 }
5279f8fd
BP
599 xpthread_mutex_unlock(&dp_netdev_mutex);
600
72865317
BP
601 return error;
602}
603
4e022ec0 604static odp_port_t
996c1b3d
BP
605dpif_netdev_get_max_ports(const struct dpif *dpif OVS_UNUSED)
606{
4e022ec0 607 return u32_to_odp(MAX_PORTS);
996c1b3d
BP
608}
609
72865317
BP
610static void
611dp_netdev_free_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
612{
613 hmap_remove(&dp->flow_table, &flow->node);
614 free(flow->actions);
615 free(flow);
616}
617
618static void
619dp_netdev_flow_flush(struct dp_netdev *dp)
620{
621 struct dp_netdev_flow *flow, *next;
622
4e8e4213 623 HMAP_FOR_EACH_SAFE (flow, next, node, &dp->flow_table) {
72865317
BP
624 dp_netdev_free_flow(dp, flow);
625 }
626}
627
628static int
629dpif_netdev_flow_flush(struct dpif *dpif)
630{
631 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
632
633 xpthread_mutex_lock(&dp_netdev_mutex);
72865317 634 dp_netdev_flow_flush(dp);
5279f8fd
BP
635 xpthread_mutex_unlock(&dp_netdev_mutex);
636
72865317
BP
637 return 0;
638}
639
b0ec0f27 640struct dp_netdev_port_state {
4e022ec0 641 odp_port_t port_no;
4c738a8d 642 char *name;
b0ec0f27
BP
643};
644
645static int
646dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
647{
648 *statep = xzalloc(sizeof(struct dp_netdev_port_state));
649 return 0;
650}
651
72865317 652static int
b0ec0f27 653dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
4c738a8d 654 struct dpif_port *dpif_port)
72865317 655{
b0ec0f27 656 struct dp_netdev_port_state *state = state_;
72865317 657 struct dp_netdev *dp = get_dp_netdev(dpif);
4e022ec0 658 uint32_t port_idx;
72865317 659
5279f8fd 660 xpthread_mutex_lock(&dp_netdev_mutex);
4e022ec0
AW
661 for (port_idx = odp_to_u32(state->port_no);
662 port_idx < MAX_PORTS; port_idx++) {
663 struct dp_netdev_port *port = dp->ports[port_idx];
b0ec0f27 664 if (port) {
4c738a8d 665 free(state->name);
3efb6063 666 state->name = xstrdup(netdev_get_name(port->netdev));
4c738a8d 667 dpif_port->name = state->name;
0cbfe35d 668 dpif_port->type = port->type;
4c738a8d 669 dpif_port->port_no = port->port_no;
4e022ec0 670 state->port_no = u32_to_odp(port_idx + 1);
5279f8fd
BP
671 xpthread_mutex_unlock(&dp_netdev_mutex);
672
b0ec0f27 673 return 0;
72865317 674 }
72865317 675 }
5279f8fd
BP
676 xpthread_mutex_unlock(&dp_netdev_mutex);
677
b0ec0f27
BP
678 return EOF;
679}
680
681static int
4c738a8d 682dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
b0ec0f27 683{
4c738a8d
BP
684 struct dp_netdev_port_state *state = state_;
685 free(state->name);
b0ec0f27
BP
686 free(state);
687 return 0;
72865317
BP
688}
689
690static int
67a4917b 691dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
72865317
BP
692{
693 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd
BP
694 int error;
695
696 xpthread_mutex_lock(&dp_netdev_mutex);
72865317
BP
697 if (dpif->dp_serial != dpif->dp->serial) {
698 dpif->dp_serial = dpif->dp->serial;
5279f8fd 699 error = ENOBUFS;
72865317 700 } else {
5279f8fd 701 error = EAGAIN;
72865317 702 }
5279f8fd
BP
703 xpthread_mutex_unlock(&dp_netdev_mutex);
704
705 return error;
72865317
BP
706}
707
708static void
709dpif_netdev_port_poll_wait(const struct dpif *dpif_)
710{
711 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd
BP
712
713 /* XXX In a multithreaded process, there is a race window between this
714 * function and the poll_block() in one thread and a change in
715 * dpif->dp->serial in another thread. */
716
717 xpthread_mutex_lock(&dp_netdev_mutex);
72865317
BP
718 if (dpif->dp_serial != dpif->dp->serial) {
719 poll_immediate_wake();
720 }
5279f8fd 721 xpthread_mutex_unlock(&dp_netdev_mutex);
72865317
BP
722}
723
72865317 724static struct dp_netdev_flow *
14608a15 725dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *key)
72865317
BP
726{
727 struct dp_netdev_flow *flow;
728
4e8e4213 729 HMAP_FOR_EACH_WITH_HASH (flow, node, flow_hash(key, 0), &dp->flow_table) {
72865317
BP
730 if (flow_equal(&flow->key, key)) {
731 return flow;
732 }
733 }
734 return NULL;
735}
736
737static void
c97fb132 738get_dpif_flow_stats(struct dp_netdev_flow *flow, struct dpif_flow_stats *stats)
feebdea2
BP
739{
740 stats->n_packets = flow->packet_count;
741 stats->n_bytes = flow->byte_count;
c97fb132 742 stats->used = flow->used;
734ec5ec 743 stats->tcp_flags = flow->tcp_flags;
72865317
BP
744}
745
36956a7d
BP
746static int
747dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
748 struct flow *flow)
749{
586ddea5
BP
750 odp_port_t in_port;
751
0135dc8b 752 if (odp_flow_key_to_flow(key, key_len, flow) != ODP_FIT_PERFECT) {
36956a7d
BP
753 /* This should not happen: it indicates that odp_flow_key_from_flow()
754 * and odp_flow_key_to_flow() disagree on the acceptable form of a
755 * flow. Log the problem as an error, with enough details to enable
756 * debugging. */
757 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
758
759 if (!VLOG_DROP_ERR(&rl)) {
760 struct ds s;
761
762 ds_init(&s);
763 odp_flow_key_format(key, key_len, &s);
764 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
765 ds_destroy(&s);
766 }
767
768 return EINVAL;
769 }
770
586ddea5
BP
771 in_port = flow->in_port.odp_port;
772 if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
18886b60
BP
773 return EINVAL;
774 }
775
36956a7d
BP
776 return 0;
777}
778
72865317 779static int
693c4a01 780dpif_netdev_flow_get(const struct dpif *dpif,
feebdea2 781 const struct nlattr *nl_key, size_t nl_key_len,
c97fb132 782 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
72865317
BP
783{
784 struct dp_netdev *dp = get_dp_netdev(dpif);
bc4a05c6
BP
785 struct dp_netdev_flow *flow;
786 struct flow key;
787 int error;
36956a7d 788
feebdea2 789 error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
bc4a05c6
BP
790 if (error) {
791 return error;
792 }
14608a15 793
5279f8fd 794 xpthread_mutex_lock(&dp_netdev_mutex);
bc4a05c6 795 flow = dp_netdev_lookup_flow(dp, &key);
5279f8fd
BP
796 if (flow) {
797 if (stats) {
798 get_dpif_flow_stats(flow, stats);
799 }
800 if (actionsp) {
801 *actionsp = ofpbuf_clone_data(flow->actions, flow->actions_len);
802 }
803 } else {
804 error = ENOENT;
72865317 805 }
5279f8fd 806 xpthread_mutex_unlock(&dp_netdev_mutex);
bc4a05c6 807
5279f8fd 808 return error;
72865317
BP
809}
810
72865317 811static int
feebdea2
BP
812set_flow_actions(struct dp_netdev_flow *flow,
813 const struct nlattr *actions, size_t actions_len)
72865317 814{
feebdea2
BP
815 flow->actions = xrealloc(flow->actions, actions_len);
816 flow->actions_len = actions_len;
817 memcpy(flow->actions, actions, actions_len);
72865317
BP
818 return 0;
819}
820
821static int
e1fef0f9
AS
822dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *key,
823 const struct nlattr *actions, size_t actions_len)
72865317 824{
72865317
BP
825 struct dp_netdev_flow *flow;
826 int error;
827
ec6fde61 828 flow = xzalloc(sizeof *flow);
36956a7d 829 flow->key = *key;
72865317 830
feebdea2 831 error = set_flow_actions(flow, actions, actions_len);
72865317
BP
832 if (error) {
833 free(flow);
834 return error;
835 }
836
837 hmap_insert(&dp->flow_table, &flow->node, flow_hash(&flow->key, 0));
838 return 0;
839}
840
841static void
842clear_stats(struct dp_netdev_flow *flow)
843{
c97fb132 844 flow->used = 0;
72865317
BP
845 flow->packet_count = 0;
846 flow->byte_count = 0;
734ec5ec 847 flow->tcp_flags = 0;
72865317
BP
848}
849
850static int
89625d1e 851dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
72865317
BP
852{
853 struct dp_netdev *dp = get_dp_netdev(dpif);
854 struct dp_netdev_flow *flow;
14608a15 855 struct flow key;
36956a7d
BP
856 int error;
857
89625d1e 858 error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &key);
36956a7d
BP
859 if (error) {
860 return error;
861 }
72865317 862
5279f8fd 863 xpthread_mutex_lock(&dp_netdev_mutex);
14608a15 864 flow = dp_netdev_lookup_flow(dp, &key);
72865317 865 if (!flow) {
89625d1e 866 if (put->flags & DPIF_FP_CREATE) {
72865317 867 if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
89625d1e
BP
868 if (put->stats) {
869 memset(put->stats, 0, sizeof *put->stats);
feebdea2 870 }
5279f8fd
BP
871 error = dp_netdev_flow_add(dp, &key, put->actions,
872 put->actions_len);
72865317 873 } else {
5279f8fd 874 error = EFBIG;
72865317
BP
875 }
876 } else {
5279f8fd 877 error = ENOENT;
72865317
BP
878 }
879 } else {
89625d1e 880 if (put->flags & DPIF_FP_MODIFY) {
5279f8fd 881 error = set_flow_actions(flow, put->actions, put->actions_len);
feebdea2 882 if (!error) {
89625d1e
BP
883 if (put->stats) {
884 get_dpif_flow_stats(flow, put->stats);
feebdea2 885 }
89625d1e 886 if (put->flags & DPIF_FP_ZERO_STATS) {
feebdea2
BP
887 clear_stats(flow);
888 }
72865317 889 }
72865317 890 } else {
5279f8fd 891 error = EEXIST;
72865317
BP
892 }
893 }
5279f8fd
BP
894 xpthread_mutex_unlock(&dp_netdev_mutex);
895
896 return error;
72865317
BP
897}
898
72865317 899static int
b99d3cee 900dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
72865317
BP
901{
902 struct dp_netdev *dp = get_dp_netdev(dpif);
903 struct dp_netdev_flow *flow;
14608a15 904 struct flow key;
36956a7d
BP
905 int error;
906
b99d3cee 907 error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
36956a7d
BP
908 if (error) {
909 return error;
910 }
72865317 911
5279f8fd 912 xpthread_mutex_lock(&dp_netdev_mutex);
14608a15 913 flow = dp_netdev_lookup_flow(dp, &key);
72865317 914 if (flow) {
b99d3cee
BP
915 if (del->stats) {
916 get_dpif_flow_stats(flow, del->stats);
feebdea2 917 }
72865317 918 dp_netdev_free_flow(dp, flow);
72865317 919 } else {
5279f8fd 920 error = ENOENT;
72865317 921 }
5279f8fd
BP
922 xpthread_mutex_unlock(&dp_netdev_mutex);
923
924 return error;
72865317
BP
925}
926
704a1e09
BP
927struct dp_netdev_flow_state {
928 uint32_t bucket;
929 uint32_t offset;
feebdea2 930 struct nlattr *actions;
19cf4069 931 struct odputil_keybuf keybuf;
c97fb132 932 struct dpif_flow_stats stats;
704a1e09
BP
933};
934
72865317 935static int
704a1e09 936dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
72865317 937{
feebdea2
BP
938 struct dp_netdev_flow_state *state;
939
940 *statep = state = xmalloc(sizeof *state);
941 state->bucket = 0;
942 state->offset = 0;
943 state->actions = NULL;
704a1e09
BP
944 return 0;
945}
946
947static int
948dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_,
feebdea2 949 const struct nlattr **key, size_t *key_len,
e6cc0bab 950 const struct nlattr **mask, size_t *mask_len,
feebdea2 951 const struct nlattr **actions, size_t *actions_len,
c97fb132 952 const struct dpif_flow_stats **stats)
704a1e09
BP
953{
954 struct dp_netdev_flow_state *state = state_;
72865317
BP
955 struct dp_netdev *dp = get_dp_netdev(dpif);
956 struct dp_netdev_flow *flow;
704a1e09 957 struct hmap_node *node;
14608a15 958
5279f8fd 959 xpthread_mutex_lock(&dp_netdev_mutex);
704a1e09
BP
960 node = hmap_at_position(&dp->flow_table, &state->bucket, &state->offset);
961 if (!node) {
5279f8fd 962 xpthread_mutex_unlock(&dp_netdev_mutex);
704a1e09 963 return EOF;
72865317 964 }
704a1e09
BP
965
966 flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
36956a7d 967
feebdea2
BP
968 if (key) {
969 struct ofpbuf buf;
970
19cf4069 971 ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
4e022ec0 972 odp_flow_key_from_flow(&buf, &flow->key, flow->key.in_port.odp_port);
36956a7d 973
feebdea2
BP
974 *key = buf.data;
975 *key_len = buf.size;
976 }
977
e6cc0bab
AZ
978 if (mask) {
979 *mask = NULL;
980 *mask_len = 0;
981 }
982
feebdea2
BP
983 if (actions) {
984 free(state->actions);
985 state->actions = xmemdup(flow->actions, flow->actions_len);
986
987 *actions = state->actions;
988 *actions_len = flow->actions_len;
989 }
990
991 if (stats) {
c97fb132 992 get_dpif_flow_stats(flow, &state->stats);
feebdea2
BP
993 *stats = &state->stats;
994 }
704a1e09 995
5279f8fd 996 xpthread_mutex_unlock(&dp_netdev_mutex);
704a1e09
BP
997 return 0;
998}
999
1000static int
feebdea2 1001dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
704a1e09 1002{
feebdea2
BP
1003 struct dp_netdev_flow_state *state = state_;
1004
1005 free(state->actions);
704a1e09
BP
1006 free(state);
1007 return 0;
72865317
BP
1008}
1009
1010static int
89625d1e 1011dpif_netdev_execute(struct dpif *dpif, const struct dpif_execute *execute)
72865317
BP
1012{
1013 struct dp_netdev *dp = get_dp_netdev(dpif);
1014 struct ofpbuf copy;
ae412e7d 1015 struct flow key;
72865317
BP
1016 int error;
1017
89625d1e
BP
1018 if (execute->packet->size < ETH_HEADER_LEN ||
1019 execute->packet->size > UINT16_MAX) {
72865317
BP
1020 return EINVAL;
1021 }
1022
109ee281 1023 /* Make a deep copy of 'packet', because we might modify its data. */
89625d1e 1024 ofpbuf_init(&copy, DP_NETDEV_HEADROOM + execute->packet->size);
109ee281 1025 ofpbuf_reserve(&copy, DP_NETDEV_HEADROOM);
89625d1e 1026 ofpbuf_put(&copy, execute->packet->data, execute->packet->size);
80e5eed9 1027
4e022ec0 1028 flow_extract(&copy, 0, 0, NULL, NULL, &key);
89625d1e
BP
1029 error = dpif_netdev_flow_from_nlattrs(execute->key, execute->key_len,
1030 &key);
18886b60 1031 if (!error) {
5279f8fd 1032 xpthread_mutex_lock(&dp_netdev_mutex);
4edb9ae9 1033 dp_netdev_execute_actions(dp, &copy, &key,
89625d1e 1034 execute->actions, execute->actions_len);
5279f8fd 1035 xpthread_mutex_unlock(&dp_netdev_mutex);
18886b60 1036 }
109ee281
BP
1037
1038 ofpbuf_uninit(&copy);
72865317
BP
1039 return error;
1040}
1041
1042static int
a12b3ead 1043dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED)
72865317 1044{
82272ede 1045 return 0;
72865317
BP
1046}
1047
5bf93d67
EJ
1048static int
1049dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
1050 uint32_t queue_id, uint32_t *priority)
1051{
1052 *priority = queue_id;
1053 return 0;
1054}
1055
856081f6 1056static struct dp_netdev_queue *
72865317
BP
1057find_nonempty_queue(struct dpif *dpif)
1058{
72865317 1059 struct dp_netdev *dp = get_dp_netdev(dpif);
72865317
BP
1060 int i;
1061
1062 for (i = 0; i < N_QUEUES; i++) {
856081f6 1063 struct dp_netdev_queue *q = &dp->queues[i];
a12b3ead 1064 if (q->head != q->tail) {
856081f6 1065 return q;
72865317
BP
1066 }
1067 }
856081f6 1068 return NULL;
72865317
BP
1069}
1070
1071static int
90a7c55e
BP
1072dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
1073 struct ofpbuf *buf)
72865317 1074{
5279f8fd
BP
1075 struct dp_netdev_queue *q;
1076 int error;
1077
1078 xpthread_mutex_lock(&dp_netdev_mutex);
1079 q = find_nonempty_queue(dpif);
856081f6 1080 if (q) {
d88b629b
BP
1081 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
1082
1083 *upcall = u->upcall;
1084 upcall->packet = buf;
b3907fbc 1085
90a7c55e 1086 ofpbuf_uninit(buf);
d88b629b 1087 *buf = u->buf;
90a7c55e 1088
5279f8fd 1089 error = 0;
72865317 1090 } else {
5279f8fd 1091 error = EAGAIN;
72865317 1092 }
5279f8fd
BP
1093 xpthread_mutex_unlock(&dp_netdev_mutex);
1094
1095 return error;
72865317
BP
1096}
1097
1098static void
1099dpif_netdev_recv_wait(struct dpif *dpif)
1100{
5279f8fd
BP
1101 /* XXX In a multithreaded process, there is a race window between this
1102 * function and the poll_block() in one thread and a packet being queued in
1103 * another thread. */
1104
1105 xpthread_mutex_lock(&dp_netdev_mutex);
856081f6 1106 if (find_nonempty_queue(dpif)) {
72865317 1107 poll_immediate_wake();
72865317 1108 }
5279f8fd 1109 xpthread_mutex_unlock(&dp_netdev_mutex);
72865317 1110}
1ba530f4
BP
1111
1112static void
1113dpif_netdev_recv_purge(struct dpif *dpif)
1114{
1115 struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
5279f8fd 1116 xpthread_mutex_lock(&dp_netdev_mutex);
1ba530f4 1117 dp_netdev_purge_queues(dpif_netdev->dp);
5279f8fd 1118 xpthread_mutex_unlock(&dp_netdev_mutex);
1ba530f4 1119}
72865317
BP
1120\f
1121static void
c1fe014d 1122dp_netdev_flow_used(struct dp_netdev_flow *flow, const struct ofpbuf *packet)
72865317 1123{
c97fb132 1124 flow->used = time_msec();
72865317
BP
1125 flow->packet_count++;
1126 flow->byte_count += packet->size;
c1fe014d 1127 flow->tcp_flags |= packet_get_tcp_flags(packet, &flow->key);
72865317
BP
1128}
1129
1130static void
1131dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port,
6c13071b
SH
1132 struct ofpbuf *packet, uint32_t skb_priority,
1133 uint32_t skb_mark, const struct flow_tnl *tnl)
72865317
BP
1134{
1135 struct dp_netdev_flow *flow;
14608a15 1136 struct flow key;
4e022ec0 1137 union flow_in_port in_port_;
72865317 1138
1805876e
BP
1139 if (packet->size < ETH_HEADER_LEN) {
1140 return;
1141 }
4e022ec0
AW
1142 in_port_.odp_port = port->port_no;
1143 flow_extract(packet, skb_priority, skb_mark, tnl, &in_port_, &key);
72865317
BP
1144 flow = dp_netdev_lookup_flow(dp, &key);
1145 if (flow) {
c1fe014d 1146 dp_netdev_flow_used(flow, packet);
72865317 1147 dp_netdev_execute_actions(dp, packet, &key,
cdee00fd 1148 flow->actions, flow->actions_len);
72865317
BP
1149 dp->n_hit++;
1150 } else {
1151 dp->n_missed++;
e995e3df 1152 dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
72865317
BP
1153 }
1154}
1155
1156static void
640e1b20 1157dpif_netdev_run(struct dpif *dpif)
72865317 1158{
640e1b20 1159 struct dp_netdev_port *port;
5279f8fd 1160 struct dp_netdev *dp;
72865317 1161 struct ofpbuf packet;
72865317 1162
5279f8fd
BP
1163 xpthread_mutex_lock(&dp_netdev_mutex);
1164 dp = get_dp_netdev(dpif);
3b0aab93
BP
1165 ofpbuf_init(&packet,
1166 DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + dp->max_mtu);
72865317 1167
640e1b20
BP
1168 LIST_FOR_EACH (port, node, &dp->port_list) {
1169 int error;
1170
1171 /* Reset packet contents. */
1172 ofpbuf_clear(&packet);
1173 ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM);
1174
796223f5 1175 error = port->rx ? netdev_rx_recv(port->rx, &packet) : EOPNOTSUPP;
640e1b20 1176 if (!error) {
6c13071b 1177 dp_netdev_port_input(dp, port, &packet, 0, 0, NULL);
640e1b20
BP
1178 } else if (error != EAGAIN && error != EOPNOTSUPP) {
1179 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3aa30359 1180
640e1b20 1181 VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
10a89ef0 1182 netdev_get_name(port->netdev), ovs_strerror(error));
72865317
BP
1183 }
1184 }
1185 ofpbuf_uninit(&packet);
5279f8fd 1186 xpthread_mutex_unlock(&dp_netdev_mutex);
72865317
BP
1187}
1188
1189static void
640e1b20 1190dpif_netdev_wait(struct dpif *dpif)
72865317 1191{
640e1b20 1192 struct dp_netdev_port *port;
462278db 1193
5279f8fd
BP
1194 /* There is a race here, if thread A calls dpif_netdev_wait(dpif) and
1195 * thread B calls dpif_port_add(dpif) or dpif_port_remove(dpif) before
1196 * A makes it to poll_block().
1197 *
1198 * But I think it doesn't matter:
1199 *
1200 * - In the dpif_port_add() case, A will not wake up when a packet
1201 * arrives on the new port, but this would also happen if the
1202 * ordering were reversed.
1203 *
1204 * - In the dpif_port_remove() case, A might wake up spuriously, but
1205 * that is harmless. */
1206
1207 xpthread_mutex_lock(&dp_netdev_mutex);
1208 LIST_FOR_EACH (port, node, &get_dp_netdev(dpif)->port_list) {
796223f5
BP
1209 if (port->rx) {
1210 netdev_rx_wait(port->rx);
1211 }
72865317 1212 }
5279f8fd 1213 xpthread_mutex_unlock(&dp_netdev_mutex);
72865317
BP
1214}
1215
72865317 1216static void
f094af7b 1217dp_netdev_output_port(void *dp_, struct ofpbuf *packet, uint32_t out_port)
72865317 1218{
f094af7b 1219 struct dp_netdev *dp = dp_;
2105ccc8 1220 struct dp_netdev_port *p = dp->ports[out_port];
72865317
BP
1221 if (p) {
1222 netdev_send(p->netdev, packet);
1223 }
1224}
1225
72865317 1226static int
b85d8d61 1227dp_netdev_output_userspace(struct dp_netdev *dp, const struct ofpbuf *packet,
e995e3df
BP
1228 int queue_no, const struct flow *flow,
1229 const struct nlattr *userdata)
72865317 1230{
856081f6 1231 struct dp_netdev_queue *q = &dp->queues[queue_no];
e995e3df
BP
1232 if (q->head - q->tail < MAX_QUEUE_LEN) {
1233 struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
1234 struct dpif_upcall *upcall = &u->upcall;
1235 struct ofpbuf *buf = &u->buf;
1236 size_t buf_size;
1237
1238 upcall->type = queue_no;
1239
1240 /* Allocate buffer big enough for everything. */
1241 buf_size = ODPUTIL_FLOW_KEY_BYTES + 2 + packet->size;
1242 if (userdata) {
1243 buf_size += NLA_ALIGN(userdata->nla_len);
1244 }
1245 ofpbuf_init(buf, buf_size);
72865317 1246
e995e3df 1247 /* Put ODP flow. */
4e022ec0 1248 odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port);
e995e3df
BP
1249 upcall->key = buf->data;
1250 upcall->key_len = buf->size;
d88b629b 1251
e995e3df
BP
1252 /* Put userdata. */
1253 if (userdata) {
1254 upcall->userdata = ofpbuf_put(buf, userdata,
1255 NLA_ALIGN(userdata->nla_len));
1256 }
856081f6 1257
e995e3df
BP
1258 /* Put packet.
1259 *
1260 * We adjust 'data' and 'size' in 'buf' so that only the packet itself
1261 * is visible in 'upcall->packet'. The ODP flow and (if present)
1262 * userdata become part of the headroom. */
1263 ofpbuf_put_zeros(buf, 2);
1264 buf->data = ofpbuf_put(buf, packet->data, packet->size);
1265 buf->size = packet->size;
1266 upcall->packet = buf;
856081f6 1267
e995e3df
BP
1268 return 0;
1269 } else {
1270 dp->n_lost++;
1271 return ENOBUFS;
1272 }
72865317
BP
1273}
1274
26c6b6cd 1275static void
f094af7b
SH
1276dp_netdev_action_userspace(void *dp, struct ofpbuf *packet,
1277 const struct flow *key,
1278 const struct nlattr *userdata)
98403001 1279{
98403001
BP
1280 dp_netdev_output_userspace(dp, packet, DPIF_UC_ACTION, key, userdata);
1281}
1282
4edb9ae9 1283static void
72865317 1284dp_netdev_execute_actions(struct dp_netdev *dp,
14608a15 1285 struct ofpbuf *packet, struct flow *key,
cdee00fd 1286 const struct nlattr *actions,
cf22f8cb 1287 size_t actions_len)
72865317 1288{
f094af7b
SH
1289 odp_execute_actions(dp, packet, key, actions, actions_len,
1290 dp_netdev_output_port, dp_netdev_action_userspace);
72865317
BP
1291}
1292
1293const struct dpif_class dpif_netdev_class = {
72865317 1294 "netdev",
2197d7ab 1295 dpif_netdev_enumerate,
0aeaabc8 1296 dpif_netdev_port_open_type,
72865317
BP
1297 dpif_netdev_open,
1298 dpif_netdev_close,
7dab847a 1299 dpif_netdev_destroy,
640e1b20
BP
1300 dpif_netdev_run,
1301 dpif_netdev_wait,
72865317 1302 dpif_netdev_get_stats,
72865317
BP
1303 dpif_netdev_port_add,
1304 dpif_netdev_port_del,
1305 dpif_netdev_port_query_by_number,
1306 dpif_netdev_port_query_by_name,
996c1b3d 1307 dpif_netdev_get_max_ports,
98403001 1308 NULL, /* port_get_pid */
b0ec0f27
BP
1309 dpif_netdev_port_dump_start,
1310 dpif_netdev_port_dump_next,
1311 dpif_netdev_port_dump_done,
72865317
BP
1312 dpif_netdev_port_poll,
1313 dpif_netdev_port_poll_wait,
72865317
BP
1314 dpif_netdev_flow_get,
1315 dpif_netdev_flow_put,
1316 dpif_netdev_flow_del,
1317 dpif_netdev_flow_flush,
704a1e09
BP
1318 dpif_netdev_flow_dump_start,
1319 dpif_netdev_flow_dump_next,
1320 dpif_netdev_flow_dump_done,
72865317 1321 dpif_netdev_execute,
6bc60024 1322 NULL, /* operate */
a12b3ead 1323 dpif_netdev_recv_set,
5bf93d67 1324 dpif_netdev_queue_to_priority,
72865317
BP
1325 dpif_netdev_recv,
1326 dpif_netdev_recv_wait,
1ba530f4 1327 dpif_netdev_recv_purge,
72865317 1328};
614c4892 1329
0cbfe35d
BP
1330static void
1331dpif_dummy_register__(const char *type)
1332{
1333 struct dpif_class *class;
1334
1335 class = xmalloc(sizeof *class);
1336 *class = dpif_netdev_class;
1337 class->type = xstrdup(type);
1338 dp_register_provider(class);
1339}
1340
614c4892 1341void
0cbfe35d 1342dpif_dummy_register(bool override)
614c4892 1343{
0cbfe35d
BP
1344 if (override) {
1345 struct sset types;
1346 const char *type;
1347
1348 sset_init(&types);
1349 dp_enumerate_types(&types);
1350 SSET_FOR_EACH (type, &types) {
1351 if (!dp_unregister_provider(type)) {
1352 dpif_dummy_register__(type);
1353 }
1354 }
1355 sset_destroy(&types);
614c4892 1356 }
0cbfe35d
BP
1357
1358 dpif_dummy_register__("dummy");
614c4892 1359}