]> git.proxmox.com Git - ovs.git/blame - lib/dpif-netdev.c
netdev: update IFF_LOOPBACK flag for linux and bsd devices
[ovs.git] / lib / dpif-netdev.c
CommitLineData
72865317 1/*
de281153 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
72865317
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpif.h"
19
72865317
BP
20#include <ctype.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
72865317 24#include <netinet/in.h>
9d82ec47 25#include <sys/socket.h>
7f3adc00 26#include <net/if.h>
cdee00fd 27#include <stdint.h>
72865317
BP
28#include <stdlib.h>
29#include <string.h>
30#include <sys/ioctl.h>
31#include <sys/stat.h>
72865317
BP
32#include <unistd.h>
33
34#include "csum.h"
614c4892 35#include "dpif.h"
72865317 36#include "dpif-provider.h"
614c4892 37#include "dummy.h"
36956a7d 38#include "dynamic-string.h"
72865317
BP
39#include "flow.h"
40#include "hmap.h"
41#include "list.h"
42#include "netdev.h"
de281153 43#include "netdev-vport.h"
cdee00fd 44#include "netlink.h"
f094af7b 45#include "odp-execute.h"
72865317
BP
46#include "odp-util.h"
47#include "ofp-print.h"
48#include "ofpbuf.h"
49#include "packets.h"
50#include "poll-loop.h"
26c6b6cd 51#include "random.h"
d33ed218 52#include "seq.h"
462278db 53#include "shash.h"
0cbfe35d 54#include "sset.h"
72865317 55#include "timeval.h"
74cc3969 56#include "unixctl.h"
72865317 57#include "util.h"
72865317 58#include "vlog.h"
5136ce49 59
d98e6007 60VLOG_DEFINE_THIS_MODULE(dpif_netdev);
72865317
BP
61
62/* Configuration parameters. */
72865317
BP
63enum { MAX_PORTS = 256 }; /* Maximum number of ports. */
64enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
65
66/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
67 * headers to be aligned on a 4-byte boundary. */
68enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
69
856081f6
BP
70/* Queues. */
71enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */
72enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
73enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
74BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
75
d88b629b
BP
76struct dp_netdev_upcall {
77 struct dpif_upcall upcall; /* Queued upcall information. */
78 struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */
79};
80
856081f6 81struct dp_netdev_queue {
d88b629b 82 struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN];
856081f6
BP
83 unsigned int head, tail;
84};
85
72865317
BP
86/* Datapath based on the network device interface from netdev.h. */
87struct dp_netdev {
614c4892 88 const struct dpif_class *class;
462278db 89 char *name;
72865317 90 int open_cnt;
7dab847a 91 bool destroyed;
3b0aab93 92 int max_mtu; /* Maximum MTU of any port added so far. */
72865317 93
856081f6 94 struct dp_netdev_queue queues[N_QUEUES];
72865317 95 struct hmap flow_table; /* Flow table. */
d33ed218 96 struct seq *queue_seq; /* Incremented whenever a packet is queued. */
72865317
BP
97
98 /* Statistics. */
72865317
BP
99 long long int n_hit; /* Number of flow table matches. */
100 long long int n_missed; /* Number of flow table misses. */
101 long long int n_lost; /* Number of misses not passed to client. */
102
103 /* Ports. */
72865317
BP
104 struct dp_netdev_port *ports[MAX_PORTS];
105 struct list port_list;
d33ed218 106 struct seq *port_seq; /* Incremented whenever a port changes. */
72865317
BP
107};
108
109/* A port in a netdev-based datapath. */
110struct dp_netdev_port {
4e022ec0 111 odp_port_t port_no; /* Index into dp_netdev's 'ports'. */
72865317
BP
112 struct list node; /* Element in dp_netdev's 'port_list'. */
113 struct netdev *netdev;
4b609110 114 struct netdev_saved_flags *sf;
796223f5 115 struct netdev_rx *rx;
0cbfe35d 116 char *type; /* Port type as requested by user. */
72865317
BP
117};
118
119/* A flow in dp_netdev's 'flow_table'. */
120struct dp_netdev_flow {
121 struct hmap_node node; /* Element in dp_netdev's 'flow_table'. */
14608a15 122 struct flow key;
72865317
BP
123
124 /* Statistics. */
c97fb132 125 long long int used; /* Last used time, in monotonic msecs. */
2105ccc8
BP
126 long long int packet_count; /* Number of packets matched. */
127 long long int byte_count; /* Number of bytes matched. */
7c808e39 128 uint8_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
72865317
BP
129
130 /* Actions. */
cdee00fd 131 struct nlattr *actions;
cf22f8cb 132 size_t actions_len;
72865317
BP
133};
134
135/* Interface to netdev-based datapath. */
136struct dpif_netdev {
137 struct dpif dpif;
138 struct dp_netdev *dp;
d33ed218 139 uint64_t last_port_seq;
72865317
BP
140};
141
142/* All netdev-based datapaths. */
462278db 143static struct shash dp_netdevs = SHASH_INITIALIZER(&dp_netdevs);
72865317 144
5279f8fd 145/* Global lock for all data. */
97be1538 146static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
5279f8fd 147
4e022ec0 148static int get_port_by_number(struct dp_netdev *, odp_port_t port_no,
72865317
BP
149 struct dp_netdev_port **portp);
150static int get_port_by_name(struct dp_netdev *, const char *devname,
151 struct dp_netdev_port **portp);
152static void dp_netdev_free(struct dp_netdev *);
153static void dp_netdev_flow_flush(struct dp_netdev *);
c3827f61 154static int do_add_port(struct dp_netdev *, const char *devname,
4e022ec0
AW
155 const char *type, odp_port_t port_no);
156static int do_del_port(struct dp_netdev *, odp_port_t port_no);
614c4892
BP
157static int dpif_netdev_open(const struct dpif_class *, const char *name,
158 bool create, struct dpif **);
b85d8d61 159static int dp_netdev_output_userspace(struct dp_netdev *, const struct ofpbuf *,
856081f6 160 int queue_no, const struct flow *,
e995e3df 161 const struct nlattr *userdata);
4edb9ae9
PS
162static void dp_netdev_execute_actions(struct dp_netdev *,
163 struct ofpbuf *, struct flow *,
164 const struct nlattr *actions,
165 size_t actions_len);
6c13071b
SH
166static void dp_netdev_port_input(struct dp_netdev *dp,
167 struct dp_netdev_port *port,
168 struct ofpbuf *packet, uint32_t skb_priority,
1362e248 169 uint32_t pkt_mark, const struct flow_tnl *tnl);
72865317
BP
170
171static struct dpif_netdev *
172dpif_netdev_cast(const struct dpif *dpif)
173{
cb22974d 174 ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
72865317
BP
175 return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
176}
177
178static struct dp_netdev *
179get_dp_netdev(const struct dpif *dpif)
180{
181 return dpif_netdev_cast(dpif)->dp;
182}
183
2197d7ab
GL
184static int
185dpif_netdev_enumerate(struct sset *all_dps)
186{
187 struct shash_node *node;
188
97be1538 189 ovs_mutex_lock(&dp_netdev_mutex);
2197d7ab
GL
190 SHASH_FOR_EACH(node, &dp_netdevs) {
191 sset_add(all_dps, node->name);
192 }
97be1538 193 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 194
2197d7ab
GL
195 return 0;
196}
197
add90f6f
EJ
198static bool
199dpif_netdev_class_is_dummy(const struct dpif_class *class)
200{
201 return class != &dpif_netdev_class;
202}
203
0aeaabc8
JP
204static const char *
205dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
206{
207 return strcmp(type, "internal") ? type
add90f6f 208 : dpif_netdev_class_is_dummy(class) ? "dummy"
0aeaabc8
JP
209 : "tap";
210}
211
72865317
BP
212static struct dpif *
213create_dpif_netdev(struct dp_netdev *dp)
214{
462278db 215 uint16_t netflow_id = hash_string(dp->name, 0);
72865317 216 struct dpif_netdev *dpif;
72865317
BP
217
218 dp->open_cnt++;
219
72865317 220 dpif = xmalloc(sizeof *dpif);
614c4892 221 dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
72865317 222 dpif->dp = dp;
d33ed218 223 dpif->last_port_seq = seq_read(dp->port_seq);
72865317
BP
224
225 return &dpif->dpif;
226}
227
4e022ec0
AW
228/* Choose an unused, non-zero port number and return it on success.
229 * Return ODPP_NONE on failure. */
230static odp_port_t
e44768b7
JP
231choose_port(struct dp_netdev *dp, const char *name)
232{
4e022ec0 233 uint32_t port_no;
e44768b7
JP
234
235 if (dp->class != &dpif_netdev_class) {
236 const char *p;
237 int start_no = 0;
238
239 /* If the port name begins with "br", start the number search at
240 * 100 to make writing tests easier. */
241 if (!strncmp(name, "br", 2)) {
242 start_no = 100;
243 }
244
245 /* If the port name contains a number, try to assign that port number.
246 * This can make writing unit tests easier because port numbers are
247 * predictable. */
248 for (p = name; *p != '\0'; p++) {
249 if (isdigit((unsigned char) *p)) {
250 port_no = start_no + strtol(p, NULL, 10);
251 if (port_no > 0 && port_no < MAX_PORTS
252 && !dp->ports[port_no]) {
4e022ec0 253 return u32_to_odp(port_no);
e44768b7
JP
254 }
255 break;
256 }
257 }
258 }
259
260 for (port_no = 1; port_no < MAX_PORTS; port_no++) {
261 if (!dp->ports[port_no]) {
4e022ec0 262 return u32_to_odp(port_no);
e44768b7
JP
263 }
264 }
265
4e022ec0 266 return ODPP_NONE;
e44768b7
JP
267}
268
72865317 269static int
614c4892
BP
270create_dp_netdev(const char *name, const struct dpif_class *class,
271 struct dp_netdev **dpp)
72865317
BP
272{
273 struct dp_netdev *dp;
274 int error;
275 int i;
276
462278db 277 dp = xzalloc(sizeof *dp);
614c4892 278 dp->class = class;
462278db 279 dp->name = xstrdup(name);
72865317 280 dp->open_cnt = 0;
3b0aab93 281 dp->max_mtu = ETH_PAYLOAD_MAX;
72865317 282 for (i = 0; i < N_QUEUES; i++) {
856081f6 283 dp->queues[i].head = dp->queues[i].tail = 0;
72865317 284 }
d33ed218 285 dp->queue_seq = seq_create();
72865317 286 hmap_init(&dp->flow_table);
72865317 287 list_init(&dp->port_list);
d33ed218 288 dp->port_seq = seq_create();
e44768b7 289
4e022ec0 290 error = do_add_port(dp, name, "internal", ODPP_LOCAL);
72865317
BP
291 if (error) {
292 dp_netdev_free(dp);
462278db 293 return error;
72865317
BP
294 }
295
462278db
BP
296 shash_add(&dp_netdevs, name, dp);
297
298 *dpp = dp;
72865317
BP
299 return 0;
300}
301
302static int
614c4892 303dpif_netdev_open(const struct dpif_class *class, const char *name,
4a387741 304 bool create, struct dpif **dpifp)
72865317 305{
462278db 306 struct dp_netdev *dp;
5279f8fd 307 int error;
462278db 308
97be1538 309 ovs_mutex_lock(&dp_netdev_mutex);
462278db
BP
310 dp = shash_find_data(&dp_netdevs, name);
311 if (!dp) {
5279f8fd 312 error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
72865317 313 } else {
5279f8fd
BP
314 error = (dp->class != class ? EINVAL
315 : create ? EEXIST
316 : 0);
317 }
318 if (!error) {
319 *dpifp = create_dpif_netdev(dp);
72865317 320 }
97be1538 321 ovs_mutex_unlock(&dp_netdev_mutex);
462278db 322
5279f8fd 323 return error;
72865317
BP
324}
325
326static void
1ba530f4 327dp_netdev_purge_queues(struct dp_netdev *dp)
72865317
BP
328{
329 int i;
330
72865317 331 for (i = 0; i < N_QUEUES; i++) {
856081f6 332 struct dp_netdev_queue *q = &dp->queues[i];
856081f6 333
1ba530f4 334 while (q->tail != q->head) {
d88b629b
BP
335 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
336 ofpbuf_uninit(&u->buf);
856081f6 337 }
72865317 338 }
1ba530f4
BP
339}
340
341static void
342dp_netdev_free(struct dp_netdev *dp)
343{
4ad28026
BP
344 struct dp_netdev_port *port, *next;
345
1ba530f4 346 dp_netdev_flow_flush(dp);
4ad28026 347 LIST_FOR_EACH_SAFE (port, next, node, &dp->port_list) {
1ba530f4
BP
348 do_del_port(dp, port->port_no);
349 }
350 dp_netdev_purge_queues(dp);
d33ed218 351 seq_destroy(dp->queue_seq);
72865317 352 hmap_destroy(&dp->flow_table);
d33ed218 353 seq_destroy(dp->port_seq);
462278db 354 free(dp->name);
72865317
BP
355 free(dp);
356}
357
358static void
359dpif_netdev_close(struct dpif *dpif)
360{
361 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 362
97be1538 363 ovs_mutex_lock(&dp_netdev_mutex);
5279f8fd 364
cb22974d 365 ovs_assert(dp->open_cnt > 0);
7dab847a 366 if (--dp->open_cnt == 0 && dp->destroyed) {
462278db 367 shash_find_and_delete(&dp_netdevs, dp->name);
72865317
BP
368 dp_netdev_free(dp);
369 }
370 free(dpif);
5279f8fd 371
97be1538 372 ovs_mutex_unlock(&dp_netdev_mutex);
72865317
BP
373}
374
375static int
7dab847a 376dpif_netdev_destroy(struct dpif *dpif)
72865317
BP
377{
378 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 379
97be1538 380 ovs_mutex_lock(&dp_netdev_mutex);
7dab847a 381 dp->destroyed = true;
97be1538 382 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 383
72865317
BP
384 return 0;
385}
386
387static int
a8d9304d 388dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
72865317
BP
389{
390 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 391
97be1538 392 ovs_mutex_lock(&dp_netdev_mutex);
f180c2e2 393 stats->n_flows = hmap_count(&dp->flow_table);
72865317
BP
394 stats->n_hit = dp->n_hit;
395 stats->n_missed = dp->n_missed;
396 stats->n_lost = dp->n_lost;
97be1538 397 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 398
72865317
BP
399 return 0;
400}
401
72865317 402static int
c3827f61 403do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
4e022ec0 404 odp_port_t port_no)
72865317 405{
4b609110 406 struct netdev_saved_flags *sf;
72865317
BP
407 struct dp_netdev_port *port;
408 struct netdev *netdev;
796223f5 409 struct netdev_rx *rx;
0cbfe35d 410 const char *open_type;
72865317
BP
411 int mtu;
412 int error;
413
414 /* XXX reject devices already in some dp_netdev. */
415
416 /* Open and validate network device. */
0aeaabc8 417 open_type = dpif_netdev_port_open_type(dp->class, type);
0cbfe35d 418 error = netdev_open(devname, open_type, &netdev);
72865317
BP
419 if (error) {
420 return error;
421 }
422 /* XXX reject loopback devices */
423 /* XXX reject non-Ethernet devices */
424
796223f5 425 error = netdev_rx_open(netdev, &rx);
add90f6f
EJ
426 if (error
427 && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
7b6b0ef4 428 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
10a89ef0 429 devname, ovs_strerror(errno));
7b6b0ef4
BP
430 netdev_close(netdev);
431 return error;
432 }
433
4b609110 434 error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
72865317 435 if (error) {
796223f5 436 netdev_rx_close(rx);
72865317
BP
437 netdev_close(netdev);
438 return error;
439 }
440
441 port = xmalloc(sizeof *port);
442 port->port_no = port_no;
443 port->netdev = netdev;
4b609110 444 port->sf = sf;
796223f5 445 port->rx = rx;
0cbfe35d 446 port->type = xstrdup(type);
72865317 447
9b020780 448 error = netdev_get_mtu(netdev, &mtu);
3b0aab93
BP
449 if (!error && mtu > dp->max_mtu) {
450 dp->max_mtu = mtu;
72865317
BP
451 }
452
453 list_push_back(&dp->port_list, &port->node);
4e022ec0 454 dp->ports[odp_to_u32(port_no)] = port;
d33ed218 455 seq_change(dp->port_seq);
72865317
BP
456
457 return 0;
458}
459
247527db
BP
460static int
461dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
4e022ec0 462 odp_port_t *port_nop)
247527db
BP
463{
464 struct dp_netdev *dp = get_dp_netdev(dpif);
3aa30359
BP
465 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
466 const char *dpif_port;
4e022ec0 467 odp_port_t port_no;
5279f8fd 468 int error;
247527db 469
97be1538 470 ovs_mutex_lock(&dp_netdev_mutex);
3aa30359 471 dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
4e022ec0
AW
472 if (*port_nop != ODPP_NONE) {
473 uint32_t port_idx = odp_to_u32(*port_nop);
474 if (port_idx >= MAX_PORTS) {
5279f8fd 475 error = EFBIG;
4e022ec0 476 } else if (dp->ports[port_idx]) {
5279f8fd
BP
477 error = EBUSY;
478 } else {
479 error = 0;
480 port_no = *port_nop;
232dfa4a 481 }
232dfa4a 482 } else {
3aa30359 483 port_no = choose_port(dp, dpif_port);
5279f8fd 484 error = port_no == ODPP_NONE ? EFBIG : 0;
232dfa4a 485 }
5279f8fd 486 if (!error) {
247527db 487 *port_nop = port_no;
5279f8fd 488 error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
247527db 489 }
97be1538 490 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd
BP
491
492 return error;
72865317
BP
493}
494
495static int
4e022ec0 496dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
72865317
BP
497{
498 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
499 int error;
500
97be1538 501 ovs_mutex_lock(&dp_netdev_mutex);
5279f8fd 502 error = port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no);
97be1538 503 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd
BP
504
505 return error;
72865317
BP
506}
507
508static bool
4e022ec0 509is_valid_port_number(odp_port_t port_no)
72865317 510{
4e022ec0 511 return odp_to_u32(port_no) < MAX_PORTS;
72865317
BP
512}
513
514static int
515get_port_by_number(struct dp_netdev *dp,
4e022ec0 516 odp_port_t port_no, struct dp_netdev_port **portp)
72865317
BP
517{
518 if (!is_valid_port_number(port_no)) {
519 *portp = NULL;
520 return EINVAL;
521 } else {
4e022ec0 522 *portp = dp->ports[odp_to_u32(port_no)];
72865317
BP
523 return *portp ? 0 : ENOENT;
524 }
525}
526
527static int
528get_port_by_name(struct dp_netdev *dp,
529 const char *devname, struct dp_netdev_port **portp)
530{
531 struct dp_netdev_port *port;
532
4e8e4213 533 LIST_FOR_EACH (port, node, &dp->port_list) {
3efb6063 534 if (!strcmp(netdev_get_name(port->netdev), devname)) {
72865317
BP
535 *portp = port;
536 return 0;
537 }
538 }
539 return ENOENT;
540}
541
542static int
4e022ec0 543do_del_port(struct dp_netdev *dp, odp_port_t port_no)
72865317
BP
544{
545 struct dp_netdev_port *port;
546 int error;
547
548 error = get_port_by_number(dp, port_no, &port);
549 if (error) {
550 return error;
551 }
552
553 list_remove(&port->node);
4e022ec0 554 dp->ports[odp_to_u32(port_no)] = NULL;
d33ed218 555 seq_change(dp->port_seq);
72865317
BP
556
557 netdev_close(port->netdev);
4b609110 558 netdev_restore_flags(port->sf);
796223f5 559 netdev_rx_close(port->rx);
0cbfe35d 560 free(port->type);
72865317
BP
561 free(port);
562
563 return 0;
564}
565
566static void
4c738a8d
BP
567answer_port_query(const struct dp_netdev_port *port,
568 struct dpif_port *dpif_port)
72865317 569{
3efb6063 570 dpif_port->name = xstrdup(netdev_get_name(port->netdev));
0cbfe35d 571 dpif_port->type = xstrdup(port->type);
4c738a8d 572 dpif_port->port_no = port->port_no;
72865317
BP
573}
574
575static int
4e022ec0 576dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 577 struct dpif_port *dpif_port)
72865317
BP
578{
579 struct dp_netdev *dp = get_dp_netdev(dpif);
580 struct dp_netdev_port *port;
581 int error;
582
97be1538 583 ovs_mutex_lock(&dp_netdev_mutex);
72865317 584 error = get_port_by_number(dp, port_no, &port);
4afba28d 585 if (!error && dpif_port) {
4c738a8d 586 answer_port_query(port, dpif_port);
72865317 587 }
97be1538 588 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 589
72865317
BP
590 return error;
591}
592
593static int
594dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 595 struct dpif_port *dpif_port)
72865317
BP
596{
597 struct dp_netdev *dp = get_dp_netdev(dpif);
598 struct dp_netdev_port *port;
599 int error;
600
97be1538 601 ovs_mutex_lock(&dp_netdev_mutex);
72865317 602 error = get_port_by_name(dp, devname, &port);
4afba28d 603 if (!error && dpif_port) {
4c738a8d 604 answer_port_query(port, dpif_port);
72865317 605 }
97be1538 606 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 607
72865317
BP
608 return error;
609}
610
1dd16b9a 611static uint32_t
996c1b3d
BP
612dpif_netdev_get_max_ports(const struct dpif *dpif OVS_UNUSED)
613{
1dd16b9a 614 return MAX_PORTS;
996c1b3d
BP
615}
616
72865317
BP
617static void
618dp_netdev_free_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
619{
620 hmap_remove(&dp->flow_table, &flow->node);
621 free(flow->actions);
622 free(flow);
623}
624
625static void
626dp_netdev_flow_flush(struct dp_netdev *dp)
627{
628 struct dp_netdev_flow *flow, *next;
629
4e8e4213 630 HMAP_FOR_EACH_SAFE (flow, next, node, &dp->flow_table) {
72865317
BP
631 dp_netdev_free_flow(dp, flow);
632 }
633}
634
635static int
636dpif_netdev_flow_flush(struct dpif *dpif)
637{
638 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 639
97be1538 640 ovs_mutex_lock(&dp_netdev_mutex);
72865317 641 dp_netdev_flow_flush(dp);
97be1538 642 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 643
72865317
BP
644 return 0;
645}
646
b0ec0f27 647struct dp_netdev_port_state {
4e022ec0 648 odp_port_t port_no;
4c738a8d 649 char *name;
b0ec0f27
BP
650};
651
652static int
653dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
654{
655 *statep = xzalloc(sizeof(struct dp_netdev_port_state));
656 return 0;
657}
658
72865317 659static int
b0ec0f27 660dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
4c738a8d 661 struct dpif_port *dpif_port)
72865317 662{
b0ec0f27 663 struct dp_netdev_port_state *state = state_;
72865317 664 struct dp_netdev *dp = get_dp_netdev(dpif);
4e022ec0 665 uint32_t port_idx;
72865317 666
97be1538 667 ovs_mutex_lock(&dp_netdev_mutex);
4e022ec0
AW
668 for (port_idx = odp_to_u32(state->port_no);
669 port_idx < MAX_PORTS; port_idx++) {
670 struct dp_netdev_port *port = dp->ports[port_idx];
b0ec0f27 671 if (port) {
4c738a8d 672 free(state->name);
3efb6063 673 state->name = xstrdup(netdev_get_name(port->netdev));
4c738a8d 674 dpif_port->name = state->name;
0cbfe35d 675 dpif_port->type = port->type;
4c738a8d 676 dpif_port->port_no = port->port_no;
4e022ec0 677 state->port_no = u32_to_odp(port_idx + 1);
97be1538 678 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 679
b0ec0f27 680 return 0;
72865317 681 }
72865317 682 }
97be1538 683 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 684
b0ec0f27
BP
685 return EOF;
686}
687
688static int
4c738a8d 689dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
b0ec0f27 690{
4c738a8d
BP
691 struct dp_netdev_port_state *state = state_;
692 free(state->name);
b0ec0f27
BP
693 free(state);
694 return 0;
72865317
BP
695}
696
697static int
67a4917b 698dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
72865317
BP
699{
700 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
d33ed218 701 uint64_t new_port_seq;
5279f8fd
BP
702 int error;
703
97be1538 704 ovs_mutex_lock(&dp_netdev_mutex);
d33ed218
BP
705 new_port_seq = seq_read(dpif->dp->port_seq);
706 if (dpif->last_port_seq != new_port_seq) {
707 dpif->last_port_seq = new_port_seq;
5279f8fd 708 error = ENOBUFS;
72865317 709 } else {
5279f8fd 710 error = EAGAIN;
72865317 711 }
97be1538 712 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd
BP
713
714 return error;
72865317
BP
715}
716
717static void
718dpif_netdev_port_poll_wait(const struct dpif *dpif_)
719{
720 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd 721
97be1538 722 ovs_mutex_lock(&dp_netdev_mutex);
d33ed218 723 seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
97be1538 724 ovs_mutex_unlock(&dp_netdev_mutex);
72865317
BP
725}
726
72865317 727static struct dp_netdev_flow *
14608a15 728dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *key)
72865317
BP
729{
730 struct dp_netdev_flow *flow;
731
4e8e4213 732 HMAP_FOR_EACH_WITH_HASH (flow, node, flow_hash(key, 0), &dp->flow_table) {
72865317
BP
733 if (flow_equal(&flow->key, key)) {
734 return flow;
735 }
736 }
737 return NULL;
738}
739
740static void
c97fb132 741get_dpif_flow_stats(struct dp_netdev_flow *flow, struct dpif_flow_stats *stats)
feebdea2
BP
742{
743 stats->n_packets = flow->packet_count;
744 stats->n_bytes = flow->byte_count;
c97fb132 745 stats->used = flow->used;
734ec5ec 746 stats->tcp_flags = flow->tcp_flags;
72865317
BP
747}
748
36956a7d
BP
749static int
750dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
751 struct flow *flow)
752{
586ddea5
BP
753 odp_port_t in_port;
754
0135dc8b 755 if (odp_flow_key_to_flow(key, key_len, flow) != ODP_FIT_PERFECT) {
36956a7d
BP
756 /* This should not happen: it indicates that odp_flow_key_from_flow()
757 * and odp_flow_key_to_flow() disagree on the acceptable form of a
758 * flow. Log the problem as an error, with enough details to enable
759 * debugging. */
760 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
761
762 if (!VLOG_DROP_ERR(&rl)) {
763 struct ds s;
764
765 ds_init(&s);
766 odp_flow_key_format(key, key_len, &s);
767 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
768 ds_destroy(&s);
769 }
770
771 return EINVAL;
772 }
773
586ddea5
BP
774 in_port = flow->in_port.odp_port;
775 if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
18886b60
BP
776 return EINVAL;
777 }
778
36956a7d
BP
779 return 0;
780}
781
72865317 782static int
693c4a01 783dpif_netdev_flow_get(const struct dpif *dpif,
feebdea2 784 const struct nlattr *nl_key, size_t nl_key_len,
c97fb132 785 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
72865317
BP
786{
787 struct dp_netdev *dp = get_dp_netdev(dpif);
bc4a05c6
BP
788 struct dp_netdev_flow *flow;
789 struct flow key;
790 int error;
36956a7d 791
feebdea2 792 error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
bc4a05c6
BP
793 if (error) {
794 return error;
795 }
14608a15 796
97be1538 797 ovs_mutex_lock(&dp_netdev_mutex);
bc4a05c6 798 flow = dp_netdev_lookup_flow(dp, &key);
5279f8fd
BP
799 if (flow) {
800 if (stats) {
801 get_dpif_flow_stats(flow, stats);
802 }
803 if (actionsp) {
804 *actionsp = ofpbuf_clone_data(flow->actions, flow->actions_len);
805 }
806 } else {
807 error = ENOENT;
72865317 808 }
97be1538 809 ovs_mutex_unlock(&dp_netdev_mutex);
bc4a05c6 810
5279f8fd 811 return error;
72865317
BP
812}
813
72865317 814static int
feebdea2
BP
815set_flow_actions(struct dp_netdev_flow *flow,
816 const struct nlattr *actions, size_t actions_len)
72865317 817{
feebdea2
BP
818 flow->actions = xrealloc(flow->actions, actions_len);
819 flow->actions_len = actions_len;
820 memcpy(flow->actions, actions, actions_len);
72865317
BP
821 return 0;
822}
823
824static int
e1fef0f9
AS
825dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *key,
826 const struct nlattr *actions, size_t actions_len)
72865317 827{
72865317
BP
828 struct dp_netdev_flow *flow;
829 int error;
830
ec6fde61 831 flow = xzalloc(sizeof *flow);
36956a7d 832 flow->key = *key;
72865317 833
feebdea2 834 error = set_flow_actions(flow, actions, actions_len);
72865317
BP
835 if (error) {
836 free(flow);
837 return error;
838 }
839
840 hmap_insert(&dp->flow_table, &flow->node, flow_hash(&flow->key, 0));
841 return 0;
842}
843
844static void
845clear_stats(struct dp_netdev_flow *flow)
846{
c97fb132 847 flow->used = 0;
72865317
BP
848 flow->packet_count = 0;
849 flow->byte_count = 0;
734ec5ec 850 flow->tcp_flags = 0;
72865317
BP
851}
852
853static int
89625d1e 854dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
72865317
BP
855{
856 struct dp_netdev *dp = get_dp_netdev(dpif);
857 struct dp_netdev_flow *flow;
14608a15 858 struct flow key;
36956a7d
BP
859 int error;
860
89625d1e 861 error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &key);
36956a7d
BP
862 if (error) {
863 return error;
864 }
72865317 865
97be1538 866 ovs_mutex_lock(&dp_netdev_mutex);
14608a15 867 flow = dp_netdev_lookup_flow(dp, &key);
72865317 868 if (!flow) {
89625d1e 869 if (put->flags & DPIF_FP_CREATE) {
72865317 870 if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
89625d1e
BP
871 if (put->stats) {
872 memset(put->stats, 0, sizeof *put->stats);
feebdea2 873 }
5279f8fd
BP
874 error = dp_netdev_flow_add(dp, &key, put->actions,
875 put->actions_len);
72865317 876 } else {
5279f8fd 877 error = EFBIG;
72865317
BP
878 }
879 } else {
5279f8fd 880 error = ENOENT;
72865317
BP
881 }
882 } else {
89625d1e 883 if (put->flags & DPIF_FP_MODIFY) {
5279f8fd 884 error = set_flow_actions(flow, put->actions, put->actions_len);
feebdea2 885 if (!error) {
89625d1e
BP
886 if (put->stats) {
887 get_dpif_flow_stats(flow, put->stats);
feebdea2 888 }
89625d1e 889 if (put->flags & DPIF_FP_ZERO_STATS) {
feebdea2
BP
890 clear_stats(flow);
891 }
72865317 892 }
72865317 893 } else {
5279f8fd 894 error = EEXIST;
72865317
BP
895 }
896 }
97be1538 897 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd
BP
898
899 return error;
72865317
BP
900}
901
72865317 902static int
b99d3cee 903dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
72865317
BP
904{
905 struct dp_netdev *dp = get_dp_netdev(dpif);
906 struct dp_netdev_flow *flow;
14608a15 907 struct flow key;
36956a7d
BP
908 int error;
909
b99d3cee 910 error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
36956a7d
BP
911 if (error) {
912 return error;
913 }
72865317 914
97be1538 915 ovs_mutex_lock(&dp_netdev_mutex);
14608a15 916 flow = dp_netdev_lookup_flow(dp, &key);
72865317 917 if (flow) {
b99d3cee
BP
918 if (del->stats) {
919 get_dpif_flow_stats(flow, del->stats);
feebdea2 920 }
72865317 921 dp_netdev_free_flow(dp, flow);
72865317 922 } else {
5279f8fd 923 error = ENOENT;
72865317 924 }
97be1538 925 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd
BP
926
927 return error;
72865317
BP
928}
929
704a1e09
BP
930struct dp_netdev_flow_state {
931 uint32_t bucket;
932 uint32_t offset;
feebdea2 933 struct nlattr *actions;
19cf4069 934 struct odputil_keybuf keybuf;
c97fb132 935 struct dpif_flow_stats stats;
704a1e09
BP
936};
937
72865317 938static int
704a1e09 939dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
72865317 940{
feebdea2
BP
941 struct dp_netdev_flow_state *state;
942
943 *statep = state = xmalloc(sizeof *state);
944 state->bucket = 0;
945 state->offset = 0;
946 state->actions = NULL;
704a1e09
BP
947 return 0;
948}
949
950static int
951dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_,
feebdea2 952 const struct nlattr **key, size_t *key_len,
e6cc0bab 953 const struct nlattr **mask, size_t *mask_len,
feebdea2 954 const struct nlattr **actions, size_t *actions_len,
c97fb132 955 const struct dpif_flow_stats **stats)
704a1e09
BP
956{
957 struct dp_netdev_flow_state *state = state_;
72865317
BP
958 struct dp_netdev *dp = get_dp_netdev(dpif);
959 struct dp_netdev_flow *flow;
704a1e09 960 struct hmap_node *node;
14608a15 961
97be1538 962 ovs_mutex_lock(&dp_netdev_mutex);
704a1e09
BP
963 node = hmap_at_position(&dp->flow_table, &state->bucket, &state->offset);
964 if (!node) {
97be1538 965 ovs_mutex_unlock(&dp_netdev_mutex);
704a1e09 966 return EOF;
72865317 967 }
704a1e09
BP
968
969 flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
36956a7d 970
feebdea2
BP
971 if (key) {
972 struct ofpbuf buf;
973
19cf4069 974 ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
4e022ec0 975 odp_flow_key_from_flow(&buf, &flow->key, flow->key.in_port.odp_port);
36956a7d 976
feebdea2
BP
977 *key = buf.data;
978 *key_len = buf.size;
979 }
980
e6cc0bab
AZ
981 if (mask) {
982 *mask = NULL;
983 *mask_len = 0;
984 }
985
feebdea2
BP
986 if (actions) {
987 free(state->actions);
988 state->actions = xmemdup(flow->actions, flow->actions_len);
989
990 *actions = state->actions;
991 *actions_len = flow->actions_len;
992 }
993
994 if (stats) {
c97fb132 995 get_dpif_flow_stats(flow, &state->stats);
feebdea2
BP
996 *stats = &state->stats;
997 }
704a1e09 998
97be1538 999 ovs_mutex_unlock(&dp_netdev_mutex);
704a1e09
BP
1000 return 0;
1001}
1002
1003static int
feebdea2 1004dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
704a1e09 1005{
feebdea2
BP
1006 struct dp_netdev_flow_state *state = state_;
1007
1008 free(state->actions);
704a1e09
BP
1009 free(state);
1010 return 0;
72865317
BP
1011}
1012
1013static int
89625d1e 1014dpif_netdev_execute(struct dpif *dpif, const struct dpif_execute *execute)
72865317
BP
1015{
1016 struct dp_netdev *dp = get_dp_netdev(dpif);
1017 struct ofpbuf copy;
ae412e7d 1018 struct flow key;
72865317
BP
1019 int error;
1020
89625d1e
BP
1021 if (execute->packet->size < ETH_HEADER_LEN ||
1022 execute->packet->size > UINT16_MAX) {
72865317
BP
1023 return EINVAL;
1024 }
1025
109ee281 1026 /* Make a deep copy of 'packet', because we might modify its data. */
89625d1e 1027 ofpbuf_init(&copy, DP_NETDEV_HEADROOM + execute->packet->size);
109ee281 1028 ofpbuf_reserve(&copy, DP_NETDEV_HEADROOM);
89625d1e 1029 ofpbuf_put(&copy, execute->packet->data, execute->packet->size);
80e5eed9 1030
4e022ec0 1031 flow_extract(&copy, 0, 0, NULL, NULL, &key);
89625d1e
BP
1032 error = dpif_netdev_flow_from_nlattrs(execute->key, execute->key_len,
1033 &key);
18886b60 1034 if (!error) {
97be1538 1035 ovs_mutex_lock(&dp_netdev_mutex);
4edb9ae9 1036 dp_netdev_execute_actions(dp, &copy, &key,
89625d1e 1037 execute->actions, execute->actions_len);
97be1538 1038 ovs_mutex_unlock(&dp_netdev_mutex);
18886b60 1039 }
109ee281
BP
1040
1041 ofpbuf_uninit(&copy);
72865317
BP
1042 return error;
1043}
1044
1045static int
a12b3ead 1046dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED)
72865317 1047{
82272ede 1048 return 0;
72865317
BP
1049}
1050
5bf93d67
EJ
1051static int
1052dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
1053 uint32_t queue_id, uint32_t *priority)
1054{
1055 *priority = queue_id;
1056 return 0;
1057}
1058
856081f6 1059static struct dp_netdev_queue *
72865317
BP
1060find_nonempty_queue(struct dpif *dpif)
1061{
72865317 1062 struct dp_netdev *dp = get_dp_netdev(dpif);
72865317
BP
1063 int i;
1064
1065 for (i = 0; i < N_QUEUES; i++) {
856081f6 1066 struct dp_netdev_queue *q = &dp->queues[i];
a12b3ead 1067 if (q->head != q->tail) {
856081f6 1068 return q;
72865317
BP
1069 }
1070 }
856081f6 1071 return NULL;
72865317
BP
1072}
1073
1074static int
90a7c55e
BP
1075dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
1076 struct ofpbuf *buf)
72865317 1077{
5279f8fd
BP
1078 struct dp_netdev_queue *q;
1079 int error;
1080
97be1538 1081 ovs_mutex_lock(&dp_netdev_mutex);
5279f8fd 1082 q = find_nonempty_queue(dpif);
856081f6 1083 if (q) {
d88b629b
BP
1084 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
1085
1086 *upcall = u->upcall;
1087 upcall->packet = buf;
b3907fbc 1088
90a7c55e 1089 ofpbuf_uninit(buf);
d88b629b 1090 *buf = u->buf;
90a7c55e 1091
5279f8fd 1092 error = 0;
72865317 1093 } else {
5279f8fd 1094 error = EAGAIN;
72865317 1095 }
97be1538 1096 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd
BP
1097
1098 return error;
72865317
BP
1099}
1100
1101static void
1102dpif_netdev_recv_wait(struct dpif *dpif)
1103{
d33ed218
BP
1104 struct dp_netdev *dp = get_dp_netdev(dpif);
1105 uint64_t seq;
5279f8fd 1106
97be1538 1107 ovs_mutex_lock(&dp_netdev_mutex);
d33ed218 1108 seq = seq_read(dp->queue_seq);
856081f6 1109 if (find_nonempty_queue(dpif)) {
72865317 1110 poll_immediate_wake();
d33ed218
BP
1111 } else {
1112 seq_wait(dp->queue_seq, seq);
72865317 1113 }
97be1538 1114 ovs_mutex_unlock(&dp_netdev_mutex);
72865317 1115}
1ba530f4
BP
1116
1117static void
1118dpif_netdev_recv_purge(struct dpif *dpif)
1119{
1120 struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
97be1538 1121 ovs_mutex_lock(&dp_netdev_mutex);
1ba530f4 1122 dp_netdev_purge_queues(dpif_netdev->dp);
97be1538 1123 ovs_mutex_unlock(&dp_netdev_mutex);
1ba530f4 1124}
72865317
BP
1125\f
1126static void
c1fe014d 1127dp_netdev_flow_used(struct dp_netdev_flow *flow, const struct ofpbuf *packet)
72865317 1128{
c97fb132 1129 flow->used = time_msec();
72865317
BP
1130 flow->packet_count++;
1131 flow->byte_count += packet->size;
c1fe014d 1132 flow->tcp_flags |= packet_get_tcp_flags(packet, &flow->key);
72865317
BP
1133}
1134
1135static void
1136dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port,
6c13071b 1137 struct ofpbuf *packet, uint32_t skb_priority,
1362e248 1138 uint32_t pkt_mark, const struct flow_tnl *tnl)
72865317
BP
1139{
1140 struct dp_netdev_flow *flow;
14608a15 1141 struct flow key;
4e022ec0 1142 union flow_in_port in_port_;
72865317 1143
1805876e
BP
1144 if (packet->size < ETH_HEADER_LEN) {
1145 return;
1146 }
4e022ec0 1147 in_port_.odp_port = port->port_no;
1362e248 1148 flow_extract(packet, skb_priority, pkt_mark, tnl, &in_port_, &key);
72865317
BP
1149 flow = dp_netdev_lookup_flow(dp, &key);
1150 if (flow) {
c1fe014d 1151 dp_netdev_flow_used(flow, packet);
72865317 1152 dp_netdev_execute_actions(dp, packet, &key,
cdee00fd 1153 flow->actions, flow->actions_len);
72865317
BP
1154 dp->n_hit++;
1155 } else {
1156 dp->n_missed++;
e995e3df 1157 dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
72865317
BP
1158 }
1159}
1160
1161static void
640e1b20 1162dpif_netdev_run(struct dpif *dpif)
72865317 1163{
640e1b20 1164 struct dp_netdev_port *port;
5279f8fd 1165 struct dp_netdev *dp;
72865317 1166 struct ofpbuf packet;
72865317 1167
97be1538 1168 ovs_mutex_lock(&dp_netdev_mutex);
5279f8fd 1169 dp = get_dp_netdev(dpif);
3b0aab93
BP
1170 ofpbuf_init(&packet,
1171 DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + dp->max_mtu);
72865317 1172
640e1b20
BP
1173 LIST_FOR_EACH (port, node, &dp->port_list) {
1174 int error;
1175
1176 /* Reset packet contents. */
1177 ofpbuf_clear(&packet);
1178 ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM);
1179
796223f5 1180 error = port->rx ? netdev_rx_recv(port->rx, &packet) : EOPNOTSUPP;
640e1b20 1181 if (!error) {
6c13071b 1182 dp_netdev_port_input(dp, port, &packet, 0, 0, NULL);
640e1b20
BP
1183 } else if (error != EAGAIN && error != EOPNOTSUPP) {
1184 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3aa30359 1185
640e1b20 1186 VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
10a89ef0 1187 netdev_get_name(port->netdev), ovs_strerror(error));
72865317
BP
1188 }
1189 }
1190 ofpbuf_uninit(&packet);
97be1538 1191 ovs_mutex_unlock(&dp_netdev_mutex);
72865317
BP
1192}
1193
1194static void
640e1b20 1195dpif_netdev_wait(struct dpif *dpif)
72865317 1196{
640e1b20 1197 struct dp_netdev_port *port;
462278db 1198
5279f8fd
BP
1199 /* There is a race here, if thread A calls dpif_netdev_wait(dpif) and
1200 * thread B calls dpif_port_add(dpif) or dpif_port_remove(dpif) before
1201 * A makes it to poll_block().
1202 *
1203 * But I think it doesn't matter:
1204 *
1205 * - In the dpif_port_add() case, A will not wake up when a packet
1206 * arrives on the new port, but this would also happen if the
1207 * ordering were reversed.
1208 *
1209 * - In the dpif_port_remove() case, A might wake up spuriously, but
1210 * that is harmless. */
1211
97be1538 1212 ovs_mutex_lock(&dp_netdev_mutex);
5279f8fd 1213 LIST_FOR_EACH (port, node, &get_dp_netdev(dpif)->port_list) {
796223f5
BP
1214 if (port->rx) {
1215 netdev_rx_wait(port->rx);
1216 }
72865317 1217 }
97be1538 1218 ovs_mutex_unlock(&dp_netdev_mutex);
72865317
BP
1219}
1220
72865317 1221static void
f094af7b 1222dp_netdev_output_port(void *dp_, struct ofpbuf *packet, uint32_t out_port)
72865317 1223{
f094af7b 1224 struct dp_netdev *dp = dp_;
2105ccc8 1225 struct dp_netdev_port *p = dp->ports[out_port];
72865317
BP
1226 if (p) {
1227 netdev_send(p->netdev, packet);
1228 }
1229}
1230
72865317 1231static int
b85d8d61 1232dp_netdev_output_userspace(struct dp_netdev *dp, const struct ofpbuf *packet,
e995e3df
BP
1233 int queue_no, const struct flow *flow,
1234 const struct nlattr *userdata)
72865317 1235{
856081f6 1236 struct dp_netdev_queue *q = &dp->queues[queue_no];
e995e3df
BP
1237 if (q->head - q->tail < MAX_QUEUE_LEN) {
1238 struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
1239 struct dpif_upcall *upcall = &u->upcall;
1240 struct ofpbuf *buf = &u->buf;
1241 size_t buf_size;
1242
1243 upcall->type = queue_no;
1244
1245 /* Allocate buffer big enough for everything. */
1246 buf_size = ODPUTIL_FLOW_KEY_BYTES + 2 + packet->size;
1247 if (userdata) {
1248 buf_size += NLA_ALIGN(userdata->nla_len);
1249 }
1250 ofpbuf_init(buf, buf_size);
72865317 1251
e995e3df 1252 /* Put ODP flow. */
4e022ec0 1253 odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port);
e995e3df
BP
1254 upcall->key = buf->data;
1255 upcall->key_len = buf->size;
d88b629b 1256
e995e3df
BP
1257 /* Put userdata. */
1258 if (userdata) {
1259 upcall->userdata = ofpbuf_put(buf, userdata,
1260 NLA_ALIGN(userdata->nla_len));
1261 }
856081f6 1262
e995e3df
BP
1263 /* Put packet.
1264 *
1265 * We adjust 'data' and 'size' in 'buf' so that only the packet itself
1266 * is visible in 'upcall->packet'. The ODP flow and (if present)
1267 * userdata become part of the headroom. */
1268 ofpbuf_put_zeros(buf, 2);
1269 buf->data = ofpbuf_put(buf, packet->data, packet->size);
1270 buf->size = packet->size;
1271 upcall->packet = buf;
856081f6 1272
d33ed218
BP
1273 seq_change(dp->queue_seq);
1274
e995e3df
BP
1275 return 0;
1276 } else {
1277 dp->n_lost++;
1278 return ENOBUFS;
1279 }
72865317
BP
1280}
1281
26c6b6cd 1282static void
f094af7b
SH
1283dp_netdev_action_userspace(void *dp, struct ofpbuf *packet,
1284 const struct flow *key,
1285 const struct nlattr *userdata)
98403001 1286{
98403001
BP
1287 dp_netdev_output_userspace(dp, packet, DPIF_UC_ACTION, key, userdata);
1288}
1289
4edb9ae9 1290static void
72865317 1291dp_netdev_execute_actions(struct dp_netdev *dp,
14608a15 1292 struct ofpbuf *packet, struct flow *key,
cdee00fd 1293 const struct nlattr *actions,
cf22f8cb 1294 size_t actions_len)
72865317 1295{
f094af7b
SH
1296 odp_execute_actions(dp, packet, key, actions, actions_len,
1297 dp_netdev_output_port, dp_netdev_action_userspace);
72865317
BP
1298}
1299
1300const struct dpif_class dpif_netdev_class = {
72865317 1301 "netdev",
2197d7ab 1302 dpif_netdev_enumerate,
0aeaabc8 1303 dpif_netdev_port_open_type,
72865317
BP
1304 dpif_netdev_open,
1305 dpif_netdev_close,
7dab847a 1306 dpif_netdev_destroy,
640e1b20
BP
1307 dpif_netdev_run,
1308 dpif_netdev_wait,
72865317 1309 dpif_netdev_get_stats,
72865317
BP
1310 dpif_netdev_port_add,
1311 dpif_netdev_port_del,
1312 dpif_netdev_port_query_by_number,
1313 dpif_netdev_port_query_by_name,
996c1b3d 1314 dpif_netdev_get_max_ports,
98403001 1315 NULL, /* port_get_pid */
b0ec0f27
BP
1316 dpif_netdev_port_dump_start,
1317 dpif_netdev_port_dump_next,
1318 dpif_netdev_port_dump_done,
72865317
BP
1319 dpif_netdev_port_poll,
1320 dpif_netdev_port_poll_wait,
72865317
BP
1321 dpif_netdev_flow_get,
1322 dpif_netdev_flow_put,
1323 dpif_netdev_flow_del,
1324 dpif_netdev_flow_flush,
704a1e09
BP
1325 dpif_netdev_flow_dump_start,
1326 dpif_netdev_flow_dump_next,
1327 dpif_netdev_flow_dump_done,
72865317 1328 dpif_netdev_execute,
6bc60024 1329 NULL, /* operate */
a12b3ead 1330 dpif_netdev_recv_set,
5bf93d67 1331 dpif_netdev_queue_to_priority,
72865317
BP
1332 dpif_netdev_recv,
1333 dpif_netdev_recv_wait,
1ba530f4 1334 dpif_netdev_recv_purge,
72865317 1335};
614c4892 1336
74cc3969
BP
1337static void
1338dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
1339 const char *argv[], void *aux OVS_UNUSED)
1340{
1341 struct dp_netdev_port *port;
1342 struct dp_netdev *dp;
1343 int port_no;
1344
1345 dp = shash_find_data(&dp_netdevs, argv[1]);
1346 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
1347 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
1348 return;
1349 }
1350
1351 if (get_port_by_name(dp, argv[2], &port)) {
1352 unixctl_command_reply_error(conn, "unknown port");
1353 return;
1354 }
1355
1356 port_no = atoi(argv[3]);
1357 if (port_no <= 0 || port_no >= MAX_PORTS) {
1358 unixctl_command_reply_error(conn, "bad port number");
1359 return;
1360 }
1361 if (dp->ports[port_no]) {
1362 unixctl_command_reply_error(conn, "port number already in use");
1363 return;
1364 }
1365 dp->ports[odp_to_u32(port->port_no)] = NULL;
1366 dp->ports[port_no] = port;
1367 port->port_no = u32_to_odp(port_no);
d33ed218 1368 seq_change(dp->port_seq);
74cc3969
BP
1369 unixctl_command_reply(conn, NULL);
1370}
1371
0cbfe35d
BP
1372static void
1373dpif_dummy_register__(const char *type)
1374{
1375 struct dpif_class *class;
1376
1377 class = xmalloc(sizeof *class);
1378 *class = dpif_netdev_class;
1379 class->type = xstrdup(type);
1380 dp_register_provider(class);
1381}
1382
614c4892 1383void
0cbfe35d 1384dpif_dummy_register(bool override)
614c4892 1385{
0cbfe35d
BP
1386 if (override) {
1387 struct sset types;
1388 const char *type;
1389
1390 sset_init(&types);
1391 dp_enumerate_types(&types);
1392 SSET_FOR_EACH (type, &types) {
1393 if (!dp_unregister_provider(type)) {
1394 dpif_dummy_register__(type);
1395 }
1396 }
1397 sset_destroy(&types);
614c4892 1398 }
0cbfe35d
BP
1399
1400 dpif_dummy_register__("dummy");
74cc3969
BP
1401
1402 unixctl_command_register("dpif-dummy/change-port-number",
1403 "DP PORT NEW-NUMBER",
1404 3, 3, dpif_dummy_change_port_number, NULL);
614c4892 1405}