2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include <netinet/in.h>
25 #include <sys/socket.h>
30 #include <sys/ioctl.h>
34 #include "classifier.h"
37 #include "dpif-provider.h"
39 #include "dynamic-string.h"
44 #include "meta-flow.h"
46 #include "netdev-vport.h"
48 #include "odp-execute.h"
50 #include "ofp-print.h"
53 #include "poll-loop.h"
63 VLOG_DEFINE_THIS_MODULE(dpif_netdev
);
65 /* By default, choose a priority in the middle. */
66 #define NETDEV_RULE_PRIORITY 0x8000
68 /* Configuration parameters. */
69 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
71 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
72 * headers to be aligned on a 4-byte boundary. */
73 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
76 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
77 enum { MAX_QUEUE_LEN
= 128 }; /* Maximum number of packets per queue. */
78 enum { QUEUE_MASK
= MAX_QUEUE_LEN
- 1 };
79 BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN
));
81 /* Protects against changes to 'dp_netdevs'. */
82 static struct ovs_mutex dp_netdev_mutex
= OVS_MUTEX_INITIALIZER
;
84 /* Contains all 'struct dp_netdev's. */
85 static struct shash dp_netdevs
OVS_GUARDED_BY(dp_netdev_mutex
)
86 = SHASH_INITIALIZER(&dp_netdevs
);
88 struct dp_netdev_upcall
{
89 struct dpif_upcall upcall
; /* Queued upcall information. */
90 struct ofpbuf buf
; /* ofpbuf instance for upcall.packet. */
93 /* A queue passing packets from a struct dp_netdev to its clients.
99 * Any access at all requires the owning 'dp_netdev''s queue_mutex. */
100 struct dp_netdev_queue
{
101 struct dp_netdev_upcall upcalls
[MAX_QUEUE_LEN
] OVS_GUARDED
;
102 unsigned int head OVS_GUARDED
;
103 unsigned int tail OVS_GUARDED
;
106 /* Datapath based on the network device interface from netdev.h.
112 * Some members, marked 'const', are immutable. Accessing other members
113 * requires synchronization, as noted in more detail below.
115 * Acquisition order is, from outermost to innermost:
117 * dp_netdev_mutex (global)
124 const struct dpif_class
*const class;
125 const char *const name
;
126 struct ovs_refcount ref_cnt
;
127 atomic_flag destroyed
;
131 * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock.
133 * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then
134 * the 'cls->rwlock' write lock. (The outer 'flow_mutex' allows writers to
135 * atomically perform multiple operations on 'cls' and 'flow_table'.)
137 struct ovs_mutex flow_mutex
;
138 struct classifier cls
; /* Classifier. Protected by cls.rwlock. */
139 struct hmap flow_table OVS_GUARDED
; /* Flow table. */
143 * Everything in 'queues' is protected by 'queue_mutex'. */
144 struct ovs_mutex queue_mutex
;
145 struct dp_netdev_queue queues
[N_QUEUES
];
146 struct seq
*queue_seq
; /* Incremented whenever a packet is queued. */
150 * ovsthread_counter is internally synchronized. */
151 struct ovsthread_counter
*n_hit
; /* Number of flow table matches. */
152 struct ovsthread_counter
*n_missed
; /* Number of flow table misses. */
153 struct ovsthread_counter
*n_lost
; /* Number of misses not passed up. */
157 * Any lookup into 'ports' or any access to the dp_netdev_ports found
158 * through 'ports' requires taking 'port_rwlock'. */
159 struct ovs_rwlock port_rwlock
;
160 struct hmap ports OVS_GUARDED
;
161 struct seq
*port_seq
; /* Incremented whenever a port changes. */
163 /* Forwarding threads. */
164 struct latch exit_latch
;
165 struct dp_forwarder
*forwarders
;
169 static struct dp_netdev_port
*dp_netdev_lookup_port(const struct dp_netdev
*dp
,
171 OVS_REQ_RDLOCK(dp
->port_rwlock
);
173 /* A port in a netdev-based datapath. */
174 struct dp_netdev_port
{
175 struct hmap_node node
; /* Node in dp_netdev's 'ports'. */
177 struct netdev
*netdev
;
178 struct netdev_saved_flags
*sf
;
179 struct netdev_rx
*rx
;
180 char *type
; /* Port type as requested by user. */
183 /* A flow in dp_netdev's 'flow_table'.
189 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
190 * its dp_netdev's classifier. The text below calls this classifier 'cls'.
195 * The thread safety rules described here for "struct dp_netdev_flow" are
196 * motivated by two goals:
198 * - Prevent threads that read members of "struct dp_netdev_flow" from
199 * reading bad data due to changes by some thread concurrently modifying
202 * - Prevent two threads making changes to members of a given "struct
203 * dp_netdev_flow" from interfering with each other.
209 * A flow 'flow' may be accessed without a risk of being freed by code that
210 * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to
211 * 'flow->ref_cnt' (or both). Code that needs to hold onto a flow for a while
212 * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt'
213 * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'.
215 * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the
216 * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't
217 * protect members of 'flow' from modification (that's 'flow->mutex').
219 * 'flow->mutex' protects the members of 'flow' from modification. It doesn't
220 * protect the flow from being deleted from 'cls' (that's 'cls->rwlock') and it
221 * doesn't prevent the flow from being freed (that's 'flow->ref_cnt').
223 * Some members, marked 'const', are immutable. Accessing other members
224 * requires synchronization, as noted in more detail below.
226 struct dp_netdev_flow
{
227 /* Packet classification. */
228 const struct cls_rule cr
; /* In owning dp_netdev's 'cls'. */
230 /* Hash table index by unmasked flow. */
231 const struct hmap_node node
; /* In owning dp_netdev's 'flow_table'. */
232 const struct flow flow
; /* The flow that created this entry. */
234 /* Number of references.
235 * The classifier owns one reference.
236 * Any thread trying to keep a rule from being freed should hold its own
238 struct ovs_refcount ref_cnt
;
240 /* Protects members marked OVS_GUARDED.
242 * Acquire after datapath's flow_mutex. */
243 struct ovs_mutex mutex
OVS_ACQ_AFTER(dp_netdev_mutex
);
247 * Reading or writing these members requires 'mutex'. */
248 long long int used OVS_GUARDED
; /* Last used time, in monotonic msecs. */
249 long long int packet_count OVS_GUARDED
; /* Number of packets matched. */
250 long long int byte_count OVS_GUARDED
; /* Number of bytes matched. */
251 uint16_t tcp_flags OVS_GUARDED
; /* Bitwise-OR of seen tcp_flags values. */
255 * Reading 'actions' requires 'mutex'.
256 * Writing 'actions' requires 'mutex' and (to allow for transactions) the
257 * datapath's flow_mutex. */
258 struct dp_netdev_actions
*actions OVS_GUARDED
;
261 static struct dp_netdev_flow
*dp_netdev_flow_ref(
262 const struct dp_netdev_flow
*);
263 static void dp_netdev_flow_unref(struct dp_netdev_flow
*);
265 /* A set of datapath actions within a "struct dp_netdev_flow".
271 * A struct dp_netdev_actions 'actions' may be accessed without a risk of being
272 * freed by code that holds a read-lock or write-lock on 'flow->mutex' (where
273 * 'flow' is the dp_netdev_flow for which 'flow->actions == actions') or that
274 * owns a reference to 'actions->ref_cnt' (or both). */
275 struct dp_netdev_actions
{
276 struct ovs_refcount ref_cnt
;
278 /* These members are immutable: they do not change during the struct's
280 struct nlattr
*actions
; /* Sequence of OVS_ACTION_ATTR_* attributes. */
281 unsigned int size
; /* Size of 'actions', in bytes. */
284 struct dp_netdev_actions
*dp_netdev_actions_create(const struct nlattr
*,
286 struct dp_netdev_actions
*dp_netdev_actions_ref(
287 const struct dp_netdev_actions
*);
288 void dp_netdev_actions_unref(struct dp_netdev_actions
*);
290 /* A thread that receives packets from some ports, looks them up in the flow
291 * table, and executes the actions it finds. */
292 struct dp_forwarder
{
293 struct dp_netdev
*dp
;
296 uint32_t min_hash
, max_hash
;
299 /* Interface to netdev-based datapath. */
302 struct dp_netdev
*dp
;
303 uint64_t last_port_seq
;
306 static int get_port_by_number(struct dp_netdev
*dp
, odp_port_t port_no
,
307 struct dp_netdev_port
**portp
)
308 OVS_REQ_RDLOCK(dp
->port_rwlock
);
309 static int get_port_by_name(struct dp_netdev
*dp
, const char *devname
,
310 struct dp_netdev_port
**portp
)
311 OVS_REQ_RDLOCK(dp
->port_rwlock
);
312 static void dp_netdev_free(struct dp_netdev
*)
313 OVS_REQUIRES(dp_netdev_mutex
);
314 static void dp_netdev_flow_flush(struct dp_netdev
*);
315 static int do_add_port(struct dp_netdev
*dp
, const char *devname
,
316 const char *type
, odp_port_t port_no
)
317 OVS_REQ_WRLOCK(dp
->port_rwlock
);
318 static int do_del_port(struct dp_netdev
*dp
, odp_port_t port_no
)
319 OVS_REQ_WRLOCK(dp
->port_rwlock
);
320 static int dpif_netdev_open(const struct dpif_class
*, const char *name
,
321 bool create
, struct dpif
**);
322 static int dp_netdev_output_userspace(struct dp_netdev
*dp
, struct ofpbuf
*,
323 int queue_no
, const struct flow
*,
324 const struct nlattr
*userdata
)
325 OVS_EXCLUDED(dp
->queue_mutex
);
326 static void dp_netdev_execute_actions(struct dp_netdev
*dp
,
327 const struct flow
*, struct ofpbuf
*,
328 struct pkt_metadata
*,
329 const struct nlattr
*actions
,
331 OVS_REQ_RDLOCK(dp
->port_rwlock
);
332 static void dp_netdev_port_input(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
333 struct pkt_metadata
*)
334 OVS_REQ_RDLOCK(dp
->port_rwlock
);
335 static void dp_netdev_set_threads(struct dp_netdev
*, int n
);
337 static struct dpif_netdev
*
338 dpif_netdev_cast(const struct dpif
*dpif
)
340 ovs_assert(dpif
->dpif_class
->open
== dpif_netdev_open
);
341 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
344 static struct dp_netdev
*
345 get_dp_netdev(const struct dpif
*dpif
)
347 return dpif_netdev_cast(dpif
)->dp
;
351 dpif_netdev_enumerate(struct sset
*all_dps
)
353 struct shash_node
*node
;
355 ovs_mutex_lock(&dp_netdev_mutex
);
356 SHASH_FOR_EACH(node
, &dp_netdevs
) {
357 sset_add(all_dps
, node
->name
);
359 ovs_mutex_unlock(&dp_netdev_mutex
);
365 dpif_netdev_class_is_dummy(const struct dpif_class
*class)
367 return class != &dpif_netdev_class
;
371 dpif_netdev_port_open_type(const struct dpif_class
*class, const char *type
)
373 return strcmp(type
, "internal") ? type
374 : dpif_netdev_class_is_dummy(class) ? "dummy"
379 create_dpif_netdev(struct dp_netdev
*dp
)
381 uint16_t netflow_id
= hash_string(dp
->name
, 0);
382 struct dpif_netdev
*dpif
;
384 ovs_refcount_ref(&dp
->ref_cnt
);
386 dpif
= xmalloc(sizeof *dpif
);
387 dpif_init(&dpif
->dpif
, dp
->class, dp
->name
, netflow_id
>> 8, netflow_id
);
389 dpif
->last_port_seq
= seq_read(dp
->port_seq
);
394 /* Choose an unused, non-zero port number and return it on success.
395 * Return ODPP_NONE on failure. */
397 choose_port(struct dp_netdev
*dp
, const char *name
)
398 OVS_REQ_RDLOCK(dp
->port_rwlock
)
402 if (dp
->class != &dpif_netdev_class
) {
406 /* If the port name begins with "br", start the number search at
407 * 100 to make writing tests easier. */
408 if (!strncmp(name
, "br", 2)) {
412 /* If the port name contains a number, try to assign that port number.
413 * This can make writing unit tests easier because port numbers are
415 for (p
= name
; *p
!= '\0'; p
++) {
416 if (isdigit((unsigned char) *p
)) {
417 port_no
= start_no
+ strtol(p
, NULL
, 10);
418 if (port_no
> 0 && port_no
!= odp_to_u32(ODPP_NONE
)
419 && !dp_netdev_lookup_port(dp
, u32_to_odp(port_no
))) {
420 return u32_to_odp(port_no
);
427 for (port_no
= 1; port_no
<= UINT16_MAX
; port_no
++) {
428 if (!dp_netdev_lookup_port(dp
, u32_to_odp(port_no
))) {
429 return u32_to_odp(port_no
);
437 create_dp_netdev(const char *name
, const struct dpif_class
*class,
438 struct dp_netdev
**dpp
)
439 OVS_REQUIRES(dp_netdev_mutex
)
441 struct dp_netdev
*dp
;
445 dp
= xzalloc(sizeof *dp
);
446 shash_add(&dp_netdevs
, name
, dp
);
448 *CONST_CAST(const struct dpif_class
**, &dp
->class) = class;
449 *CONST_CAST(const char **, &dp
->name
) = xstrdup(name
);
450 ovs_refcount_init(&dp
->ref_cnt
);
451 atomic_flag_init(&dp
->destroyed
);
453 ovs_mutex_init(&dp
->flow_mutex
);
454 classifier_init(&dp
->cls
, NULL
);
455 hmap_init(&dp
->flow_table
);
457 ovs_mutex_init(&dp
->queue_mutex
);
458 ovs_mutex_lock(&dp
->queue_mutex
);
459 for (i
= 0; i
< N_QUEUES
; i
++) {
460 dp
->queues
[i
].head
= dp
->queues
[i
].tail
= 0;
462 ovs_mutex_unlock(&dp
->queue_mutex
);
463 dp
->queue_seq
= seq_create();
465 dp
->n_hit
= ovsthread_counter_create();
466 dp
->n_missed
= ovsthread_counter_create();
467 dp
->n_lost
= ovsthread_counter_create();
469 ovs_rwlock_init(&dp
->port_rwlock
);
470 hmap_init(&dp
->ports
);
471 dp
->port_seq
= seq_create();
472 latch_init(&dp
->exit_latch
);
474 ovs_rwlock_wrlock(&dp
->port_rwlock
);
475 error
= do_add_port(dp
, name
, "internal", ODPP_LOCAL
);
476 ovs_rwlock_unlock(&dp
->port_rwlock
);
481 dp_netdev_set_threads(dp
, 2);
488 dpif_netdev_open(const struct dpif_class
*class, const char *name
,
489 bool create
, struct dpif
**dpifp
)
491 struct dp_netdev
*dp
;
494 ovs_mutex_lock(&dp_netdev_mutex
);
495 dp
= shash_find_data(&dp_netdevs
, name
);
497 error
= create
? create_dp_netdev(name
, class, &dp
) : ENODEV
;
499 error
= (dp
->class != class ? EINVAL
504 *dpifp
= create_dpif_netdev(dp
);
506 ovs_mutex_unlock(&dp_netdev_mutex
);
512 dp_netdev_purge_queues(struct dp_netdev
*dp
)
516 ovs_mutex_lock(&dp
->queue_mutex
);
517 for (i
= 0; i
< N_QUEUES
; i
++) {
518 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
520 while (q
->tail
!= q
->head
) {
521 struct dp_netdev_upcall
*u
= &q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
522 ofpbuf_uninit(&u
->upcall
.packet
);
523 ofpbuf_uninit(&u
->buf
);
526 ovs_mutex_unlock(&dp
->queue_mutex
);
529 /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
530 * through the 'dp_netdevs' shash while freeing 'dp'. */
532 dp_netdev_free(struct dp_netdev
*dp
)
533 OVS_REQUIRES(dp_netdev_mutex
)
535 struct dp_netdev_port
*port
, *next
;
537 shash_find_and_delete(&dp_netdevs
, dp
->name
);
539 dp_netdev_set_threads(dp
, 0);
540 free(dp
->forwarders
);
542 dp_netdev_flow_flush(dp
);
543 ovs_rwlock_wrlock(&dp
->port_rwlock
);
544 HMAP_FOR_EACH_SAFE (port
, next
, node
, &dp
->ports
) {
545 do_del_port(dp
, port
->port_no
);
547 ovs_rwlock_unlock(&dp
->port_rwlock
);
548 ovsthread_counter_destroy(dp
->n_hit
);
549 ovsthread_counter_destroy(dp
->n_missed
);
550 ovsthread_counter_destroy(dp
->n_lost
);
552 dp_netdev_purge_queues(dp
);
553 seq_destroy(dp
->queue_seq
);
554 ovs_mutex_destroy(&dp
->queue_mutex
);
556 classifier_destroy(&dp
->cls
);
557 hmap_destroy(&dp
->flow_table
);
558 ovs_mutex_destroy(&dp
->flow_mutex
);
559 seq_destroy(dp
->port_seq
);
560 hmap_destroy(&dp
->ports
);
561 atomic_flag_destroy(&dp
->destroyed
);
562 ovs_refcount_destroy(&dp
->ref_cnt
);
563 latch_destroy(&dp
->exit_latch
);
564 free(CONST_CAST(char *, dp
->name
));
569 dp_netdev_unref(struct dp_netdev
*dp
)
572 /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
573 * get a new reference to 'dp' through the 'dp_netdevs' shash. */
574 ovs_mutex_lock(&dp_netdev_mutex
);
575 if (ovs_refcount_unref(&dp
->ref_cnt
) == 1) {
578 ovs_mutex_unlock(&dp_netdev_mutex
);
583 dpif_netdev_close(struct dpif
*dpif
)
585 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
592 dpif_netdev_destroy(struct dpif
*dpif
)
594 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
596 if (!atomic_flag_test_and_set(&dp
->destroyed
)) {
597 if (ovs_refcount_unref(&dp
->ref_cnt
) == 1) {
598 /* Can't happen: 'dpif' still owns a reference to 'dp'. */
607 dpif_netdev_get_stats(const struct dpif
*dpif
, struct dpif_dp_stats
*stats
)
609 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
611 fat_rwlock_rdlock(&dp
->cls
.rwlock
);
612 stats
->n_flows
= hmap_count(&dp
->flow_table
);
613 fat_rwlock_unlock(&dp
->cls
.rwlock
);
615 stats
->n_hit
= ovsthread_counter_read(dp
->n_hit
);
616 stats
->n_missed
= ovsthread_counter_read(dp
->n_missed
);
617 stats
->n_lost
= ovsthread_counter_read(dp
->n_lost
);
618 stats
->n_masks
= UINT32_MAX
;
619 stats
->n_mask_hit
= UINT64_MAX
;
625 do_add_port(struct dp_netdev
*dp
, const char *devname
, const char *type
,
627 OVS_REQ_WRLOCK(dp
->port_rwlock
)
629 struct netdev_saved_flags
*sf
;
630 struct dp_netdev_port
*port
;
631 struct netdev
*netdev
;
632 struct netdev_rx
*rx
;
633 enum netdev_flags flags
;
634 const char *open_type
;
637 /* XXX reject devices already in some dp_netdev. */
639 /* Open and validate network device. */
640 open_type
= dpif_netdev_port_open_type(dp
->class, type
);
641 error
= netdev_open(devname
, open_type
, &netdev
);
645 /* XXX reject non-Ethernet devices */
647 netdev_get_flags(netdev
, &flags
);
648 if (flags
& NETDEV_LOOPBACK
) {
649 VLOG_ERR("%s: cannot add a loopback device", devname
);
650 netdev_close(netdev
);
654 error
= netdev_rx_open(netdev
, &rx
);
656 && !(error
== EOPNOTSUPP
&& dpif_netdev_class_is_dummy(dp
->class))) {
657 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
658 devname
, ovs_strerror(errno
));
659 netdev_close(netdev
);
663 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, &sf
);
666 netdev_close(netdev
);
670 port
= xmalloc(sizeof *port
);
671 port
->port_no
= port_no
;
672 port
->netdev
= netdev
;
675 port
->type
= xstrdup(type
);
677 hmap_insert(&dp
->ports
, &port
->node
, hash_int(odp_to_u32(port_no
), 0));
678 seq_change(dp
->port_seq
);
684 dpif_netdev_port_add(struct dpif
*dpif
, struct netdev
*netdev
,
685 odp_port_t
*port_nop
)
687 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
688 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
689 const char *dpif_port
;
693 ovs_rwlock_wrlock(&dp
->port_rwlock
);
694 dpif_port
= netdev_vport_get_dpif_port(netdev
, namebuf
, sizeof namebuf
);
695 if (*port_nop
!= ODPP_NONE
) {
697 error
= dp_netdev_lookup_port(dp
, *port_nop
) ? EBUSY
: 0;
699 port_no
= choose_port(dp
, dpif_port
);
700 error
= port_no
== ODPP_NONE
? EFBIG
: 0;
704 error
= do_add_port(dp
, dpif_port
, netdev_get_type(netdev
), port_no
);
706 ovs_rwlock_unlock(&dp
->port_rwlock
);
712 dpif_netdev_port_del(struct dpif
*dpif
, odp_port_t port_no
)
714 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
717 ovs_rwlock_wrlock(&dp
->port_rwlock
);
718 error
= port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
719 ovs_rwlock_unlock(&dp
->port_rwlock
);
725 is_valid_port_number(odp_port_t port_no
)
727 return port_no
!= ODPP_NONE
;
730 static struct dp_netdev_port
*
731 dp_netdev_lookup_port(const struct dp_netdev
*dp
, odp_port_t port_no
)
732 OVS_REQ_RDLOCK(dp
->port_rwlock
)
734 struct dp_netdev_port
*port
;
736 HMAP_FOR_EACH_IN_BUCKET (port
, node
, hash_int(odp_to_u32(port_no
), 0),
738 if (port
->port_no
== port_no
) {
746 get_port_by_number(struct dp_netdev
*dp
,
747 odp_port_t port_no
, struct dp_netdev_port
**portp
)
748 OVS_REQ_RDLOCK(dp
->port_rwlock
)
750 if (!is_valid_port_number(port_no
)) {
754 *portp
= dp_netdev_lookup_port(dp
, port_no
);
755 return *portp
? 0 : ENOENT
;
760 get_port_by_name(struct dp_netdev
*dp
,
761 const char *devname
, struct dp_netdev_port
**portp
)
762 OVS_REQ_RDLOCK(dp
->port_rwlock
)
764 struct dp_netdev_port
*port
;
766 HMAP_FOR_EACH (port
, node
, &dp
->ports
) {
767 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
776 do_del_port(struct dp_netdev
*dp
, odp_port_t port_no
)
777 OVS_REQ_WRLOCK(dp
->port_rwlock
)
779 struct dp_netdev_port
*port
;
782 error
= get_port_by_number(dp
, port_no
, &port
);
787 hmap_remove(&dp
->ports
, &port
->node
);
788 seq_change(dp
->port_seq
);
790 netdev_close(port
->netdev
);
791 netdev_restore_flags(port
->sf
);
792 netdev_rx_close(port
->rx
);
800 answer_port_query(const struct dp_netdev_port
*port
,
801 struct dpif_port
*dpif_port
)
803 dpif_port
->name
= xstrdup(netdev_get_name(port
->netdev
));
804 dpif_port
->type
= xstrdup(port
->type
);
805 dpif_port
->port_no
= port
->port_no
;
809 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, odp_port_t port_no
,
810 struct dpif_port
*dpif_port
)
812 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
813 struct dp_netdev_port
*port
;
816 ovs_rwlock_rdlock(&dp
->port_rwlock
);
817 error
= get_port_by_number(dp
, port_no
, &port
);
818 if (!error
&& dpif_port
) {
819 answer_port_query(port
, dpif_port
);
821 ovs_rwlock_unlock(&dp
->port_rwlock
);
827 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
828 struct dpif_port
*dpif_port
)
830 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
831 struct dp_netdev_port
*port
;
834 ovs_rwlock_rdlock(&dp
->port_rwlock
);
835 error
= get_port_by_name(dp
, devname
, &port
);
836 if (!error
&& dpif_port
) {
837 answer_port_query(port
, dpif_port
);
839 ovs_rwlock_unlock(&dp
->port_rwlock
);
845 dp_netdev_remove_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
846 OVS_REQ_WRLOCK(dp
->cls
.rwlock
)
847 OVS_REQUIRES(dp
->flow_mutex
)
849 struct cls_rule
*cr
= CONST_CAST(struct cls_rule
*, &flow
->cr
);
850 struct hmap_node
*node
= CONST_CAST(struct hmap_node
*, &flow
->node
);
852 classifier_remove(&dp
->cls
, cr
);
853 hmap_remove(&dp
->flow_table
, node
);
854 dp_netdev_flow_unref(flow
);
857 static struct dp_netdev_flow
*
858 dp_netdev_flow_ref(const struct dp_netdev_flow
*flow_
)
860 struct dp_netdev_flow
*flow
= CONST_CAST(struct dp_netdev_flow
*, flow_
);
862 ovs_refcount_ref(&flow
->ref_cnt
);
868 dp_netdev_flow_unref(struct dp_netdev_flow
*flow
)
870 if (flow
&& ovs_refcount_unref(&flow
->ref_cnt
) == 1) {
871 cls_rule_destroy(CONST_CAST(struct cls_rule
*, &flow
->cr
));
872 ovs_mutex_lock(&flow
->mutex
);
873 dp_netdev_actions_unref(flow
->actions
);
874 ovs_mutex_unlock(&flow
->mutex
);
875 ovs_mutex_destroy(&flow
->mutex
);
881 dp_netdev_flow_flush(struct dp_netdev
*dp
)
883 struct dp_netdev_flow
*netdev_flow
, *next
;
885 ovs_mutex_lock(&dp
->flow_mutex
);
886 fat_rwlock_wrlock(&dp
->cls
.rwlock
);
887 HMAP_FOR_EACH_SAFE (netdev_flow
, next
, node
, &dp
->flow_table
) {
888 dp_netdev_remove_flow(dp
, netdev_flow
);
890 fat_rwlock_unlock(&dp
->cls
.rwlock
);
891 ovs_mutex_unlock(&dp
->flow_mutex
);
895 dpif_netdev_flow_flush(struct dpif
*dpif
)
897 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
899 dp_netdev_flow_flush(dp
);
903 struct dp_netdev_port_state
{
910 dpif_netdev_port_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
912 *statep
= xzalloc(sizeof(struct dp_netdev_port_state
));
917 dpif_netdev_port_dump_next(const struct dpif
*dpif
, void *state_
,
918 struct dpif_port
*dpif_port
)
920 struct dp_netdev_port_state
*state
= state_
;
921 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
922 struct hmap_node
*node
;
925 ovs_rwlock_rdlock(&dp
->port_rwlock
);
926 node
= hmap_at_position(&dp
->ports
, &state
->bucket
, &state
->offset
);
928 struct dp_netdev_port
*port
;
930 port
= CONTAINER_OF(node
, struct dp_netdev_port
, node
);
933 state
->name
= xstrdup(netdev_get_name(port
->netdev
));
934 dpif_port
->name
= state
->name
;
935 dpif_port
->type
= port
->type
;
936 dpif_port
->port_no
= port
->port_no
;
942 ovs_rwlock_unlock(&dp
->port_rwlock
);
948 dpif_netdev_port_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
950 struct dp_netdev_port_state
*state
= state_
;
957 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep OVS_UNUSED
)
959 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
960 uint64_t new_port_seq
;
963 new_port_seq
= seq_read(dpif
->dp
->port_seq
);
964 if (dpif
->last_port_seq
!= new_port_seq
) {
965 dpif
->last_port_seq
= new_port_seq
;
975 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
977 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
979 seq_wait(dpif
->dp
->port_seq
, dpif
->last_port_seq
);
982 static struct dp_netdev_flow
*
983 dp_netdev_flow_cast(const struct cls_rule
*cr
)
985 return cr
? CONTAINER_OF(cr
, struct dp_netdev_flow
, cr
) : NULL
;
988 static struct dp_netdev_flow
*
989 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const struct flow
*flow
)
990 OVS_EXCLUDED(dp
->cls
.rwlock
)
992 struct dp_netdev_flow
*netdev_flow
;
994 fat_rwlock_rdlock(&dp
->cls
.rwlock
);
995 netdev_flow
= dp_netdev_flow_cast(classifier_lookup(&dp
->cls
, flow
, NULL
));
996 dp_netdev_flow_ref(netdev_flow
);
997 fat_rwlock_unlock(&dp
->cls
.rwlock
);
1002 static struct dp_netdev_flow
*
1003 dp_netdev_find_flow(const struct dp_netdev
*dp
, const struct flow
*flow
)
1004 OVS_REQ_RDLOCK(dp
->cls
.rwlock
)
1006 struct dp_netdev_flow
*netdev_flow
;
1008 HMAP_FOR_EACH_WITH_HASH (netdev_flow
, node
, flow_hash(flow
, 0),
1010 if (flow_equal(&netdev_flow
->flow
, flow
)) {
1011 return dp_netdev_flow_ref(netdev_flow
);
1019 get_dpif_flow_stats(struct dp_netdev_flow
*netdev_flow
,
1020 struct dpif_flow_stats
*stats
)
1021 OVS_REQ_RDLOCK(netdev_flow
->mutex
)
1023 stats
->n_packets
= netdev_flow
->packet_count
;
1024 stats
->n_bytes
= netdev_flow
->byte_count
;
1025 stats
->used
= netdev_flow
->used
;
1026 stats
->tcp_flags
= netdev_flow
->tcp_flags
;
1030 dpif_netdev_mask_from_nlattrs(const struct nlattr
*key
, uint32_t key_len
,
1031 const struct nlattr
*mask_key
,
1032 uint32_t mask_key_len
, const struct flow
*flow
,
1036 enum odp_key_fitness fitness
;
1038 fitness
= odp_flow_key_to_mask(mask_key
, mask_key_len
, mask
, flow
);
1040 /* This should not happen: it indicates that
1041 * odp_flow_key_from_mask() and odp_flow_key_to_mask()
1042 * disagree on the acceptable form of a mask. Log the problem
1043 * as an error, with enough details to enable debugging. */
1044 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1046 if (!VLOG_DROP_ERR(&rl
)) {
1050 odp_flow_format(key
, key_len
, mask_key
, mask_key_len
, NULL
, &s
,
1052 VLOG_ERR("internal error parsing flow mask %s (%s)",
1053 ds_cstr(&s
), odp_key_fitness_to_string(fitness
));
1059 /* Force unwildcard the in_port. */
1060 mask
->in_port
.odp_port
= u32_to_odp(UINT32_MAX
);
1062 enum mf_field_id id
;
1063 /* No mask key, unwildcard everything except fields whose
1064 * prerequisities are not met. */
1065 memset(mask
, 0x0, sizeof *mask
);
1067 for (id
= 0; id
< MFF_N_IDS
; ++id
) {
1068 /* Skip registers and metadata. */
1069 if (!(id
>= MFF_REG0
&& id
< MFF_REG0
+ FLOW_N_REGS
)
1070 && id
!= MFF_METADATA
) {
1071 const struct mf_field
*mf
= mf_from_id(id
);
1072 if (mf_are_prereqs_ok(mf
, flow
)) {
1073 mf_mask_field(mf
, mask
);
1083 dpif_netdev_flow_from_nlattrs(const struct nlattr
*key
, uint32_t key_len
,
1088 if (odp_flow_key_to_flow(key
, key_len
, flow
)) {
1089 /* This should not happen: it indicates that odp_flow_key_from_flow()
1090 * and odp_flow_key_to_flow() disagree on the acceptable form of a
1091 * flow. Log the problem as an error, with enough details to enable
1093 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1095 if (!VLOG_DROP_ERR(&rl
)) {
1099 odp_flow_format(key
, key_len
, NULL
, 0, NULL
, &s
, true);
1100 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s
));
1107 in_port
= flow
->in_port
.odp_port
;
1108 if (!is_valid_port_number(in_port
) && in_port
!= ODPP_NONE
) {
1116 dpif_netdev_flow_get(const struct dpif
*dpif
,
1117 const struct nlattr
*nl_key
, size_t nl_key_len
,
1118 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
1120 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1121 struct dp_netdev_flow
*netdev_flow
;
1125 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
1130 fat_rwlock_rdlock(&dp
->cls
.rwlock
);
1131 netdev_flow
= dp_netdev_find_flow(dp
, &key
);
1132 fat_rwlock_unlock(&dp
->cls
.rwlock
);
1135 struct dp_netdev_actions
*actions
= NULL
;
1137 ovs_mutex_lock(&netdev_flow
->mutex
);
1139 get_dpif_flow_stats(netdev_flow
, stats
);
1142 actions
= dp_netdev_actions_ref(netdev_flow
->actions
);
1144 ovs_mutex_unlock(&netdev_flow
->mutex
);
1146 dp_netdev_flow_unref(netdev_flow
);
1149 *actionsp
= ofpbuf_clone_data(actions
->actions
, actions
->size
);
1150 dp_netdev_actions_unref(actions
);
1160 dp_netdev_flow_add(struct dp_netdev
*dp
, const struct flow
*flow
,
1161 const struct flow_wildcards
*wc
,
1162 const struct nlattr
*actions
,
1164 OVS_REQUIRES(dp
->flow_mutex
)
1166 struct dp_netdev_flow
*netdev_flow
;
1169 netdev_flow
= xzalloc(sizeof *netdev_flow
);
1170 *CONST_CAST(struct flow
*, &netdev_flow
->flow
) = *flow
;
1171 ovs_refcount_init(&netdev_flow
->ref_cnt
);
1173 ovs_mutex_init(&netdev_flow
->mutex
);
1174 ovs_mutex_lock(&netdev_flow
->mutex
);
1176 netdev_flow
->actions
= dp_netdev_actions_create(actions
, actions_len
);
1178 match_init(&match
, flow
, wc
);
1179 cls_rule_init(CONST_CAST(struct cls_rule
*, &netdev_flow
->cr
),
1180 &match
, NETDEV_RULE_PRIORITY
);
1181 fat_rwlock_wrlock(&dp
->cls
.rwlock
);
1182 classifier_insert(&dp
->cls
,
1183 CONST_CAST(struct cls_rule
*, &netdev_flow
->cr
));
1184 hmap_insert(&dp
->flow_table
,
1185 CONST_CAST(struct hmap_node
*, &netdev_flow
->node
),
1186 flow_hash(flow
, 0));
1187 fat_rwlock_unlock(&dp
->cls
.rwlock
);
1189 ovs_mutex_unlock(&netdev_flow
->mutex
);
1195 clear_stats(struct dp_netdev_flow
*netdev_flow
)
1196 OVS_REQUIRES(netdev_flow
->mutex
)
1198 netdev_flow
->used
= 0;
1199 netdev_flow
->packet_count
= 0;
1200 netdev_flow
->byte_count
= 0;
1201 netdev_flow
->tcp_flags
= 0;
1205 dpif_netdev_flow_put(struct dpif
*dpif
, const struct dpif_flow_put
*put
)
1207 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1208 struct dp_netdev_flow
*netdev_flow
;
1210 struct flow_wildcards wc
;
1213 error
= dpif_netdev_flow_from_nlattrs(put
->key
, put
->key_len
, &flow
);
1217 error
= dpif_netdev_mask_from_nlattrs(put
->key
, put
->key_len
,
1218 put
->mask
, put
->mask_len
,
1224 ovs_mutex_lock(&dp
->flow_mutex
);
1225 netdev_flow
= dp_netdev_lookup_flow(dp
, &flow
);
1227 if (put
->flags
& DPIF_FP_CREATE
) {
1228 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
1230 memset(put
->stats
, 0, sizeof *put
->stats
);
1232 error
= dp_netdev_flow_add(dp
, &flow
, &wc
, put
->actions
,
1241 if (put
->flags
& DPIF_FP_MODIFY
1242 && flow_equal(&flow
, &netdev_flow
->flow
)) {
1243 struct dp_netdev_actions
*new_actions
;
1244 struct dp_netdev_actions
*old_actions
;
1246 new_actions
= dp_netdev_actions_create(put
->actions
,
1249 ovs_mutex_lock(&netdev_flow
->mutex
);
1250 old_actions
= netdev_flow
->actions
;
1251 netdev_flow
->actions
= new_actions
;
1253 get_dpif_flow_stats(netdev_flow
, put
->stats
);
1255 if (put
->flags
& DPIF_FP_ZERO_STATS
) {
1256 clear_stats(netdev_flow
);
1258 ovs_mutex_unlock(&netdev_flow
->mutex
);
1260 dp_netdev_actions_unref(old_actions
);
1261 } else if (put
->flags
& DPIF_FP_CREATE
) {
1264 /* Overlapping flow. */
1267 dp_netdev_flow_unref(netdev_flow
);
1269 ovs_mutex_unlock(&dp
->flow_mutex
);
1275 dpif_netdev_flow_del(struct dpif
*dpif
, const struct dpif_flow_del
*del
)
1277 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1278 struct dp_netdev_flow
*netdev_flow
;
1282 error
= dpif_netdev_flow_from_nlattrs(del
->key
, del
->key_len
, &key
);
1287 ovs_mutex_lock(&dp
->flow_mutex
);
1288 fat_rwlock_wrlock(&dp
->cls
.rwlock
);
1289 netdev_flow
= dp_netdev_find_flow(dp
, &key
);
1292 ovs_mutex_lock(&netdev_flow
->mutex
);
1293 get_dpif_flow_stats(netdev_flow
, del
->stats
);
1294 ovs_mutex_unlock(&netdev_flow
->mutex
);
1296 dp_netdev_remove_flow(dp
, netdev_flow
);
1300 fat_rwlock_unlock(&dp
->cls
.rwlock
);
1301 ovs_mutex_unlock(&dp
->flow_mutex
);
1306 struct dp_netdev_flow_state
{
1309 struct dp_netdev_actions
*actions
;
1310 struct odputil_keybuf keybuf
;
1311 struct odputil_keybuf maskbuf
;
1312 struct dpif_flow_stats stats
;
1316 dpif_netdev_flow_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
1318 struct dp_netdev_flow_state
*state
;
1320 *statep
= state
= xmalloc(sizeof *state
);
1323 state
->actions
= NULL
;
1328 dpif_netdev_flow_dump_next(const struct dpif
*dpif
, void *state_
,
1329 const struct nlattr
**key
, size_t *key_len
,
1330 const struct nlattr
**mask
, size_t *mask_len
,
1331 const struct nlattr
**actions
, size_t *actions_len
,
1332 const struct dpif_flow_stats
**stats
)
1334 struct dp_netdev_flow_state
*state
= state_
;
1335 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1336 struct dp_netdev_flow
*netdev_flow
;
1337 struct hmap_node
*node
;
1339 fat_rwlock_rdlock(&dp
->cls
.rwlock
);
1340 node
= hmap_at_position(&dp
->flow_table
, &state
->bucket
, &state
->offset
);
1342 netdev_flow
= CONTAINER_OF(node
, struct dp_netdev_flow
, node
);
1343 dp_netdev_flow_ref(netdev_flow
);
1345 fat_rwlock_unlock(&dp
->cls
.rwlock
);
1353 ofpbuf_use_stack(&buf
, &state
->keybuf
, sizeof state
->keybuf
);
1354 odp_flow_key_from_flow(&buf
, &netdev_flow
->flow
,
1355 netdev_flow
->flow
.in_port
.odp_port
);
1358 *key_len
= buf
.size
;
1363 struct flow_wildcards wc
;
1365 ofpbuf_use_stack(&buf
, &state
->maskbuf
, sizeof state
->maskbuf
);
1366 minimask_expand(&netdev_flow
->cr
.match
.mask
, &wc
);
1367 odp_flow_key_from_mask(&buf
, &wc
.masks
, &netdev_flow
->flow
,
1368 odp_to_u32(wc
.masks
.in_port
.odp_port
));
1371 *mask_len
= buf
.size
;
1374 if (actions
|| stats
) {
1375 dp_netdev_actions_unref(state
->actions
);
1376 state
->actions
= NULL
;
1378 ovs_mutex_lock(&netdev_flow
->mutex
);
1380 state
->actions
= dp_netdev_actions_ref(netdev_flow
->actions
);
1381 *actions
= state
->actions
->actions
;
1382 *actions_len
= state
->actions
->size
;
1385 get_dpif_flow_stats(netdev_flow
, &state
->stats
);
1386 *stats
= &state
->stats
;
1388 ovs_mutex_unlock(&netdev_flow
->mutex
);
1391 dp_netdev_flow_unref(netdev_flow
);
1397 dpif_netdev_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
1399 struct dp_netdev_flow_state
*state
= state_
;
1401 dp_netdev_actions_unref(state
->actions
);
1407 dpif_netdev_execute(struct dpif
*dpif
, struct dpif_execute
*execute
)
1409 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1410 struct pkt_metadata
*md
= &execute
->md
;
1413 if (execute
->packet
->size
< ETH_HEADER_LEN
||
1414 execute
->packet
->size
> UINT16_MAX
) {
1418 /* Extract flow key. */
1419 flow_extract(execute
->packet
, md
->skb_priority
, md
->pkt_mark
, &md
->tunnel
,
1420 (union flow_in_port
*)&md
->in_port
, &key
);
1422 ovs_rwlock_rdlock(&dp
->port_rwlock
);
1423 dp_netdev_execute_actions(dp
, &key
, execute
->packet
, md
, execute
->actions
,
1424 execute
->actions_len
);
1425 ovs_rwlock_unlock(&dp
->port_rwlock
);
1431 dpif_netdev_recv_set(struct dpif
*dpif OVS_UNUSED
, bool enable OVS_UNUSED
)
1437 dpif_netdev_queue_to_priority(const struct dpif
*dpif OVS_UNUSED
,
1438 uint32_t queue_id
, uint32_t *priority
)
1440 *priority
= queue_id
;
1444 static struct dp_netdev_queue
*
1445 find_nonempty_queue(struct dp_netdev
*dp
)
1446 OVS_REQUIRES(dp
->queue_mutex
)
1450 for (i
= 0; i
< N_QUEUES
; i
++) {
1451 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
1452 if (q
->head
!= q
->tail
) {
1460 dpif_netdev_recv(struct dpif
*dpif
, struct dpif_upcall
*upcall
,
1463 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1464 struct dp_netdev_queue
*q
;
1467 ovs_mutex_lock(&dp
->queue_mutex
);
1468 q
= find_nonempty_queue(dp
);
1470 struct dp_netdev_upcall
*u
= &q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
1472 *upcall
= u
->upcall
;
1481 ovs_mutex_unlock(&dp
->queue_mutex
);
1487 dpif_netdev_recv_wait(struct dpif
*dpif
)
1489 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1492 ovs_mutex_lock(&dp
->queue_mutex
);
1493 seq
= seq_read(dp
->queue_seq
);
1494 if (find_nonempty_queue(dp
)) {
1495 poll_immediate_wake();
1497 seq_wait(dp
->queue_seq
, seq
);
1499 ovs_mutex_unlock(&dp
->queue_mutex
);
1503 dpif_netdev_recv_purge(struct dpif
*dpif
)
1505 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1507 dp_netdev_purge_queues(dpif_netdev
->dp
);
1510 /* Creates and returns a new 'struct dp_netdev_actions', with a reference count
1511 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
1513 struct dp_netdev_actions
*
1514 dp_netdev_actions_create(const struct nlattr
*actions
, size_t size
)
1516 struct dp_netdev_actions
*netdev_actions
;
1518 netdev_actions
= xmalloc(sizeof *netdev_actions
);
1519 ovs_refcount_init(&netdev_actions
->ref_cnt
);
1520 netdev_actions
->actions
= xmemdup(actions
, size
);
1521 netdev_actions
->size
= size
;
1523 return netdev_actions
;
1526 /* Increments 'actions''s refcount. */
1527 struct dp_netdev_actions
*
1528 dp_netdev_actions_ref(const struct dp_netdev_actions
*actions_
)
1530 struct dp_netdev_actions
*actions
;
1532 actions
= CONST_CAST(struct dp_netdev_actions
*, actions_
);
1534 ovs_refcount_ref(&actions
->ref_cnt
);
1539 /* Decrements 'actions''s refcount and frees 'actions' if the refcount reaches
1542 dp_netdev_actions_unref(struct dp_netdev_actions
*actions
)
1544 if (actions
&& ovs_refcount_unref(&actions
->ref_cnt
) == 1) {
1545 free(actions
->actions
);
1551 dp_forwarder_main(void *f_
)
1553 struct dp_forwarder
*f
= f_
;
1554 struct dp_netdev
*dp
= f
->dp
;
1555 struct ofpbuf packet
;
1557 f
->name
= xasprintf("forwarder_%u", ovsthread_id_self());
1558 set_subprogram_name("%s", f
->name
);
1560 ofpbuf_init(&packet
, 0);
1561 while (!latch_is_set(&dp
->exit_latch
)) {
1562 bool received_anything
;
1565 ovs_rwlock_rdlock(&dp
->port_rwlock
);
1566 for (i
= 0; i
< 50; i
++) {
1567 struct dp_netdev_port
*port
;
1569 received_anything
= false;
1570 HMAP_FOR_EACH (port
, node
, &f
->dp
->ports
) {
1572 && port
->node
.hash
>= f
->min_hash
1573 && port
->node
.hash
<= f
->max_hash
) {
1578 if (netdev_get_mtu(port
->netdev
, &mtu
)) {
1579 mtu
= ETH_PAYLOAD_MAX
;
1581 buf_size
= DP_NETDEV_HEADROOM
+ VLAN_ETH_HEADER_LEN
+ mtu
;
1583 ofpbuf_clear(&packet
);
1584 ofpbuf_reserve_with_tailroom(&packet
, DP_NETDEV_HEADROOM
,
1587 error
= netdev_rx_recv(port
->rx
, &packet
);
1589 struct pkt_metadata md
1590 = PKT_METADATA_INITIALIZER(port
->port_no
);
1591 dp_netdev_port_input(dp
, &packet
, &md
);
1593 received_anything
= true;
1594 } else if (error
!= EAGAIN
&& error
!= EOPNOTSUPP
) {
1595 static struct vlog_rate_limit rl
1596 = VLOG_RATE_LIMIT_INIT(1, 5);
1598 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
1599 netdev_get_name(port
->netdev
),
1600 ovs_strerror(error
));
1605 if (!received_anything
) {
1610 if (received_anything
) {
1611 poll_immediate_wake();
1613 struct dp_netdev_port
*port
;
1615 HMAP_FOR_EACH (port
, node
, &f
->dp
->ports
)
1617 && port
->node
.hash
>= f
->min_hash
1618 && port
->node
.hash
<= f
->max_hash
) {
1619 netdev_rx_wait(port
->rx
);
1621 seq_wait(dp
->port_seq
, seq_read(dp
->port_seq
));
1622 latch_wait(&dp
->exit_latch
);
1624 ovs_rwlock_unlock(&dp
->port_rwlock
);
1628 ofpbuf_uninit(&packet
);
1636 dp_netdev_set_threads(struct dp_netdev
*dp
, int n
)
1640 if (n
== dp
->n_forwarders
) {
1644 /* Stop existing threads. */
1645 latch_set(&dp
->exit_latch
);
1646 for (i
= 0; i
< dp
->n_forwarders
; i
++) {
1647 struct dp_forwarder
*f
= &dp
->forwarders
[i
];
1649 xpthread_join(f
->thread
, NULL
);
1651 latch_poll(&dp
->exit_latch
);
1652 free(dp
->forwarders
);
1654 /* Start new threads. */
1655 dp
->forwarders
= xmalloc(n
* sizeof *dp
->forwarders
);
1656 dp
->n_forwarders
= n
;
1657 for (i
= 0; i
< n
; i
++) {
1658 struct dp_forwarder
*f
= &dp
->forwarders
[i
];
1661 f
->min_hash
= UINT32_MAX
/ n
* i
;
1662 f
->max_hash
= UINT32_MAX
/ n
* (i
+ 1) - 1;
1664 f
->max_hash
= UINT32_MAX
;
1666 xpthread_create(&f
->thread
, NULL
, dp_forwarder_main
, f
);
1671 dp_netdev_flow_used(struct dp_netdev_flow
*netdev_flow
,
1672 const struct ofpbuf
*packet
)
1673 OVS_REQUIRES(netdev_flow
->mutex
)
1675 netdev_flow
->used
= time_msec();
1676 netdev_flow
->packet_count
++;
1677 netdev_flow
->byte_count
+= packet
->size
;
1678 netdev_flow
->tcp_flags
|= packet_get_tcp_flags(packet
, &netdev_flow
->flow
);
1682 dp_netdev_port_input(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1683 struct pkt_metadata
*md
)
1684 OVS_REQ_RDLOCK(dp
->port_rwlock
)
1686 struct dp_netdev_flow
*netdev_flow
;
1689 if (packet
->size
< ETH_HEADER_LEN
) {
1692 flow_extract(packet
, md
->skb_priority
, md
->pkt_mark
, &md
->tunnel
,
1693 (union flow_in_port
*)&md
->in_port
, &key
);
1694 netdev_flow
= dp_netdev_lookup_flow(dp
, &key
);
1696 struct dp_netdev_actions
*actions
;
1698 ovs_mutex_lock(&netdev_flow
->mutex
);
1699 dp_netdev_flow_used(netdev_flow
, packet
);
1700 actions
= dp_netdev_actions_ref(netdev_flow
->actions
);
1701 ovs_mutex_unlock(&netdev_flow
->mutex
);
1703 dp_netdev_execute_actions(dp
, &key
, packet
, md
,
1704 actions
->actions
, actions
->size
);
1705 dp_netdev_actions_unref(actions
);
1706 ovsthread_counter_inc(dp
->n_hit
, 1);
1708 ovsthread_counter_inc(dp
->n_missed
, 1);
1709 dp_netdev_output_userspace(dp
, packet
, DPIF_UC_MISS
, &key
, NULL
);
1714 dp_netdev_output_userspace(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1715 int queue_no
, const struct flow
*flow
,
1716 const struct nlattr
*userdata
)
1717 OVS_EXCLUDED(dp
->queue_mutex
)
1719 struct dp_netdev_queue
*q
= &dp
->queues
[queue_no
];
1722 ovs_mutex_lock(&dp
->queue_mutex
);
1723 if (q
->head
- q
->tail
< MAX_QUEUE_LEN
) {
1724 struct dp_netdev_upcall
*u
= &q
->upcalls
[q
->head
++ & QUEUE_MASK
];
1725 struct dpif_upcall
*upcall
= &u
->upcall
;
1726 struct ofpbuf
*buf
= &u
->buf
;
1729 upcall
->type
= queue_no
;
1731 /* Allocate buffer big enough for everything. */
1732 buf_size
= ODPUTIL_FLOW_KEY_BYTES
;
1734 buf_size
+= NLA_ALIGN(userdata
->nla_len
);
1736 ofpbuf_init(buf
, buf_size
);
1739 odp_flow_key_from_flow(buf
, flow
, flow
->in_port
.odp_port
);
1740 upcall
->key
= buf
->data
;
1741 upcall
->key_len
= buf
->size
;
1745 upcall
->userdata
= ofpbuf_put(buf
, userdata
,
1746 NLA_ALIGN(userdata
->nla_len
));
1749 /* Steal packet data. */
1750 ovs_assert(packet
->source
== OFPBUF_MALLOC
);
1751 upcall
->packet
= *packet
;
1752 ofpbuf_use(packet
, NULL
, 0);
1754 seq_change(dp
->queue_seq
);
1758 ovsthread_counter_inc(dp
->n_lost
, 1);
1761 ovs_mutex_unlock(&dp
->queue_mutex
);
1766 struct dp_netdev_execute_aux
{
1767 struct dp_netdev
*dp
;
1768 const struct flow
*key
;
1772 dp_execute_cb(void *aux_
, struct ofpbuf
*packet
,
1773 const struct pkt_metadata
*md OVS_UNUSED
,
1774 const struct nlattr
*a
, bool may_steal
)
1775 OVS_NO_THREAD_SAFETY_ANALYSIS
1777 struct dp_netdev_execute_aux
*aux
= aux_
;
1778 int type
= nl_attr_type(a
);
1779 struct dp_netdev_port
*p
;
1781 switch ((enum ovs_action_attr
)type
) {
1782 case OVS_ACTION_ATTR_OUTPUT
:
1783 p
= dp_netdev_lookup_port(aux
->dp
, u32_to_odp(nl_attr_get_u32(a
)));
1785 netdev_send(p
->netdev
, packet
);
1789 case OVS_ACTION_ATTR_USERSPACE
: {
1790 const struct nlattr
*userdata
;
1792 userdata
= nl_attr_find_nested(a
, OVS_USERSPACE_ATTR_USERDATA
);
1794 /* Make a copy if we are not allowed to steal the packet's data. */
1796 packet
= ofpbuf_clone_with_headroom(packet
, DP_NETDEV_HEADROOM
);
1798 dp_netdev_output_userspace(aux
->dp
, packet
, DPIF_UC_ACTION
, aux
->key
,
1801 ofpbuf_uninit(packet
);
1805 case OVS_ACTION_ATTR_PUSH_VLAN
:
1806 case OVS_ACTION_ATTR_POP_VLAN
:
1807 case OVS_ACTION_ATTR_PUSH_MPLS
:
1808 case OVS_ACTION_ATTR_POP_MPLS
:
1809 case OVS_ACTION_ATTR_SET
:
1810 case OVS_ACTION_ATTR_SAMPLE
:
1811 case OVS_ACTION_ATTR_UNSPEC
:
1812 case __OVS_ACTION_ATTR_MAX
:
1818 dp_netdev_execute_actions(struct dp_netdev
*dp
, const struct flow
*key
,
1819 struct ofpbuf
*packet
, struct pkt_metadata
*md
,
1820 const struct nlattr
*actions
, size_t actions_len
)
1821 OVS_REQ_RDLOCK(dp
->port_rwlock
)
1823 struct dp_netdev_execute_aux aux
= {dp
, key
};
1825 odp_execute_actions(&aux
, packet
, md
, actions
, actions_len
, dp_execute_cb
);
1828 const struct dpif_class dpif_netdev_class
= {
1830 dpif_netdev_enumerate
,
1831 dpif_netdev_port_open_type
,
1834 dpif_netdev_destroy
,
1837 dpif_netdev_get_stats
,
1838 dpif_netdev_port_add
,
1839 dpif_netdev_port_del
,
1840 dpif_netdev_port_query_by_number
,
1841 dpif_netdev_port_query_by_name
,
1842 NULL
, /* port_get_pid */
1843 dpif_netdev_port_dump_start
,
1844 dpif_netdev_port_dump_next
,
1845 dpif_netdev_port_dump_done
,
1846 dpif_netdev_port_poll
,
1847 dpif_netdev_port_poll_wait
,
1848 dpif_netdev_flow_get
,
1849 dpif_netdev_flow_put
,
1850 dpif_netdev_flow_del
,
1851 dpif_netdev_flow_flush
,
1852 dpif_netdev_flow_dump_start
,
1853 dpif_netdev_flow_dump_next
,
1854 dpif_netdev_flow_dump_done
,
1855 dpif_netdev_execute
,
1857 dpif_netdev_recv_set
,
1858 dpif_netdev_queue_to_priority
,
1860 dpif_netdev_recv_wait
,
1861 dpif_netdev_recv_purge
,
1865 dpif_dummy_change_port_number(struct unixctl_conn
*conn
, int argc OVS_UNUSED
,
1866 const char *argv
[], void *aux OVS_UNUSED
)
1868 struct dp_netdev_port
*port
;
1869 struct dp_netdev
*dp
;
1872 ovs_mutex_lock(&dp_netdev_mutex
);
1873 dp
= shash_find_data(&dp_netdevs
, argv
[1]);
1874 if (!dp
|| !dpif_netdev_class_is_dummy(dp
->class)) {
1875 ovs_mutex_unlock(&dp_netdev_mutex
);
1876 unixctl_command_reply_error(conn
, "unknown datapath or not a dummy");
1879 ovs_refcount_ref(&dp
->ref_cnt
);
1880 ovs_mutex_unlock(&dp_netdev_mutex
);
1882 ovs_rwlock_wrlock(&dp
->port_rwlock
);
1883 if (get_port_by_name(dp
, argv
[2], &port
)) {
1884 unixctl_command_reply_error(conn
, "unknown port");
1888 port_no
= u32_to_odp(atoi(argv
[3]));
1889 if (!port_no
|| port_no
== ODPP_NONE
) {
1890 unixctl_command_reply_error(conn
, "bad port number");
1893 if (dp_netdev_lookup_port(dp
, port_no
)) {
1894 unixctl_command_reply_error(conn
, "port number already in use");
1897 hmap_remove(&dp
->ports
, &port
->node
);
1898 port
->port_no
= port_no
;
1899 hmap_insert(&dp
->ports
, &port
->node
, hash_int(odp_to_u32(port_no
), 0));
1900 seq_change(dp
->port_seq
);
1901 unixctl_command_reply(conn
, NULL
);
1904 ovs_rwlock_unlock(&dp
->port_rwlock
);
1905 dp_netdev_unref(dp
);
1909 dpif_dummy_register__(const char *type
)
1911 struct dpif_class
*class;
1913 class = xmalloc(sizeof *class);
1914 *class = dpif_netdev_class
;
1915 class->type
= xstrdup(type
);
1916 dp_register_provider(class);
1920 dpif_dummy_register(bool override
)
1927 dp_enumerate_types(&types
);
1928 SSET_FOR_EACH (type
, &types
) {
1929 if (!dp_unregister_provider(type
)) {
1930 dpif_dummy_register__(type
);
1933 sset_destroy(&types
);
1936 dpif_dummy_register__("dummy");
1938 unixctl_command_register("dpif-dummy/change-port-number",
1939 "DP PORT NEW-NUMBER",
1940 3, 3, dpif_dummy_change_port_number
, NULL
);