2 * Copyright (c) 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <netinet/in.h>
26 #include <sys/socket.h>
31 #include <sys/ioctl.h>
37 #include "dpif-provider.h"
39 #include "dynamic-string.h"
46 #include "ofp-print.h"
49 #include "poll-loop.h"
55 VLOG_DEFINE_THIS_MODULE(dpif_netdev
);
57 /* Configuration parameters. */
58 enum { MAX_PORTS
= 256 }; /* Maximum number of ports. */
59 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
61 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
62 * headers to be aligned on a 4-byte boundary. */
63 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
66 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
67 enum { MAX_QUEUE_LEN
= 128 }; /* Maximum number of packets per queue. */
68 enum { QUEUE_MASK
= MAX_QUEUE_LEN
- 1 };
69 BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN
));
71 struct dp_netdev_queue
{
72 struct dpif_upcall
*upcalls
[MAX_QUEUE_LEN
];
73 unsigned int head
, tail
;
76 /* Datapath based on the network device interface from netdev.h. */
78 const struct dpif_class
*class;
83 bool drop_frags
; /* Drop all IP fragments, if true. */
84 struct dp_netdev_queue queues
[N_QUEUES
];
85 struct hmap flow_table
; /* Flow table. */
88 long long int n_frags
; /* Number of dropped IP fragments. */
89 long long int n_hit
; /* Number of flow table matches. */
90 long long int n_missed
; /* Number of flow table misses. */
91 long long int n_lost
; /* Number of misses not passed to client. */
94 struct dp_netdev_port
*ports
[MAX_PORTS
];
95 struct list port_list
;
99 /* A port in a netdev-based datapath. */
100 struct dp_netdev_port
{
101 int port_no
; /* Index into dp_netdev's 'ports'. */
102 struct list node
; /* Element in dp_netdev's 'port_list'. */
103 struct netdev
*netdev
;
104 bool internal
; /* Internal port? */
107 /* A flow in dp_netdev's 'flow_table'. */
108 struct dp_netdev_flow
{
109 struct hmap_node node
; /* Element in dp_netdev's 'flow_table'. */
113 long long int used
; /* Last used time, in monotonic msecs. */
114 long long int packet_count
; /* Number of packets matched. */
115 long long int byte_count
; /* Number of bytes matched. */
116 ovs_be16 tcp_ctl
; /* Bitwise-OR of seen tcp_ctl values. */
119 struct nlattr
*actions
;
123 /* Interface to netdev-based datapath. */
126 struct dp_netdev
*dp
;
128 unsigned int dp_serial
;
131 /* All netdev-based datapaths. */
132 static struct shash dp_netdevs
= SHASH_INITIALIZER(&dp_netdevs
);
134 /* Maximum port MTU seen so far. */
135 static int max_mtu
= ETH_PAYLOAD_MAX
;
137 static int get_port_by_number(struct dp_netdev
*, uint16_t port_no
,
138 struct dp_netdev_port
**portp
);
139 static int get_port_by_name(struct dp_netdev
*, const char *devname
,
140 struct dp_netdev_port
**portp
);
141 static void dp_netdev_free(struct dp_netdev
*);
142 static void dp_netdev_flow_flush(struct dp_netdev
*);
143 static int do_add_port(struct dp_netdev
*, const char *devname
,
144 const char *type
, uint16_t port_no
);
145 static int do_del_port(struct dp_netdev
*, uint16_t port_no
);
146 static int dpif_netdev_open(const struct dpif_class
*, const char *name
,
147 bool create
, struct dpif
**);
148 static int dp_netdev_output_userspace(struct dp_netdev
*, const struct ofpbuf
*,
149 int queue_no
, const struct flow
*,
151 static int dp_netdev_execute_actions(struct dp_netdev
*,
152 struct ofpbuf
*, struct flow
*,
153 const struct nlattr
*actions
,
156 static struct dpif_class dpif_dummy_class
;
158 static struct dpif_netdev
*
159 dpif_netdev_cast(const struct dpif
*dpif
)
161 assert(dpif
->dpif_class
->open
== dpif_netdev_open
);
162 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
165 static struct dp_netdev
*
166 get_dp_netdev(const struct dpif
*dpif
)
168 return dpif_netdev_cast(dpif
)->dp
;
172 create_dpif_netdev(struct dp_netdev
*dp
)
174 uint16_t netflow_id
= hash_string(dp
->name
, 0);
175 struct dpif_netdev
*dpif
;
179 dpif
= xmalloc(sizeof *dpif
);
180 dpif_init(&dpif
->dpif
, dp
->class, dp
->name
, netflow_id
>> 8, netflow_id
);
182 dpif
->listen_mask
= 0;
183 dpif
->dp_serial
= dp
->serial
;
189 create_dp_netdev(const char *name
, const struct dpif_class
*class,
190 struct dp_netdev
**dpp
)
192 struct dp_netdev
*dp
;
196 dp
= xzalloc(sizeof *dp
);
198 dp
->name
= xstrdup(name
);
200 dp
->drop_frags
= false;
201 for (i
= 0; i
< N_QUEUES
; i
++) {
202 dp
->queues
[i
].head
= dp
->queues
[i
].tail
= 0;
204 hmap_init(&dp
->flow_table
);
205 list_init(&dp
->port_list
);
206 error
= do_add_port(dp
, name
, "internal", ODPP_LOCAL
);
212 shash_add(&dp_netdevs
, name
, dp
);
219 dpif_netdev_open(const struct dpif_class
*class, const char *name
,
220 bool create
, struct dpif
**dpifp
)
222 struct dp_netdev
*dp
;
224 dp
= shash_find_data(&dp_netdevs
, name
);
229 int error
= create_dp_netdev(name
, class, &dp
);
236 if (dp
->class != class) {
243 *dpifp
= create_dpif_netdev(dp
);
248 dp_netdev_purge_queues(struct dp_netdev
*dp
)
252 for (i
= 0; i
< N_QUEUES
; i
++) {
253 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
255 while (q
->tail
!= q
->head
) {
256 struct dpif_upcall
*upcall
= q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
258 ofpbuf_delete(upcall
->packet
);
265 dp_netdev_free(struct dp_netdev
*dp
)
267 struct dp_netdev_port
*port
, *next
;
269 dp_netdev_flow_flush(dp
);
270 LIST_FOR_EACH_SAFE (port
, next
, node
, &dp
->port_list
) {
271 do_del_port(dp
, port
->port_no
);
273 dp_netdev_purge_queues(dp
);
274 hmap_destroy(&dp
->flow_table
);
280 dpif_netdev_close(struct dpif
*dpif
)
282 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
283 assert(dp
->open_cnt
> 0);
284 if (--dp
->open_cnt
== 0 && dp
->destroyed
) {
285 shash_find_and_delete(&dp_netdevs
, dp
->name
);
292 dpif_netdev_destroy(struct dpif
*dpif
)
294 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
295 dp
->destroyed
= true;
300 dpif_netdev_get_stats(const struct dpif
*dpif
, struct odp_stats
*stats
)
302 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
303 memset(stats
, 0, sizeof *stats
);
304 stats
->n_flows
= hmap_count(&dp
->flow_table
);
305 stats
->n_frags
= dp
->n_frags
;
306 stats
->n_hit
= dp
->n_hit
;
307 stats
->n_missed
= dp
->n_missed
;
308 stats
->n_lost
= dp
->n_lost
;
313 dpif_netdev_get_drop_frags(const struct dpif
*dpif
, bool *drop_fragsp
)
315 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
316 *drop_fragsp
= dp
->drop_frags
;
321 dpif_netdev_set_drop_frags(struct dpif
*dpif
, bool drop_frags
)
323 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
324 dp
->drop_frags
= drop_frags
;
329 do_add_port(struct dp_netdev
*dp
, const char *devname
, const char *type
,
332 struct dp_netdev_port
*port
;
333 struct netdev
*netdev
;
338 /* XXX reject devices already in some dp_netdev. */
339 if (type
[0] == '\0' || !strcmp(type
, "system")) {
341 } else if (!strcmp(type
, "internal")) {
344 VLOG_WARN("%s: unsupported port type %s", devname
, type
);
348 /* Open and validate network device. */
349 if (dp
->class == &dpif_dummy_class
) {
351 } else if (internal
) {
355 error
= netdev_open(devname
, type
, &netdev
);
359 /* XXX reject loopback devices */
360 /* XXX reject non-Ethernet devices */
362 error
= netdev_listen(netdev
);
364 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
365 devname
, strerror(errno
));
366 netdev_close(netdev
);
370 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, false);
372 netdev_close(netdev
);
376 port
= xmalloc(sizeof *port
);
377 port
->port_no
= port_no
;
378 port
->netdev
= netdev
;
379 port
->internal
= internal
;
381 netdev_get_mtu(netdev
, &mtu
);
382 if (mtu
!= INT_MAX
&& mtu
> max_mtu
) {
386 list_push_back(&dp
->port_list
, &port
->node
);
387 dp
->ports
[port_no
] = port
;
394 dpif_netdev_port_add(struct dpif
*dpif
, struct netdev
*netdev
,
397 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
400 for (port_no
= 0; port_no
< MAX_PORTS
; port_no
++) {
401 if (!dp
->ports
[port_no
]) {
403 return do_add_port(dp
, netdev_get_name(netdev
),
404 netdev_get_type(netdev
), port_no
);
411 dpif_netdev_port_del(struct dpif
*dpif
, uint16_t port_no
)
413 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
414 return port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
418 is_valid_port_number(uint16_t port_no
)
420 return port_no
< MAX_PORTS
;
424 get_port_by_number(struct dp_netdev
*dp
,
425 uint16_t port_no
, struct dp_netdev_port
**portp
)
427 if (!is_valid_port_number(port_no
)) {
431 *portp
= dp
->ports
[port_no
];
432 return *portp
? 0 : ENOENT
;
437 get_port_by_name(struct dp_netdev
*dp
,
438 const char *devname
, struct dp_netdev_port
**portp
)
440 struct dp_netdev_port
*port
;
442 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
443 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
452 do_del_port(struct dp_netdev
*dp
, uint16_t port_no
)
454 struct dp_netdev_port
*port
;
458 error
= get_port_by_number(dp
, port_no
, &port
);
463 list_remove(&port
->node
);
464 dp
->ports
[port
->port_no
] = NULL
;
467 name
= xstrdup(netdev_get_name(port
->netdev
));
468 netdev_close(port
->netdev
);
477 answer_port_query(const struct dp_netdev_port
*port
,
478 struct dpif_port
*dpif_port
)
480 dpif_port
->name
= xstrdup(netdev_get_name(port
->netdev
));
481 dpif_port
->type
= xstrdup(port
->internal
? "internal" : "system");
482 dpif_port
->port_no
= port
->port_no
;
486 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
487 struct dpif_port
*dpif_port
)
489 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
490 struct dp_netdev_port
*port
;
493 error
= get_port_by_number(dp
, port_no
, &port
);
495 answer_port_query(port
, dpif_port
);
501 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
502 struct dpif_port
*dpif_port
)
504 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
505 struct dp_netdev_port
*port
;
508 error
= get_port_by_name(dp
, devname
, &port
);
510 answer_port_query(port
, dpif_port
);
516 dpif_netdev_get_max_ports(const struct dpif
*dpif OVS_UNUSED
)
522 dp_netdev_free_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
524 hmap_remove(&dp
->flow_table
, &flow
->node
);
530 dp_netdev_flow_flush(struct dp_netdev
*dp
)
532 struct dp_netdev_flow
*flow
, *next
;
534 HMAP_FOR_EACH_SAFE (flow
, next
, node
, &dp
->flow_table
) {
535 dp_netdev_free_flow(dp
, flow
);
540 dpif_netdev_flow_flush(struct dpif
*dpif
)
542 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
543 dp_netdev_flow_flush(dp
);
547 struct dp_netdev_port_state
{
553 dpif_netdev_port_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
555 *statep
= xzalloc(sizeof(struct dp_netdev_port_state
));
560 dpif_netdev_port_dump_next(const struct dpif
*dpif
, void *state_
,
561 struct dpif_port
*dpif_port
)
563 struct dp_netdev_port_state
*state
= state_
;
564 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
567 for (port_no
= state
->port_no
; port_no
< MAX_PORTS
; port_no
++) {
568 struct dp_netdev_port
*port
= dp
->ports
[port_no
];
571 state
->name
= xstrdup(netdev_get_name(port
->netdev
));
572 dpif_port
->name
= state
->name
;
573 dpif_port
->type
= port
->internal
? "internal" : "system";
574 dpif_port
->port_no
= port
->port_no
;
575 state
->port_no
= port_no
+ 1;
583 dpif_netdev_port_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
585 struct dp_netdev_port_state
*state
= state_
;
592 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep OVS_UNUSED
)
594 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
595 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
596 dpif
->dp_serial
= dpif
->dp
->serial
;
604 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
606 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
607 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
608 poll_immediate_wake();
612 static struct dp_netdev_flow
*
613 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const struct flow
*key
)
615 struct dp_netdev_flow
*flow
;
617 HMAP_FOR_EACH_WITH_HASH (flow
, node
, flow_hash(key
, 0), &dp
->flow_table
) {
618 if (flow_equal(&flow
->key
, key
)) {
626 get_dpif_flow_stats(struct dp_netdev_flow
*flow
, struct dpif_flow_stats
*stats
)
628 stats
->n_packets
= flow
->packet_count
;
629 stats
->n_bytes
= flow
->byte_count
;
630 stats
->used
= flow
->used
;
631 stats
->tcp_flags
= TCP_FLAGS(flow
->tcp_ctl
);
635 dpif_netdev_flow_from_nlattrs(const struct nlattr
*key
, uint32_t key_len
,
638 if (odp_flow_key_to_flow(key
, key_len
, flow
)) {
639 /* This should not happen: it indicates that odp_flow_key_from_flow()
640 * and odp_flow_key_to_flow() disagree on the acceptable form of a
641 * flow. Log the problem as an error, with enough details to enable
643 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
645 if (!VLOG_DROP_ERR(&rl
)) {
649 odp_flow_key_format(key
, key_len
, &s
);
650 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s
));
661 dpif_netdev_flow_get(const struct dpif
*dpif
,
662 const struct nlattr
*nl_key
, size_t nl_key_len
,
663 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
665 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
666 struct dp_netdev_flow
*flow
;
670 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
675 flow
= dp_netdev_lookup_flow(dp
, &key
);
681 get_dpif_flow_stats(flow
, stats
);
684 *actionsp
= ofpbuf_clone_data(flow
->actions
, flow
->actions_len
);
690 dpif_netdev_validate_actions(const struct nlattr
*actions
,
691 size_t actions_len
, bool *mutates
)
693 const struct nlattr
*a
;
697 NL_ATTR_FOR_EACH (a
, left
, actions
, actions_len
) {
698 uint16_t type
= nl_attr_type(a
);
699 int len
= odp_action_len(type
);
701 if (len
!= nl_attr_get_size(a
)) {
706 case ODP_ACTION_ATTR_OUTPUT
:
707 if (nl_attr_get_u32(a
) >= MAX_PORTS
) {
712 case ODP_ACTION_ATTR_USERSPACE
:
715 case ODP_ACTION_ATTR_SET_DL_TCI
:
717 if (nl_attr_get_be16(a
) & htons(VLAN_CFI
)) {
722 case ODP_ACTION_ATTR_SET_NW_TOS
:
724 if (nl_attr_get_u8(a
) & IP_ECN_MASK
) {
729 case ODP_ACTION_ATTR_STRIP_VLAN
:
730 case ODP_ACTION_ATTR_SET_DL_SRC
:
731 case ODP_ACTION_ATTR_SET_DL_DST
:
732 case ODP_ACTION_ATTR_SET_NW_SRC
:
733 case ODP_ACTION_ATTR_SET_NW_DST
:
734 case ODP_ACTION_ATTR_SET_TP_SRC
:
735 case ODP_ACTION_ATTR_SET_TP_DST
:
739 case ODP_ACTION_ATTR_SET_TUNNEL
:
740 case ODP_ACTION_ATTR_SET_PRIORITY
:
741 case ODP_ACTION_ATTR_POP_PRIORITY
:
750 set_flow_actions(struct dp_netdev_flow
*flow
,
751 const struct nlattr
*actions
, size_t actions_len
)
756 error
= dpif_netdev_validate_actions(actions
, actions_len
, &mutates
);
761 flow
->actions
= xrealloc(flow
->actions
, actions_len
);
762 flow
->actions_len
= actions_len
;
763 memcpy(flow
->actions
, actions
, actions_len
);
768 add_flow(struct dpif
*dpif
, const struct flow
*key
,
769 const struct nlattr
*actions
, size_t actions_len
)
771 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
772 struct dp_netdev_flow
*flow
;
775 flow
= xzalloc(sizeof *flow
);
778 error
= set_flow_actions(flow
, actions
, actions_len
);
784 hmap_insert(&dp
->flow_table
, &flow
->node
, flow_hash(&flow
->key
, 0));
789 clear_stats(struct dp_netdev_flow
*flow
)
792 flow
->packet_count
= 0;
793 flow
->byte_count
= 0;
798 dpif_netdev_flow_put(struct dpif
*dpif
, enum dpif_flow_put_flags flags
,
799 const struct nlattr
*nl_key
, size_t nl_key_len
,
800 const struct nlattr
*actions
, size_t actions_len
,
801 struct dpif_flow_stats
*stats
)
803 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
804 struct dp_netdev_flow
*flow
;
808 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
813 flow
= dp_netdev_lookup_flow(dp
, &key
);
815 if (flags
& DPIF_FP_CREATE
) {
816 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
818 memset(stats
, 0, sizeof *stats
);
820 return add_flow(dpif
, &key
, actions
, actions_len
);
828 if (flags
& DPIF_FP_MODIFY
) {
829 int error
= set_flow_actions(flow
, actions
, actions_len
);
832 get_dpif_flow_stats(flow
, stats
);
834 if (flags
& DPIF_FP_ZERO_STATS
) {
846 dpif_netdev_flow_del(struct dpif
*dpif
,
847 const struct nlattr
*nl_key
, size_t nl_key_len
,
848 struct dpif_flow_stats
*stats
)
850 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
851 struct dp_netdev_flow
*flow
;
855 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
860 flow
= dp_netdev_lookup_flow(dp
, &key
);
863 get_dpif_flow_stats(flow
, stats
);
865 dp_netdev_free_flow(dp
, flow
);
872 struct dp_netdev_flow_state
{
875 struct nlattr
*actions
;
876 struct odputil_keybuf keybuf
;
877 struct dpif_flow_stats stats
;
881 dpif_netdev_flow_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
883 struct dp_netdev_flow_state
*state
;
885 *statep
= state
= xmalloc(sizeof *state
);
888 state
->actions
= NULL
;
893 dpif_netdev_flow_dump_next(const struct dpif
*dpif
, void *state_
,
894 const struct nlattr
**key
, size_t *key_len
,
895 const struct nlattr
**actions
, size_t *actions_len
,
896 const struct dpif_flow_stats
**stats
)
898 struct dp_netdev_flow_state
*state
= state_
;
899 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
900 struct dp_netdev_flow
*flow
;
901 struct hmap_node
*node
;
903 node
= hmap_at_position(&dp
->flow_table
, &state
->bucket
, &state
->offset
);
908 flow
= CONTAINER_OF(node
, struct dp_netdev_flow
, node
);
913 ofpbuf_use_stack(&buf
, &state
->keybuf
, sizeof state
->keybuf
);
914 odp_flow_key_from_flow(&buf
, &flow
->key
);
921 free(state
->actions
);
922 state
->actions
= xmemdup(flow
->actions
, flow
->actions_len
);
924 *actions
= state
->actions
;
925 *actions_len
= flow
->actions_len
;
929 get_dpif_flow_stats(flow
, &state
->stats
);
930 *stats
= &state
->stats
;
937 dpif_netdev_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
939 struct dp_netdev_flow_state
*state
= state_
;
941 free(state
->actions
);
947 dpif_netdev_execute(struct dpif
*dpif
,
948 const struct nlattr
*key_attrs
, size_t key_len
,
949 const struct nlattr
*actions
, size_t actions_len
,
950 const struct ofpbuf
*packet
)
952 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
958 if (packet
->size
< ETH_HEADER_LEN
|| packet
->size
> UINT16_MAX
) {
962 error
= dpif_netdev_validate_actions(actions
, actions_len
, &mutates
);
968 /* We need a deep copy of 'packet' since we're going to modify its
970 ofpbuf_init(©
, DP_NETDEV_HEADROOM
+ packet
->size
);
971 ofpbuf_reserve(©
, DP_NETDEV_HEADROOM
);
972 ofpbuf_put(©
, packet
->data
, packet
->size
);
974 /* We still need a shallow copy of 'packet', even though we won't
975 * modify its data, because flow_extract() modifies packet->l2, etc.
976 * We could probably get away with modifying those but it's more polite
981 flow_extract(©
, 0, -1, &key
);
982 dpif_netdev_flow_from_nlattrs(key_attrs
, key_len
, &key
);
984 error
= dp_netdev_execute_actions(dp
, ©
, &key
, actions
, actions_len
);
986 ofpbuf_uninit(©
);
992 dpif_netdev_recv_get_mask(const struct dpif
*dpif
, int *listen_mask
)
994 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
995 *listen_mask
= dpif_netdev
->listen_mask
;
1000 dpif_netdev_recv_set_mask(struct dpif
*dpif
, int listen_mask
)
1002 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1003 dpif_netdev
->listen_mask
= listen_mask
;
1007 static struct dp_netdev_queue
*
1008 find_nonempty_queue(struct dpif
*dpif
)
1010 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1011 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1012 int mask
= dpif_netdev
->listen_mask
;
1015 for (i
= 0; i
< N_QUEUES
; i
++) {
1016 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
1017 if (q
->head
!= q
->tail
&& mask
& (1u << i
)) {
1025 dpif_netdev_recv(struct dpif
*dpif
, struct dpif_upcall
*upcall
)
1027 struct dp_netdev_queue
*q
= find_nonempty_queue(dpif
);
1029 struct dpif_upcall
*u
= q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
1040 dpif_netdev_recv_wait(struct dpif
*dpif
)
1042 if (find_nonempty_queue(dpif
)) {
1043 poll_immediate_wake();
1045 /* No messages ready to be received, and dp_wait() will ensure that we
1046 * wake up to queue new messages, so there is nothing to do. */
1051 dpif_netdev_recv_purge(struct dpif
*dpif
)
1053 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1054 dp_netdev_purge_queues(dpif_netdev
->dp
);
1058 dp_netdev_flow_used(struct dp_netdev_flow
*flow
, struct flow
*key
,
1059 const struct ofpbuf
*packet
)
1061 flow
->used
= time_msec();
1062 flow
->packet_count
++;
1063 flow
->byte_count
+= packet
->size
;
1064 if (key
->dl_type
== htons(ETH_TYPE_IP
) && key
->nw_proto
== IPPROTO_TCP
) {
1065 struct tcp_header
*th
= packet
->l4
;
1066 flow
->tcp_ctl
|= th
->tcp_ctl
;
1071 dp_netdev_port_input(struct dp_netdev
*dp
, struct dp_netdev_port
*port
,
1072 struct ofpbuf
*packet
)
1074 struct dp_netdev_flow
*flow
;
1077 if (packet
->size
< ETH_HEADER_LEN
) {
1080 if (flow_extract(packet
, 0, port
->port_no
, &key
) && dp
->drop_frags
) {
1085 flow
= dp_netdev_lookup_flow(dp
, &key
);
1087 dp_netdev_flow_used(flow
, &key
, packet
);
1088 dp_netdev_execute_actions(dp
, packet
, &key
,
1089 flow
->actions
, flow
->actions_len
);
1093 dp_netdev_output_userspace(dp
, packet
, DPIF_UC_MISS
, &key
, 0);
1098 dpif_netdev_run(struct dpif
*dpif
)
1100 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1101 struct dp_netdev_port
*port
;
1102 struct ofpbuf packet
;
1104 ofpbuf_init(&packet
, DP_NETDEV_HEADROOM
+ VLAN_ETH_HEADER_LEN
+ max_mtu
);
1106 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1109 /* Reset packet contents. */
1110 ofpbuf_clear(&packet
);
1111 ofpbuf_reserve(&packet
, DP_NETDEV_HEADROOM
);
1113 error
= netdev_recv(port
->netdev
, &packet
);
1115 dp_netdev_port_input(dp
, port
, &packet
);
1116 } else if (error
!= EAGAIN
&& error
!= EOPNOTSUPP
) {
1117 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1118 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
1119 netdev_get_name(port
->netdev
), strerror(error
));
1122 ofpbuf_uninit(&packet
);
1126 dpif_netdev_wait(struct dpif
*dpif
)
1128 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1129 struct dp_netdev_port
*port
;
1131 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1132 netdev_recv_wait(port
->netdev
);
1137 dp_netdev_strip_vlan(struct ofpbuf
*packet
)
1139 struct vlan_eth_header
*veh
= packet
->l2
;
1140 if (packet
->size
>= sizeof *veh
1141 && veh
->veth_type
== htons(ETH_TYPE_VLAN
)) {
1142 struct eth_header tmp
;
1144 memcpy(tmp
.eth_dst
, veh
->veth_dst
, ETH_ADDR_LEN
);
1145 memcpy(tmp
.eth_src
, veh
->veth_src
, ETH_ADDR_LEN
);
1146 tmp
.eth_type
= veh
->veth_next_type
;
1148 ofpbuf_pull(packet
, VLAN_HEADER_LEN
);
1149 packet
->l2
= (char*)packet
->l2
+ VLAN_HEADER_LEN
;
1150 memcpy(packet
->data
, &tmp
, sizeof tmp
);
1155 dp_netdev_set_dl_src(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1157 struct eth_header
*eh
= packet
->l2
;
1158 memcpy(eh
->eth_src
, dl_addr
, sizeof eh
->eth_src
);
1162 dp_netdev_set_dl_dst(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1164 struct eth_header
*eh
= packet
->l2
;
1165 memcpy(eh
->eth_dst
, dl_addr
, sizeof eh
->eth_dst
);
1169 is_ip(const struct ofpbuf
*packet
, const struct flow
*key
)
1171 return key
->dl_type
== htons(ETH_TYPE_IP
) && packet
->l4
;
1175 dp_netdev_set_nw_addr(struct ofpbuf
*packet
, const struct flow
*key
,
1176 const struct nlattr
*a
)
1178 if (is_ip(packet
, key
)) {
1179 struct ip_header
*nh
= packet
->l3
;
1180 ovs_be32 ip
= nl_attr_get_be32(a
);
1181 uint16_t type
= nl_attr_type(a
);
1184 field
= type
== ODP_ACTION_ATTR_SET_NW_SRC
? &nh
->ip_src
: &nh
->ip_dst
;
1185 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1186 struct tcp_header
*th
= packet
->l4
;
1187 th
->tcp_csum
= recalc_csum32(th
->tcp_csum
, *field
, ip
);
1188 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1189 struct udp_header
*uh
= packet
->l4
;
1191 uh
->udp_csum
= recalc_csum32(uh
->udp_csum
, *field
, ip
);
1192 if (!uh
->udp_csum
) {
1193 uh
->udp_csum
= htons(0xffff);
1197 nh
->ip_csum
= recalc_csum32(nh
->ip_csum
, *field
, ip
);
1203 dp_netdev_set_nw_tos(struct ofpbuf
*packet
, const struct flow
*key
,
1206 if (is_ip(packet
, key
)) {
1207 struct ip_header
*nh
= packet
->l3
;
1208 uint8_t *field
= &nh
->ip_tos
;
1210 /* Set the DSCP bits and preserve the ECN bits. */
1211 uint8_t new = nw_tos
| (nh
->ip_tos
& IP_ECN_MASK
);
1213 nh
->ip_csum
= recalc_csum16(nh
->ip_csum
, htons((uint16_t)*field
),
1214 htons((uint16_t) new));
1220 dp_netdev_set_tp_port(struct ofpbuf
*packet
, const struct flow
*key
,
1221 const struct nlattr
*a
)
1223 if (is_ip(packet
, key
)) {
1224 uint16_t type
= nl_attr_type(a
);
1225 ovs_be16 port
= nl_attr_get_be16(a
);
1228 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1229 struct tcp_header
*th
= packet
->l4
;
1230 field
= (type
== ODP_ACTION_ATTR_SET_TP_SRC
1231 ? &th
->tcp_src
: &th
->tcp_dst
);
1232 th
->tcp_csum
= recalc_csum16(th
->tcp_csum
, *field
, port
);
1234 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1235 struct udp_header
*uh
= packet
->l4
;
1236 field
= (type
== ODP_ACTION_ATTR_SET_TP_SRC
1237 ? &uh
->udp_src
: &uh
->udp_dst
);
1238 uh
->udp_csum
= recalc_csum16(uh
->udp_csum
, *field
, port
);
1247 dp_netdev_output_port(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1250 struct dp_netdev_port
*p
= dp
->ports
[out_port
];
1252 netdev_send(p
->netdev
, packet
);
1257 dp_netdev_output_userspace(struct dp_netdev
*dp
, const struct ofpbuf
*packet
,
1258 int queue_no
, const struct flow
*flow
, uint64_t arg
)
1260 struct dp_netdev_queue
*q
= &dp
->queues
[queue_no
];
1261 struct dpif_upcall
*upcall
;
1265 if (q
->head
- q
->tail
>= MAX_QUEUE_LEN
) {
1270 buf
= ofpbuf_new(ODPUTIL_FLOW_KEY_BYTES
+ 2 + packet
->size
);
1271 odp_flow_key_from_flow(buf
, flow
);
1272 key_len
= buf
->size
;
1273 ofpbuf_pull(buf
, key_len
);
1274 ofpbuf_reserve(buf
, 2);
1275 ofpbuf_put(buf
, packet
->data
, packet
->size
);
1277 upcall
= xzalloc(sizeof *upcall
);
1278 upcall
->type
= queue_no
;
1279 upcall
->packet
= buf
;
1280 upcall
->key
= buf
->base
;
1281 upcall
->key_len
= key_len
;
1282 upcall
->userdata
= arg
;
1284 q
->upcalls
[q
->head
++ & QUEUE_MASK
] = upcall
;
1290 dp_netdev_execute_actions(struct dp_netdev
*dp
,
1291 struct ofpbuf
*packet
, struct flow
*key
,
1292 const struct nlattr
*actions
,
1295 const struct nlattr
*a
;
1298 NL_ATTR_FOR_EACH_UNSAFE (a
, left
, actions
, actions_len
) {
1299 switch (nl_attr_type(a
)) {
1300 case ODP_ACTION_ATTR_OUTPUT
:
1301 dp_netdev_output_port(dp
, packet
, nl_attr_get_u32(a
));
1304 case ODP_ACTION_ATTR_USERSPACE
:
1305 dp_netdev_output_userspace(dp
, packet
, DPIF_UC_ACTION
,
1306 key
, nl_attr_get_u64(a
));
1309 case ODP_ACTION_ATTR_SET_DL_TCI
:
1310 eth_set_vlan_tci(packet
, nl_attr_get_be16(a
));
1313 case ODP_ACTION_ATTR_STRIP_VLAN
:
1314 dp_netdev_strip_vlan(packet
);
1317 case ODP_ACTION_ATTR_SET_DL_SRC
:
1318 dp_netdev_set_dl_src(packet
, nl_attr_get_unspec(a
, ETH_ADDR_LEN
));
1321 case ODP_ACTION_ATTR_SET_DL_DST
:
1322 dp_netdev_set_dl_dst(packet
, nl_attr_get_unspec(a
, ETH_ADDR_LEN
));
1325 case ODP_ACTION_ATTR_SET_NW_SRC
:
1326 case ODP_ACTION_ATTR_SET_NW_DST
:
1327 dp_netdev_set_nw_addr(packet
, key
, a
);
1330 case ODP_ACTION_ATTR_SET_NW_TOS
:
1331 dp_netdev_set_nw_tos(packet
, key
, nl_attr_get_u8(a
));
1334 case ODP_ACTION_ATTR_SET_TP_SRC
:
1335 case ODP_ACTION_ATTR_SET_TP_DST
:
1336 dp_netdev_set_tp_port(packet
, key
, a
);
1343 const struct dpif_class dpif_netdev_class
= {
1345 NULL
, /* enumerate */
1348 dpif_netdev_destroy
,
1351 dpif_netdev_get_stats
,
1352 dpif_netdev_get_drop_frags
,
1353 dpif_netdev_set_drop_frags
,
1354 dpif_netdev_port_add
,
1355 dpif_netdev_port_del
,
1356 dpif_netdev_port_query_by_number
,
1357 dpif_netdev_port_query_by_name
,
1358 dpif_netdev_get_max_ports
,
1359 dpif_netdev_port_dump_start
,
1360 dpif_netdev_port_dump_next
,
1361 dpif_netdev_port_dump_done
,
1362 dpif_netdev_port_poll
,
1363 dpif_netdev_port_poll_wait
,
1364 dpif_netdev_flow_get
,
1365 dpif_netdev_flow_put
,
1366 dpif_netdev_flow_del
,
1367 dpif_netdev_flow_flush
,
1368 dpif_netdev_flow_dump_start
,
1369 dpif_netdev_flow_dump_next
,
1370 dpif_netdev_flow_dump_done
,
1371 dpif_netdev_execute
,
1372 dpif_netdev_recv_get_mask
,
1373 dpif_netdev_recv_set_mask
,
1374 NULL
, /* get_sflow_probability */
1375 NULL
, /* set_sflow_probability */
1376 NULL
, /* queue_to_priority */
1378 dpif_netdev_recv_wait
,
1379 dpif_netdev_recv_purge
,
1383 dpif_dummy_register(void)
1385 if (!dpif_dummy_class
.type
) {
1386 dpif_dummy_class
= dpif_netdev_class
;
1387 dpif_dummy_class
.type
= "dummy";
1388 dp_register_provider(&dpif_dummy_class
);