2 * Copyright (c) 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <netinet/in.h>
26 #include <sys/socket.h>
31 #include <sys/ioctl.h>
37 #include "dpif-provider.h"
39 #include "dynamic-string.h"
46 #include "ofp-print.h"
49 #include "poll-loop.h"
55 VLOG_DEFINE_THIS_MODULE(dpif_netdev
);
57 /* Configuration parameters. */
58 enum { MAX_PORTS
= 256 }; /* Maximum number of ports. */
59 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
61 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
62 * headers to be aligned on a 4-byte boundary. */
63 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
66 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
67 enum { MAX_QUEUE_LEN
= 128 }; /* Maximum number of packets per queue. */
68 enum { QUEUE_MASK
= MAX_QUEUE_LEN
- 1 };
69 BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN
));
71 struct dp_netdev_queue
{
72 struct dpif_upcall
*upcalls
[MAX_QUEUE_LEN
];
73 unsigned int head
, tail
;
76 /* Datapath based on the network device interface from netdev.h. */
78 const struct dpif_class
*class;
83 bool drop_frags
; /* Drop all IP fragments, if true. */
84 struct dp_netdev_queue queues
[N_QUEUES
];
85 struct hmap flow_table
; /* Flow table. */
88 long long int n_frags
; /* Number of dropped IP fragments. */
89 long long int n_hit
; /* Number of flow table matches. */
90 long long int n_missed
; /* Number of flow table misses. */
91 long long int n_lost
; /* Number of misses not passed to client. */
95 struct dp_netdev_port
*ports
[MAX_PORTS
];
96 struct list port_list
;
100 /* A port in a netdev-based datapath. */
101 struct dp_netdev_port
{
102 int port_no
; /* Index into dp_netdev's 'ports'. */
103 struct list node
; /* Element in dp_netdev's 'port_list'. */
104 struct netdev
*netdev
;
105 bool internal
; /* Internal port? */
108 /* A flow in dp_netdev's 'flow_table'. */
109 struct dp_netdev_flow
{
110 struct hmap_node node
; /* Element in dp_netdev's 'flow_table'. */
114 long long int used
; /* Last used time, in monotonic msecs. */
115 long long int packet_count
; /* Number of packets matched. */
116 long long int byte_count
; /* Number of bytes matched. */
117 ovs_be16 tcp_ctl
; /* Bitwise-OR of seen tcp_ctl values. */
120 struct nlattr
*actions
;
124 /* Interface to netdev-based datapath. */
127 struct dp_netdev
*dp
;
129 unsigned int dp_serial
;
132 /* All netdev-based datapaths. */
133 static struct shash dp_netdevs
= SHASH_INITIALIZER(&dp_netdevs
);
135 /* Maximum port MTU seen so far. */
136 static int max_mtu
= ETH_PAYLOAD_MAX
;
138 static int get_port_by_number(struct dp_netdev
*, uint16_t port_no
,
139 struct dp_netdev_port
**portp
);
140 static int get_port_by_name(struct dp_netdev
*, const char *devname
,
141 struct dp_netdev_port
**portp
);
142 static void dp_netdev_free(struct dp_netdev
*);
143 static void dp_netdev_flow_flush(struct dp_netdev
*);
144 static int do_add_port(struct dp_netdev
*, const char *devname
,
145 const char *type
, uint16_t port_no
);
146 static int do_del_port(struct dp_netdev
*, uint16_t port_no
);
147 static int dpif_netdev_open(const struct dpif_class
*, const char *name
,
148 bool create
, struct dpif
**);
149 static int dp_netdev_output_userspace(struct dp_netdev
*, const struct ofpbuf
*,
150 int queue_no
, const struct flow
*,
152 static int dp_netdev_execute_actions(struct dp_netdev
*,
153 struct ofpbuf
*, struct flow
*,
154 const struct nlattr
*actions
,
157 static struct dpif_class dpif_dummy_class
;
159 static struct dpif_netdev
*
160 dpif_netdev_cast(const struct dpif
*dpif
)
162 assert(dpif
->dpif_class
->open
== dpif_netdev_open
);
163 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
166 static struct dp_netdev
*
167 get_dp_netdev(const struct dpif
*dpif
)
169 return dpif_netdev_cast(dpif
)->dp
;
173 create_dpif_netdev(struct dp_netdev
*dp
)
175 uint16_t netflow_id
= hash_string(dp
->name
, 0);
176 struct dpif_netdev
*dpif
;
180 dpif
= xmalloc(sizeof *dpif
);
181 dpif_init(&dpif
->dpif
, dp
->class, dp
->name
, netflow_id
>> 8, netflow_id
);
183 dpif
->listen_mask
= 0;
184 dpif
->dp_serial
= dp
->serial
;
190 create_dp_netdev(const char *name
, const struct dpif_class
*class,
191 struct dp_netdev
**dpp
)
193 struct dp_netdev
*dp
;
197 dp
= xzalloc(sizeof *dp
);
199 dp
->name
= xstrdup(name
);
201 dp
->drop_frags
= false;
202 for (i
= 0; i
< N_QUEUES
; i
++) {
203 dp
->queues
[i
].head
= dp
->queues
[i
].tail
= 0;
205 hmap_init(&dp
->flow_table
);
206 list_init(&dp
->port_list
);
207 error
= do_add_port(dp
, name
, "internal", ODPP_LOCAL
);
213 shash_add(&dp_netdevs
, name
, dp
);
220 dpif_netdev_open(const struct dpif_class
*class, const char *name
,
221 bool create
, struct dpif
**dpifp
)
223 struct dp_netdev
*dp
;
225 dp
= shash_find_data(&dp_netdevs
, name
);
230 int error
= create_dp_netdev(name
, class, &dp
);
237 if (dp
->class != class) {
244 *dpifp
= create_dpif_netdev(dp
);
249 dp_netdev_purge_queues(struct dp_netdev
*dp
)
253 for (i
= 0; i
< N_QUEUES
; i
++) {
254 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
256 while (q
->tail
!= q
->head
) {
257 struct dpif_upcall
*upcall
= q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
259 ofpbuf_delete(upcall
->packet
);
266 dp_netdev_free(struct dp_netdev
*dp
)
268 dp_netdev_flow_flush(dp
);
269 while (dp
->n_ports
> 0) {
270 struct dp_netdev_port
*port
= CONTAINER_OF(
271 dp
->port_list
.next
, struct dp_netdev_port
, node
);
272 do_del_port(dp
, port
->port_no
);
274 dp_netdev_purge_queues(dp
);
275 hmap_destroy(&dp
->flow_table
);
281 dpif_netdev_close(struct dpif
*dpif
)
283 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
284 assert(dp
->open_cnt
> 0);
285 if (--dp
->open_cnt
== 0 && dp
->destroyed
) {
286 shash_find_and_delete(&dp_netdevs
, dp
->name
);
293 dpif_netdev_destroy(struct dpif
*dpif
)
295 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
296 dp
->destroyed
= true;
301 dpif_netdev_get_stats(const struct dpif
*dpif
, struct odp_stats
*stats
)
303 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
304 memset(stats
, 0, sizeof *stats
);
305 stats
->n_flows
= hmap_count(&dp
->flow_table
);
306 stats
->n_frags
= dp
->n_frags
;
307 stats
->n_hit
= dp
->n_hit
;
308 stats
->n_missed
= dp
->n_missed
;
309 stats
->n_lost
= dp
->n_lost
;
314 dpif_netdev_get_drop_frags(const struct dpif
*dpif
, bool *drop_fragsp
)
316 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
317 *drop_fragsp
= dp
->drop_frags
;
322 dpif_netdev_set_drop_frags(struct dpif
*dpif
, bool drop_frags
)
324 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
325 dp
->drop_frags
= drop_frags
;
330 do_add_port(struct dp_netdev
*dp
, const char *devname
, const char *type
,
333 struct dp_netdev_port
*port
;
334 struct netdev_options netdev_options
;
335 struct netdev
*netdev
;
340 /* XXX reject devices already in some dp_netdev. */
341 if (type
[0] == '\0' || !strcmp(type
, "system")) {
343 } else if (!strcmp(type
, "internal")) {
346 VLOG_WARN("%s: unsupported port type %s", devname
, type
);
350 /* Open and validate network device. */
351 memset(&netdev_options
, 0, sizeof netdev_options
);
352 netdev_options
.name
= devname
;
353 if (dp
->class == &dpif_dummy_class
) {
354 netdev_options
.type
= "dummy";
355 } else if (internal
) {
356 netdev_options
.type
= "tap";
359 error
= netdev_open(&netdev_options
, &netdev
);
363 /* XXX reject loopback devices */
364 /* XXX reject non-Ethernet devices */
366 error
= netdev_listen(netdev
);
368 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
369 devname
, strerror(errno
));
370 netdev_close(netdev
);
374 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, false);
376 netdev_close(netdev
);
380 port
= xmalloc(sizeof *port
);
381 port
->port_no
= port_no
;
382 port
->netdev
= netdev
;
383 port
->internal
= internal
;
385 netdev_get_mtu(netdev
, &mtu
);
386 if (mtu
!= INT_MAX
&& mtu
> max_mtu
) {
390 list_push_back(&dp
->port_list
, &port
->node
);
391 dp
->ports
[port_no
] = port
;
399 dpif_netdev_port_add(struct dpif
*dpif
, struct netdev
*netdev
,
402 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
405 for (port_no
= 0; port_no
< MAX_PORTS
; port_no
++) {
406 if (!dp
->ports
[port_no
]) {
408 return do_add_port(dp
, netdev_get_name(netdev
),
409 netdev_get_type(netdev
), port_no
);
416 dpif_netdev_port_del(struct dpif
*dpif
, uint16_t port_no
)
418 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
419 return port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
423 is_valid_port_number(uint16_t port_no
)
425 return port_no
< MAX_PORTS
;
429 get_port_by_number(struct dp_netdev
*dp
,
430 uint16_t port_no
, struct dp_netdev_port
**portp
)
432 if (!is_valid_port_number(port_no
)) {
436 *portp
= dp
->ports
[port_no
];
437 return *portp
? 0 : ENOENT
;
442 get_port_by_name(struct dp_netdev
*dp
,
443 const char *devname
, struct dp_netdev_port
**portp
)
445 struct dp_netdev_port
*port
;
447 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
448 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
457 do_del_port(struct dp_netdev
*dp
, uint16_t port_no
)
459 struct dp_netdev_port
*port
;
463 error
= get_port_by_number(dp
, port_no
, &port
);
468 list_remove(&port
->node
);
469 dp
->ports
[port
->port_no
] = NULL
;
473 name
= xstrdup(netdev_get_name(port
->netdev
));
474 netdev_close(port
->netdev
);
483 answer_port_query(const struct dp_netdev_port
*port
,
484 struct dpif_port
*dpif_port
)
486 dpif_port
->name
= xstrdup(netdev_get_name(port
->netdev
));
487 dpif_port
->type
= xstrdup(port
->internal
? "internal" : "system");
488 dpif_port
->port_no
= port
->port_no
;
492 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
493 struct dpif_port
*dpif_port
)
495 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
496 struct dp_netdev_port
*port
;
499 error
= get_port_by_number(dp
, port_no
, &port
);
501 answer_port_query(port
, dpif_port
);
507 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
508 struct dpif_port
*dpif_port
)
510 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
511 struct dp_netdev_port
*port
;
514 error
= get_port_by_name(dp
, devname
, &port
);
516 answer_port_query(port
, dpif_port
);
522 dpif_netdev_get_max_ports(const struct dpif
*dpif OVS_UNUSED
)
528 dp_netdev_free_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
530 hmap_remove(&dp
->flow_table
, &flow
->node
);
536 dp_netdev_flow_flush(struct dp_netdev
*dp
)
538 struct dp_netdev_flow
*flow
, *next
;
540 HMAP_FOR_EACH_SAFE (flow
, next
, node
, &dp
->flow_table
) {
541 dp_netdev_free_flow(dp
, flow
);
546 dpif_netdev_flow_flush(struct dpif
*dpif
)
548 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
549 dp_netdev_flow_flush(dp
);
553 struct dp_netdev_port_state
{
559 dpif_netdev_port_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
561 *statep
= xzalloc(sizeof(struct dp_netdev_port_state
));
566 dpif_netdev_port_dump_next(const struct dpif
*dpif
, void *state_
,
567 struct dpif_port
*dpif_port
)
569 struct dp_netdev_port_state
*state
= state_
;
570 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
573 for (port_no
= state
->port_no
; port_no
< MAX_PORTS
; port_no
++) {
574 struct dp_netdev_port
*port
= dp
->ports
[port_no
];
577 state
->name
= xstrdup(netdev_get_name(port
->netdev
));
578 dpif_port
->name
= state
->name
;
579 dpif_port
->type
= port
->internal
? "internal" : "system";
580 dpif_port
->port_no
= port
->port_no
;
581 state
->port_no
= port_no
+ 1;
589 dpif_netdev_port_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
591 struct dp_netdev_port_state
*state
= state_
;
598 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep OVS_UNUSED
)
600 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
601 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
602 dpif
->dp_serial
= dpif
->dp
->serial
;
610 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
612 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
613 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
614 poll_immediate_wake();
618 static struct dp_netdev_flow
*
619 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const struct flow
*key
)
621 struct dp_netdev_flow
*flow
;
623 HMAP_FOR_EACH_WITH_HASH (flow
, node
, flow_hash(key
, 0), &dp
->flow_table
) {
624 if (flow_equal(&flow
->key
, key
)) {
632 get_dpif_flow_stats(struct dp_netdev_flow
*flow
, struct dpif_flow_stats
*stats
)
634 stats
->n_packets
= flow
->packet_count
;
635 stats
->n_bytes
= flow
->byte_count
;
636 stats
->used
= flow
->used
;
637 stats
->tcp_flags
= TCP_FLAGS(flow
->tcp_ctl
);
641 dpif_netdev_flow_from_nlattrs(const struct nlattr
*key
, uint32_t key_len
,
644 if (odp_flow_key_to_flow(key
, key_len
, flow
)) {
645 /* This should not happen: it indicates that odp_flow_key_from_flow()
646 * and odp_flow_key_to_flow() disagree on the acceptable form of a
647 * flow. Log the problem as an error, with enough details to enable
649 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
651 if (!VLOG_DROP_ERR(&rl
)) {
655 odp_flow_key_format(key
, key_len
, &s
);
656 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s
));
667 dpif_netdev_flow_get(const struct dpif
*dpif
,
668 const struct nlattr
*nl_key
, size_t nl_key_len
,
669 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
671 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
672 struct dp_netdev_flow
*flow
;
676 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
681 flow
= dp_netdev_lookup_flow(dp
, &key
);
687 get_dpif_flow_stats(flow
, stats
);
690 *actionsp
= ofpbuf_clone_data(flow
->actions
, flow
->actions_len
);
696 dpif_netdev_validate_actions(const struct nlattr
*actions
,
697 size_t actions_len
, bool *mutates
)
699 const struct nlattr
*a
;
703 NL_ATTR_FOR_EACH (a
, left
, actions
, actions_len
) {
704 uint16_t type
= nl_attr_type(a
);
705 int len
= odp_action_len(type
);
707 if (len
!= nl_attr_get_size(a
)) {
712 case ODP_ACTION_ATTR_OUTPUT
:
713 if (nl_attr_get_u32(a
) >= MAX_PORTS
) {
718 case ODP_ACTION_ATTR_USERSPACE
:
721 case ODP_ACTION_ATTR_SET_DL_TCI
:
723 if (nl_attr_get_be16(a
) & htons(VLAN_CFI
)) {
728 case ODP_ACTION_ATTR_SET_NW_TOS
:
730 if (nl_attr_get_u8(a
) & IP_ECN_MASK
) {
735 case ODP_ACTION_ATTR_STRIP_VLAN
:
736 case ODP_ACTION_ATTR_SET_DL_SRC
:
737 case ODP_ACTION_ATTR_SET_DL_DST
:
738 case ODP_ACTION_ATTR_SET_NW_SRC
:
739 case ODP_ACTION_ATTR_SET_NW_DST
:
740 case ODP_ACTION_ATTR_SET_TP_SRC
:
741 case ODP_ACTION_ATTR_SET_TP_DST
:
745 case ODP_ACTION_ATTR_SET_TUNNEL
:
746 case ODP_ACTION_ATTR_SET_PRIORITY
:
747 case ODP_ACTION_ATTR_POP_PRIORITY
:
756 set_flow_actions(struct dp_netdev_flow
*flow
,
757 const struct nlattr
*actions
, size_t actions_len
)
762 error
= dpif_netdev_validate_actions(actions
, actions_len
, &mutates
);
767 flow
->actions
= xrealloc(flow
->actions
, actions_len
);
768 flow
->actions_len
= actions_len
;
769 memcpy(flow
->actions
, actions
, actions_len
);
774 add_flow(struct dpif
*dpif
, const struct flow
*key
,
775 const struct nlattr
*actions
, size_t actions_len
)
777 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
778 struct dp_netdev_flow
*flow
;
781 flow
= xzalloc(sizeof *flow
);
784 error
= set_flow_actions(flow
, actions
, actions_len
);
790 hmap_insert(&dp
->flow_table
, &flow
->node
, flow_hash(&flow
->key
, 0));
795 clear_stats(struct dp_netdev_flow
*flow
)
798 flow
->packet_count
= 0;
799 flow
->byte_count
= 0;
804 dpif_netdev_flow_put(struct dpif
*dpif
, enum dpif_flow_put_flags flags
,
805 const struct nlattr
*nl_key
, size_t nl_key_len
,
806 const struct nlattr
*actions
, size_t actions_len
,
807 struct dpif_flow_stats
*stats
)
809 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
810 struct dp_netdev_flow
*flow
;
814 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
819 flow
= dp_netdev_lookup_flow(dp
, &key
);
821 if (flags
& DPIF_FP_CREATE
) {
822 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
824 memset(stats
, 0, sizeof *stats
);
826 return add_flow(dpif
, &key
, actions
, actions_len
);
834 if (flags
& DPIF_FP_MODIFY
) {
835 int error
= set_flow_actions(flow
, actions
, actions_len
);
838 get_dpif_flow_stats(flow
, stats
);
840 if (flags
& DPIF_FP_ZERO_STATS
) {
852 dpif_netdev_flow_del(struct dpif
*dpif
,
853 const struct nlattr
*nl_key
, size_t nl_key_len
,
854 struct dpif_flow_stats
*stats
)
856 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
857 struct dp_netdev_flow
*flow
;
861 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
866 flow
= dp_netdev_lookup_flow(dp
, &key
);
869 get_dpif_flow_stats(flow
, stats
);
871 dp_netdev_free_flow(dp
, flow
);
878 struct dp_netdev_flow_state
{
881 struct nlattr
*actions
;
882 struct odputil_keybuf keybuf
;
883 struct dpif_flow_stats stats
;
887 dpif_netdev_flow_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
889 struct dp_netdev_flow_state
*state
;
891 *statep
= state
= xmalloc(sizeof *state
);
894 state
->actions
= NULL
;
899 dpif_netdev_flow_dump_next(const struct dpif
*dpif
, void *state_
,
900 const struct nlattr
**key
, size_t *key_len
,
901 const struct nlattr
**actions
, size_t *actions_len
,
902 const struct dpif_flow_stats
**stats
)
904 struct dp_netdev_flow_state
*state
= state_
;
905 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
906 struct dp_netdev_flow
*flow
;
907 struct hmap_node
*node
;
909 node
= hmap_at_position(&dp
->flow_table
, &state
->bucket
, &state
->offset
);
914 flow
= CONTAINER_OF(node
, struct dp_netdev_flow
, node
);
919 ofpbuf_use_stack(&buf
, &state
->keybuf
, sizeof state
->keybuf
);
920 odp_flow_key_from_flow(&buf
, &flow
->key
);
927 free(state
->actions
);
928 state
->actions
= xmemdup(flow
->actions
, flow
->actions_len
);
930 *actions
= state
->actions
;
931 *actions_len
= flow
->actions_len
;
935 get_dpif_flow_stats(flow
, &state
->stats
);
936 *stats
= &state
->stats
;
943 dpif_netdev_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
945 struct dp_netdev_flow_state
*state
= state_
;
947 free(state
->actions
);
953 dpif_netdev_execute(struct dpif
*dpif
,
954 const struct nlattr
*key_attrs
, size_t key_len
,
955 const struct nlattr
*actions
, size_t actions_len
,
956 const struct ofpbuf
*packet
)
958 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
964 if (packet
->size
< ETH_HEADER_LEN
|| packet
->size
> UINT16_MAX
) {
968 error
= dpif_netdev_validate_actions(actions
, actions_len
, &mutates
);
974 /* We need a deep copy of 'packet' since we're going to modify its
976 ofpbuf_init(©
, DP_NETDEV_HEADROOM
+ packet
->size
);
977 ofpbuf_reserve(©
, DP_NETDEV_HEADROOM
);
978 ofpbuf_put(©
, packet
->data
, packet
->size
);
980 /* We still need a shallow copy of 'packet', even though we won't
981 * modify its data, because flow_extract() modifies packet->l2, etc.
982 * We could probably get away with modifying those but it's more polite
987 flow_extract(©
, 0, -1, &key
);
988 dpif_netdev_flow_from_nlattrs(key_attrs
, key_len
, &key
);
990 error
= dp_netdev_execute_actions(dp
, ©
, &key
, actions
, actions_len
);
992 ofpbuf_uninit(©
);
998 dpif_netdev_recv_get_mask(const struct dpif
*dpif
, int *listen_mask
)
1000 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1001 *listen_mask
= dpif_netdev
->listen_mask
;
1006 dpif_netdev_recv_set_mask(struct dpif
*dpif
, int listen_mask
)
1008 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1009 dpif_netdev
->listen_mask
= listen_mask
;
1013 static struct dp_netdev_queue
*
1014 find_nonempty_queue(struct dpif
*dpif
)
1016 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1017 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1018 int mask
= dpif_netdev
->listen_mask
;
1021 for (i
= 0; i
< N_QUEUES
; i
++) {
1022 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
1023 if (q
->head
!= q
->tail
&& mask
& (1u << i
)) {
1031 dpif_netdev_recv(struct dpif
*dpif
, struct dpif_upcall
*upcall
)
1033 struct dp_netdev_queue
*q
= find_nonempty_queue(dpif
);
1035 struct dpif_upcall
*u
= q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
1046 dpif_netdev_recv_wait(struct dpif
*dpif
)
1048 if (find_nonempty_queue(dpif
)) {
1049 poll_immediate_wake();
1051 /* No messages ready to be received, and dp_wait() will ensure that we
1052 * wake up to queue new messages, so there is nothing to do. */
1057 dpif_netdev_recv_purge(struct dpif
*dpif
)
1059 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1060 dp_netdev_purge_queues(dpif_netdev
->dp
);
1064 dp_netdev_flow_used(struct dp_netdev_flow
*flow
, struct flow
*key
,
1065 const struct ofpbuf
*packet
)
1067 flow
->used
= time_msec();
1068 flow
->packet_count
++;
1069 flow
->byte_count
+= packet
->size
;
1070 if (key
->dl_type
== htons(ETH_TYPE_IP
) && key
->nw_proto
== IPPROTO_TCP
) {
1071 struct tcp_header
*th
= packet
->l4
;
1072 flow
->tcp_ctl
|= th
->tcp_ctl
;
1077 dp_netdev_port_input(struct dp_netdev
*dp
, struct dp_netdev_port
*port
,
1078 struct ofpbuf
*packet
)
1080 struct dp_netdev_flow
*flow
;
1083 if (packet
->size
< ETH_HEADER_LEN
) {
1086 if (flow_extract(packet
, 0, port
->port_no
, &key
) && dp
->drop_frags
) {
1091 flow
= dp_netdev_lookup_flow(dp
, &key
);
1093 dp_netdev_flow_used(flow
, &key
, packet
);
1094 dp_netdev_execute_actions(dp
, packet
, &key
,
1095 flow
->actions
, flow
->actions_len
);
1099 dp_netdev_output_userspace(dp
, packet
, DPIF_UC_MISS
, &key
, 0);
1104 dpif_netdev_run(struct dpif
*dpif
)
1106 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1107 struct dp_netdev_port
*port
;
1108 struct ofpbuf packet
;
1110 ofpbuf_init(&packet
, DP_NETDEV_HEADROOM
+ VLAN_ETH_HEADER_LEN
+ max_mtu
);
1112 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1115 /* Reset packet contents. */
1116 ofpbuf_clear(&packet
);
1117 ofpbuf_reserve(&packet
, DP_NETDEV_HEADROOM
);
1119 error
= netdev_recv(port
->netdev
, &packet
);
1121 dp_netdev_port_input(dp
, port
, &packet
);
1122 } else if (error
!= EAGAIN
&& error
!= EOPNOTSUPP
) {
1123 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1124 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
1125 netdev_get_name(port
->netdev
), strerror(error
));
1128 ofpbuf_uninit(&packet
);
1132 dpif_netdev_wait(struct dpif
*dpif
)
1134 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1135 struct dp_netdev_port
*port
;
1137 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1138 netdev_recv_wait(port
->netdev
);
1143 dp_netdev_strip_vlan(struct ofpbuf
*packet
)
1145 struct vlan_eth_header
*veh
= packet
->l2
;
1146 if (packet
->size
>= sizeof *veh
1147 && veh
->veth_type
== htons(ETH_TYPE_VLAN
)) {
1148 struct eth_header tmp
;
1150 memcpy(tmp
.eth_dst
, veh
->veth_dst
, ETH_ADDR_LEN
);
1151 memcpy(tmp
.eth_src
, veh
->veth_src
, ETH_ADDR_LEN
);
1152 tmp
.eth_type
= veh
->veth_next_type
;
1154 ofpbuf_pull(packet
, VLAN_HEADER_LEN
);
1155 packet
->l2
= (char*)packet
->l2
+ VLAN_HEADER_LEN
;
1156 memcpy(packet
->data
, &tmp
, sizeof tmp
);
1161 dp_netdev_set_dl_src(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1163 struct eth_header
*eh
= packet
->l2
;
1164 memcpy(eh
->eth_src
, dl_addr
, sizeof eh
->eth_src
);
1168 dp_netdev_set_dl_dst(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1170 struct eth_header
*eh
= packet
->l2
;
1171 memcpy(eh
->eth_dst
, dl_addr
, sizeof eh
->eth_dst
);
1175 is_ip(const struct ofpbuf
*packet
, const struct flow
*key
)
1177 return key
->dl_type
== htons(ETH_TYPE_IP
) && packet
->l4
;
1181 dp_netdev_set_nw_addr(struct ofpbuf
*packet
, const struct flow
*key
,
1182 const struct nlattr
*a
)
1184 if (is_ip(packet
, key
)) {
1185 struct ip_header
*nh
= packet
->l3
;
1186 ovs_be32 ip
= nl_attr_get_be32(a
);
1187 uint16_t type
= nl_attr_type(a
);
1190 field
= type
== ODP_ACTION_ATTR_SET_NW_SRC
? &nh
->ip_src
: &nh
->ip_dst
;
1191 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1192 struct tcp_header
*th
= packet
->l4
;
1193 th
->tcp_csum
= recalc_csum32(th
->tcp_csum
, *field
, ip
);
1194 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1195 struct udp_header
*uh
= packet
->l4
;
1197 uh
->udp_csum
= recalc_csum32(uh
->udp_csum
, *field
, ip
);
1198 if (!uh
->udp_csum
) {
1199 uh
->udp_csum
= htons(0xffff);
1203 nh
->ip_csum
= recalc_csum32(nh
->ip_csum
, *field
, ip
);
1209 dp_netdev_set_nw_tos(struct ofpbuf
*packet
, const struct flow
*key
,
1212 if (is_ip(packet
, key
)) {
1213 struct ip_header
*nh
= packet
->l3
;
1214 uint8_t *field
= &nh
->ip_tos
;
1216 /* Set the DSCP bits and preserve the ECN bits. */
1217 uint8_t new = nw_tos
| (nh
->ip_tos
& IP_ECN_MASK
);
1219 nh
->ip_csum
= recalc_csum16(nh
->ip_csum
, htons((uint16_t)*field
),
1220 htons((uint16_t) new));
1226 dp_netdev_set_tp_port(struct ofpbuf
*packet
, const struct flow
*key
,
1227 const struct nlattr
*a
)
1229 if (is_ip(packet
, key
)) {
1230 uint16_t type
= nl_attr_type(a
);
1231 ovs_be16 port
= nl_attr_get_be16(a
);
1234 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1235 struct tcp_header
*th
= packet
->l4
;
1236 field
= (type
== ODP_ACTION_ATTR_SET_TP_SRC
1237 ? &th
->tcp_src
: &th
->tcp_dst
);
1238 th
->tcp_csum
= recalc_csum16(th
->tcp_csum
, *field
, port
);
1240 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1241 struct udp_header
*uh
= packet
->l4
;
1242 field
= (type
== ODP_ACTION_ATTR_SET_TP_SRC
1243 ? &uh
->udp_src
: &uh
->udp_dst
);
1244 uh
->udp_csum
= recalc_csum16(uh
->udp_csum
, *field
, port
);
1253 dp_netdev_output_port(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1256 struct dp_netdev_port
*p
= dp
->ports
[out_port
];
1258 netdev_send(p
->netdev
, packet
);
1263 dp_netdev_output_userspace(struct dp_netdev
*dp
, const struct ofpbuf
*packet
,
1264 int queue_no
, const struct flow
*flow
, uint64_t arg
)
1266 struct dp_netdev_queue
*q
= &dp
->queues
[queue_no
];
1267 struct dpif_upcall
*upcall
;
1271 if (q
->head
- q
->tail
>= MAX_QUEUE_LEN
) {
1276 buf
= ofpbuf_new(ODPUTIL_FLOW_KEY_BYTES
+ 2 + packet
->size
);
1277 odp_flow_key_from_flow(buf
, flow
);
1278 key_len
= buf
->size
;
1279 ofpbuf_pull(buf
, key_len
);
1280 ofpbuf_reserve(buf
, 2);
1281 ofpbuf_put(buf
, packet
->data
, packet
->size
);
1283 upcall
= xzalloc(sizeof *upcall
);
1284 upcall
->type
= queue_no
;
1285 upcall
->packet
= buf
;
1286 upcall
->key
= buf
->base
;
1287 upcall
->key_len
= key_len
;
1288 upcall
->userdata
= arg
;
1290 q
->upcalls
[q
->head
++ & QUEUE_MASK
] = upcall
;
1296 dp_netdev_execute_actions(struct dp_netdev
*dp
,
1297 struct ofpbuf
*packet
, struct flow
*key
,
1298 const struct nlattr
*actions
,
1301 const struct nlattr
*a
;
1304 NL_ATTR_FOR_EACH_UNSAFE (a
, left
, actions
, actions_len
) {
1305 switch (nl_attr_type(a
)) {
1306 case ODP_ACTION_ATTR_OUTPUT
:
1307 dp_netdev_output_port(dp
, packet
, nl_attr_get_u32(a
));
1310 case ODP_ACTION_ATTR_USERSPACE
:
1311 dp_netdev_output_userspace(dp
, packet
, DPIF_UC_ACTION
,
1312 key
, nl_attr_get_u64(a
));
1315 case ODP_ACTION_ATTR_SET_DL_TCI
:
1316 eth_set_vlan_tci(packet
, nl_attr_get_be16(a
));
1319 case ODP_ACTION_ATTR_STRIP_VLAN
:
1320 dp_netdev_strip_vlan(packet
);
1323 case ODP_ACTION_ATTR_SET_DL_SRC
:
1324 dp_netdev_set_dl_src(packet
, nl_attr_get_unspec(a
, ETH_ADDR_LEN
));
1327 case ODP_ACTION_ATTR_SET_DL_DST
:
1328 dp_netdev_set_dl_dst(packet
, nl_attr_get_unspec(a
, ETH_ADDR_LEN
));
1331 case ODP_ACTION_ATTR_SET_NW_SRC
:
1332 case ODP_ACTION_ATTR_SET_NW_DST
:
1333 dp_netdev_set_nw_addr(packet
, key
, a
);
1336 case ODP_ACTION_ATTR_SET_NW_TOS
:
1337 dp_netdev_set_nw_tos(packet
, key
, nl_attr_get_u8(a
));
1340 case ODP_ACTION_ATTR_SET_TP_SRC
:
1341 case ODP_ACTION_ATTR_SET_TP_DST
:
1342 dp_netdev_set_tp_port(packet
, key
, a
);
1349 const struct dpif_class dpif_netdev_class
= {
1351 NULL
, /* enumerate */
1354 dpif_netdev_destroy
,
1357 dpif_netdev_get_stats
,
1358 dpif_netdev_get_drop_frags
,
1359 dpif_netdev_set_drop_frags
,
1360 dpif_netdev_port_add
,
1361 dpif_netdev_port_del
,
1362 dpif_netdev_port_query_by_number
,
1363 dpif_netdev_port_query_by_name
,
1364 dpif_netdev_get_max_ports
,
1365 dpif_netdev_port_dump_start
,
1366 dpif_netdev_port_dump_next
,
1367 dpif_netdev_port_dump_done
,
1368 dpif_netdev_port_poll
,
1369 dpif_netdev_port_poll_wait
,
1370 dpif_netdev_flow_get
,
1371 dpif_netdev_flow_put
,
1372 dpif_netdev_flow_del
,
1373 dpif_netdev_flow_flush
,
1374 dpif_netdev_flow_dump_start
,
1375 dpif_netdev_flow_dump_next
,
1376 dpif_netdev_flow_dump_done
,
1377 dpif_netdev_execute
,
1378 dpif_netdev_recv_get_mask
,
1379 dpif_netdev_recv_set_mask
,
1380 NULL
, /* get_sflow_probability */
1381 NULL
, /* set_sflow_probability */
1382 NULL
, /* queue_to_priority */
1384 dpif_netdev_recv_wait
,
1385 dpif_netdev_recv_purge
,
1389 dpif_dummy_register(void)
1391 if (!dpif_dummy_class
.type
) {
1392 dpif_dummy_class
= dpif_netdev_class
;
1393 dpif_dummy_class
.type
= "dummy";
1394 dp_register_provider(&dpif_dummy_class
);