2 * Copyright (c) 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <netinet/in.h>
26 #include <sys/socket.h>
31 #include <sys/ioctl.h>
37 #include "dpif-provider.h"
39 #include "dynamic-string.h"
46 #include "ofp-print.h"
49 #include "poll-loop.h"
55 VLOG_DEFINE_THIS_MODULE(dpif_netdev
);
57 /* Configuration parameters. */
58 enum { MAX_PORTS
= 256 }; /* Maximum number of ports. */
59 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
61 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
62 * headers to be aligned on a 4-byte boundary. */
63 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
66 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
67 enum { MAX_QUEUE_LEN
= 128 }; /* Maximum number of packets per queue. */
68 enum { QUEUE_MASK
= MAX_QUEUE_LEN
- 1 };
69 BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN
));
71 struct dp_netdev_queue
{
72 struct dpif_upcall
*upcalls
[MAX_QUEUE_LEN
];
73 unsigned int head
, tail
;
76 /* Datapath based on the network device interface from netdev.h. */
78 const struct dpif_class
*class;
83 bool drop_frags
; /* Drop all IP fragments, if true. */
84 struct dp_netdev_queue queues
[N_QUEUES
];
85 struct hmap flow_table
; /* Flow table. */
88 long long int n_frags
; /* Number of dropped IP fragments. */
89 long long int n_hit
; /* Number of flow table matches. */
90 long long int n_missed
; /* Number of flow table misses. */
91 long long int n_lost
; /* Number of misses not passed to client. */
95 struct dp_netdev_port
*ports
[MAX_PORTS
];
96 struct list port_list
;
100 /* A port in a netdev-based datapath. */
101 struct dp_netdev_port
{
102 int port_no
; /* Index into dp_netdev's 'ports'. */
103 struct list node
; /* Element in dp_netdev's 'port_list'. */
104 struct netdev
*netdev
;
105 bool internal
; /* Internal port? */
108 /* A flow in dp_netdev's 'flow_table'. */
109 struct dp_netdev_flow
{
110 struct hmap_node node
; /* Element in dp_netdev's 'flow_table'. */
114 long long int used
; /* Last used time, in monotonic msecs. */
115 long long int packet_count
; /* Number of packets matched. */
116 long long int byte_count
; /* Number of bytes matched. */
117 ovs_be16 tcp_ctl
; /* Bitwise-OR of seen tcp_ctl values. */
120 struct nlattr
*actions
;
124 /* Interface to netdev-based datapath. */
127 struct dp_netdev
*dp
;
129 unsigned int dp_serial
;
132 /* All netdev-based datapaths. */
133 static struct shash dp_netdevs
= SHASH_INITIALIZER(&dp_netdevs
);
135 /* Maximum port MTU seen so far. */
136 static int max_mtu
= ETH_PAYLOAD_MAX
;
138 static int get_port_by_number(struct dp_netdev
*, uint16_t port_no
,
139 struct dp_netdev_port
**portp
);
140 static int get_port_by_name(struct dp_netdev
*, const char *devname
,
141 struct dp_netdev_port
**portp
);
142 static void dp_netdev_free(struct dp_netdev
*);
143 static void dp_netdev_flow_flush(struct dp_netdev
*);
144 static int do_add_port(struct dp_netdev
*, const char *devname
,
145 const char *type
, uint16_t port_no
);
146 static int do_del_port(struct dp_netdev
*, uint16_t port_no
);
147 static int dpif_netdev_open(const struct dpif_class
*, const char *name
,
148 bool create
, struct dpif
**);
149 static int dp_netdev_output_control(struct dp_netdev
*, const struct ofpbuf
*,
150 int queue_no
, const struct flow
*,
152 static int dp_netdev_execute_actions(struct dp_netdev
*,
153 struct ofpbuf
*, struct flow
*,
154 const struct nlattr
*actions
,
157 static struct dpif_class dpif_dummy_class
;
159 static struct dpif_netdev
*
160 dpif_netdev_cast(const struct dpif
*dpif
)
162 assert(dpif
->dpif_class
->open
== dpif_netdev_open
);
163 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
166 static struct dp_netdev
*
167 get_dp_netdev(const struct dpif
*dpif
)
169 return dpif_netdev_cast(dpif
)->dp
;
173 create_dpif_netdev(struct dp_netdev
*dp
)
175 uint16_t netflow_id
= hash_string(dp
->name
, 0);
176 struct dpif_netdev
*dpif
;
180 dpif
= xmalloc(sizeof *dpif
);
181 dpif_init(&dpif
->dpif
, dp
->class, dp
->name
, netflow_id
>> 8, netflow_id
);
183 dpif
->listen_mask
= 0;
184 dpif
->dp_serial
= dp
->serial
;
190 create_dp_netdev(const char *name
, const struct dpif_class
*class,
191 struct dp_netdev
**dpp
)
193 struct dp_netdev
*dp
;
197 dp
= xzalloc(sizeof *dp
);
199 dp
->name
= xstrdup(name
);
201 dp
->drop_frags
= false;
202 for (i
= 0; i
< N_QUEUES
; i
++) {
203 dp
->queues
[i
].head
= dp
->queues
[i
].tail
= 0;
205 hmap_init(&dp
->flow_table
);
206 list_init(&dp
->port_list
);
207 error
= do_add_port(dp
, name
, "internal", ODPP_LOCAL
);
213 shash_add(&dp_netdevs
, name
, dp
);
220 dpif_netdev_open(const struct dpif_class
*class, const char *name
,
221 bool create
, struct dpif
**dpifp
)
223 struct dp_netdev
*dp
;
225 dp
= shash_find_data(&dp_netdevs
, name
);
230 int error
= create_dp_netdev(name
, class, &dp
);
237 if (dp
->class != class) {
244 *dpifp
= create_dpif_netdev(dp
);
249 dp_netdev_purge_queues(struct dp_netdev
*dp
)
253 for (i
= 0; i
< N_QUEUES
; i
++) {
254 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
256 while (q
->tail
!= q
->head
) {
257 struct dpif_upcall
*upcall
= q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
259 ofpbuf_delete(upcall
->packet
);
266 dp_netdev_free(struct dp_netdev
*dp
)
268 dp_netdev_flow_flush(dp
);
269 while (dp
->n_ports
> 0) {
270 struct dp_netdev_port
*port
= CONTAINER_OF(
271 dp
->port_list
.next
, struct dp_netdev_port
, node
);
272 do_del_port(dp
, port
->port_no
);
274 dp_netdev_purge_queues(dp
);
275 hmap_destroy(&dp
->flow_table
);
281 dpif_netdev_close(struct dpif
*dpif
)
283 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
284 assert(dp
->open_cnt
> 0);
285 if (--dp
->open_cnt
== 0 && dp
->destroyed
) {
286 shash_find_and_delete(&dp_netdevs
, dp
->name
);
293 dpif_netdev_destroy(struct dpif
*dpif
)
295 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
296 dp
->destroyed
= true;
301 dpif_netdev_get_stats(const struct dpif
*dpif
, struct odp_stats
*stats
)
303 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
304 memset(stats
, 0, sizeof *stats
);
305 stats
->n_frags
= dp
->n_frags
;
306 stats
->n_hit
= dp
->n_hit
;
307 stats
->n_missed
= dp
->n_missed
;
308 stats
->n_lost
= dp
->n_lost
;
313 dpif_netdev_get_drop_frags(const struct dpif
*dpif
, bool *drop_fragsp
)
315 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
316 *drop_fragsp
= dp
->drop_frags
;
321 dpif_netdev_set_drop_frags(struct dpif
*dpif
, bool drop_frags
)
323 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
324 dp
->drop_frags
= drop_frags
;
329 do_add_port(struct dp_netdev
*dp
, const char *devname
, const char *type
,
332 struct dp_netdev_port
*port
;
333 struct netdev_options netdev_options
;
334 struct netdev
*netdev
;
339 /* XXX reject devices already in some dp_netdev. */
340 if (type
[0] == '\0' || !strcmp(type
, "system")) {
342 } else if (!strcmp(type
, "internal")) {
345 VLOG_WARN("%s: unsupported port type %s", devname
, type
);
349 /* Open and validate network device. */
350 memset(&netdev_options
, 0, sizeof netdev_options
);
351 netdev_options
.name
= devname
;
352 netdev_options
.ethertype
= NETDEV_ETH_TYPE_ANY
;
353 if (dp
->class == &dpif_dummy_class
) {
354 netdev_options
.type
= "dummy";
355 } else if (internal
) {
356 netdev_options
.type
= "tap";
359 error
= netdev_open(&netdev_options
, &netdev
);
363 /* XXX reject loopback devices */
364 /* XXX reject non-Ethernet devices */
366 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, false);
368 netdev_close(netdev
);
372 port
= xmalloc(sizeof *port
);
373 port
->port_no
= port_no
;
374 port
->netdev
= netdev
;
375 port
->internal
= internal
;
377 netdev_get_mtu(netdev
, &mtu
);
378 if (mtu
!= INT_MAX
&& mtu
> max_mtu
) {
382 list_push_back(&dp
->port_list
, &port
->node
);
383 dp
->ports
[port_no
] = port
;
391 dpif_netdev_port_add(struct dpif
*dpif
, struct netdev
*netdev
,
394 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
397 for (port_no
= 0; port_no
< MAX_PORTS
; port_no
++) {
398 if (!dp
->ports
[port_no
]) {
400 return do_add_port(dp
, netdev_get_name(netdev
),
401 netdev_get_type(netdev
), port_no
);
408 dpif_netdev_port_del(struct dpif
*dpif
, uint16_t port_no
)
410 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
411 return port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
415 is_valid_port_number(uint16_t port_no
)
417 return port_no
< MAX_PORTS
;
421 get_port_by_number(struct dp_netdev
*dp
,
422 uint16_t port_no
, struct dp_netdev_port
**portp
)
424 if (!is_valid_port_number(port_no
)) {
428 *portp
= dp
->ports
[port_no
];
429 return *portp
? 0 : ENOENT
;
434 get_port_by_name(struct dp_netdev
*dp
,
435 const char *devname
, struct dp_netdev_port
**portp
)
437 struct dp_netdev_port
*port
;
439 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
440 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
449 do_del_port(struct dp_netdev
*dp
, uint16_t port_no
)
451 struct dp_netdev_port
*port
;
455 error
= get_port_by_number(dp
, port_no
, &port
);
460 list_remove(&port
->node
);
461 dp
->ports
[port
->port_no
] = NULL
;
465 name
= xstrdup(netdev_get_name(port
->netdev
));
466 netdev_close(port
->netdev
);
475 answer_port_query(const struct dp_netdev_port
*port
,
476 struct dpif_port
*dpif_port
)
478 dpif_port
->name
= xstrdup(netdev_get_name(port
->netdev
));
479 dpif_port
->type
= xstrdup(port
->internal
? "internal" : "system");
480 dpif_port
->port_no
= port
->port_no
;
484 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
485 struct dpif_port
*dpif_port
)
487 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
488 struct dp_netdev_port
*port
;
491 error
= get_port_by_number(dp
, port_no
, &port
);
493 answer_port_query(port
, dpif_port
);
499 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
500 struct dpif_port
*dpif_port
)
502 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
503 struct dp_netdev_port
*port
;
506 error
= get_port_by_name(dp
, devname
, &port
);
508 answer_port_query(port
, dpif_port
);
514 dpif_netdev_get_max_ports(const struct dpif
*dpif OVS_UNUSED
)
520 dp_netdev_free_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
522 hmap_remove(&dp
->flow_table
, &flow
->node
);
528 dp_netdev_flow_flush(struct dp_netdev
*dp
)
530 struct dp_netdev_flow
*flow
, *next
;
532 HMAP_FOR_EACH_SAFE (flow
, next
, node
, &dp
->flow_table
) {
533 dp_netdev_free_flow(dp
, flow
);
538 dpif_netdev_flow_flush(struct dpif
*dpif
)
540 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
541 dp_netdev_flow_flush(dp
);
545 struct dp_netdev_port_state
{
551 dpif_netdev_port_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
553 *statep
= xzalloc(sizeof(struct dp_netdev_port_state
));
558 dpif_netdev_port_dump_next(const struct dpif
*dpif
, void *state_
,
559 struct dpif_port
*dpif_port
)
561 struct dp_netdev_port_state
*state
= state_
;
562 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
565 for (port_no
= state
->port_no
; port_no
< MAX_PORTS
; port_no
++) {
566 struct dp_netdev_port
*port
= dp
->ports
[port_no
];
569 state
->name
= xstrdup(netdev_get_name(port
->netdev
));
570 dpif_port
->name
= state
->name
;
571 dpif_port
->type
= port
->internal
? "internal" : "system";
572 dpif_port
->port_no
= port
->port_no
;
573 state
->port_no
= port_no
+ 1;
581 dpif_netdev_port_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
583 struct dp_netdev_port_state
*state
= state_
;
590 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep OVS_UNUSED
)
592 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
593 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
594 dpif
->dp_serial
= dpif
->dp
->serial
;
602 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
604 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
605 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
606 poll_immediate_wake();
610 static struct dp_netdev_flow
*
611 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const struct flow
*key
)
613 struct dp_netdev_flow
*flow
;
615 HMAP_FOR_EACH_WITH_HASH (flow
, node
, flow_hash(key
, 0), &dp
->flow_table
) {
616 if (flow_equal(&flow
->key
, key
)) {
624 get_dpif_flow_stats(struct dp_netdev_flow
*flow
, struct dpif_flow_stats
*stats
)
626 stats
->n_packets
= flow
->packet_count
;
627 stats
->n_bytes
= flow
->byte_count
;
628 stats
->used
= flow
->used
;
629 stats
->tcp_flags
= TCP_FLAGS(flow
->tcp_ctl
);
633 dpif_netdev_flow_from_nlattrs(const struct nlattr
*key
, uint32_t key_len
,
636 if (odp_flow_key_to_flow(key
, key_len
, flow
)) {
637 /* This should not happen: it indicates that odp_flow_key_from_flow()
638 * and odp_flow_key_to_flow() disagree on the acceptable form of a
639 * flow. Log the problem as an error, with enough details to enable
641 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
643 if (!VLOG_DROP_ERR(&rl
)) {
647 odp_flow_key_format(key
, key_len
, &s
);
648 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s
));
659 dpif_netdev_flow_get(const struct dpif
*dpif
,
660 const struct nlattr
*nl_key
, size_t nl_key_len
,
661 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
663 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
664 struct dp_netdev_flow
*flow
;
668 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
673 flow
= dp_netdev_lookup_flow(dp
, &key
);
679 get_dpif_flow_stats(flow
, stats
);
682 *actionsp
= ofpbuf_clone_data(flow
->actions
, flow
->actions_len
);
688 dpif_netdev_validate_actions(const struct nlattr
*actions
,
689 size_t actions_len
, bool *mutates
)
691 const struct nlattr
*a
;
695 NL_ATTR_FOR_EACH (a
, left
, actions
, actions_len
) {
696 uint16_t type
= nl_attr_type(a
);
697 int len
= odp_action_len(type
);
699 if (len
!= nl_attr_get_size(a
)) {
704 case ODP_ACTION_ATTR_OUTPUT
:
705 if (nl_attr_get_u32(a
) >= MAX_PORTS
) {
710 case ODP_ACTION_ATTR_CONTROLLER
:
713 case ODP_ACTION_ATTR_SET_DL_TCI
:
715 if (nl_attr_get_be16(a
) & htons(VLAN_CFI
)) {
720 case ODP_ACTION_ATTR_SET_NW_TOS
:
722 if (nl_attr_get_u8(a
) & IP_ECN_MASK
) {
727 case ODP_ACTION_ATTR_STRIP_VLAN
:
728 case ODP_ACTION_ATTR_SET_DL_SRC
:
729 case ODP_ACTION_ATTR_SET_DL_DST
:
730 case ODP_ACTION_ATTR_SET_NW_SRC
:
731 case ODP_ACTION_ATTR_SET_NW_DST
:
732 case ODP_ACTION_ATTR_SET_TP_SRC
:
733 case ODP_ACTION_ATTR_SET_TP_DST
:
737 case ODP_ACTION_ATTR_SET_TUNNEL
:
738 case ODP_ACTION_ATTR_SET_PRIORITY
:
739 case ODP_ACTION_ATTR_POP_PRIORITY
:
748 set_flow_actions(struct dp_netdev_flow
*flow
,
749 const struct nlattr
*actions
, size_t actions_len
)
754 error
= dpif_netdev_validate_actions(actions
, actions_len
, &mutates
);
759 flow
->actions
= xrealloc(flow
->actions
, actions_len
);
760 flow
->actions_len
= actions_len
;
761 memcpy(flow
->actions
, actions
, actions_len
);
766 add_flow(struct dpif
*dpif
, const struct flow
*key
,
767 const struct nlattr
*actions
, size_t actions_len
)
769 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
770 struct dp_netdev_flow
*flow
;
773 flow
= xzalloc(sizeof *flow
);
776 error
= set_flow_actions(flow
, actions
, actions_len
);
782 hmap_insert(&dp
->flow_table
, &flow
->node
, flow_hash(&flow
->key
, 0));
787 clear_stats(struct dp_netdev_flow
*flow
)
790 flow
->packet_count
= 0;
791 flow
->byte_count
= 0;
796 dpif_netdev_flow_put(struct dpif
*dpif
, enum dpif_flow_put_flags flags
,
797 const struct nlattr
*nl_key
, size_t nl_key_len
,
798 const struct nlattr
*actions
, size_t actions_len
,
799 struct dpif_flow_stats
*stats
)
801 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
802 struct dp_netdev_flow
*flow
;
806 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
811 flow
= dp_netdev_lookup_flow(dp
, &key
);
813 if (flags
& DPIF_FP_CREATE
) {
814 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
816 memset(stats
, 0, sizeof *stats
);
818 return add_flow(dpif
, &key
, actions
, actions_len
);
826 if (flags
& DPIF_FP_MODIFY
) {
827 int error
= set_flow_actions(flow
, actions
, actions_len
);
830 get_dpif_flow_stats(flow
, stats
);
832 if (flags
& DPIF_FP_ZERO_STATS
) {
844 dpif_netdev_flow_del(struct dpif
*dpif
,
845 const struct nlattr
*nl_key
, size_t nl_key_len
,
846 struct dpif_flow_stats
*stats
)
848 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
849 struct dp_netdev_flow
*flow
;
853 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
858 flow
= dp_netdev_lookup_flow(dp
, &key
);
861 get_dpif_flow_stats(flow
, stats
);
863 dp_netdev_free_flow(dp
, flow
);
870 struct dp_netdev_flow_state
{
873 struct nlattr
*actions
;
874 struct odputil_keybuf keybuf
;
875 struct dpif_flow_stats stats
;
879 dpif_netdev_flow_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
881 struct dp_netdev_flow_state
*state
;
883 *statep
= state
= xmalloc(sizeof *state
);
886 state
->actions
= NULL
;
891 dpif_netdev_flow_dump_next(const struct dpif
*dpif
, void *state_
,
892 const struct nlattr
**key
, size_t *key_len
,
893 const struct nlattr
**actions
, size_t *actions_len
,
894 const struct dpif_flow_stats
**stats
)
896 struct dp_netdev_flow_state
*state
= state_
;
897 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
898 struct dp_netdev_flow
*flow
;
899 struct hmap_node
*node
;
901 node
= hmap_at_position(&dp
->flow_table
, &state
->bucket
, &state
->offset
);
906 flow
= CONTAINER_OF(node
, struct dp_netdev_flow
, node
);
911 ofpbuf_use_stack(&buf
, &state
->keybuf
, sizeof state
->keybuf
);
912 odp_flow_key_from_flow(&buf
, &flow
->key
);
919 free(state
->actions
);
920 state
->actions
= xmemdup(flow
->actions
, flow
->actions_len
);
922 *actions
= state
->actions
;
923 *actions_len
= flow
->actions_len
;
927 get_dpif_flow_stats(flow
, &state
->stats
);
928 *stats
= &state
->stats
;
935 dpif_netdev_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
937 struct dp_netdev_flow_state
*state
= state_
;
939 free(state
->actions
);
945 dpif_netdev_execute(struct dpif
*dpif
,
946 const struct nlattr
*key_attrs
, size_t key_len
,
947 const struct nlattr
*actions
, size_t actions_len
,
948 const struct ofpbuf
*packet
)
950 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
956 if (packet
->size
< ETH_HEADER_LEN
|| packet
->size
> UINT16_MAX
) {
960 error
= dpif_netdev_validate_actions(actions
, actions_len
, &mutates
);
966 /* We need a deep copy of 'packet' since we're going to modify its
968 ofpbuf_init(©
, DP_NETDEV_HEADROOM
+ packet
->size
);
969 ofpbuf_reserve(©
, DP_NETDEV_HEADROOM
);
970 ofpbuf_put(©
, packet
->data
, packet
->size
);
972 /* We still need a shallow copy of 'packet', even though we won't
973 * modify its data, because flow_extract() modifies packet->l2, etc.
974 * We could probably get away with modifying those but it's more polite
979 flow_extract(©
, 0, -1, &key
);
980 dpif_netdev_flow_from_nlattrs(key_attrs
, key_len
, &key
);
982 error
= dp_netdev_execute_actions(dp
, ©
, &key
, actions
, actions_len
);
984 ofpbuf_uninit(©
);
990 dpif_netdev_recv_get_mask(const struct dpif
*dpif
, int *listen_mask
)
992 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
993 *listen_mask
= dpif_netdev
->listen_mask
;
998 dpif_netdev_recv_set_mask(struct dpif
*dpif
, int listen_mask
)
1000 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1001 dpif_netdev
->listen_mask
= listen_mask
;
1005 static struct dp_netdev_queue
*
1006 find_nonempty_queue(struct dpif
*dpif
)
1008 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1009 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1010 int mask
= dpif_netdev
->listen_mask
;
1013 for (i
= 0; i
< N_QUEUES
; i
++) {
1014 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
1015 if (q
->head
!= q
->tail
&& mask
& (1u << i
)) {
1023 dpif_netdev_recv(struct dpif
*dpif
, struct dpif_upcall
*upcall
)
1025 struct dp_netdev_queue
*q
= find_nonempty_queue(dpif
);
1027 struct dpif_upcall
*u
= q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
1038 dpif_netdev_recv_wait(struct dpif
*dpif
)
1040 if (find_nonempty_queue(dpif
)) {
1041 poll_immediate_wake();
1043 /* No messages ready to be received, and dp_wait() will ensure that we
1044 * wake up to queue new messages, so there is nothing to do. */
1049 dpif_netdev_recv_purge(struct dpif
*dpif
)
1051 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1052 dp_netdev_purge_queues(dpif_netdev
->dp
);
1056 dp_netdev_flow_used(struct dp_netdev_flow
*flow
, struct flow
*key
,
1057 const struct ofpbuf
*packet
)
1059 flow
->used
= time_msec();
1060 flow
->packet_count
++;
1061 flow
->byte_count
+= packet
->size
;
1062 if (key
->dl_type
== htons(ETH_TYPE_IP
) && key
->nw_proto
== IPPROTO_TCP
) {
1063 struct tcp_header
*th
= packet
->l4
;
1064 flow
->tcp_ctl
|= th
->tcp_ctl
;
1069 dp_netdev_port_input(struct dp_netdev
*dp
, struct dp_netdev_port
*port
,
1070 struct ofpbuf
*packet
)
1072 struct dp_netdev_flow
*flow
;
1075 if (packet
->size
< ETH_HEADER_LEN
) {
1078 if (flow_extract(packet
, 0, port
->port_no
, &key
) && dp
->drop_frags
) {
1083 flow
= dp_netdev_lookup_flow(dp
, &key
);
1085 dp_netdev_flow_used(flow
, &key
, packet
);
1086 dp_netdev_execute_actions(dp
, packet
, &key
,
1087 flow
->actions
, flow
->actions_len
);
1091 dp_netdev_output_control(dp
, packet
, DPIF_UC_MISS
, &key
, 0);
1096 dpif_netdev_run(struct dpif
*dpif
)
1098 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1099 struct dp_netdev_port
*port
;
1100 struct ofpbuf packet
;
1102 ofpbuf_init(&packet
, DP_NETDEV_HEADROOM
+ VLAN_ETH_HEADER_LEN
+ max_mtu
);
1104 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1107 /* Reset packet contents. */
1108 ofpbuf_clear(&packet
);
1109 ofpbuf_reserve(&packet
, DP_NETDEV_HEADROOM
);
1111 error
= netdev_recv(port
->netdev
, &packet
);
1113 dp_netdev_port_input(dp
, port
, &packet
);
1114 } else if (error
!= EAGAIN
&& error
!= EOPNOTSUPP
) {
1115 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1116 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
1117 netdev_get_name(port
->netdev
), strerror(error
));
1120 ofpbuf_uninit(&packet
);
1124 dpif_netdev_wait(struct dpif
*dpif
)
1126 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1127 struct dp_netdev_port
*port
;
1129 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1130 netdev_recv_wait(port
->netdev
);
1135 dp_netdev_strip_vlan(struct ofpbuf
*packet
)
1137 struct vlan_eth_header
*veh
= packet
->l2
;
1138 if (packet
->size
>= sizeof *veh
1139 && veh
->veth_type
== htons(ETH_TYPE_VLAN
)) {
1140 struct eth_header tmp
;
1142 memcpy(tmp
.eth_dst
, veh
->veth_dst
, ETH_ADDR_LEN
);
1143 memcpy(tmp
.eth_src
, veh
->veth_src
, ETH_ADDR_LEN
);
1144 tmp
.eth_type
= veh
->veth_next_type
;
1146 ofpbuf_pull(packet
, VLAN_HEADER_LEN
);
1147 packet
->l2
= (char*)packet
->l2
+ VLAN_HEADER_LEN
;
1148 memcpy(packet
->data
, &tmp
, sizeof tmp
);
1153 dp_netdev_set_dl_src(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1155 struct eth_header
*eh
= packet
->l2
;
1156 memcpy(eh
->eth_src
, dl_addr
, sizeof eh
->eth_src
);
1160 dp_netdev_set_dl_dst(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1162 struct eth_header
*eh
= packet
->l2
;
1163 memcpy(eh
->eth_dst
, dl_addr
, sizeof eh
->eth_dst
);
1167 is_ip(const struct ofpbuf
*packet
, const struct flow
*key
)
1169 return key
->dl_type
== htons(ETH_TYPE_IP
) && packet
->l4
;
1173 dp_netdev_set_nw_addr(struct ofpbuf
*packet
, const struct flow
*key
,
1174 const struct nlattr
*a
)
1176 if (is_ip(packet
, key
)) {
1177 struct ip_header
*nh
= packet
->l3
;
1178 ovs_be32 ip
= nl_attr_get_be32(a
);
1179 uint16_t type
= nl_attr_type(a
);
1182 field
= type
== ODP_ACTION_ATTR_SET_NW_SRC
? &nh
->ip_src
: &nh
->ip_dst
;
1183 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1184 struct tcp_header
*th
= packet
->l4
;
1185 th
->tcp_csum
= recalc_csum32(th
->tcp_csum
, *field
, ip
);
1186 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1187 struct udp_header
*uh
= packet
->l4
;
1189 uh
->udp_csum
= recalc_csum32(uh
->udp_csum
, *field
, ip
);
1190 if (!uh
->udp_csum
) {
1191 uh
->udp_csum
= htons(0xffff);
1195 nh
->ip_csum
= recalc_csum32(nh
->ip_csum
, *field
, ip
);
1201 dp_netdev_set_nw_tos(struct ofpbuf
*packet
, const struct flow
*key
,
1204 if (is_ip(packet
, key
)) {
1205 struct ip_header
*nh
= packet
->l3
;
1206 uint8_t *field
= &nh
->ip_tos
;
1208 /* Set the DSCP bits and preserve the ECN bits. */
1209 uint8_t new = nw_tos
| (nh
->ip_tos
& IP_ECN_MASK
);
1211 nh
->ip_csum
= recalc_csum16(nh
->ip_csum
, htons((uint16_t)*field
),
1212 htons((uint16_t) new));
1218 dp_netdev_set_tp_port(struct ofpbuf
*packet
, const struct flow
*key
,
1219 const struct nlattr
*a
)
1221 if (is_ip(packet
, key
)) {
1222 uint16_t type
= nl_attr_type(a
);
1223 ovs_be16 port
= nl_attr_get_be16(a
);
1226 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1227 struct tcp_header
*th
= packet
->l4
;
1228 field
= (type
== ODP_ACTION_ATTR_SET_TP_SRC
1229 ? &th
->tcp_src
: &th
->tcp_dst
);
1230 th
->tcp_csum
= recalc_csum16(th
->tcp_csum
, *field
, port
);
1232 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1233 struct udp_header
*uh
= packet
->l4
;
1234 field
= (type
== ODP_ACTION_ATTR_SET_TP_SRC
1235 ? &uh
->udp_src
: &uh
->udp_dst
);
1236 uh
->udp_csum
= recalc_csum16(uh
->udp_csum
, *field
, port
);
1245 dp_netdev_output_port(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1248 struct dp_netdev_port
*p
= dp
->ports
[out_port
];
1250 netdev_send(p
->netdev
, packet
);
1255 dp_netdev_output_control(struct dp_netdev
*dp
, const struct ofpbuf
*packet
,
1256 int queue_no
, const struct flow
*flow
, uint64_t arg
)
1258 struct dp_netdev_queue
*q
= &dp
->queues
[queue_no
];
1259 struct dpif_upcall
*upcall
;
1263 if (q
->head
- q
->tail
>= MAX_QUEUE_LEN
) {
1268 buf
= ofpbuf_new(ODPUTIL_FLOW_KEY_BYTES
+ 2 + packet
->size
);
1269 odp_flow_key_from_flow(buf
, flow
);
1270 key_len
= buf
->size
;
1271 ofpbuf_pull(buf
, key_len
);
1272 ofpbuf_reserve(buf
, 2);
1273 ofpbuf_put(buf
, packet
->data
, packet
->size
);
1275 upcall
= xzalloc(sizeof *upcall
);
1276 upcall
->type
= queue_no
;
1277 upcall
->packet
= buf
;
1278 upcall
->key
= buf
->base
;
1279 upcall
->key_len
= key_len
;
1280 upcall
->userdata
= arg
;
1282 q
->upcalls
[q
->head
++ & QUEUE_MASK
] = upcall
;
1288 dp_netdev_execute_actions(struct dp_netdev
*dp
,
1289 struct ofpbuf
*packet
, struct flow
*key
,
1290 const struct nlattr
*actions
,
1293 const struct nlattr
*a
;
1296 NL_ATTR_FOR_EACH_UNSAFE (a
, left
, actions
, actions_len
) {
1297 switch (nl_attr_type(a
)) {
1298 case ODP_ACTION_ATTR_OUTPUT
:
1299 dp_netdev_output_port(dp
, packet
, nl_attr_get_u32(a
));
1302 case ODP_ACTION_ATTR_CONTROLLER
:
1303 dp_netdev_output_control(dp
, packet
, DPIF_UC_ACTION
,
1304 key
, nl_attr_get_u64(a
));
1307 case ODP_ACTION_ATTR_SET_DL_TCI
:
1308 eth_set_vlan_tci(packet
, nl_attr_get_be16(a
));
1311 case ODP_ACTION_ATTR_STRIP_VLAN
:
1312 dp_netdev_strip_vlan(packet
);
1315 case ODP_ACTION_ATTR_SET_DL_SRC
:
1316 dp_netdev_set_dl_src(packet
, nl_attr_get_unspec(a
, ETH_ADDR_LEN
));
1319 case ODP_ACTION_ATTR_SET_DL_DST
:
1320 dp_netdev_set_dl_dst(packet
, nl_attr_get_unspec(a
, ETH_ADDR_LEN
));
1323 case ODP_ACTION_ATTR_SET_NW_SRC
:
1324 case ODP_ACTION_ATTR_SET_NW_DST
:
1325 dp_netdev_set_nw_addr(packet
, key
, a
);
1328 case ODP_ACTION_ATTR_SET_NW_TOS
:
1329 dp_netdev_set_nw_tos(packet
, key
, nl_attr_get_u8(a
));
1332 case ODP_ACTION_ATTR_SET_TP_SRC
:
1333 case ODP_ACTION_ATTR_SET_TP_DST
:
1334 dp_netdev_set_tp_port(packet
, key
, a
);
1341 const struct dpif_class dpif_netdev_class
= {
1343 NULL
, /* enumerate */
1346 dpif_netdev_destroy
,
1349 dpif_netdev_get_stats
,
1350 dpif_netdev_get_drop_frags
,
1351 dpif_netdev_set_drop_frags
,
1352 dpif_netdev_port_add
,
1353 dpif_netdev_port_del
,
1354 dpif_netdev_port_query_by_number
,
1355 dpif_netdev_port_query_by_name
,
1356 dpif_netdev_get_max_ports
,
1357 dpif_netdev_port_dump_start
,
1358 dpif_netdev_port_dump_next
,
1359 dpif_netdev_port_dump_done
,
1360 dpif_netdev_port_poll
,
1361 dpif_netdev_port_poll_wait
,
1362 dpif_netdev_flow_get
,
1363 dpif_netdev_flow_put
,
1364 dpif_netdev_flow_del
,
1365 dpif_netdev_flow_flush
,
1366 dpif_netdev_flow_dump_start
,
1367 dpif_netdev_flow_dump_next
,
1368 dpif_netdev_flow_dump_done
,
1369 dpif_netdev_execute
,
1370 dpif_netdev_recv_get_mask
,
1371 dpif_netdev_recv_set_mask
,
1372 NULL
, /* get_sflow_probability */
1373 NULL
, /* set_sflow_probability */
1374 NULL
, /* queue_to_priority */
1376 dpif_netdev_recv_wait
,
1377 dpif_netdev_recv_purge
,
1381 dpif_dummy_register(void)
1383 if (!dpif_dummy_class
.type
) {
1384 dpif_dummy_class
= dpif_netdev_class
;
1385 dpif_dummy_class
.type
= "dummy";
1386 dp_register_provider(&dpif_dummy_class
);