2 * Copyright (c) 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <netinet/in.h>
26 #include <sys/socket.h>
31 #include <sys/ioctl.h>
37 #include "dpif-provider.h"
39 #include "dynamic-string.h"
46 #include "ofp-print.h"
49 #include "poll-loop.h"
55 VLOG_DEFINE_THIS_MODULE(dpif_netdev
);
57 /* Configuration parameters. */
58 enum { MAX_PORTS
= 256 }; /* Maximum number of ports. */
59 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
61 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
62 * headers to be aligned on a 4-byte boundary. */
63 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
66 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
67 enum { MAX_QUEUE_LEN
= 128 }; /* Maximum number of packets per queue. */
68 enum { QUEUE_MASK
= MAX_QUEUE_LEN
- 1 };
69 BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN
));
71 struct dp_netdev_queue
{
72 struct dpif_upcall
*upcalls
[MAX_QUEUE_LEN
];
73 unsigned int head
, tail
;
76 /* Datapath based on the network device interface from netdev.h. */
78 const struct dpif_class
*class;
83 bool drop_frags
; /* Drop all IP fragments, if true. */
84 struct dp_netdev_queue queues
[N_QUEUES
];
85 struct hmap flow_table
; /* Flow table. */
88 long long int n_frags
; /* Number of dropped IP fragments. */
89 long long int n_hit
; /* Number of flow table matches. */
90 long long int n_missed
; /* Number of flow table misses. */
91 long long int n_lost
; /* Number of misses not passed to client. */
95 struct dp_netdev_port
*ports
[MAX_PORTS
];
96 struct list port_list
;
100 /* A port in a netdev-based datapath. */
101 struct dp_netdev_port
{
102 int port_no
; /* Index into dp_netdev's 'ports'. */
103 struct list node
; /* Element in dp_netdev's 'port_list'. */
104 struct netdev
*netdev
;
105 bool internal
; /* Internal port? */
108 /* A flow in dp_netdev's 'flow_table'. */
109 struct dp_netdev_flow
{
110 struct hmap_node node
; /* Element in dp_netdev's 'flow_table'. */
114 long long int used
; /* Last used time, in monotonic msecs. */
115 long long int packet_count
; /* Number of packets matched. */
116 long long int byte_count
; /* Number of bytes matched. */
117 ovs_be16 tcp_ctl
; /* Bitwise-OR of seen tcp_ctl values. */
120 struct nlattr
*actions
;
124 /* Interface to netdev-based datapath. */
127 struct dp_netdev
*dp
;
129 unsigned int dp_serial
;
132 /* All netdev-based datapaths. */
133 static struct shash dp_netdevs
= SHASH_INITIALIZER(&dp_netdevs
);
135 /* Maximum port MTU seen so far. */
136 static int max_mtu
= ETH_PAYLOAD_MAX
;
138 static int get_port_by_number(struct dp_netdev
*, uint16_t port_no
,
139 struct dp_netdev_port
**portp
);
140 static int get_port_by_name(struct dp_netdev
*, const char *devname
,
141 struct dp_netdev_port
**portp
);
142 static void dp_netdev_free(struct dp_netdev
*);
143 static void dp_netdev_flow_flush(struct dp_netdev
*);
144 static int do_add_port(struct dp_netdev
*, const char *devname
,
145 const char *type
, uint16_t port_no
);
146 static int do_del_port(struct dp_netdev
*, uint16_t port_no
);
147 static int dpif_netdev_open(const struct dpif_class
*, const char *name
,
148 bool create
, struct dpif
**);
149 static int dp_netdev_output_control(struct dp_netdev
*, const struct ofpbuf
*,
150 int queue_no
, const struct flow
*,
152 static int dp_netdev_execute_actions(struct dp_netdev
*,
153 struct ofpbuf
*, struct flow
*,
154 const struct nlattr
*actions
,
157 static struct dpif_class dpif_dummy_class
;
159 static struct dpif_netdev
*
160 dpif_netdev_cast(const struct dpif
*dpif
)
162 assert(dpif
->dpif_class
->open
== dpif_netdev_open
);
163 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
166 static struct dp_netdev
*
167 get_dp_netdev(const struct dpif
*dpif
)
169 return dpif_netdev_cast(dpif
)->dp
;
173 create_dpif_netdev(struct dp_netdev
*dp
)
175 uint16_t netflow_id
= hash_string(dp
->name
, 0);
176 struct dpif_netdev
*dpif
;
180 dpif
= xmalloc(sizeof *dpif
);
181 dpif_init(&dpif
->dpif
, dp
->class, dp
->name
, netflow_id
>> 8, netflow_id
);
183 dpif
->listen_mask
= 0;
184 dpif
->dp_serial
= dp
->serial
;
190 create_dp_netdev(const char *name
, const struct dpif_class
*class,
191 struct dp_netdev
**dpp
)
193 struct dp_netdev
*dp
;
197 dp
= xzalloc(sizeof *dp
);
199 dp
->name
= xstrdup(name
);
201 dp
->drop_frags
= false;
202 for (i
= 0; i
< N_QUEUES
; i
++) {
203 dp
->queues
[i
].head
= dp
->queues
[i
].tail
= 0;
205 hmap_init(&dp
->flow_table
);
206 list_init(&dp
->port_list
);
207 error
= do_add_port(dp
, name
, "internal", ODPP_LOCAL
);
213 shash_add(&dp_netdevs
, name
, dp
);
220 dpif_netdev_open(const struct dpif_class
*class, const char *name
,
221 bool create
, struct dpif
**dpifp
)
223 struct dp_netdev
*dp
;
225 dp
= shash_find_data(&dp_netdevs
, name
);
230 int error
= create_dp_netdev(name
, class, &dp
);
237 if (dp
->class != class) {
244 *dpifp
= create_dpif_netdev(dp
);
249 dp_netdev_purge_queues(struct dp_netdev
*dp
)
253 for (i
= 0; i
< N_QUEUES
; i
++) {
254 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
256 while (q
->tail
!= q
->head
) {
257 struct dpif_upcall
*upcall
= q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
259 ofpbuf_delete(upcall
->packet
);
266 dp_netdev_free(struct dp_netdev
*dp
)
268 dp_netdev_flow_flush(dp
);
269 while (dp
->n_ports
> 0) {
270 struct dp_netdev_port
*port
= CONTAINER_OF(
271 dp
->port_list
.next
, struct dp_netdev_port
, node
);
272 do_del_port(dp
, port
->port_no
);
274 dp_netdev_purge_queues(dp
);
275 hmap_destroy(&dp
->flow_table
);
281 dpif_netdev_close(struct dpif
*dpif
)
283 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
284 assert(dp
->open_cnt
> 0);
285 if (--dp
->open_cnt
== 0 && dp
->destroyed
) {
286 shash_find_and_delete(&dp_netdevs
, dp
->name
);
293 dpif_netdev_destroy(struct dpif
*dpif
)
295 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
296 dp
->destroyed
= true;
301 dpif_netdev_get_stats(const struct dpif
*dpif
, struct odp_stats
*stats
)
303 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
304 memset(stats
, 0, sizeof *stats
);
305 stats
->n_frags
= dp
->n_frags
;
306 stats
->n_hit
= dp
->n_hit
;
307 stats
->n_missed
= dp
->n_missed
;
308 stats
->n_lost
= dp
->n_lost
;
313 dpif_netdev_get_drop_frags(const struct dpif
*dpif
, bool *drop_fragsp
)
315 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
316 *drop_fragsp
= dp
->drop_frags
;
321 dpif_netdev_set_drop_frags(struct dpif
*dpif
, bool drop_frags
)
323 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
324 dp
->drop_frags
= drop_frags
;
329 do_add_port(struct dp_netdev
*dp
, const char *devname
, const char *type
,
332 struct dp_netdev_port
*port
;
333 struct netdev_options netdev_options
;
334 struct netdev
*netdev
;
339 /* XXX reject devices already in some dp_netdev. */
340 if (type
[0] == '\0' || !strcmp(type
, "system")) {
342 } else if (!strcmp(type
, "internal")) {
345 VLOG_WARN("%s: unsupported port type %s", devname
, type
);
349 /* Open and validate network device. */
350 memset(&netdev_options
, 0, sizeof netdev_options
);
351 netdev_options
.name
= devname
;
352 netdev_options
.ethertype
= NETDEV_ETH_TYPE_ANY
;
353 if (dp
->class == &dpif_dummy_class
) {
354 netdev_options
.type
= "dummy";
355 } else if (internal
) {
356 netdev_options
.type
= "tap";
359 error
= netdev_open(&netdev_options
, &netdev
);
363 /* XXX reject loopback devices */
364 /* XXX reject non-Ethernet devices */
366 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, false);
368 netdev_close(netdev
);
372 port
= xmalloc(sizeof *port
);
373 port
->port_no
= port_no
;
374 port
->netdev
= netdev
;
375 port
->internal
= internal
;
377 netdev_get_mtu(netdev
, &mtu
);
378 if (mtu
!= INT_MAX
&& mtu
> max_mtu
) {
382 list_push_back(&dp
->port_list
, &port
->node
);
383 dp
->ports
[port_no
] = port
;
391 dpif_netdev_port_add(struct dpif
*dpif
, struct netdev
*netdev
,
394 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
397 for (port_no
= 0; port_no
< MAX_PORTS
; port_no
++) {
398 if (!dp
->ports
[port_no
]) {
400 return do_add_port(dp
, netdev_get_name(netdev
),
401 netdev_get_type(netdev
), port_no
);
408 dpif_netdev_port_del(struct dpif
*dpif
, uint16_t port_no
)
410 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
411 return port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
415 is_valid_port_number(uint16_t port_no
)
417 return port_no
< MAX_PORTS
;
421 get_port_by_number(struct dp_netdev
*dp
,
422 uint16_t port_no
, struct dp_netdev_port
**portp
)
424 if (!is_valid_port_number(port_no
)) {
428 *portp
= dp
->ports
[port_no
];
429 return *portp
? 0 : ENOENT
;
434 get_port_by_name(struct dp_netdev
*dp
,
435 const char *devname
, struct dp_netdev_port
**portp
)
437 struct dp_netdev_port
*port
;
439 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
440 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
449 do_del_port(struct dp_netdev
*dp
, uint16_t port_no
)
451 struct dp_netdev_port
*port
;
455 error
= get_port_by_number(dp
, port_no
, &port
);
460 list_remove(&port
->node
);
461 dp
->ports
[port
->port_no
] = NULL
;
465 name
= xstrdup(netdev_get_name(port
->netdev
));
466 netdev_close(port
->netdev
);
475 answer_port_query(const struct dp_netdev_port
*port
,
476 struct dpif_port
*dpif_port
)
478 dpif_port
->name
= xstrdup(netdev_get_name(port
->netdev
));
479 dpif_port
->type
= xstrdup(port
->internal
? "internal" : "system");
480 dpif_port
->port_no
= port
->port_no
;
484 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
485 struct dpif_port
*dpif_port
)
487 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
488 struct dp_netdev_port
*port
;
491 error
= get_port_by_number(dp
, port_no
, &port
);
493 answer_port_query(port
, dpif_port
);
499 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
500 struct dpif_port
*dpif_port
)
502 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
503 struct dp_netdev_port
*port
;
506 error
= get_port_by_name(dp
, devname
, &port
);
508 answer_port_query(port
, dpif_port
);
514 dpif_netdev_get_max_ports(const struct dpif
*dpif OVS_UNUSED
)
520 dp_netdev_free_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
522 hmap_remove(&dp
->flow_table
, &flow
->node
);
528 dp_netdev_flow_flush(struct dp_netdev
*dp
)
530 struct dp_netdev_flow
*flow
, *next
;
532 HMAP_FOR_EACH_SAFE (flow
, next
, node
, &dp
->flow_table
) {
533 dp_netdev_free_flow(dp
, flow
);
538 dpif_netdev_flow_flush(struct dpif
*dpif
)
540 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
541 dp_netdev_flow_flush(dp
);
545 struct dp_netdev_port_state
{
551 dpif_netdev_port_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
553 *statep
= xzalloc(sizeof(struct dp_netdev_port_state
));
558 dpif_netdev_port_dump_next(const struct dpif
*dpif
, void *state_
,
559 struct dpif_port
*dpif_port
)
561 struct dp_netdev_port_state
*state
= state_
;
562 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
565 for (port_no
= state
->port_no
; port_no
< MAX_PORTS
; port_no
++) {
566 struct dp_netdev_port
*port
= dp
->ports
[port_no
];
569 state
->name
= xstrdup(netdev_get_name(port
->netdev
));
570 dpif_port
->name
= state
->name
;
571 dpif_port
->type
= port
->internal
? "internal" : "system";
572 dpif_port
->port_no
= port
->port_no
;
573 state
->port_no
= port_no
+ 1;
581 dpif_netdev_port_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
583 struct dp_netdev_port_state
*state
= state_
;
590 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep OVS_UNUSED
)
592 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
593 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
594 dpif
->dp_serial
= dpif
->dp
->serial
;
602 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
604 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
605 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
606 poll_immediate_wake();
610 static struct dp_netdev_flow
*
611 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const struct flow
*key
)
613 struct dp_netdev_flow
*flow
;
615 HMAP_FOR_EACH_WITH_HASH (flow
, node
, flow_hash(key
, 0), &dp
->flow_table
) {
616 if (flow_equal(&flow
->key
, key
)) {
624 get_dpif_flow_stats(struct dp_netdev_flow
*flow
, struct dpif_flow_stats
*stats
)
626 stats
->n_packets
= flow
->packet_count
;
627 stats
->n_bytes
= flow
->byte_count
;
628 stats
->used
= flow
->used
;
629 stats
->tcp_flags
= TCP_FLAGS(flow
->tcp_ctl
);
633 dpif_netdev_flow_from_nlattrs(const struct nlattr
*key
, uint32_t key_len
,
636 if (odp_flow_key_to_flow(key
, key_len
, flow
)) {
637 /* This should not happen: it indicates that odp_flow_key_from_flow()
638 * and odp_flow_key_to_flow() disagree on the acceptable form of a
639 * flow. Log the problem as an error, with enough details to enable
641 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
643 if (!VLOG_DROP_ERR(&rl
)) {
647 odp_flow_key_format(key
, key_len
, &s
);
648 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s
));
659 dpif_netdev_flow_get(const struct dpif
*dpif
,
660 const struct nlattr
*nl_key
, size_t nl_key_len
,
661 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
663 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
664 struct dp_netdev_flow
*flow
;
668 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
673 flow
= dp_netdev_lookup_flow(dp
, &key
);
679 get_dpif_flow_stats(flow
, stats
);
682 *actionsp
= ofpbuf_clone_data(flow
->actions
, flow
->actions_len
);
688 dpif_netdev_validate_actions(const struct nlattr
*actions
,
689 size_t actions_len
, bool *mutates
)
691 const struct nlattr
*a
;
695 NL_ATTR_FOR_EACH (a
, left
, actions
, actions_len
) {
696 uint16_t type
= nl_attr_type(a
);
697 int len
= odp_action_len(type
);
699 if (len
!= nl_attr_get_size(a
)) {
704 case ODP_ACTION_ATTR_OUTPUT
:
705 if (nl_attr_get_u32(a
) >= MAX_PORTS
) {
710 case ODP_ACTION_ATTR_CONTROLLER
:
711 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP
:
714 case ODP_ACTION_ATTR_SET_DL_TCI
:
716 if (nl_attr_get_be16(a
) & htons(VLAN_CFI
)) {
721 case ODP_ACTION_ATTR_SET_NW_TOS
:
723 if (nl_attr_get_u8(a
) & IP_ECN_MASK
) {
728 case ODP_ACTION_ATTR_STRIP_VLAN
:
729 case ODP_ACTION_ATTR_SET_DL_SRC
:
730 case ODP_ACTION_ATTR_SET_DL_DST
:
731 case ODP_ACTION_ATTR_SET_NW_SRC
:
732 case ODP_ACTION_ATTR_SET_NW_DST
:
733 case ODP_ACTION_ATTR_SET_TP_SRC
:
734 case ODP_ACTION_ATTR_SET_TP_DST
:
738 case ODP_ACTION_ATTR_SET_TUNNEL
:
739 case ODP_ACTION_ATTR_SET_PRIORITY
:
740 case ODP_ACTION_ATTR_POP_PRIORITY
:
749 set_flow_actions(struct dp_netdev_flow
*flow
,
750 const struct nlattr
*actions
, size_t actions_len
)
755 error
= dpif_netdev_validate_actions(actions
, actions_len
, &mutates
);
760 flow
->actions
= xrealloc(flow
->actions
, actions_len
);
761 flow
->actions_len
= actions_len
;
762 memcpy(flow
->actions
, actions
, actions_len
);
767 add_flow(struct dpif
*dpif
, const struct flow
*key
,
768 const struct nlattr
*actions
, size_t actions_len
)
770 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
771 struct dp_netdev_flow
*flow
;
774 flow
= xzalloc(sizeof *flow
);
777 error
= set_flow_actions(flow
, actions
, actions_len
);
783 hmap_insert(&dp
->flow_table
, &flow
->node
, flow_hash(&flow
->key
, 0));
788 clear_stats(struct dp_netdev_flow
*flow
)
791 flow
->packet_count
= 0;
792 flow
->byte_count
= 0;
797 dpif_netdev_flow_put(struct dpif
*dpif
, enum dpif_flow_put_flags flags
,
798 const struct nlattr
*nl_key
, size_t nl_key_len
,
799 const struct nlattr
*actions
, size_t actions_len
,
800 struct dpif_flow_stats
*stats
)
802 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
803 struct dp_netdev_flow
*flow
;
807 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
812 flow
= dp_netdev_lookup_flow(dp
, &key
);
814 if (flags
& DPIF_FP_CREATE
) {
815 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
817 memset(stats
, 0, sizeof *stats
);
819 return add_flow(dpif
, &key
, actions
, actions_len
);
827 if (flags
& DPIF_FP_MODIFY
) {
828 int error
= set_flow_actions(flow
, actions
, actions_len
);
831 get_dpif_flow_stats(flow
, stats
);
833 if (flags
& DPIF_FP_ZERO_STATS
) {
845 dpif_netdev_flow_del(struct dpif
*dpif
,
846 const struct nlattr
*nl_key
, size_t nl_key_len
,
847 struct dpif_flow_stats
*stats
)
849 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
850 struct dp_netdev_flow
*flow
;
854 error
= dpif_netdev_flow_from_nlattrs(nl_key
, nl_key_len
, &key
);
859 flow
= dp_netdev_lookup_flow(dp
, &key
);
862 get_dpif_flow_stats(flow
, stats
);
864 dp_netdev_free_flow(dp
, flow
);
871 struct dp_netdev_flow_state
{
874 struct nlattr
*actions
;
875 struct odputil_keybuf keybuf
;
876 struct dpif_flow_stats stats
;
880 dpif_netdev_flow_dump_start(const struct dpif
*dpif OVS_UNUSED
, void **statep
)
882 struct dp_netdev_flow_state
*state
;
884 *statep
= state
= xmalloc(sizeof *state
);
887 state
->actions
= NULL
;
892 dpif_netdev_flow_dump_next(const struct dpif
*dpif
, void *state_
,
893 const struct nlattr
**key
, size_t *key_len
,
894 const struct nlattr
**actions
, size_t *actions_len
,
895 const struct dpif_flow_stats
**stats
)
897 struct dp_netdev_flow_state
*state
= state_
;
898 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
899 struct dp_netdev_flow
*flow
;
900 struct hmap_node
*node
;
902 node
= hmap_at_position(&dp
->flow_table
, &state
->bucket
, &state
->offset
);
907 flow
= CONTAINER_OF(node
, struct dp_netdev_flow
, node
);
912 ofpbuf_use_stack(&buf
, &state
->keybuf
, sizeof state
->keybuf
);
913 odp_flow_key_from_flow(&buf
, &flow
->key
);
920 free(state
->actions
);
921 state
->actions
= xmemdup(flow
->actions
, flow
->actions_len
);
923 *actions
= state
->actions
;
924 *actions_len
= flow
->actions_len
;
928 get_dpif_flow_stats(flow
, &state
->stats
);
929 *stats
= &state
->stats
;
936 dpif_netdev_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
938 struct dp_netdev_flow_state
*state
= state_
;
940 free(state
->actions
);
946 dpif_netdev_execute(struct dpif
*dpif
,
947 const struct nlattr
*actions
, size_t actions_len
,
948 const struct ofpbuf
*packet
)
950 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
956 if (packet
->size
< ETH_HEADER_LEN
|| packet
->size
> UINT16_MAX
) {
960 error
= dpif_netdev_validate_actions(actions
, actions_len
, &mutates
);
966 /* We need a deep copy of 'packet' since we're going to modify its
968 ofpbuf_init(©
, DP_NETDEV_HEADROOM
+ packet
->size
);
969 ofpbuf_reserve(©
, DP_NETDEV_HEADROOM
);
970 ofpbuf_put(©
, packet
->data
, packet
->size
);
972 /* We still need a shallow copy of 'packet', even though we won't
973 * modify its data, because flow_extract() modifies packet->l2, etc.
974 * We could probably get away with modifying those but it's more polite
978 flow_extract(©
, 0, -1, &key
);
979 error
= dp_netdev_execute_actions(dp
, ©
, &key
, actions
, actions_len
);
981 ofpbuf_uninit(©
);
987 dpif_netdev_recv_get_mask(const struct dpif
*dpif
, int *listen_mask
)
989 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
990 *listen_mask
= dpif_netdev
->listen_mask
;
995 dpif_netdev_recv_set_mask(struct dpif
*dpif
, int listen_mask
)
997 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
998 dpif_netdev
->listen_mask
= listen_mask
;
1002 static struct dp_netdev_queue
*
1003 find_nonempty_queue(struct dpif
*dpif
)
1005 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1006 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1007 int mask
= dpif_netdev
->listen_mask
;
1010 for (i
= 0; i
< N_QUEUES
; i
++) {
1011 struct dp_netdev_queue
*q
= &dp
->queues
[i
];
1012 if (q
->head
!= q
->tail
&& mask
& (1u << i
)) {
1020 dpif_netdev_recv(struct dpif
*dpif
, struct dpif_upcall
*upcall
)
1022 struct dp_netdev_queue
*q
= find_nonempty_queue(dpif
);
1024 struct dpif_upcall
*u
= q
->upcalls
[q
->tail
++ & QUEUE_MASK
];
1035 dpif_netdev_recv_wait(struct dpif
*dpif
)
1037 if (find_nonempty_queue(dpif
)) {
1038 poll_immediate_wake();
1040 /* No messages ready to be received, and dp_wait() will ensure that we
1041 * wake up to queue new messages, so there is nothing to do. */
1046 dpif_netdev_recv_purge(struct dpif
*dpif
)
1048 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
1049 dp_netdev_purge_queues(dpif_netdev
->dp
);
1053 dp_netdev_flow_used(struct dp_netdev_flow
*flow
, struct flow
*key
,
1054 const struct ofpbuf
*packet
)
1056 flow
->used
= time_msec();
1057 flow
->packet_count
++;
1058 flow
->byte_count
+= packet
->size
;
1059 if (key
->dl_type
== htons(ETH_TYPE_IP
) && key
->nw_proto
== IPPROTO_TCP
) {
1060 struct tcp_header
*th
= packet
->l4
;
1061 flow
->tcp_ctl
|= th
->tcp_ctl
;
1066 dp_netdev_port_input(struct dp_netdev
*dp
, struct dp_netdev_port
*port
,
1067 struct ofpbuf
*packet
)
1069 struct dp_netdev_flow
*flow
;
1072 if (packet
->size
< ETH_HEADER_LEN
) {
1075 if (flow_extract(packet
, 0, port
->port_no
, &key
) && dp
->drop_frags
) {
1080 flow
= dp_netdev_lookup_flow(dp
, &key
);
1082 dp_netdev_flow_used(flow
, &key
, packet
);
1083 dp_netdev_execute_actions(dp
, packet
, &key
,
1084 flow
->actions
, flow
->actions_len
);
1088 dp_netdev_output_control(dp
, packet
, DPIF_UC_MISS
, &key
, 0);
1093 dpif_netdev_run(struct dpif
*dpif
)
1095 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1096 struct dp_netdev_port
*port
;
1097 struct ofpbuf packet
;
1099 ofpbuf_init(&packet
, DP_NETDEV_HEADROOM
+ VLAN_ETH_HEADER_LEN
+ max_mtu
);
1101 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1104 /* Reset packet contents. */
1105 ofpbuf_clear(&packet
);
1106 ofpbuf_reserve(&packet
, DP_NETDEV_HEADROOM
);
1108 error
= netdev_recv(port
->netdev
, &packet
);
1110 dp_netdev_port_input(dp
, port
, &packet
);
1111 } else if (error
!= EAGAIN
&& error
!= EOPNOTSUPP
) {
1112 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1113 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
1114 netdev_get_name(port
->netdev
), strerror(error
));
1117 ofpbuf_uninit(&packet
);
1121 dpif_netdev_wait(struct dpif
*dpif
)
1123 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
1124 struct dp_netdev_port
*port
;
1126 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1127 netdev_recv_wait(port
->netdev
);
1132 dp_netdev_strip_vlan(struct ofpbuf
*packet
)
1134 struct vlan_eth_header
*veh
= packet
->l2
;
1135 if (packet
->size
>= sizeof *veh
1136 && veh
->veth_type
== htons(ETH_TYPE_VLAN
)) {
1137 struct eth_header tmp
;
1139 memcpy(tmp
.eth_dst
, veh
->veth_dst
, ETH_ADDR_LEN
);
1140 memcpy(tmp
.eth_src
, veh
->veth_src
, ETH_ADDR_LEN
);
1141 tmp
.eth_type
= veh
->veth_next_type
;
1143 ofpbuf_pull(packet
, VLAN_HEADER_LEN
);
1144 packet
->l2
= (char*)packet
->l2
+ VLAN_HEADER_LEN
;
1145 memcpy(packet
->data
, &tmp
, sizeof tmp
);
1150 dp_netdev_set_dl_src(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1152 struct eth_header
*eh
= packet
->l2
;
1153 memcpy(eh
->eth_src
, dl_addr
, sizeof eh
->eth_src
);
1157 dp_netdev_set_dl_dst(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1159 struct eth_header
*eh
= packet
->l2
;
1160 memcpy(eh
->eth_dst
, dl_addr
, sizeof eh
->eth_dst
);
1164 is_ip(const struct ofpbuf
*packet
, const struct flow
*key
)
1166 return key
->dl_type
== htons(ETH_TYPE_IP
) && packet
->l4
;
1170 dp_netdev_set_nw_addr(struct ofpbuf
*packet
, const struct flow
*key
,
1171 const struct nlattr
*a
)
1173 if (is_ip(packet
, key
)) {
1174 struct ip_header
*nh
= packet
->l3
;
1175 ovs_be32 ip
= nl_attr_get_be32(a
);
1176 uint16_t type
= nl_attr_type(a
);
1179 field
= type
== ODP_ACTION_ATTR_SET_NW_SRC
? &nh
->ip_src
: &nh
->ip_dst
;
1180 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1181 struct tcp_header
*th
= packet
->l4
;
1182 th
->tcp_csum
= recalc_csum32(th
->tcp_csum
, *field
, ip
);
1183 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1184 struct udp_header
*uh
= packet
->l4
;
1186 uh
->udp_csum
= recalc_csum32(uh
->udp_csum
, *field
, ip
);
1187 if (!uh
->udp_csum
) {
1188 uh
->udp_csum
= htons(0xffff);
1192 nh
->ip_csum
= recalc_csum32(nh
->ip_csum
, *field
, ip
);
1198 dp_netdev_set_nw_tos(struct ofpbuf
*packet
, const struct flow
*key
,
1201 if (is_ip(packet
, key
)) {
1202 struct ip_header
*nh
= packet
->l3
;
1203 uint8_t *field
= &nh
->ip_tos
;
1205 /* Set the DSCP bits and preserve the ECN bits. */
1206 uint8_t new = nw_tos
| (nh
->ip_tos
& IP_ECN_MASK
);
1208 nh
->ip_csum
= recalc_csum16(nh
->ip_csum
, htons((uint16_t)*field
),
1209 htons((uint16_t) new));
1215 dp_netdev_set_tp_port(struct ofpbuf
*packet
, const struct flow
*key
,
1216 const struct nlattr
*a
)
1218 if (is_ip(packet
, key
)) {
1219 uint16_t type
= nl_attr_type(a
);
1220 ovs_be16 port
= nl_attr_get_be16(a
);
1223 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1224 struct tcp_header
*th
= packet
->l4
;
1225 field
= (type
== ODP_ACTION_ATTR_SET_TP_SRC
1226 ? &th
->tcp_src
: &th
->tcp_dst
);
1227 th
->tcp_csum
= recalc_csum16(th
->tcp_csum
, *field
, port
);
1229 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1230 struct udp_header
*uh
= packet
->l4
;
1231 field
= (type
== ODP_ACTION_ATTR_SET_TP_SRC
1232 ? &uh
->udp_src
: &uh
->udp_dst
);
1233 uh
->udp_csum
= recalc_csum16(uh
->udp_csum
, *field
, port
);
1242 dp_netdev_output_port(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1245 struct dp_netdev_port
*p
= dp
->ports
[out_port
];
1247 netdev_send(p
->netdev
, packet
);
1252 dp_netdev_output_control(struct dp_netdev
*dp
, const struct ofpbuf
*packet
,
1253 int queue_no
, const struct flow
*flow
, uint64_t arg
)
1255 struct dp_netdev_queue
*q
= &dp
->queues
[queue_no
];
1256 struct dpif_upcall
*upcall
;
1260 if (q
->head
- q
->tail
>= MAX_QUEUE_LEN
) {
1265 buf
= ofpbuf_new(ODPUTIL_FLOW_KEY_BYTES
+ 2 + packet
->size
);
1266 odp_flow_key_from_flow(buf
, flow
);
1267 key_len
= buf
->size
;
1268 ofpbuf_pull(buf
, key_len
);
1269 ofpbuf_reserve(buf
, 2);
1270 ofpbuf_put(buf
, packet
->data
, packet
->size
);
1272 upcall
= xzalloc(sizeof *upcall
);
1273 upcall
->type
= queue_no
;
1274 upcall
->packet
= buf
;
1275 upcall
->key
= buf
->base
;
1276 upcall
->key_len
= key_len
;
1277 upcall
->userdata
= arg
;
1279 q
->upcalls
[q
->head
++ & QUEUE_MASK
] = upcall
;
1284 /* Returns true if 'packet' is an invalid Ethernet+IPv4 ARP packet: one with
1285 * screwy or truncated header fields or one whose inner and outer Ethernet
1286 * address differ. */
1288 dp_netdev_is_spoofed_arp(struct ofpbuf
*packet
, const struct flow
*key
)
1290 struct arp_eth_header
*arp
;
1291 struct eth_header
*eth
;
1294 if (key
->dl_type
!= htons(ETH_TYPE_ARP
)) {
1298 l3_size
= (char *) ofpbuf_end(packet
) - (char *) packet
->l3
;
1299 if (l3_size
< sizeof(struct arp_eth_header
)) {
1305 return (arp
->ar_hrd
!= htons(ARP_HRD_ETHERNET
)
1306 || arp
->ar_pro
!= htons(ARP_PRO_IP
)
1307 || arp
->ar_hln
!= ETH_HEADER_LEN
1309 || !eth_addr_equals(arp
->ar_sha
, eth
->eth_src
));
1313 dp_netdev_execute_actions(struct dp_netdev
*dp
,
1314 struct ofpbuf
*packet
, struct flow
*key
,
1315 const struct nlattr
*actions
,
1318 const struct nlattr
*a
;
1321 NL_ATTR_FOR_EACH_UNSAFE (a
, left
, actions
, actions_len
) {
1322 switch (nl_attr_type(a
)) {
1323 case ODP_ACTION_ATTR_OUTPUT
:
1324 dp_netdev_output_port(dp
, packet
, nl_attr_get_u32(a
));
1327 case ODP_ACTION_ATTR_CONTROLLER
:
1328 dp_netdev_output_control(dp
, packet
, DPIF_UC_ACTION
,
1329 key
, nl_attr_get_u64(a
));
1332 case ODP_ACTION_ATTR_SET_DL_TCI
:
1333 eth_set_vlan_tci(packet
, nl_attr_get_be16(a
));
1336 case ODP_ACTION_ATTR_STRIP_VLAN
:
1337 dp_netdev_strip_vlan(packet
);
1340 case ODP_ACTION_ATTR_SET_DL_SRC
:
1341 dp_netdev_set_dl_src(packet
, nl_attr_get_unspec(a
, ETH_ADDR_LEN
));
1344 case ODP_ACTION_ATTR_SET_DL_DST
:
1345 dp_netdev_set_dl_dst(packet
, nl_attr_get_unspec(a
, ETH_ADDR_LEN
));
1348 case ODP_ACTION_ATTR_SET_NW_SRC
:
1349 case ODP_ACTION_ATTR_SET_NW_DST
:
1350 dp_netdev_set_nw_addr(packet
, key
, a
);
1353 case ODP_ACTION_ATTR_SET_NW_TOS
:
1354 dp_netdev_set_nw_tos(packet
, key
, nl_attr_get_u8(a
));
1357 case ODP_ACTION_ATTR_SET_TP_SRC
:
1358 case ODP_ACTION_ATTR_SET_TP_DST
:
1359 dp_netdev_set_tp_port(packet
, key
, a
);
1362 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP
:
1363 if (dp_netdev_is_spoofed_arp(packet
, key
)) {
1371 const struct dpif_class dpif_netdev_class
= {
1373 NULL
, /* enumerate */
1376 dpif_netdev_destroy
,
1379 dpif_netdev_get_stats
,
1380 dpif_netdev_get_drop_frags
,
1381 dpif_netdev_set_drop_frags
,
1382 dpif_netdev_port_add
,
1383 dpif_netdev_port_del
,
1384 dpif_netdev_port_query_by_number
,
1385 dpif_netdev_port_query_by_name
,
1386 dpif_netdev_get_max_ports
,
1387 dpif_netdev_port_dump_start
,
1388 dpif_netdev_port_dump_next
,
1389 dpif_netdev_port_dump_done
,
1390 dpif_netdev_port_poll
,
1391 dpif_netdev_port_poll_wait
,
1392 dpif_netdev_flow_get
,
1393 dpif_netdev_flow_put
,
1394 dpif_netdev_flow_del
,
1395 dpif_netdev_flow_flush
,
1396 dpif_netdev_flow_dump_start
,
1397 dpif_netdev_flow_dump_next
,
1398 dpif_netdev_flow_dump_done
,
1399 dpif_netdev_execute
,
1400 dpif_netdev_recv_get_mask
,
1401 dpif_netdev_recv_set_mask
,
1402 NULL
, /* get_sflow_probability */
1403 NULL
, /* set_sflow_probability */
1404 NULL
, /* queue_to_priority */
1406 dpif_netdev_recv_wait
,
1407 dpif_netdev_recv_purge
,
1411 dpif_dummy_register(void)
1413 if (!dpif_dummy_class
.type
) {
1414 dpif_dummy_class
= dpif_netdev_class
;
1415 dpif_dummy_class
.type
= "dummy";
1416 dp_register_provider(&dpif_dummy_class
);