2 * Copyright (c) 2009, 2010 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <netinet/in.h>
26 #include <sys/socket.h>
30 #include <sys/ioctl.h>
35 #include "dpif-provider.h"
41 #include "ofp-print.h"
44 #include "poll-loop.h"
50 VLOG_DEFINE_THIS_MODULE(dpif_netdev
)
52 /* Configuration parameters. */
53 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
54 enum { MAX_QUEUE_LEN
= 100 }; /* Maximum number of packets per queue. */
55 enum { MAX_PORTS
= 256 }; /* Maximum number of ports. */
56 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
58 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
59 * headers to be aligned on a 4-byte boundary. */
60 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
62 /* Datapath based on the network device interface from netdev.h. */
69 bool drop_frags
; /* Drop all IP fragments, if true. */
70 struct ovs_queue queues
[N_QUEUES
]; /* Messages queued for dpif_recv(). */
71 struct hmap flow_table
; /* Flow table. */
74 long long int n_frags
; /* Number of dropped IP fragments. */
75 long long int n_hit
; /* Number of flow table matches. */
76 long long int n_missed
; /* Number of flow table misses. */
77 long long int n_lost
; /* Number of misses not passed to client. */
81 struct dp_netdev_port
*ports
[MAX_PORTS
];
82 struct list port_list
;
86 /* A port in a netdev-based datapath. */
87 struct dp_netdev_port
{
88 int port_no
; /* Index into dp_netdev's 'ports'. */
89 struct list node
; /* Element in dp_netdev's 'port_list'. */
90 struct netdev
*netdev
;
91 bool internal
; /* Internal port (as ODP_PORT_INTERNAL)? */
94 /* A flow in dp_netdev's 'flow_table'. */
95 struct dp_netdev_flow
{
96 struct hmap_node node
; /* Element in dp_netdev's 'flow_table'. */
100 struct timespec used
; /* Last used time. */
101 long long int packet_count
; /* Number of packets matched. */
102 long long int byte_count
; /* Number of bytes matched. */
103 uint16_t tcp_ctl
; /* Bitwise-OR of seen tcp_ctl values. */
106 union odp_action
*actions
;
107 unsigned int n_actions
;
110 /* Interface to netdev-based datapath. */
113 struct dp_netdev
*dp
;
115 unsigned int dp_serial
;
118 /* All netdev-based datapaths. */
119 static struct dp_netdev
*dp_netdevs
[256];
120 struct list dp_netdev_list
= LIST_INITIALIZER(&dp_netdev_list
);
121 enum { N_DP_NETDEVS
= ARRAY_SIZE(dp_netdevs
) };
123 /* Maximum port MTU seen so far. */
124 static int max_mtu
= ETH_PAYLOAD_MAX
;
126 static int get_port_by_number(struct dp_netdev
*, uint16_t port_no
,
127 struct dp_netdev_port
**portp
);
128 static int get_port_by_name(struct dp_netdev
*, const char *devname
,
129 struct dp_netdev_port
**portp
);
130 static void dp_netdev_free(struct dp_netdev
*);
131 static void dp_netdev_flow_flush(struct dp_netdev
*);
132 static int do_add_port(struct dp_netdev
*, const char *devname
, uint16_t flags
,
134 static int do_del_port(struct dp_netdev
*, uint16_t port_no
);
135 static int dp_netdev_output_control(struct dp_netdev
*, const struct ofpbuf
*,
136 int queue_no
, int port_no
, uint32_t arg
);
137 static int dp_netdev_execute_actions(struct dp_netdev
*,
138 struct ofpbuf
*, const flow_t
*,
139 const union odp_action
*, int n
);
141 static struct dpif_netdev
*
142 dpif_netdev_cast(const struct dpif
*dpif
)
144 dpif_assert_class(dpif
, &dpif_netdev_class
);
145 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
148 static struct dp_netdev
*
149 get_dp_netdev(const struct dpif
*dpif
)
151 return dpif_netdev_cast(dpif
)->dp
;
155 name_to_dp_idx(const char *name
)
157 if (!strncmp(name
, "dp", 2) && isdigit((unsigned char)name
[2])) {
158 int dp_idx
= atoi(name
+ 2);
159 if (dp_idx
>= 0 && dp_idx
< N_DP_NETDEVS
) {
166 static struct dp_netdev
*
167 find_dp_netdev(const char *name
)
172 dp_idx
= name_to_dp_idx(name
);
174 return dp_netdevs
[dp_idx
];
177 for (i
= 0; i
< N_DP_NETDEVS
; i
++) {
178 struct dp_netdev
*dp
= dp_netdevs
[i
];
180 struct dp_netdev_port
*port
;
181 if (!get_port_by_name(dp
, name
, &port
)) {
190 create_dpif_netdev(struct dp_netdev
*dp
)
192 struct dpif_netdev
*dpif
;
197 dpname
= xasprintf("dp%d", dp
->dp_idx
);
198 dpif
= xmalloc(sizeof *dpif
);
199 dpif_init(&dpif
->dpif
, &dpif_netdev_class
, dpname
, dp
->dp_idx
, dp
->dp_idx
);
201 dpif
->listen_mask
= 0;
202 dpif
->dp_serial
= dp
->serial
;
209 create_dp_netdev(const char *name
, int dp_idx
, struct dpif
**dpifp
)
211 struct dp_netdev
*dp
;
215 if (dp_netdevs
[dp_idx
]) {
219 /* Create datapath. */
220 dp_netdevs
[dp_idx
] = dp
= xzalloc(sizeof *dp
);
221 list_push_back(&dp_netdev_list
, &dp
->node
);
224 dp
->drop_frags
= false;
225 for (i
= 0; i
< N_QUEUES
; i
++) {
226 queue_init(&dp
->queues
[i
]);
228 hmap_init(&dp
->flow_table
);
229 list_init(&dp
->port_list
);
230 error
= do_add_port(dp
, name
, ODP_PORT_INTERNAL
, ODPP_LOCAL
);
236 *dpifp
= create_dpif_netdev(dp
);
241 dpif_netdev_open(const char *name
, const char *type OVS_UNUSED
, bool create
,
245 if (find_dp_netdev(name
)) {
248 int dp_idx
= name_to_dp_idx(name
);
250 return create_dp_netdev(name
, dp_idx
, dpifp
);
252 /* Scan for unused dp_idx number. */
253 for (dp_idx
= 0; dp_idx
< N_DP_NETDEVS
; dp_idx
++) {
254 int error
= create_dp_netdev(name
, dp_idx
, dpifp
);
255 if (error
!= EBUSY
) {
260 /* All datapath numbers in use. */
265 struct dp_netdev
*dp
= find_dp_netdev(name
);
267 *dpifp
= create_dpif_netdev(dp
);
276 dp_netdev_free(struct dp_netdev
*dp
)
280 dp_netdev_flow_flush(dp
);
281 while (dp
->n_ports
> 0) {
282 struct dp_netdev_port
*port
= CONTAINER_OF(
283 dp
->port_list
.next
, struct dp_netdev_port
, node
);
284 do_del_port(dp
, port
->port_no
);
286 for (i
= 0; i
< N_QUEUES
; i
++) {
287 queue_destroy(&dp
->queues
[i
]);
289 hmap_destroy(&dp
->flow_table
);
290 dp_netdevs
[dp
->dp_idx
] = NULL
;
291 list_remove(&dp
->node
);
296 dpif_netdev_close(struct dpif
*dpif
)
298 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
299 assert(dp
->open_cnt
> 0);
300 if (--dp
->open_cnt
== 0 && dp
->destroyed
) {
307 dpif_netdev_destroy(struct dpif
*dpif
)
309 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
310 dp
->destroyed
= true;
315 dpif_netdev_get_stats(const struct dpif
*dpif
, struct odp_stats
*stats
)
317 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
318 memset(stats
, 0, sizeof *stats
);
319 stats
->n_flows
= hmap_count(&dp
->flow_table
);
320 stats
->cur_capacity
= hmap_capacity(&dp
->flow_table
);
321 stats
->max_capacity
= MAX_FLOWS
;
322 stats
->n_ports
= dp
->n_ports
;
323 stats
->max_ports
= MAX_PORTS
;
324 stats
->n_frags
= dp
->n_frags
;
325 stats
->n_hit
= dp
->n_hit
;
326 stats
->n_missed
= dp
->n_missed
;
327 stats
->n_lost
= dp
->n_lost
;
328 stats
->max_miss_queue
= MAX_QUEUE_LEN
;
329 stats
->max_action_queue
= MAX_QUEUE_LEN
;
334 dpif_netdev_get_drop_frags(const struct dpif
*dpif
, bool *drop_fragsp
)
336 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
337 *drop_fragsp
= dp
->drop_frags
;
342 dpif_netdev_set_drop_frags(struct dpif
*dpif
, bool drop_frags
)
344 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
345 dp
->drop_frags
= drop_frags
;
350 do_add_port(struct dp_netdev
*dp
, const char *devname
, uint16_t flags
,
353 bool internal
= (flags
& ODP_PORT_INTERNAL
) != 0;
354 struct dp_netdev_port
*port
;
355 struct netdev_options netdev_options
;
356 struct netdev
*netdev
;
360 /* XXX reject devices already in some dp_netdev. */
362 /* Open and validate network device. */
363 memset(&netdev_options
, 0, sizeof netdev_options
);
364 netdev_options
.name
= devname
;
365 netdev_options
.ethertype
= NETDEV_ETH_TYPE_ANY
;
367 netdev_options
.type
= "tap";
370 error
= netdev_open(&netdev_options
, &netdev
);
374 /* XXX reject loopback devices */
375 /* XXX reject non-Ethernet devices */
377 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, false);
379 netdev_close(netdev
);
383 port
= xmalloc(sizeof *port
);
384 port
->port_no
= port_no
;
385 port
->netdev
= netdev
;
386 port
->internal
= internal
;
388 netdev_get_mtu(netdev
, &mtu
);
393 list_push_back(&dp
->port_list
, &port
->node
);
394 dp
->ports
[port_no
] = port
;
402 dpif_netdev_port_add(struct dpif
*dpif
, const char *devname
, uint16_t flags
,
405 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
408 for (port_no
= 0; port_no
< MAX_PORTS
; port_no
++) {
409 if (!dp
->ports
[port_no
]) {
411 return do_add_port(dp
, devname
, flags
, port_no
);
418 dpif_netdev_port_del(struct dpif
*dpif
, uint16_t port_no
)
420 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
421 return port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
425 is_valid_port_number(uint16_t port_no
)
427 return port_no
< MAX_PORTS
;
431 get_port_by_number(struct dp_netdev
*dp
,
432 uint16_t port_no
, struct dp_netdev_port
**portp
)
434 if (!is_valid_port_number(port_no
)) {
438 *portp
= dp
->ports
[port_no
];
439 return *portp
? 0 : ENOENT
;
444 get_port_by_name(struct dp_netdev
*dp
,
445 const char *devname
, struct dp_netdev_port
**portp
)
447 struct dp_netdev_port
*port
;
449 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
450 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
459 do_del_port(struct dp_netdev
*dp
, uint16_t port_no
)
461 struct dp_netdev_port
*port
;
465 error
= get_port_by_number(dp
, port_no
, &port
);
470 list_remove(&port
->node
);
471 dp
->ports
[port
->port_no
] = NULL
;
475 name
= xstrdup(netdev_get_name(port
->netdev
));
476 netdev_close(port
->netdev
);
485 answer_port_query(const struct dp_netdev_port
*port
, struct odp_port
*odp_port
)
487 memset(odp_port
, 0, sizeof *odp_port
);
488 ovs_strlcpy(odp_port
->devname
, netdev_get_name(port
->netdev
),
489 sizeof odp_port
->devname
);
490 odp_port
->port
= port
->port_no
;
491 odp_port
->flags
= port
->internal
? ODP_PORT_INTERNAL
: 0;
495 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
496 struct odp_port
*odp_port
)
498 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
499 struct dp_netdev_port
*port
;
502 error
= get_port_by_number(dp
, port_no
, &port
);
504 answer_port_query(port
, odp_port
);
510 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
511 struct odp_port
*odp_port
)
513 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
514 struct dp_netdev_port
*port
;
517 error
= get_port_by_name(dp
, devname
, &port
);
519 answer_port_query(port
, odp_port
);
525 dp_netdev_free_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
527 hmap_remove(&dp
->flow_table
, &flow
->node
);
533 dp_netdev_flow_flush(struct dp_netdev
*dp
)
535 struct dp_netdev_flow
*flow
, *next
;
537 HMAP_FOR_EACH_SAFE (flow
, next
, node
, &dp
->flow_table
) {
538 dp_netdev_free_flow(dp
, flow
);
543 dpif_netdev_flow_flush(struct dpif
*dpif
)
545 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
546 dp_netdev_flow_flush(dp
);
551 dpif_netdev_port_list(const struct dpif
*dpif
, struct odp_port
*ports
, int n
)
553 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
554 struct dp_netdev_port
*port
;
558 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
559 struct odp_port
*odp_port
= &ports
[i
];
563 answer_port_query(port
, odp_port
);
570 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep OVS_UNUSED
)
572 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
573 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
574 dpif
->dp_serial
= dpif
->dp
->serial
;
582 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
584 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
585 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
586 poll_immediate_wake();
590 static struct dp_netdev_flow
*
591 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const flow_t
*key
)
593 struct dp_netdev_flow
*flow
;
595 assert(!key
->reserved
[0] && !key
->reserved
[1] && !key
->reserved
[2]);
596 HMAP_FOR_EACH_WITH_HASH (flow
, node
, flow_hash(key
, 0), &dp
->flow_table
) {
597 if (flow_equal(&flow
->key
, key
)) {
605 answer_flow_query(struct dp_netdev_flow
*flow
, uint32_t query_flags
,
606 struct odp_flow
*odp_flow
)
609 odp_flow
->key
= flow
->key
;
610 odp_flow
->stats
.n_packets
= flow
->packet_count
;
611 odp_flow
->stats
.n_bytes
= flow
->byte_count
;
612 odp_flow
->stats
.used_sec
= flow
->used
.tv_sec
;
613 odp_flow
->stats
.used_nsec
= flow
->used
.tv_nsec
;
614 odp_flow
->stats
.tcp_flags
= TCP_FLAGS(flow
->tcp_ctl
);
615 odp_flow
->stats
.reserved
= 0;
616 odp_flow
->stats
.error
= 0;
617 if (odp_flow
->n_actions
> 0) {
618 unsigned int n
= MIN(odp_flow
->n_actions
, flow
->n_actions
);
619 memcpy(odp_flow
->actions
, flow
->actions
,
620 n
* sizeof *odp_flow
->actions
);
621 odp_flow
->n_actions
= flow
->n_actions
;
624 if (query_flags
& ODPFF_ZERO_TCP_FLAGS
) {
629 odp_flow
->stats
.error
= ENOENT
;
634 dpif_netdev_flow_get(const struct dpif
*dpif
, struct odp_flow flows
[], int n
)
636 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
639 for (i
= 0; i
< n
; i
++) {
640 struct odp_flow
*odp_flow
= &flows
[i
];
641 answer_flow_query(dp_netdev_lookup_flow(dp
, &odp_flow
->key
),
642 odp_flow
->flags
, odp_flow
);
648 dpif_netdev_validate_actions(const union odp_action
*actions
, int n_actions
,
654 for (i
= 0; i
< n_actions
; i
++) {
655 const union odp_action
*a
= &actions
[i
];
658 if (a
->output
.port
>= MAX_PORTS
) {
663 case ODPAT_CONTROLLER
:
666 case ODPAT_SET_VLAN_VID
:
668 if (a
->vlan_vid
.vlan_vid
& htons(~VLAN_VID_MASK
)) {
673 case ODPAT_SET_VLAN_PCP
:
675 if (a
->vlan_pcp
.vlan_pcp
& ~(VLAN_PCP_MASK
>> VLAN_PCP_SHIFT
)) {
680 case ODPAT_SET_NW_TOS
:
682 if (a
->nw_tos
.nw_tos
& IP_ECN_MASK
) {
687 case ODPAT_STRIP_VLAN
:
688 case ODPAT_SET_DL_SRC
:
689 case ODPAT_SET_DL_DST
:
690 case ODPAT_SET_NW_SRC
:
691 case ODPAT_SET_NW_DST
:
692 case ODPAT_SET_TP_SRC
:
693 case ODPAT_SET_TP_DST
:
705 set_flow_actions(struct dp_netdev_flow
*flow
, struct odp_flow
*odp_flow
)
711 if (odp_flow
->n_actions
>= 4096 / sizeof *odp_flow
->actions
) {
714 error
= dpif_netdev_validate_actions(odp_flow
->actions
,
715 odp_flow
->n_actions
, &mutates
);
720 n_bytes
= odp_flow
->n_actions
* sizeof *flow
->actions
;
721 flow
->actions
= xrealloc(flow
->actions
, n_bytes
);
722 flow
->n_actions
= odp_flow
->n_actions
;
723 memcpy(flow
->actions
, odp_flow
->actions
, n_bytes
);
728 add_flow(struct dpif
*dpif
, struct odp_flow
*odp_flow
)
730 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
731 struct dp_netdev_flow
*flow
;
734 flow
= xzalloc(sizeof *flow
);
735 flow
->key
= odp_flow
->key
;
736 memset(flow
->key
.reserved
, 0, sizeof flow
->key
.reserved
);
738 error
= set_flow_actions(flow
, odp_flow
);
744 hmap_insert(&dp
->flow_table
, &flow
->node
, flow_hash(&flow
->key
, 0));
749 clear_stats(struct dp_netdev_flow
*flow
)
751 flow
->used
.tv_sec
= 0;
752 flow
->used
.tv_nsec
= 0;
753 flow
->packet_count
= 0;
754 flow
->byte_count
= 0;
759 dpif_netdev_flow_put(struct dpif
*dpif
, struct odp_flow_put
*put
)
761 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
762 struct dp_netdev_flow
*flow
;
764 flow
= dp_netdev_lookup_flow(dp
, &put
->flow
.key
);
766 if (put
->flags
& ODPPF_CREATE
) {
767 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
768 return add_flow(dpif
, &put
->flow
);
776 if (put
->flags
& ODPPF_MODIFY
) {
777 int error
= set_flow_actions(flow
, &put
->flow
);
778 if (!error
&& put
->flags
& ODPPF_ZERO_STATS
) {
790 dpif_netdev_flow_del(struct dpif
*dpif
, struct odp_flow
*odp_flow
)
792 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
793 struct dp_netdev_flow
*flow
;
795 flow
= dp_netdev_lookup_flow(dp
, &odp_flow
->key
);
797 answer_flow_query(flow
, 0, odp_flow
);
798 dp_netdev_free_flow(dp
, flow
);
806 dpif_netdev_flow_list(const struct dpif
*dpif
, struct odp_flow flows
[], int n
)
808 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
809 struct dp_netdev_flow
*flow
;
813 HMAP_FOR_EACH (flow
, node
, &dp
->flow_table
) {
817 answer_flow_query(flow
, 0, &flows
[i
++]);
819 return hmap_count(&dp
->flow_table
);
823 dpif_netdev_execute(struct dpif
*dpif
,
824 const union odp_action actions
[], int n_actions
,
825 const struct ofpbuf
*packet
)
827 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
833 if (packet
->size
< ETH_HEADER_LEN
|| packet
->size
> UINT16_MAX
) {
837 error
= dpif_netdev_validate_actions(actions
, n_actions
, &mutates
);
843 /* We need a deep copy of 'packet' since we're going to modify its
845 ofpbuf_init(©
, DP_NETDEV_HEADROOM
+ packet
->size
);
846 copy
.data
= (char*)copy
.base
+ DP_NETDEV_HEADROOM
;
847 ofpbuf_put(©
, packet
->data
, packet
->size
);
849 /* We still need a shallow copy of 'packet', even though we won't
850 * modify its data, because flow_extract() modifies packet->l2, etc.
851 * We could probably get away with modifying those but it's more polite
855 flow_extract(©
, 0, -1, &flow
);
856 error
= dp_netdev_execute_actions(dp
, ©
, &flow
, actions
, n_actions
);
858 ofpbuf_uninit(©
);
864 dpif_netdev_recv_get_mask(const struct dpif
*dpif
, int *listen_mask
)
866 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
867 *listen_mask
= dpif_netdev
->listen_mask
;
872 dpif_netdev_recv_set_mask(struct dpif
*dpif
, int listen_mask
)
874 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
875 if (!(listen_mask
& ~ODPL_ALL
)) {
876 dpif_netdev
->listen_mask
= listen_mask
;
883 static struct ovs_queue
*
884 find_nonempty_queue(struct dpif
*dpif
)
886 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
887 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
888 int mask
= dpif_netdev
->listen_mask
;
891 for (i
= 0; i
< N_QUEUES
; i
++) {
892 struct ovs_queue
*q
= &dp
->queues
[i
];
893 if (q
->n
&& mask
& (1u << i
)) {
901 dpif_netdev_recv(struct dpif
*dpif
, struct ofpbuf
**bufp
)
903 struct ovs_queue
*q
= find_nonempty_queue(dpif
);
905 *bufp
= queue_pop_head(q
);
913 dpif_netdev_recv_wait(struct dpif
*dpif
)
915 struct ovs_queue
*q
= find_nonempty_queue(dpif
);
917 poll_immediate_wake();
919 /* No messages ready to be received, and dp_wait() will ensure that we
920 * wake up to queue new messages, so there is nothing to do. */
925 dp_netdev_flow_used(struct dp_netdev_flow
*flow
, const flow_t
*key
,
926 const struct ofpbuf
*packet
)
928 time_timespec(&flow
->used
);
929 flow
->packet_count
++;
930 flow
->byte_count
+= packet
->size
;
931 if (key
->dl_type
== htons(ETH_TYPE_IP
) && key
->nw_proto
== IPPROTO_TCP
) {
932 struct tcp_header
*th
= packet
->l4
;
933 flow
->tcp_ctl
|= th
->tcp_ctl
;
938 dp_netdev_port_input(struct dp_netdev
*dp
, struct dp_netdev_port
*port
,
939 struct ofpbuf
*packet
)
941 struct dp_netdev_flow
*flow
;
944 if (packet
->size
< ETH_HEADER_LEN
) {
947 if (flow_extract(packet
, 0, port
->port_no
, &key
) && dp
->drop_frags
) {
952 flow
= dp_netdev_lookup_flow(dp
, &key
);
954 dp_netdev_flow_used(flow
, &key
, packet
);
955 dp_netdev_execute_actions(dp
, packet
, &key
,
956 flow
->actions
, flow
->n_actions
);
960 dp_netdev_output_control(dp
, packet
, _ODPL_MISS_NR
, port
->port_no
, 0);
967 struct ofpbuf packet
;
968 struct dp_netdev
*dp
;
970 ofpbuf_init(&packet
, DP_NETDEV_HEADROOM
+ max_mtu
);
971 LIST_FOR_EACH (dp
, node
, &dp_netdev_list
) {
972 struct dp_netdev_port
*port
;
974 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
977 /* Reset packet contents. */
978 packet
.data
= (char*)packet
.base
+ DP_NETDEV_HEADROOM
;
981 error
= netdev_recv(port
->netdev
, &packet
);
983 dp_netdev_port_input(dp
, port
, &packet
);
984 } else if (error
!= EAGAIN
) {
985 struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
986 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
987 netdev_get_name(port
->netdev
), strerror(error
));
991 ofpbuf_uninit(&packet
);
997 struct dp_netdev
*dp
;
999 LIST_FOR_EACH (dp
, node
, &dp_netdev_list
) {
1000 struct dp_netdev_port
*port
;
1001 LIST_FOR_EACH (port
, node
, &dp
->port_list
) {
1002 netdev_recv_wait(port
->netdev
);
1008 /* Modify the TCI field of 'packet'. If a VLAN tag is not present, one
1009 * is added with the TCI field set to 'tci'. If a VLAN tag is present,
1010 * then 'mask' bits are cleared before 'tci' is logically OR'd into the
1013 * Note that the function does not ensure that 'tci' does not affect
1014 * bits outside of 'mask'.
1017 dp_netdev_modify_vlan_tci(struct ofpbuf
*packet
, uint16_t tci
, uint16_t mask
)
1019 struct vlan_eth_header
*veh
;
1020 struct eth_header
*eh
;
1023 if (packet
->size
>= sizeof(struct vlan_eth_header
)
1024 && eh
->eth_type
== htons(ETH_TYPE_VLAN
)) {
1025 /* Clear 'mask' bits, but maintain other TCI bits. */
1027 veh
->veth_tci
&= ~htons(mask
);
1028 veh
->veth_tci
|= htons(tci
);
1030 /* Insert new 802.1Q header. */
1031 struct vlan_eth_header tmp
;
1032 memcpy(tmp
.veth_dst
, eh
->eth_dst
, ETH_ADDR_LEN
);
1033 memcpy(tmp
.veth_src
, eh
->eth_src
, ETH_ADDR_LEN
);
1034 tmp
.veth_type
= htons(ETH_TYPE_VLAN
);
1035 tmp
.veth_tci
= htons(tci
);
1036 tmp
.veth_next_type
= eh
->eth_type
;
1038 veh
= ofpbuf_push_uninit(packet
, VLAN_HEADER_LEN
);
1039 memcpy(veh
, &tmp
, sizeof tmp
);
1040 packet
->l2
= (char*)packet
->l2
- VLAN_HEADER_LEN
;
1045 dp_netdev_strip_vlan(struct ofpbuf
*packet
)
1047 struct vlan_eth_header
*veh
= packet
->l2
;
1048 if (packet
->size
>= sizeof *veh
1049 && veh
->veth_type
== htons(ETH_TYPE_VLAN
)) {
1050 struct eth_header tmp
;
1052 memcpy(tmp
.eth_dst
, veh
->veth_dst
, ETH_ADDR_LEN
);
1053 memcpy(tmp
.eth_src
, veh
->veth_src
, ETH_ADDR_LEN
);
1054 tmp
.eth_type
= veh
->veth_next_type
;
1056 packet
->size
-= VLAN_HEADER_LEN
;
1057 packet
->data
= (char*)packet
->data
+ VLAN_HEADER_LEN
;
1058 packet
->l2
= (char*)packet
->l2
+ VLAN_HEADER_LEN
;
1059 memcpy(packet
->data
, &tmp
, sizeof tmp
);
1064 dp_netdev_set_dl_src(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1066 struct eth_header
*eh
= packet
->l2
;
1067 memcpy(eh
->eth_src
, dl_addr
, sizeof eh
->eth_src
);
1071 dp_netdev_set_dl_dst(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1073 struct eth_header
*eh
= packet
->l2
;
1074 memcpy(eh
->eth_dst
, dl_addr
, sizeof eh
->eth_dst
);
1078 is_ip(const struct ofpbuf
*packet
, const flow_t
*key
)
1080 return key
->dl_type
== htons(ETH_TYPE_IP
) && packet
->l4
;
1084 dp_netdev_set_nw_addr(struct ofpbuf
*packet
, const flow_t
*key
,
1085 const struct odp_action_nw_addr
*a
)
1087 if (is_ip(packet
, key
)) {
1088 struct ip_header
*nh
= packet
->l3
;
1091 field
= a
->type
== ODPAT_SET_NW_SRC
? &nh
->ip_src
: &nh
->ip_dst
;
1092 if (key
->nw_proto
== IP_TYPE_TCP
&& packet
->l7
) {
1093 struct tcp_header
*th
= packet
->l4
;
1094 th
->tcp_csum
= recalc_csum32(th
->tcp_csum
, *field
, a
->nw_addr
);
1095 } else if (key
->nw_proto
== IP_TYPE_UDP
&& packet
->l7
) {
1096 struct udp_header
*uh
= packet
->l4
;
1098 uh
->udp_csum
= recalc_csum32(uh
->udp_csum
, *field
, a
->nw_addr
);
1099 if (!uh
->udp_csum
) {
1100 uh
->udp_csum
= 0xffff;
1104 nh
->ip_csum
= recalc_csum32(nh
->ip_csum
, *field
, a
->nw_addr
);
1105 *field
= a
->nw_addr
;
1110 dp_netdev_set_nw_tos(struct ofpbuf
*packet
, const flow_t
*key
,
1111 const struct odp_action_nw_tos
*a
)
1113 if (is_ip(packet
, key
)) {
1114 struct ip_header
*nh
= packet
->l3
;
1115 uint8_t *field
= &nh
->ip_tos
;
1117 /* Set the DSCP bits and preserve the ECN bits. */
1118 uint8_t new = a
->nw_tos
| (nh
->ip_tos
& IP_ECN_MASK
);
1120 nh
->ip_csum
= recalc_csum16(nh
->ip_csum
, htons((uint16_t)*field
),
1121 htons((uint16_t)a
->nw_tos
));
1127 dp_netdev_set_tp_port(struct ofpbuf
*packet
, const flow_t
*key
,
1128 const struct odp_action_tp_port
*a
)
1130 if (is_ip(packet
, key
)) {
1132 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1133 struct tcp_header
*th
= packet
->l4
;
1134 field
= a
->type
== ODPAT_SET_TP_SRC
? &th
->tcp_src
: &th
->tcp_dst
;
1135 th
->tcp_csum
= recalc_csum16(th
->tcp_csum
, *field
, a
->tp_port
);
1136 *field
= a
->tp_port
;
1137 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1138 struct udp_header
*uh
= packet
->l4
;
1139 field
= a
->type
== ODPAT_SET_TP_SRC
? &uh
->udp_src
: &uh
->udp_dst
;
1140 uh
->udp_csum
= recalc_csum16(uh
->udp_csum
, *field
, a
->tp_port
);
1141 *field
= a
->tp_port
;
1149 dp_netdev_output_port(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1152 struct dp_netdev_port
*p
= dp
->ports
[out_port
];
1154 netdev_send(p
->netdev
, packet
);
1159 dp_netdev_output_control(struct dp_netdev
*dp
, const struct ofpbuf
*packet
,
1160 int queue_no
, int port_no
, uint32_t arg
)
1162 struct ovs_queue
*q
= &dp
->queues
[queue_no
];
1163 struct odp_msg
*header
;
1167 if (q
->n
>= MAX_QUEUE_LEN
) {
1172 msg_size
= sizeof *header
+ packet
->size
;
1173 msg
= ofpbuf_new_with_headroom(msg_size
, DPIF_RECV_MSG_PADDING
);
1174 header
= ofpbuf_put_uninit(msg
, sizeof *header
);
1175 header
->type
= queue_no
;
1176 header
->length
= msg_size
;
1177 header
->port
= port_no
;
1179 ofpbuf_put(msg
, packet
->data
, packet
->size
);
1180 queue_push_tail(q
, msg
);
1185 /* Returns true if 'packet' is an invalid Ethernet+IPv4 ARP packet: one with
1186 * screwy or truncated header fields or one whose inner and outer Ethernet
1187 * address differ. */
1189 dp_netdev_is_spoofed_arp(struct ofpbuf
*packet
, const struct odp_flow_key
*key
)
1191 struct arp_eth_header
*arp
;
1192 struct eth_header
*eth
;
1195 if (key
->dl_type
!= htons(ETH_TYPE_ARP
)) {
1199 l3_size
= (char *) ofpbuf_end(packet
) - (char *) packet
->l3
;
1200 if (l3_size
< sizeof(struct arp_eth_header
)) {
1206 return (arp
->ar_hrd
!= htons(ARP_HRD_ETHERNET
)
1207 || arp
->ar_pro
!= htons(ARP_PRO_IP
)
1208 || arp
->ar_hln
!= ETH_HEADER_LEN
1210 || !eth_addr_equals(arp
->ar_sha
, eth
->eth_src
));
1214 dp_netdev_execute_actions(struct dp_netdev
*dp
,
1215 struct ofpbuf
*packet
, const flow_t
*key
,
1216 const union odp_action
*actions
, int n_actions
)
1219 for (i
= 0; i
< n_actions
; i
++) {
1220 const union odp_action
*a
= &actions
[i
];
1224 dp_netdev_output_port(dp
, packet
, a
->output
.port
);
1227 case ODPAT_CONTROLLER
:
1228 dp_netdev_output_control(dp
, packet
, _ODPL_ACTION_NR
,
1229 key
->in_port
, a
->controller
.arg
);
1232 case ODPAT_SET_VLAN_VID
:
1233 dp_netdev_modify_vlan_tci(packet
, ntohs(a
->vlan_vid
.vlan_vid
),
1237 case ODPAT_SET_VLAN_PCP
:
1238 dp_netdev_modify_vlan_tci(packet
,
1239 a
->vlan_pcp
.vlan_pcp
<< VLAN_PCP_SHIFT
,
1243 case ODPAT_STRIP_VLAN
:
1244 dp_netdev_strip_vlan(packet
);
1247 case ODPAT_SET_DL_SRC
:
1248 dp_netdev_set_dl_src(packet
, a
->dl_addr
.dl_addr
);
1251 case ODPAT_SET_DL_DST
:
1252 dp_netdev_set_dl_dst(packet
, a
->dl_addr
.dl_addr
);
1255 case ODPAT_SET_NW_SRC
:
1256 case ODPAT_SET_NW_DST
:
1257 dp_netdev_set_nw_addr(packet
, key
, &a
->nw_addr
);
1260 case ODPAT_SET_NW_TOS
:
1261 dp_netdev_set_nw_tos(packet
, key
, &a
->nw_tos
);
1264 case ODPAT_SET_TP_SRC
:
1265 case ODPAT_SET_TP_DST
:
1266 dp_netdev_set_tp_port(packet
, key
, &a
->tp_port
);
1269 case ODPAT_DROP_SPOOFED_ARP
:
1270 if (dp_netdev_is_spoofed_arp(packet
, key
)) {
1278 const struct dpif_class dpif_netdev_class
= {
1282 NULL
, /* enumerate */
1285 NULL
, /* get_all_names */
1286 dpif_netdev_destroy
,
1287 dpif_netdev_get_stats
,
1288 dpif_netdev_get_drop_frags
,
1289 dpif_netdev_set_drop_frags
,
1290 dpif_netdev_port_add
,
1291 dpif_netdev_port_del
,
1292 dpif_netdev_port_query_by_number
,
1293 dpif_netdev_port_query_by_name
,
1294 dpif_netdev_port_list
,
1295 dpif_netdev_port_poll
,
1296 dpif_netdev_port_poll_wait
,
1297 dpif_netdev_flow_get
,
1298 dpif_netdev_flow_put
,
1299 dpif_netdev_flow_del
,
1300 dpif_netdev_flow_flush
,
1301 dpif_netdev_flow_list
,
1302 dpif_netdev_execute
,
1303 dpif_netdev_recv_get_mask
,
1304 dpif_netdev_recv_set_mask
,
1305 NULL
, /* get_sflow_probability */
1306 NULL
, /* set_sflow_probability */
1307 NULL
, /* queue_to_priority */
1309 dpif_netdev_recv_wait
,