2 * Copyright (c) 2009, 2010 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <netinet/in.h>
26 #include <sys/socket.h>
30 #include <sys/ioctl.h>
35 #include "dpif-provider.h"
41 #include "ofp-print.h"
44 #include "poll-loop.h"
50 VLOG_DEFINE_THIS_MODULE(dpif_netdev
)
52 /* Configuration parameters. */
53 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
54 enum { MAX_QUEUE_LEN
= 100 }; /* Maximum number of packets per queue. */
55 enum { N_GROUPS
= 16 }; /* Number of port groups. */
56 enum { MAX_PORTS
= 256 }; /* Maximum number of ports. */
57 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
59 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
60 * headers to be aligned on a 4-byte boundary. */
61 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
63 /* Datapath based on the network device interface from netdev.h. */
70 bool drop_frags
; /* Drop all IP fragments, if true. */
71 struct ovs_queue queues
[N_QUEUES
]; /* Messages queued for dpif_recv(). */
72 struct hmap flow_table
; /* Flow table. */
73 struct odp_port_group groups
[N_GROUPS
];
76 long long int n_frags
; /* Number of dropped IP fragments. */
77 long long int n_hit
; /* Number of flow table matches. */
78 long long int n_missed
; /* Number of flow table misses. */
79 long long int n_lost
; /* Number of misses not passed to client. */
83 struct dp_netdev_port
*ports
[MAX_PORTS
];
84 struct list port_list
;
88 /* A port in a netdev-based datapath. */
89 struct dp_netdev_port
{
90 int port_no
; /* Index into dp_netdev's 'ports'. */
91 struct list node
; /* Element in dp_netdev's 'port_list'. */
92 struct netdev
*netdev
;
93 bool internal
; /* Internal port (as ODP_PORT_INTERNAL)? */
96 /* A flow in dp_netdev's 'flow_table'. */
97 struct dp_netdev_flow
{
98 struct hmap_node node
; /* Element in dp_netdev's 'flow_table'. */
102 struct timespec used
; /* Last used time. */
103 long long int packet_count
; /* Number of packets matched. */
104 long long int byte_count
; /* Number of bytes matched. */
105 uint16_t tcp_ctl
; /* Bitwise-OR of seen tcp_ctl values. */
108 union odp_action
*actions
;
109 unsigned int n_actions
;
112 /* Interface to netdev-based datapath. */
115 struct dp_netdev
*dp
;
117 unsigned int dp_serial
;
120 /* All netdev-based datapaths. */
121 static struct dp_netdev
*dp_netdevs
[256];
122 struct list dp_netdev_list
= LIST_INITIALIZER(&dp_netdev_list
);
123 enum { N_DP_NETDEVS
= ARRAY_SIZE(dp_netdevs
) };
125 /* Maximum port MTU seen so far. */
126 static int max_mtu
= ETH_PAYLOAD_MAX
;
128 static int get_port_by_number(struct dp_netdev
*, uint16_t port_no
,
129 struct dp_netdev_port
**portp
);
130 static int get_port_by_name(struct dp_netdev
*, const char *devname
,
131 struct dp_netdev_port
**portp
);
132 static void dp_netdev_free(struct dp_netdev
*);
133 static void dp_netdev_flow_flush(struct dp_netdev
*);
134 static int do_add_port(struct dp_netdev
*, const char *devname
, uint16_t flags
,
136 static int do_del_port(struct dp_netdev
*, uint16_t port_no
);
137 static int dp_netdev_output_control(struct dp_netdev
*, const struct ofpbuf
*,
138 int queue_no
, int port_no
, uint32_t arg
);
139 static int dp_netdev_execute_actions(struct dp_netdev
*,
140 struct ofpbuf
*, const flow_t
*,
141 const union odp_action
*, int n
);
143 static struct dpif_netdev
*
144 dpif_netdev_cast(const struct dpif
*dpif
)
146 dpif_assert_class(dpif
, &dpif_netdev_class
);
147 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
150 static struct dp_netdev
*
151 get_dp_netdev(const struct dpif
*dpif
)
153 return dpif_netdev_cast(dpif
)->dp
;
157 name_to_dp_idx(const char *name
)
159 if (!strncmp(name
, "dp", 2) && isdigit((unsigned char)name
[2])) {
160 int dp_idx
= atoi(name
+ 2);
161 if (dp_idx
>= 0 && dp_idx
< N_DP_NETDEVS
) {
168 static struct dp_netdev
*
169 find_dp_netdev(const char *name
)
174 dp_idx
= name_to_dp_idx(name
);
176 return dp_netdevs
[dp_idx
];
179 for (i
= 0; i
< N_DP_NETDEVS
; i
++) {
180 struct dp_netdev
*dp
= dp_netdevs
[i
];
182 struct dp_netdev_port
*port
;
183 if (!get_port_by_name(dp
, name
, &port
)) {
192 create_dpif_netdev(struct dp_netdev
*dp
)
194 struct dpif_netdev
*dpif
;
199 dpname
= xasprintf("dp%d", dp
->dp_idx
);
200 dpif
= xmalloc(sizeof *dpif
);
201 dpif_init(&dpif
->dpif
, &dpif_netdev_class
, dpname
, dp
->dp_idx
, dp
->dp_idx
);
203 dpif
->listen_mask
= 0;
204 dpif
->dp_serial
= dp
->serial
;
211 create_dp_netdev(const char *name
, int dp_idx
, struct dpif
**dpifp
)
213 struct dp_netdev
*dp
;
217 if (dp_netdevs
[dp_idx
]) {
221 /* Create datapath. */
222 dp_netdevs
[dp_idx
] = dp
= xzalloc(sizeof *dp
);
223 list_push_back(&dp_netdev_list
, &dp
->node
);
226 dp
->drop_frags
= false;
227 for (i
= 0; i
< N_QUEUES
; i
++) {
228 queue_init(&dp
->queues
[i
]);
230 hmap_init(&dp
->flow_table
);
231 for (i
= 0; i
< N_GROUPS
; i
++) {
232 dp
->groups
[i
].ports
= NULL
;
233 dp
->groups
[i
].n_ports
= 0;
234 dp
->groups
[i
].group
= i
;
236 list_init(&dp
->port_list
);
237 error
= do_add_port(dp
, name
, ODP_PORT_INTERNAL
, ODPP_LOCAL
);
243 *dpifp
= create_dpif_netdev(dp
);
248 dpif_netdev_open(const char *name
, const char *type OVS_UNUSED
, bool create
,
252 if (find_dp_netdev(name
)) {
255 int dp_idx
= name_to_dp_idx(name
);
257 return create_dp_netdev(name
, dp_idx
, dpifp
);
259 /* Scan for unused dp_idx number. */
260 for (dp_idx
= 0; dp_idx
< N_DP_NETDEVS
; dp_idx
++) {
261 int error
= create_dp_netdev(name
, dp_idx
, dpifp
);
262 if (error
!= EBUSY
) {
267 /* All datapath numbers in use. */
272 struct dp_netdev
*dp
= find_dp_netdev(name
);
274 *dpifp
= create_dpif_netdev(dp
);
283 dp_netdev_free(struct dp_netdev
*dp
)
287 dp_netdev_flow_flush(dp
);
288 while (dp
->n_ports
> 0) {
289 struct dp_netdev_port
*port
= CONTAINER_OF(
290 dp
->port_list
.next
, struct dp_netdev_port
, node
);
291 do_del_port(dp
, port
->port_no
);
293 for (i
= 0; i
< N_QUEUES
; i
++) {
294 queue_destroy(&dp
->queues
[i
]);
296 hmap_destroy(&dp
->flow_table
);
297 for (i
= 0; i
< N_GROUPS
; i
++) {
298 free(dp
->groups
[i
].ports
);
300 dp_netdevs
[dp
->dp_idx
] = NULL
;
301 list_remove(&dp
->node
);
306 dpif_netdev_close(struct dpif
*dpif
)
308 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
309 assert(dp
->open_cnt
> 0);
310 if (--dp
->open_cnt
== 0 && dp
->destroyed
) {
317 dpif_netdev_destroy(struct dpif
*dpif
)
319 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
320 dp
->destroyed
= true;
325 dpif_netdev_get_stats(const struct dpif
*dpif
, struct odp_stats
*stats
)
327 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
328 memset(stats
, 0, sizeof *stats
);
329 stats
->n_flows
= hmap_count(&dp
->flow_table
);
330 stats
->cur_capacity
= hmap_capacity(&dp
->flow_table
);
331 stats
->max_capacity
= MAX_FLOWS
;
332 stats
->n_ports
= dp
->n_ports
;
333 stats
->max_ports
= MAX_PORTS
;
334 stats
->max_groups
= N_GROUPS
;
335 stats
->n_frags
= dp
->n_frags
;
336 stats
->n_hit
= dp
->n_hit
;
337 stats
->n_missed
= dp
->n_missed
;
338 stats
->n_lost
= dp
->n_lost
;
339 stats
->max_miss_queue
= MAX_QUEUE_LEN
;
340 stats
->max_action_queue
= MAX_QUEUE_LEN
;
345 dpif_netdev_get_drop_frags(const struct dpif
*dpif
, bool *drop_fragsp
)
347 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
348 *drop_fragsp
= dp
->drop_frags
;
353 dpif_netdev_set_drop_frags(struct dpif
*dpif
, bool drop_frags
)
355 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
356 dp
->drop_frags
= drop_frags
;
361 do_add_port(struct dp_netdev
*dp
, const char *devname
, uint16_t flags
,
364 bool internal
= (flags
& ODP_PORT_INTERNAL
) != 0;
365 struct dp_netdev_port
*port
;
366 struct netdev_options netdev_options
;
367 struct netdev
*netdev
;
371 /* XXX reject devices already in some dp_netdev. */
373 /* Open and validate network device. */
374 memset(&netdev_options
, 0, sizeof netdev_options
);
375 netdev_options
.name
= devname
;
376 netdev_options
.ethertype
= NETDEV_ETH_TYPE_ANY
;
378 netdev_options
.type
= "tap";
381 error
= netdev_open(&netdev_options
, &netdev
);
385 /* XXX reject loopback devices */
386 /* XXX reject non-Ethernet devices */
388 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, false);
390 netdev_close(netdev
);
394 port
= xmalloc(sizeof *port
);
395 port
->port_no
= port_no
;
396 port
->netdev
= netdev
;
397 port
->internal
= internal
;
399 netdev_get_mtu(netdev
, &mtu
);
404 list_push_back(&dp
->port_list
, &port
->node
);
405 dp
->ports
[port_no
] = port
;
413 dpif_netdev_port_add(struct dpif
*dpif
, const char *devname
, uint16_t flags
,
416 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
419 for (port_no
= 0; port_no
< MAX_PORTS
; port_no
++) {
420 if (!dp
->ports
[port_no
]) {
422 return do_add_port(dp
, devname
, flags
, port_no
);
429 dpif_netdev_port_del(struct dpif
*dpif
, uint16_t port_no
)
431 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
432 return port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
436 is_valid_port_number(uint16_t port_no
)
438 return port_no
< MAX_PORTS
;
442 get_port_by_number(struct dp_netdev
*dp
,
443 uint16_t port_no
, struct dp_netdev_port
**portp
)
445 if (!is_valid_port_number(port_no
)) {
449 *portp
= dp
->ports
[port_no
];
450 return *portp
? 0 : ENOENT
;
455 get_port_by_name(struct dp_netdev
*dp
,
456 const char *devname
, struct dp_netdev_port
**portp
)
458 struct dp_netdev_port
*port
;
460 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
461 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
470 do_del_port(struct dp_netdev
*dp
, uint16_t port_no
)
472 struct dp_netdev_port
*port
;
476 error
= get_port_by_number(dp
, port_no
, &port
);
481 list_remove(&port
->node
);
482 dp
->ports
[port
->port_no
] = NULL
;
486 name
= xstrdup(netdev_get_name(port
->netdev
));
487 netdev_close(port
->netdev
);
496 answer_port_query(const struct dp_netdev_port
*port
, struct odp_port
*odp_port
)
498 memset(odp_port
, 0, sizeof *odp_port
);
499 ovs_strlcpy(odp_port
->devname
, netdev_get_name(port
->netdev
),
500 sizeof odp_port
->devname
);
501 odp_port
->port
= port
->port_no
;
502 odp_port
->flags
= port
->internal
? ODP_PORT_INTERNAL
: 0;
506 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
507 struct odp_port
*odp_port
)
509 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
510 struct dp_netdev_port
*port
;
513 error
= get_port_by_number(dp
, port_no
, &port
);
515 answer_port_query(port
, odp_port
);
521 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
522 struct odp_port
*odp_port
)
524 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
525 struct dp_netdev_port
*port
;
528 error
= get_port_by_name(dp
, devname
, &port
);
530 answer_port_query(port
, odp_port
);
536 dp_netdev_free_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
538 hmap_remove(&dp
->flow_table
, &flow
->node
);
544 dp_netdev_flow_flush(struct dp_netdev
*dp
)
546 struct dp_netdev_flow
*flow
, *next
;
548 HMAP_FOR_EACH_SAFE (flow
, next
, struct dp_netdev_flow
, node
,
550 dp_netdev_free_flow(dp
, flow
);
555 dpif_netdev_flow_flush(struct dpif
*dpif
)
557 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
558 dp_netdev_flow_flush(dp
);
563 dpif_netdev_port_list(const struct dpif
*dpif
, struct odp_port
*ports
, int n
)
565 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
566 struct dp_netdev_port
*port
;
570 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
571 struct odp_port
*odp_port
= &ports
[i
];
575 answer_port_query(port
, odp_port
);
582 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep OVS_UNUSED
)
584 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
585 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
586 dpif
->dp_serial
= dpif
->dp
->serial
;
594 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
596 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
597 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
598 poll_immediate_wake();
603 get_port_group(const struct dpif
*dpif
, int group_no
,
604 struct odp_port_group
**groupp
)
606 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
608 if (group_no
>= 0 && group_no
< N_GROUPS
) {
609 *groupp
= &dp
->groups
[group_no
];
618 dpif_netdev_port_group_get(const struct dpif
*dpif
, int group_no
,
619 uint16_t ports
[], int n
)
621 struct odp_port_group
*group
;
628 error
= get_port_group(dpif
, group_no
, &group
);
630 memcpy(ports
, group
->ports
, MIN(n
, group
->n_ports
) * sizeof *ports
);
631 return group
->n_ports
;
638 dpif_netdev_port_group_set(struct dpif
*dpif
, int group_no
,
639 const uint16_t ports
[], int n
)
641 struct odp_port_group
*group
;
644 if (n
< 0 || n
> MAX_PORTS
) {
648 error
= get_port_group(dpif
, group_no
, &group
);
651 group
->ports
= xmemdup(ports
, n
* sizeof *group
->ports
);
653 group
->group
= group_no
;
658 static struct dp_netdev_flow
*
659 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const flow_t
*key
)
661 struct dp_netdev_flow
*flow
;
663 assert(!key
->reserved
[0] && !key
->reserved
[1] && !key
->reserved
[2]);
664 HMAP_FOR_EACH_WITH_HASH (flow
, struct dp_netdev_flow
, node
,
665 flow_hash(key
, 0), &dp
->flow_table
) {
666 if (flow_equal(&flow
->key
, key
)) {
674 answer_flow_query(struct dp_netdev_flow
*flow
, uint32_t query_flags
,
675 struct odp_flow
*odp_flow
)
678 odp_flow
->key
= flow
->key
;
679 odp_flow
->stats
.n_packets
= flow
->packet_count
;
680 odp_flow
->stats
.n_bytes
= flow
->byte_count
;
681 odp_flow
->stats
.used_sec
= flow
->used
.tv_sec
;
682 odp_flow
->stats
.used_nsec
= flow
->used
.tv_nsec
;
683 odp_flow
->stats
.tcp_flags
= TCP_FLAGS(flow
->tcp_ctl
);
684 odp_flow
->stats
.reserved
= 0;
685 odp_flow
->stats
.error
= 0;
686 if (odp_flow
->n_actions
> 0) {
687 unsigned int n
= MIN(odp_flow
->n_actions
, flow
->n_actions
);
688 memcpy(odp_flow
->actions
, flow
->actions
,
689 n
* sizeof *odp_flow
->actions
);
690 odp_flow
->n_actions
= flow
->n_actions
;
693 if (query_flags
& ODPFF_ZERO_TCP_FLAGS
) {
698 odp_flow
->stats
.error
= ENOENT
;
703 dpif_netdev_flow_get(const struct dpif
*dpif
, struct odp_flow flows
[], int n
)
705 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
708 for (i
= 0; i
< n
; i
++) {
709 struct odp_flow
*odp_flow
= &flows
[i
];
710 answer_flow_query(dp_netdev_lookup_flow(dp
, &odp_flow
->key
),
711 odp_flow
->flags
, odp_flow
);
717 dpif_netdev_validate_actions(const union odp_action
*actions
, int n_actions
,
723 for (i
= 0; i
< n_actions
; i
++) {
724 const union odp_action
*a
= &actions
[i
];
727 if (a
->output
.port
>= MAX_PORTS
) {
732 case ODPAT_OUTPUT_GROUP
:
734 if (a
->output_group
.group
>= N_GROUPS
) {
739 case ODPAT_CONTROLLER
:
742 case ODPAT_SET_VLAN_VID
:
744 if (a
->vlan_vid
.vlan_vid
& htons(~VLAN_VID_MASK
)) {
749 case ODPAT_SET_VLAN_PCP
:
751 if (a
->vlan_pcp
.vlan_pcp
& ~(VLAN_PCP_MASK
>> VLAN_PCP_SHIFT
)) {
756 case ODPAT_SET_NW_TOS
:
758 if (a
->nw_tos
.nw_tos
& IP_ECN_MASK
) {
763 case ODPAT_STRIP_VLAN
:
764 case ODPAT_SET_DL_SRC
:
765 case ODPAT_SET_DL_DST
:
766 case ODPAT_SET_NW_SRC
:
767 case ODPAT_SET_NW_DST
:
768 case ODPAT_SET_TP_SRC
:
769 case ODPAT_SET_TP_DST
:
781 set_flow_actions(struct dp_netdev_flow
*flow
, struct odp_flow
*odp_flow
)
787 if (odp_flow
->n_actions
>= 4096 / sizeof *odp_flow
->actions
) {
790 error
= dpif_netdev_validate_actions(odp_flow
->actions
,
791 odp_flow
->n_actions
, &mutates
);
796 n_bytes
= odp_flow
->n_actions
* sizeof *flow
->actions
;
797 flow
->actions
= xrealloc(flow
->actions
, n_bytes
);
798 flow
->n_actions
= odp_flow
->n_actions
;
799 memcpy(flow
->actions
, odp_flow
->actions
, n_bytes
);
804 add_flow(struct dpif
*dpif
, struct odp_flow
*odp_flow
)
806 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
807 struct dp_netdev_flow
*flow
;
810 flow
= xzalloc(sizeof *flow
);
811 flow
->key
= odp_flow
->key
;
812 memset(flow
->key
.reserved
, 0, sizeof flow
->key
.reserved
);
814 error
= set_flow_actions(flow
, odp_flow
);
820 hmap_insert(&dp
->flow_table
, &flow
->node
, flow_hash(&flow
->key
, 0));
825 clear_stats(struct dp_netdev_flow
*flow
)
827 flow
->used
.tv_sec
= 0;
828 flow
->used
.tv_nsec
= 0;
829 flow
->packet_count
= 0;
830 flow
->byte_count
= 0;
835 dpif_netdev_flow_put(struct dpif
*dpif
, struct odp_flow_put
*put
)
837 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
838 struct dp_netdev_flow
*flow
;
840 flow
= dp_netdev_lookup_flow(dp
, &put
->flow
.key
);
842 if (put
->flags
& ODPPF_CREATE
) {
843 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
844 return add_flow(dpif
, &put
->flow
);
852 if (put
->flags
& ODPPF_MODIFY
) {
853 int error
= set_flow_actions(flow
, &put
->flow
);
854 if (!error
&& put
->flags
& ODPPF_ZERO_STATS
) {
866 dpif_netdev_flow_del(struct dpif
*dpif
, struct odp_flow
*odp_flow
)
868 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
869 struct dp_netdev_flow
*flow
;
871 flow
= dp_netdev_lookup_flow(dp
, &odp_flow
->key
);
873 answer_flow_query(flow
, 0, odp_flow
);
874 dp_netdev_free_flow(dp
, flow
);
882 dpif_netdev_flow_list(const struct dpif
*dpif
, struct odp_flow flows
[], int n
)
884 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
885 struct dp_netdev_flow
*flow
;
889 HMAP_FOR_EACH (flow
, struct dp_netdev_flow
, node
, &dp
->flow_table
) {
893 answer_flow_query(flow
, 0, &flows
[i
++]);
895 return hmap_count(&dp
->flow_table
);
899 dpif_netdev_execute(struct dpif
*dpif
, uint16_t in_port
,
900 const union odp_action actions
[], int n_actions
,
901 const struct ofpbuf
*packet
)
903 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
909 if (packet
->size
< ETH_HEADER_LEN
|| packet
->size
> UINT16_MAX
) {
913 error
= dpif_netdev_validate_actions(actions
, n_actions
, &mutates
);
919 /* We need a deep copy of 'packet' since we're going to modify its
921 ofpbuf_init(©
, DP_NETDEV_HEADROOM
+ packet
->size
);
922 copy
.data
= (char*)copy
.base
+ DP_NETDEV_HEADROOM
;
923 ofpbuf_put(©
, packet
->data
, packet
->size
);
925 /* We still need a shallow copy of 'packet', even though we won't
926 * modify its data, because flow_extract() modifies packet->l2, etc.
927 * We could probably get away with modifying those but it's more polite
931 flow_extract(©
, 0, in_port
, &flow
);
932 error
= dp_netdev_execute_actions(dp
, ©
, &flow
, actions
, n_actions
);
934 ofpbuf_uninit(©
);
940 dpif_netdev_recv_get_mask(const struct dpif
*dpif
, int *listen_mask
)
942 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
943 *listen_mask
= dpif_netdev
->listen_mask
;
948 dpif_netdev_recv_set_mask(struct dpif
*dpif
, int listen_mask
)
950 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
951 if (!(listen_mask
& ~ODPL_ALL
)) {
952 dpif_netdev
->listen_mask
= listen_mask
;
959 static struct ovs_queue
*
960 find_nonempty_queue(struct dpif
*dpif
)
962 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
963 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
964 int mask
= dpif_netdev
->listen_mask
;
967 for (i
= 0; i
< N_QUEUES
; i
++) {
968 struct ovs_queue
*q
= &dp
->queues
[i
];
969 if (q
->n
&& mask
& (1u << i
)) {
977 dpif_netdev_recv(struct dpif
*dpif
, struct ofpbuf
**bufp
)
979 struct ovs_queue
*q
= find_nonempty_queue(dpif
);
981 *bufp
= queue_pop_head(q
);
989 dpif_netdev_recv_wait(struct dpif
*dpif
)
991 struct ovs_queue
*q
= find_nonempty_queue(dpif
);
993 poll_immediate_wake();
995 /* No messages ready to be received, and dp_wait() will ensure that we
996 * wake up to queue new messages, so there is nothing to do. */
1001 dp_netdev_flow_used(struct dp_netdev_flow
*flow
, const flow_t
*key
,
1002 const struct ofpbuf
*packet
)
1004 time_timespec(&flow
->used
);
1005 flow
->packet_count
++;
1006 flow
->byte_count
+= packet
->size
;
1007 if (key
->dl_type
== htons(ETH_TYPE_IP
) && key
->nw_proto
== IPPROTO_TCP
) {
1008 struct tcp_header
*th
= packet
->l4
;
1009 flow
->tcp_ctl
|= th
->tcp_ctl
;
1014 dp_netdev_port_input(struct dp_netdev
*dp
, struct dp_netdev_port
*port
,
1015 struct ofpbuf
*packet
)
1017 struct dp_netdev_flow
*flow
;
1020 if (packet
->size
< ETH_HEADER_LEN
) {
1023 if (flow_extract(packet
, 0, port
->port_no
, &key
) && dp
->drop_frags
) {
1028 flow
= dp_netdev_lookup_flow(dp
, &key
);
1030 dp_netdev_flow_used(flow
, &key
, packet
);
1031 dp_netdev_execute_actions(dp
, packet
, &key
,
1032 flow
->actions
, flow
->n_actions
);
1036 dp_netdev_output_control(dp
, packet
, _ODPL_MISS_NR
, port
->port_no
, 0);
1043 struct ofpbuf packet
;
1044 struct dp_netdev
*dp
;
1046 ofpbuf_init(&packet
, DP_NETDEV_HEADROOM
+ max_mtu
);
1047 LIST_FOR_EACH (dp
, struct dp_netdev
, node
, &dp_netdev_list
) {
1048 struct dp_netdev_port
*port
;
1050 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
1053 /* Reset packet contents. */
1054 packet
.data
= (char*)packet
.base
+ DP_NETDEV_HEADROOM
;
1057 error
= netdev_recv(port
->netdev
, &packet
);
1059 dp_netdev_port_input(dp
, port
, &packet
);
1060 } else if (error
!= EAGAIN
) {
1061 struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1062 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
1063 netdev_get_name(port
->netdev
), strerror(error
));
1067 ofpbuf_uninit(&packet
);
1071 dp_netdev_wait(void)
1073 struct dp_netdev
*dp
;
1075 LIST_FOR_EACH (dp
, struct dp_netdev
, node
, &dp_netdev_list
) {
1076 struct dp_netdev_port
*port
;
1077 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
1078 netdev_recv_wait(port
->netdev
);
1084 /* Modify the TCI field of 'packet'. If a VLAN tag is not present, one
1085 * is added with the TCI field set to 'tci'. If a VLAN tag is present,
1086 * then 'mask' bits are cleared before 'tci' is logically OR'd into the
1089 * Note that the function does not ensure that 'tci' does not affect
1090 * bits outside of 'mask'.
1093 dp_netdev_modify_vlan_tci(struct ofpbuf
*packet
, uint16_t tci
, uint16_t mask
)
1095 struct vlan_eth_header
*veh
;
1096 struct eth_header
*eh
;
1099 if (packet
->size
>= sizeof(struct vlan_eth_header
)
1100 && eh
->eth_type
== htons(ETH_TYPE_VLAN
)) {
1101 /* Clear 'mask' bits, but maintain other TCI bits. */
1103 veh
->veth_tci
&= ~htons(mask
);
1104 veh
->veth_tci
|= htons(tci
);
1106 /* Insert new 802.1Q header. */
1107 struct vlan_eth_header tmp
;
1108 memcpy(tmp
.veth_dst
, eh
->eth_dst
, ETH_ADDR_LEN
);
1109 memcpy(tmp
.veth_src
, eh
->eth_src
, ETH_ADDR_LEN
);
1110 tmp
.veth_type
= htons(ETH_TYPE_VLAN
);
1111 tmp
.veth_tci
= htons(tci
);
1112 tmp
.veth_next_type
= eh
->eth_type
;
1114 veh
= ofpbuf_push_uninit(packet
, VLAN_HEADER_LEN
);
1115 memcpy(veh
, &tmp
, sizeof tmp
);
1116 packet
->l2
= (char*)packet
->l2
- VLAN_HEADER_LEN
;
1121 dp_netdev_strip_vlan(struct ofpbuf
*packet
)
1123 struct vlan_eth_header
*veh
= packet
->l2
;
1124 if (packet
->size
>= sizeof *veh
1125 && veh
->veth_type
== htons(ETH_TYPE_VLAN
)) {
1126 struct eth_header tmp
;
1128 memcpy(tmp
.eth_dst
, veh
->veth_dst
, ETH_ADDR_LEN
);
1129 memcpy(tmp
.eth_src
, veh
->veth_src
, ETH_ADDR_LEN
);
1130 tmp
.eth_type
= veh
->veth_next_type
;
1132 packet
->size
-= VLAN_HEADER_LEN
;
1133 packet
->data
= (char*)packet
->data
+ VLAN_HEADER_LEN
;
1134 packet
->l2
= (char*)packet
->l2
+ VLAN_HEADER_LEN
;
1135 memcpy(packet
->data
, &tmp
, sizeof tmp
);
1140 dp_netdev_set_dl_src(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1142 struct eth_header
*eh
= packet
->l2
;
1143 memcpy(eh
->eth_src
, dl_addr
, sizeof eh
->eth_src
);
1147 dp_netdev_set_dl_dst(struct ofpbuf
*packet
, const uint8_t dl_addr
[ETH_ADDR_LEN
])
1149 struct eth_header
*eh
= packet
->l2
;
1150 memcpy(eh
->eth_dst
, dl_addr
, sizeof eh
->eth_dst
);
1154 is_ip(const struct ofpbuf
*packet
, const flow_t
*key
)
1156 return key
->dl_type
== htons(ETH_TYPE_IP
) && packet
->l4
;
1160 dp_netdev_set_nw_addr(struct ofpbuf
*packet
, const flow_t
*key
,
1161 const struct odp_action_nw_addr
*a
)
1163 if (is_ip(packet
, key
)) {
1164 struct ip_header
*nh
= packet
->l3
;
1167 field
= a
->type
== ODPAT_SET_NW_SRC
? &nh
->ip_src
: &nh
->ip_dst
;
1168 if (key
->nw_proto
== IP_TYPE_TCP
&& packet
->l7
) {
1169 struct tcp_header
*th
= packet
->l4
;
1170 th
->tcp_csum
= recalc_csum32(th
->tcp_csum
, *field
, a
->nw_addr
);
1171 } else if (key
->nw_proto
== IP_TYPE_UDP
&& packet
->l7
) {
1172 struct udp_header
*uh
= packet
->l4
;
1174 uh
->udp_csum
= recalc_csum32(uh
->udp_csum
, *field
, a
->nw_addr
);
1175 if (!uh
->udp_csum
) {
1176 uh
->udp_csum
= 0xffff;
1180 nh
->ip_csum
= recalc_csum32(nh
->ip_csum
, *field
, a
->nw_addr
);
1181 *field
= a
->nw_addr
;
1186 dp_netdev_set_nw_tos(struct ofpbuf
*packet
, const flow_t
*key
,
1187 const struct odp_action_nw_tos
*a
)
1189 if (is_ip(packet
, key
)) {
1190 struct ip_header
*nh
= packet
->l3
;
1191 uint8_t *field
= &nh
->ip_tos
;
1193 /* Set the DSCP bits and preserve the ECN bits. */
1194 uint8_t new = a
->nw_tos
| (nh
->ip_tos
& IP_ECN_MASK
);
1196 nh
->ip_csum
= recalc_csum16(nh
->ip_csum
, htons((uint16_t)*field
),
1197 htons((uint16_t)a
->nw_tos
));
1203 dp_netdev_set_tp_port(struct ofpbuf
*packet
, const flow_t
*key
,
1204 const struct odp_action_tp_port
*a
)
1206 if (is_ip(packet
, key
)) {
1208 if (key
->nw_proto
== IPPROTO_TCP
&& packet
->l7
) {
1209 struct tcp_header
*th
= packet
->l4
;
1210 field
= a
->type
== ODPAT_SET_TP_SRC
? &th
->tcp_src
: &th
->tcp_dst
;
1211 th
->tcp_csum
= recalc_csum16(th
->tcp_csum
, *field
, a
->tp_port
);
1212 *field
= a
->tp_port
;
1213 } else if (key
->nw_proto
== IPPROTO_UDP
&& packet
->l7
) {
1214 struct udp_header
*uh
= packet
->l4
;
1215 field
= a
->type
== ODPAT_SET_TP_SRC
? &uh
->udp_src
: &uh
->udp_dst
;
1216 uh
->udp_csum
= recalc_csum16(uh
->udp_csum
, *field
, a
->tp_port
);
1217 *field
= a
->tp_port
;
1225 dp_netdev_output_port(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1228 struct dp_netdev_port
*p
= dp
->ports
[out_port
];
1230 netdev_send(p
->netdev
, packet
);
1235 dp_netdev_output_group(struct dp_netdev
*dp
, uint16_t group
, uint16_t in_port
,
1236 struct ofpbuf
*packet
)
1238 struct odp_port_group
*g
= &dp
->groups
[group
];
1241 for (i
= 0; i
< g
->n_ports
; i
++) {
1242 uint16_t out_port
= g
->ports
[i
];
1243 if (out_port
!= in_port
) {
1244 dp_netdev_output_port(dp
, packet
, out_port
);
1250 dp_netdev_output_control(struct dp_netdev
*dp
, const struct ofpbuf
*packet
,
1251 int queue_no
, int port_no
, uint32_t arg
)
1253 struct ovs_queue
*q
= &dp
->queues
[queue_no
];
1254 struct odp_msg
*header
;
1258 if (q
->n
>= MAX_QUEUE_LEN
) {
1263 msg_size
= sizeof *header
+ packet
->size
;
1264 msg
= ofpbuf_new_with_headroom(msg_size
, DPIF_RECV_MSG_PADDING
);
1265 header
= ofpbuf_put_uninit(msg
, sizeof *header
);
1266 header
->type
= queue_no
;
1267 header
->length
= msg_size
;
1268 header
->port
= port_no
;
1270 ofpbuf_put(msg
, packet
->data
, packet
->size
);
1271 queue_push_tail(q
, msg
);
1276 /* Returns true if 'packet' is an invalid Ethernet+IPv4 ARP packet: one with
1277 * screwy or truncated header fields or one whose inner and outer Ethernet
1278 * address differ. */
1280 dp_netdev_is_spoofed_arp(struct ofpbuf
*packet
, const struct odp_flow_key
*key
)
1282 struct arp_eth_header
*arp
;
1283 struct eth_header
*eth
;
1286 if (key
->dl_type
!= htons(ETH_TYPE_ARP
)) {
1290 l3_size
= (char *) ofpbuf_end(packet
) - (char *) packet
->l3
;
1291 if (l3_size
< sizeof(struct arp_eth_header
)) {
1297 return (arp
->ar_hrd
!= htons(ARP_HRD_ETHERNET
)
1298 || arp
->ar_pro
!= htons(ARP_PRO_IP
)
1299 || arp
->ar_hln
!= ETH_HEADER_LEN
1301 || !eth_addr_equals(arp
->ar_sha
, eth
->eth_src
));
1305 dp_netdev_execute_actions(struct dp_netdev
*dp
,
1306 struct ofpbuf
*packet
, const flow_t
*key
,
1307 const union odp_action
*actions
, int n_actions
)
1310 for (i
= 0; i
< n_actions
; i
++) {
1311 const union odp_action
*a
= &actions
[i
];
1315 dp_netdev_output_port(dp
, packet
, a
->output
.port
);
1318 case ODPAT_OUTPUT_GROUP
:
1319 dp_netdev_output_group(dp
, a
->output_group
.group
, key
->in_port
,
1323 case ODPAT_CONTROLLER
:
1324 dp_netdev_output_control(dp
, packet
, _ODPL_ACTION_NR
,
1325 key
->in_port
, a
->controller
.arg
);
1328 case ODPAT_SET_VLAN_VID
:
1329 dp_netdev_modify_vlan_tci(packet
, ntohs(a
->vlan_vid
.vlan_vid
),
1333 case ODPAT_SET_VLAN_PCP
:
1334 dp_netdev_modify_vlan_tci(packet
,
1335 a
->vlan_pcp
.vlan_pcp
<< VLAN_PCP_SHIFT
,
1339 case ODPAT_STRIP_VLAN
:
1340 dp_netdev_strip_vlan(packet
);
1343 case ODPAT_SET_DL_SRC
:
1344 dp_netdev_set_dl_src(packet
, a
->dl_addr
.dl_addr
);
1347 case ODPAT_SET_DL_DST
:
1348 dp_netdev_set_dl_dst(packet
, a
->dl_addr
.dl_addr
);
1351 case ODPAT_SET_NW_SRC
:
1352 case ODPAT_SET_NW_DST
:
1353 dp_netdev_set_nw_addr(packet
, key
, &a
->nw_addr
);
1356 case ODPAT_SET_NW_TOS
:
1357 dp_netdev_set_nw_tos(packet
, key
, &a
->nw_tos
);
1360 case ODPAT_SET_TP_SRC
:
1361 case ODPAT_SET_TP_DST
:
1362 dp_netdev_set_tp_port(packet
, key
, &a
->tp_port
);
1365 case ODPAT_DROP_SPOOFED_ARP
:
1366 if (dp_netdev_is_spoofed_arp(packet
, key
)) {
1374 const struct dpif_class dpif_netdev_class
= {
1378 NULL
, /* enumerate */
1381 NULL
, /* get_all_names */
1382 dpif_netdev_destroy
,
1383 dpif_netdev_get_stats
,
1384 dpif_netdev_get_drop_frags
,
1385 dpif_netdev_set_drop_frags
,
1386 dpif_netdev_port_add
,
1387 dpif_netdev_port_del
,
1388 dpif_netdev_port_query_by_number
,
1389 dpif_netdev_port_query_by_name
,
1390 dpif_netdev_port_list
,
1391 dpif_netdev_port_poll
,
1392 dpif_netdev_port_poll_wait
,
1393 dpif_netdev_port_group_get
,
1394 dpif_netdev_port_group_set
,
1395 dpif_netdev_flow_get
,
1396 dpif_netdev_flow_put
,
1397 dpif_netdev_flow_del
,
1398 dpif_netdev_flow_flush
,
1399 dpif_netdev_flow_list
,
1400 dpif_netdev_execute
,
1401 dpif_netdev_recv_get_mask
,
1402 dpif_netdev_recv_set_mask
,
1403 NULL
, /* get_sflow_probability */
1404 NULL
, /* set_sflow_probability */
1405 NULL
, /* queue_to_priority */
1407 dpif_netdev_recv_wait
,