2 * Copyright (c) 2009 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
26 #include <netinet/in.h>
29 #include <sys/ioctl.h>
34 #include "dpif-provider.h"
40 #include "ofp-print.h"
43 #include "poll-loop.h"
49 #define THIS_MODULE VLM_dpif_netdev
51 /* Configuration parameters. */
52 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
53 enum { MAX_QUEUE_LEN
= 100 }; /* Maximum number of packets per queue. */
54 enum { N_GROUPS
= 16 }; /* Number of port groups. */
55 enum { MAX_PORTS
= 256 }; /* Maximum number of ports. */
56 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
58 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
59 * headers to be aligned on a 4-byte boundary. */
60 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
62 /* Datapath based on the network device interface from netdev.h. */
69 bool drop_frags
; /* Drop all IP fragments, if true. */
70 struct ovs_queue queues
[N_QUEUES
]; /* Messages queued for dpif_recv(). */
71 struct hmap flow_table
; /* Flow table. */
72 struct odp_port_group groups
[N_GROUPS
];
75 long long int n_frags
; /* Number of dropped IP fragments. */
76 long long int n_hit
; /* Number of flow table matches. */
77 long long int n_missed
; /* Number of flow table misses. */
78 long long int n_lost
; /* Number of misses not passed to client. */
82 struct dp_netdev_port
*ports
[MAX_PORTS
];
83 struct list port_list
;
87 /* A port in a netdev-based datapath. */
88 struct dp_netdev_port
{
89 int port_no
; /* Index into dp_netdev's 'ports'. */
90 struct list node
; /* Element in dp_netdev's 'port_list'. */
91 struct netdev
*netdev
;
92 bool internal
; /* Internal port (as ODP_PORT_INTERNAL)? */
95 /* A flow in dp_netdev's 'flow_table'. */
96 struct dp_netdev_flow
{
97 struct hmap_node node
; /* Element in dp_netdev's 'flow_table'. */
101 struct timeval used
; /* Last used time, in milliseconds. */
102 long long int packet_count
; /* Number of packets matched. */
103 long long int byte_count
; /* Number of bytes matched. */
104 uint8_t ip_tos
; /* IP TOS value. */
105 uint16_t tcp_ctl
; /* Bitwise-OR of seen tcp_ctl values. */
108 union odp_action
*actions
;
109 unsigned int n_actions
;
112 /* Interface to netdev-based datapath. */
115 struct dp_netdev
*dp
;
117 unsigned int dp_serial
;
120 /* All netdev-based datapaths. */
121 static struct dp_netdev
*dp_netdevs
[256];
122 struct list dp_netdev_list
= LIST_INITIALIZER(&dp_netdev_list
);
123 enum { N_DP_NETDEVS
= ARRAY_SIZE(dp_netdevs
) };
125 /* Maximum port MTU seen so far. */
126 static int max_mtu
= ETH_PAYLOAD_MAX
;
128 static int get_port_by_number(struct dp_netdev
*, uint16_t port_no
,
129 struct dp_netdev_port
**portp
);
130 static int get_port_by_name(struct dp_netdev
*, const char *devname
,
131 struct dp_netdev_port
**portp
);
132 static void dp_netdev_free(struct dp_netdev
*);
133 static void dp_netdev_flow_flush(struct dp_netdev
*);
134 static int do_add_port(struct dp_netdev
*, const char *devname
, uint16_t flags
,
136 static int do_del_port(struct dp_netdev
*, uint16_t port_no
);
137 static int dp_netdev_output_control(struct dp_netdev
*, const struct ofpbuf
*,
138 int queue_no
, int port_no
, uint32_t arg
);
139 static int dp_netdev_execute_actions(struct dp_netdev
*,
140 struct ofpbuf
*, flow_t
*,
141 const union odp_action
*, int n
);
143 static struct dpif_netdev
*
144 dpif_netdev_cast(const struct dpif
*dpif
)
146 dpif_assert_class(dpif
, &dpif_netdev_class
);
147 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
150 static struct dp_netdev
*
151 get_dp_netdev(const struct dpif
*dpif
)
153 return dpif_netdev_cast(dpif
)->dp
;
157 name_to_dp_idx(const char *name
)
159 if (!strncmp(name
, "dp", 2) && isdigit((unsigned char)name
[2])) {
160 int dp_idx
= atoi(name
+ 2);
161 if (dp_idx
>= 0 && dp_idx
< N_DP_NETDEVS
) {
168 static struct dp_netdev
*
169 find_dp_netdev(const char *name
)
174 dp_idx
= name_to_dp_idx(name
);
176 return dp_netdevs
[dp_idx
];
179 for (i
= 0; i
< N_DP_NETDEVS
; i
++) {
180 struct dp_netdev
*dp
= dp_netdevs
[i
];
182 struct dp_netdev_port
*port
;
183 if (!get_port_by_name(dp
, name
, &port
)) {
192 create_dpif_netdev(struct dp_netdev
*dp
)
194 struct dpif_netdev
*dpif
;
199 dpname
= xasprintf("netdev:dp%d", dp
->dp_idx
);
200 dpif
= xmalloc(sizeof *dpif
);
201 dpif_init(&dpif
->dpif
, &dpif_netdev_class
, dpname
, dp
->dp_idx
, dp
->dp_idx
);
203 dpif
->listen_mask
= 0;
204 dpif
->dp_serial
= dp
->serial
;
211 create_dp_netdev(const char *name
, int dp_idx
, struct dpif
**dpifp
)
213 struct dp_netdev
*dp
;
217 if (dp_netdevs
[dp_idx
]) {
221 /* Create datapath. */
222 dp_netdevs
[dp_idx
] = dp
= xcalloc(1, sizeof *dp
);
223 list_push_back(&dp_netdev_list
, &dp
->node
);
226 dp
->drop_frags
= false;
227 for (i
= 0; i
< N_QUEUES
; i
++) {
228 queue_init(&dp
->queues
[i
]);
230 hmap_init(&dp
->flow_table
);
231 for (i
= 0; i
< N_GROUPS
; i
++) {
232 dp
->groups
[i
].ports
= NULL
;
233 dp
->groups
[i
].n_ports
= 0;
234 dp
->groups
[i
].group
= i
;
236 list_init(&dp
->port_list
);
237 error
= do_add_port(dp
, name
, ODP_PORT_INTERNAL
, ODPP_LOCAL
);
243 *dpifp
= create_dpif_netdev(dp
);
248 dpif_netdev_open(const char *name UNUSED
, char *suffix
, bool create
,
252 if (find_dp_netdev(suffix
)) {
255 int dp_idx
= name_to_dp_idx(suffix
);
257 return create_dp_netdev(suffix
, dp_idx
, dpifp
);
259 /* Scan for unused dp_idx number. */
260 for (dp_idx
= 0; dp_idx
< N_DP_NETDEVS
; dp_idx
++) {
261 int error
= create_dp_netdev(suffix
, dp_idx
, dpifp
);
262 if (error
!= EBUSY
) {
267 /* All datapath numbers in use. */
272 struct dp_netdev
*dp
= find_dp_netdev(suffix
);
274 *dpifp
= create_dpif_netdev(dp
);
283 dp_netdev_free(struct dp_netdev
*dp
)
287 dp_netdev_flow_flush(dp
);
288 while (dp
->n_ports
> 0) {
289 struct dp_netdev_port
*port
= CONTAINER_OF(
290 dp
->port_list
.next
, struct dp_netdev_port
, node
);
291 do_del_port(dp
, port
->port_no
);
293 for (i
= 0; i
< N_QUEUES
; i
++) {
294 queue_destroy(&dp
->queues
[i
]);
296 hmap_destroy(&dp
->flow_table
);
297 for (i
= 0; i
< N_GROUPS
; i
++) {
298 free(dp
->groups
[i
].ports
);
300 dp_netdevs
[dp
->dp_idx
] = NULL
;
301 list_remove(&dp
->node
);
306 dpif_netdev_close(struct dpif
*dpif
)
308 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
309 assert(dp
->open_cnt
> 0);
310 if (--dp
->open_cnt
== 0 && dp
->deleted
) {
317 dpif_netdev_delete(struct dpif
*dpif
)
319 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
325 dpif_netdev_get_stats(const struct dpif
*dpif
, struct odp_stats
*stats
)
327 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
328 memset(stats
, 0, sizeof *stats
);
329 stats
->n_flows
= hmap_count(&dp
->flow_table
);
330 stats
->cur_capacity
= hmap_capacity(&dp
->flow_table
);
331 stats
->max_capacity
= MAX_FLOWS
;
332 stats
->n_ports
= dp
->n_ports
;
333 stats
->max_ports
= MAX_PORTS
;
334 stats
->max_groups
= N_GROUPS
;
335 stats
->n_frags
= dp
->n_frags
;
336 stats
->n_hit
= dp
->n_hit
;
337 stats
->n_missed
= dp
->n_missed
;
338 stats
->n_lost
= dp
->n_lost
;
339 stats
->max_miss_queue
= MAX_QUEUE_LEN
;
340 stats
->max_action_queue
= MAX_QUEUE_LEN
;
345 dpif_netdev_get_drop_frags(const struct dpif
*dpif
, bool *drop_fragsp
)
347 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
348 *drop_fragsp
= dp
->drop_frags
;
353 dpif_netdev_set_drop_frags(struct dpif
*dpif
, bool drop_frags
)
355 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
356 dp
->drop_frags
= drop_frags
;
361 do_add_port(struct dp_netdev
*dp
, const char *devname
, uint16_t flags
,
364 bool internal
= (flags
& ODP_PORT_INTERNAL
) != 0;
365 struct dp_netdev_port
*port
;
366 struct netdev
*netdev
;
370 /* XXX reject devices already in some dp_netdev. */
372 /* Open and validate network device. */
374 error
= netdev_open(devname
, NETDEV_ETH_TYPE_ANY
, &netdev
);
376 char *tapname
= xasprintf("tap:%s", devname
);
377 error
= netdev_open(tapname
, NETDEV_ETH_TYPE_ANY
, &netdev
);
383 /* XXX reject loopback devices */
384 /* XXX reject non-Ethernet devices */
386 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, false);
388 netdev_close(netdev
);
392 port
= xmalloc(sizeof *port
);
393 port
->port_no
= port_no
;
394 port
->netdev
= netdev
;
395 port
->internal
= internal
;
397 netdev_get_mtu(netdev
, &mtu
);
402 list_push_back(&dp
->port_list
, &port
->node
);
403 dp
->ports
[port_no
] = port
;
411 dpif_netdev_port_add(struct dpif
*dpif
, const char *devname
, uint16_t flags
,
414 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
417 for (port_no
= 0; port_no
< MAX_PORTS
; port_no
++) {
418 if (!dp
->ports
[port_no
]) {
420 return do_add_port(dp
, devname
, flags
, port_no
);
427 dpif_netdev_port_del(struct dpif
*dpif
, uint16_t port_no
)
429 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
430 return port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
434 is_valid_port_number(uint16_t port_no
)
436 return port_no
< MAX_PORTS
;
440 get_port_by_number(struct dp_netdev
*dp
,
441 uint16_t port_no
, struct dp_netdev_port
**portp
)
443 if (!is_valid_port_number(port_no
)) {
447 *portp
= dp
->ports
[port_no
];
448 return *portp
? 0 : ENOENT
;
453 get_port_by_name(struct dp_netdev
*dp
,
454 const char *devname
, struct dp_netdev_port
**portp
)
456 struct dp_netdev_port
*port
;
458 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
459 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
468 do_del_port(struct dp_netdev
*dp
, uint16_t port_no
)
470 struct dp_netdev_port
*port
;
473 error
= get_port_by_number(dp
, port_no
, &port
);
478 list_remove(&port
->node
);
479 dp
->ports
[port
->port_no
] = NULL
;
483 netdev_close(port
->netdev
);
490 answer_port_query(const struct dp_netdev_port
*port
, struct odp_port
*odp_port
)
492 memset(odp_port
, 0, sizeof *odp_port
);
493 ovs_strlcpy(odp_port
->devname
, netdev_get_name(port
->netdev
),
494 sizeof odp_port
->devname
);
495 odp_port
->port
= port
->port_no
;
496 odp_port
->flags
= port
->internal
? ODP_PORT_INTERNAL
: 0;
500 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
501 struct odp_port
*odp_port
)
503 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
504 struct dp_netdev_port
*port
;
507 error
= get_port_by_number(dp
, port_no
, &port
);
509 answer_port_query(port
, odp_port
);
515 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
516 struct odp_port
*odp_port
)
518 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
519 struct dp_netdev_port
*port
;
522 error
= get_port_by_name(dp
, devname
, &port
);
524 answer_port_query(port
, odp_port
);
530 dp_netdev_free_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
532 hmap_remove(&dp
->flow_table
, &flow
->node
);
538 dp_netdev_flow_flush(struct dp_netdev
*dp
)
540 struct dp_netdev_flow
*flow
, *next
;
542 HMAP_FOR_EACH_SAFE (flow
, next
, struct dp_netdev_flow
, node
,
544 dp_netdev_free_flow(dp
, flow
);
549 dpif_netdev_flow_flush(struct dpif
*dpif
)
551 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
552 dp_netdev_flow_flush(dp
);
557 dpif_netdev_port_list(const struct dpif
*dpif
, struct odp_port
*ports
, int n
)
559 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
560 struct dp_netdev_port
*port
;
564 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
565 struct odp_port
*odp_port
= &ports
[i
];
569 answer_port_query(port
, odp_port
);
576 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep UNUSED
)
578 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
579 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
580 dpif
->dp_serial
= dpif
->dp
->serial
;
588 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
590 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
591 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
592 poll_immediate_wake();
597 get_port_group(const struct dpif
*dpif
, int group_no
,
598 struct odp_port_group
**groupp
)
600 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
602 if (group_no
>= 0 && group_no
< N_GROUPS
) {
603 *groupp
= &dp
->groups
[group_no
];
612 dpif_netdev_port_group_get(const struct dpif
*dpif
, int group_no
,
613 uint16_t ports
[], int n
)
615 struct odp_port_group
*group
;
622 error
= get_port_group(dpif
, group_no
, &group
);
624 memcpy(ports
, group
->ports
, MIN(n
, group
->n_ports
) * sizeof *ports
);
625 return group
->n_ports
;
632 dpif_netdev_port_group_set(struct dpif
*dpif
, int group_no
,
633 const uint16_t ports
[], int n
)
635 struct odp_port_group
*group
;
638 if (n
< 0 || n
> MAX_PORTS
) {
642 error
= get_port_group(dpif
, group_no
, &group
);
645 group
->ports
= xmemdup(ports
, n
* sizeof *group
->ports
);
647 group
->group
= group_no
;
652 static struct dp_netdev_flow
*
653 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const flow_t
*key
)
655 struct dp_netdev_flow
*flow
;
657 assert(key
->reserved
== 0);
658 HMAP_FOR_EACH_WITH_HASH (flow
, struct dp_netdev_flow
, node
,
659 flow_hash(key
, 0), &dp
->flow_table
) {
660 if (flow_equal(&flow
->key
, key
)) {
668 answer_flow_query(struct dp_netdev_flow
*flow
, uint32_t query_flags
,
669 struct odp_flow
*odp_flow
)
672 odp_flow
->key
= flow
->key
;
673 odp_flow
->stats
.n_packets
= flow
->packet_count
;
674 odp_flow
->stats
.n_bytes
= flow
->byte_count
;
675 odp_flow
->stats
.used_sec
= flow
->used
.tv_sec
;
676 odp_flow
->stats
.used_nsec
= flow
->used
.tv_usec
* 1000;
677 odp_flow
->stats
.tcp_flags
= TCP_FLAGS(flow
->tcp_ctl
);
678 odp_flow
->stats
.ip_tos
= flow
->ip_tos
;
679 odp_flow
->stats
.error
= 0;
680 if (odp_flow
->n_actions
> 0) {
681 unsigned int n
= MIN(odp_flow
->n_actions
, flow
->n_actions
);
682 memcpy(odp_flow
->actions
, flow
->actions
,
683 n
* sizeof *odp_flow
->actions
);
684 odp_flow
->n_actions
= flow
->n_actions
;
687 if (query_flags
& ODPFF_ZERO_TCP_FLAGS
) {
692 odp_flow
->stats
.error
= ENOENT
;
697 dpif_netdev_flow_get(const struct dpif
*dpif
, struct odp_flow flows
[], int n
)
699 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
702 for (i
= 0; i
< n
; i
++) {
703 struct odp_flow
*odp_flow
= &flows
[i
];
704 answer_flow_query(dp_netdev_lookup_flow(dp
, &odp_flow
->key
),
705 odp_flow
->flags
, odp_flow
);
711 dpif_netdev_validate_actions(const union odp_action
*actions
, int n_actions
,
717 for (i
= 0; i
< n_actions
; i
++) {
718 const union odp_action
*a
= &actions
[i
];
721 if (a
->output
.port
>= MAX_PORTS
) {
726 case ODPAT_OUTPUT_GROUP
:
728 if (a
->output_group
.group
>= N_GROUPS
) {
733 case ODPAT_CONTROLLER
:
736 case ODPAT_SET_VLAN_VID
:
738 if (a
->vlan_vid
.vlan_vid
& htons(~VLAN_VID_MASK
)) {
743 case ODPAT_SET_VLAN_PCP
:
745 if (a
->vlan_pcp
.vlan_pcp
& ~VLAN_PCP_MASK
) {
750 case ODPAT_STRIP_VLAN
:
751 case ODPAT_SET_DL_SRC
:
752 case ODPAT_SET_DL_DST
:
753 case ODPAT_SET_NW_SRC
:
754 case ODPAT_SET_NW_DST
:
755 case ODPAT_SET_TP_SRC
:
756 case ODPAT_SET_TP_DST
:
768 set_flow_actions(struct dp_netdev_flow
*flow
, struct odp_flow
*odp_flow
)
774 if (odp_flow
->n_actions
>= 4096 / sizeof *odp_flow
->actions
) {
777 error
= dpif_netdev_validate_actions(odp_flow
->actions
,
778 odp_flow
->n_actions
, &mutates
);
783 n_bytes
= odp_flow
->n_actions
* sizeof *flow
->actions
;
784 flow
->actions
= xrealloc(flow
->actions
, n_bytes
);
785 flow
->n_actions
= odp_flow
->n_actions
;
786 memcpy(flow
->actions
, odp_flow
->actions
, n_bytes
);
791 add_flow(struct dpif
*dpif
, struct odp_flow
*odp_flow
)
793 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
794 struct dp_netdev_flow
*flow
;
797 flow
= xcalloc(1, sizeof *flow
);
798 flow
->key
= odp_flow
->key
;
799 flow
->key
.reserved
= 0;
801 error
= set_flow_actions(flow
, odp_flow
);
807 hmap_insert(&dp
->flow_table
, &flow
->node
, flow_hash(&flow
->key
, 0));
812 clear_stats(struct dp_netdev_flow
*flow
)
814 flow
->used
.tv_sec
= 0;
815 flow
->used
.tv_usec
= 0;
816 flow
->packet_count
= 0;
817 flow
->byte_count
= 0;
823 dpif_netdev_flow_put(struct dpif
*dpif
, struct odp_flow_put
*put
)
825 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
826 struct dp_netdev_flow
*flow
;
828 flow
= dp_netdev_lookup_flow(dp
, &put
->flow
.key
);
830 if (put
->flags
& ODPPF_CREATE
) {
831 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
832 return add_flow(dpif
, &put
->flow
);
840 if (put
->flags
& ODPPF_MODIFY
) {
841 int error
= set_flow_actions(flow
, &put
->flow
);
842 if (!error
&& put
->flags
& ODPPF_ZERO_STATS
) {
854 dpif_netdev_flow_del(struct dpif
*dpif
, struct odp_flow
*odp_flow
)
856 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
857 struct dp_netdev_flow
*flow
;
859 flow
= dp_netdev_lookup_flow(dp
, &odp_flow
->key
);
861 answer_flow_query(flow
, 0, odp_flow
);
862 dp_netdev_free_flow(dp
, flow
);
870 dpif_netdev_flow_list(const struct dpif
*dpif
, struct odp_flow flows
[], int n
)
872 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
873 struct dp_netdev_flow
*flow
;
877 HMAP_FOR_EACH (flow
, struct dp_netdev_flow
, node
, &dp
->flow_table
) {
881 answer_flow_query(flow
, 0, &flows
[i
++]);
883 return hmap_count(&dp
->flow_table
);
887 dpif_netdev_execute(struct dpif
*dpif
, uint16_t in_port
,
888 const union odp_action actions
[], int n_actions
,
889 const struct ofpbuf
*packet
)
891 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
897 if (packet
->size
< ETH_HEADER_LEN
|| packet
->size
> UINT16_MAX
) {
901 error
= dpif_netdev_validate_actions(actions
, n_actions
, &mutates
);
907 /* We need a deep copy of 'packet' since we're going to modify its
909 ofpbuf_init(©
, DP_NETDEV_HEADROOM
+ packet
->size
);
910 copy
.data
= (char*)copy
.base
+ DP_NETDEV_HEADROOM
;
911 ofpbuf_put(©
, packet
->data
, packet
->size
);
913 /* We still need a shallow copy of 'packet', even though we won't
914 * modify its data, because flow_extract() modifies packet->l2, etc.
915 * We could probably get away with modifying those but it's more polite
919 flow_extract(©
, in_port
, &flow
);
920 error
= dp_netdev_execute_actions(dp
, ©
, &flow
, actions
, n_actions
);
922 ofpbuf_uninit(©
);
928 dpif_netdev_recv_get_mask(const struct dpif
*dpif
, int *listen_mask
)
930 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
931 *listen_mask
= dpif_netdev
->listen_mask
;
936 dpif_netdev_recv_set_mask(struct dpif
*dpif
, int listen_mask
)
938 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
939 if (!(listen_mask
& ~ODPL_ALL
)) {
940 dpif_netdev
->listen_mask
= listen_mask
;
947 static struct ovs_queue
*
948 find_nonempty_queue(struct dpif
*dpif
)
950 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
951 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
952 int mask
= dpif_netdev
->listen_mask
;
955 for (i
= 0; i
< N_QUEUES
; i
++) {
956 struct ovs_queue
*q
= &dp
->queues
[i
];
957 if (q
->n
&& mask
& (1u << i
)) {
965 dpif_netdev_recv(struct dpif
*dpif
, struct ofpbuf
**bufp
)
967 struct ovs_queue
*q
= find_nonempty_queue(dpif
);
969 *bufp
= queue_pop_head(q
);
977 dpif_netdev_recv_wait(struct dpif
*dpif
)
979 struct ovs_queue
*q
= find_nonempty_queue(dpif
);
981 poll_immediate_wake();
983 /* No messages ready to be received, and dp_wait() will ensure that we
984 * wake up to queue new messages, so there is nothing to do. */
989 dp_netdev_flow_used(struct dp_netdev_flow
*flow
, const flow_t
*key
,
990 const struct ofpbuf
*packet
)
992 time_timeval(&flow
->used
);
993 flow
->packet_count
++;
994 flow
->byte_count
+= packet
->size
;
995 if (key
->dl_type
== htons(ETH_TYPE_IP
)) {
996 struct ip_header
*nh
= packet
->l3
;
997 flow
->ip_tos
= nh
->ip_tos
;
999 if (key
->nw_proto
== IPPROTO_TCP
) {
1000 struct tcp_header
*th
= packet
->l4
;
1001 flow
->tcp_ctl
|= th
->tcp_ctl
;
1007 dp_netdev_port_input(struct dp_netdev
*dp
, struct dp_netdev_port
*port
,
1008 struct ofpbuf
*packet
)
1010 struct dp_netdev_flow
*flow
;
1013 if (flow_extract(packet
, port
->port_no
, &key
) && dp
->drop_frags
) {
1018 flow
= dp_netdev_lookup_flow(dp
, &key
);
1020 dp_netdev_flow_used(flow
, &key
, packet
);
1021 dp_netdev_execute_actions(dp
, packet
, &key
,
1022 flow
->actions
, flow
->n_actions
);
1026 dp_netdev_output_control(dp
, packet
, _ODPL_MISS_NR
, port
->port_no
, 0);
1033 struct ofpbuf packet
;
1034 struct dp_netdev
*dp
;
1036 ofpbuf_init(&packet
, DP_NETDEV_HEADROOM
+ max_mtu
);
1037 LIST_FOR_EACH (dp
, struct dp_netdev
, node
, &dp_netdev_list
) {
1038 struct dp_netdev_port
*port
;
1040 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
1043 /* Reset packet contents. */
1044 packet
.data
= (char*)packet
.base
+ DP_NETDEV_HEADROOM
;
1047 error
= netdev_recv(port
->netdev
, &packet
);
1049 dp_netdev_port_input(dp
, port
, &packet
);
1050 } else if (error
!= EAGAIN
) {
1051 struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1052 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
1053 netdev_get_name(port
->netdev
), strerror(error
));
1057 ofpbuf_uninit(&packet
);
1061 dp_netdev_wait(void)
1063 struct dp_netdev
*dp
;
1065 LIST_FOR_EACH (dp
, struct dp_netdev
, node
, &dp_netdev_list
) {
1066 struct dp_netdev_port
*port
;
1067 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
1068 netdev_recv_wait(port
->netdev
);
1074 dp_netdev_modify_vlan_tci(struct ofpbuf
*packet
, flow_t
*key
,
1075 uint16_t tci
, uint16_t mask
)
1077 struct vlan_eth_header
*veh
;
1079 if (key
->dl_vlan
!= htons(ODP_VLAN_NONE
)) {
1080 /* Modify 'mask' bits, but maintain other TCI bits. */
1082 veh
->veth_tci
&= ~htons(mask
);
1083 veh
->veth_tci
|= htons(tci
);
1085 /* Insert new 802.1Q header. */
1086 struct eth_header
*eh
= packet
->l2
;
1087 struct vlan_eth_header tmp
;
1088 memcpy(tmp
.veth_dst
, eh
->eth_dst
, ETH_ADDR_LEN
);
1089 memcpy(tmp
.veth_src
, eh
->eth_src
, ETH_ADDR_LEN
);
1090 tmp
.veth_type
= htons(ETH_TYPE_VLAN
);
1091 tmp
.veth_tci
= htons(tci
);
1092 tmp
.veth_next_type
= eh
->eth_type
;
1094 veh
= ofpbuf_push_uninit(packet
, VLAN_HEADER_LEN
);
1095 memcpy(veh
, &tmp
, sizeof tmp
);
1096 packet
->l2
= (char*)packet
->l2
- VLAN_HEADER_LEN
;
1099 key
->dl_vlan
= veh
->veth_tci
& htons(VLAN_VID_MASK
);
1103 dp_netdev_strip_vlan(struct ofpbuf
*packet
, flow_t
*key
)
1105 struct vlan_eth_header
*veh
= packet
->l2
;
1106 if (veh
->veth_type
== htons(ETH_TYPE_VLAN
)) {
1107 struct eth_header tmp
;
1109 memcpy(tmp
.eth_dst
, veh
->veth_dst
, ETH_ADDR_LEN
);
1110 memcpy(tmp
.eth_src
, veh
->veth_src
, ETH_ADDR_LEN
);
1111 tmp
.eth_type
= veh
->veth_next_type
;
1113 packet
->size
-= VLAN_HEADER_LEN
;
1114 packet
->data
= (char*)packet
->data
+ VLAN_HEADER_LEN
;
1115 packet
->l2
= (char*)packet
->l2
+ VLAN_HEADER_LEN
;
1116 memcpy(packet
->data
, &tmp
, sizeof tmp
);
1118 key
->dl_vlan
= htons(ODP_VLAN_NONE
);
1123 dp_netdev_set_dl_src(struct ofpbuf
*packet
,
1124 const uint8_t dl_addr
[ETH_ADDR_LEN
])
1126 struct eth_header
*eh
= packet
->l2
;
1127 memcpy(eh
->eth_src
, dl_addr
, sizeof eh
->eth_src
);
1131 dp_netdev_set_dl_dst(struct ofpbuf
*packet
,
1132 const uint8_t dl_addr
[ETH_ADDR_LEN
])
1134 struct eth_header
*eh
= packet
->l2
;
1135 memcpy(eh
->eth_dst
, dl_addr
, sizeof eh
->eth_dst
);
1139 dp_netdev_set_nw_addr(struct ofpbuf
*packet
, flow_t
*key
,
1140 const struct odp_action_nw_addr
*a
)
1142 if (key
->dl_type
== htons(ETH_TYPE_IP
)) {
1143 struct ip_header
*nh
= packet
->l3
;
1146 field
= a
->type
== ODPAT_SET_NW_SRC
? &nh
->ip_src
: &nh
->ip_dst
;
1147 if (key
->nw_proto
== IP_TYPE_TCP
) {
1148 struct tcp_header
*th
= packet
->l4
;
1149 th
->tcp_csum
= recalc_csum32(th
->tcp_csum
, *field
, a
->nw_addr
);
1150 } else if (key
->nw_proto
== IP_TYPE_UDP
) {
1151 struct udp_header
*uh
= packet
->l4
;
1153 uh
->udp_csum
= recalc_csum32(uh
->udp_csum
, *field
, a
->nw_addr
);
1154 if (!uh
->udp_csum
) {
1155 uh
->udp_csum
= 0xffff;
1159 nh
->ip_csum
= recalc_csum32(nh
->ip_csum
, *field
, a
->nw_addr
);
1160 *field
= a
->nw_addr
;
1165 dp_netdev_set_tp_port(struct ofpbuf
*packet
, flow_t
*key
,
1166 const struct odp_action_tp_port
*a
)
1168 if (key
->dl_type
== htons(ETH_TYPE_IP
)) {
1170 if (key
->nw_proto
== IPPROTO_TCP
) {
1171 struct tcp_header
*th
= packet
->l4
;
1172 field
= a
->type
== ODPAT_SET_TP_SRC
? &th
->tcp_src
: &th
->tcp_dst
;
1173 th
->tcp_csum
= recalc_csum16(th
->tcp_csum
, *field
, a
->tp_port
);
1174 *field
= a
->tp_port
;
1175 } else if (key
->nw_proto
== IPPROTO_UDP
) {
1176 struct udp_header
*uh
= packet
->l4
;
1177 field
= a
->type
== ODPAT_SET_TP_SRC
? &uh
->udp_src
: &uh
->udp_dst
;
1178 uh
->udp_csum
= recalc_csum16(uh
->udp_csum
, *field
, a
->tp_port
);
1179 *field
= a
->tp_port
;
1185 dp_netdev_output_port(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1188 struct dp_netdev_port
*p
= dp
->ports
[out_port
];
1190 netdev_send(p
->netdev
, packet
);
1195 dp_netdev_output_group(struct dp_netdev
*dp
, uint16_t group
, uint16_t in_port
,
1196 struct ofpbuf
*packet
)
1198 struct odp_port_group
*g
= &dp
->groups
[group
];
1201 for (i
= 0; i
< g
->n_ports
; i
++) {
1202 uint16_t out_port
= g
->ports
[i
];
1203 if (out_port
!= in_port
) {
1204 dp_netdev_output_port(dp
, packet
, out_port
);
1210 dp_netdev_output_control(struct dp_netdev
*dp
, const struct ofpbuf
*packet
,
1211 int queue_no
, int port_no
, uint32_t arg
)
1213 struct ovs_queue
*q
= &dp
->queues
[queue_no
];
1214 struct odp_msg
*header
;
1218 if (q
->n
>= MAX_QUEUE_LEN
) {
1223 msg_size
= sizeof *header
+ packet
->size
;
1224 msg
= ofpbuf_new(msg_size
);
1225 header
= ofpbuf_put_uninit(msg
, sizeof *header
);
1226 header
->type
= queue_no
;
1227 header
->length
= msg_size
;
1228 header
->port
= port_no
;
1230 ofpbuf_put(msg
, packet
->data
, packet
->size
);
1231 queue_push_tail(q
, msg
);
1237 dp_netdev_execute_actions(struct dp_netdev
*dp
,
1238 struct ofpbuf
*packet
, flow_t
*key
,
1239 const union odp_action
*actions
, int n_actions
)
1242 for (i
= 0; i
< n_actions
; i
++) {
1243 const union odp_action
*a
= &actions
[i
];
1247 dp_netdev_output_port(dp
, packet
, a
->output
.port
);
1250 case ODPAT_OUTPUT_GROUP
:
1251 dp_netdev_output_group(dp
, a
->output_group
.group
, key
->in_port
,
1255 case ODPAT_CONTROLLER
:
1256 dp_netdev_output_control(dp
, packet
, _ODPL_ACTION_NR
,
1257 key
->in_port
, a
->controller
.arg
);
1260 case ODPAT_SET_VLAN_VID
:
1261 dp_netdev_modify_vlan_tci(packet
, key
, ntohs(a
->vlan_vid
.vlan_vid
),
1265 case ODPAT_SET_VLAN_PCP
:
1266 dp_netdev_modify_vlan_tci(packet
, key
, a
->vlan_pcp
.vlan_pcp
<< 13,
1270 case ODPAT_STRIP_VLAN
:
1271 dp_netdev_strip_vlan(packet
, key
);
1274 case ODPAT_SET_DL_SRC
:
1275 dp_netdev_set_dl_src(packet
, a
->dl_addr
.dl_addr
);
1278 case ODPAT_SET_DL_DST
:
1279 dp_netdev_set_dl_dst(packet
, a
->dl_addr
.dl_addr
);
1282 case ODPAT_SET_NW_SRC
:
1283 case ODPAT_SET_NW_DST
:
1284 dp_netdev_set_nw_addr(packet
, key
, &a
->nw_addr
);
1287 case ODPAT_SET_TP_SRC
:
1288 case ODPAT_SET_TP_DST
:
1289 dp_netdev_set_tp_port(packet
, key
, &a
->tp_port
);
1296 const struct dpif_class dpif_netdev_class
= {
1301 NULL
, /* enumerate */
1304 NULL
, /* get_all_names */
1306 dpif_netdev_get_stats
,
1307 dpif_netdev_get_drop_frags
,
1308 dpif_netdev_set_drop_frags
,
1309 dpif_netdev_port_add
,
1310 dpif_netdev_port_del
,
1311 dpif_netdev_port_query_by_number
,
1312 dpif_netdev_port_query_by_name
,
1313 dpif_netdev_port_list
,
1314 dpif_netdev_port_poll
,
1315 dpif_netdev_port_poll_wait
,
1316 dpif_netdev_port_group_get
,
1317 dpif_netdev_port_group_set
,
1318 dpif_netdev_flow_get
,
1319 dpif_netdev_flow_put
,
1320 dpif_netdev_flow_del
,
1321 dpif_netdev_flow_flush
,
1322 dpif_netdev_flow_list
,
1323 dpif_netdev_execute
,
1324 dpif_netdev_recv_get_mask
,
1325 dpif_netdev_recv_set_mask
,
1327 dpif_netdev_recv_wait
,