2 * Copyright (c) 2009, 2010 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
26 #include <netinet/in.h>
29 #include <sys/ioctl.h>
34 #include "dpif-provider.h"
40 #include "ofp-print.h"
43 #include "poll-loop.h"
49 #define THIS_MODULE VLM_dpif_netdev
51 /* Configuration parameters. */
52 enum { N_QUEUES
= 2 }; /* Number of queues for dpif_recv(). */
53 enum { MAX_QUEUE_LEN
= 100 }; /* Maximum number of packets per queue. */
54 enum { N_GROUPS
= 16 }; /* Number of port groups. */
55 enum { MAX_PORTS
= 256 }; /* Maximum number of ports. */
56 enum { MAX_FLOWS
= 65536 }; /* Maximum number of flows in flow table. */
58 /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
59 * headers to be aligned on a 4-byte boundary. */
60 enum { DP_NETDEV_HEADROOM
= 2 + VLAN_HEADER_LEN
};
62 /* Datapath based on the network device interface from netdev.h. */
69 bool drop_frags
; /* Drop all IP fragments, if true. */
70 struct ovs_queue queues
[N_QUEUES
]; /* Messages queued for dpif_recv(). */
71 struct hmap flow_table
; /* Flow table. */
72 struct odp_port_group groups
[N_GROUPS
];
75 long long int n_frags
; /* Number of dropped IP fragments. */
76 long long int n_hit
; /* Number of flow table matches. */
77 long long int n_missed
; /* Number of flow table misses. */
78 long long int n_lost
; /* Number of misses not passed to client. */
82 struct dp_netdev_port
*ports
[MAX_PORTS
];
83 struct list port_list
;
87 /* A port in a netdev-based datapath. */
88 struct dp_netdev_port
{
89 int port_no
; /* Index into dp_netdev's 'ports'. */
90 struct list node
; /* Element in dp_netdev's 'port_list'. */
91 struct netdev
*netdev
;
92 bool internal
; /* Internal port (as ODP_PORT_INTERNAL)? */
95 /* A flow in dp_netdev's 'flow_table'. */
96 struct dp_netdev_flow
{
97 struct hmap_node node
; /* Element in dp_netdev's 'flow_table'. */
101 struct timeval used
; /* Last used time, in milliseconds. */
102 long long int packet_count
; /* Number of packets matched. */
103 long long int byte_count
; /* Number of bytes matched. */
104 uint8_t ip_tos
; /* IP TOS value. */
105 uint16_t tcp_ctl
; /* Bitwise-OR of seen tcp_ctl values. */
108 union odp_action
*actions
;
109 unsigned int n_actions
;
112 /* Interface to netdev-based datapath. */
115 struct dp_netdev
*dp
;
117 unsigned int dp_serial
;
120 /* All netdev-based datapaths. */
121 static struct dp_netdev
*dp_netdevs
[256];
122 struct list dp_netdev_list
= LIST_INITIALIZER(&dp_netdev_list
);
123 enum { N_DP_NETDEVS
= ARRAY_SIZE(dp_netdevs
) };
125 /* Maximum port MTU seen so far. */
126 static int max_mtu
= ETH_PAYLOAD_MAX
;
128 static int get_port_by_number(struct dp_netdev
*, uint16_t port_no
,
129 struct dp_netdev_port
**portp
);
130 static int get_port_by_name(struct dp_netdev
*, const char *devname
,
131 struct dp_netdev_port
**portp
);
132 static void dp_netdev_free(struct dp_netdev
*);
133 static void dp_netdev_flow_flush(struct dp_netdev
*);
134 static int do_add_port(struct dp_netdev
*, const char *devname
, uint16_t flags
,
136 static int do_del_port(struct dp_netdev
*, uint16_t port_no
);
137 static int dp_netdev_output_control(struct dp_netdev
*, const struct ofpbuf
*,
138 int queue_no
, int port_no
, uint32_t arg
);
139 static int dp_netdev_execute_actions(struct dp_netdev
*,
140 struct ofpbuf
*, flow_t
*,
141 const union odp_action
*, int n
);
143 static struct dpif_netdev
*
144 dpif_netdev_cast(const struct dpif
*dpif
)
146 dpif_assert_class(dpif
, &dpif_netdev_class
);
147 return CONTAINER_OF(dpif
, struct dpif_netdev
, dpif
);
150 static struct dp_netdev
*
151 get_dp_netdev(const struct dpif
*dpif
)
153 return dpif_netdev_cast(dpif
)->dp
;
157 name_to_dp_idx(const char *name
)
159 if (!strncmp(name
, "dp", 2) && isdigit((unsigned char)name
[2])) {
160 int dp_idx
= atoi(name
+ 2);
161 if (dp_idx
>= 0 && dp_idx
< N_DP_NETDEVS
) {
168 static struct dp_netdev
*
169 find_dp_netdev(const char *name
)
174 dp_idx
= name_to_dp_idx(name
);
176 return dp_netdevs
[dp_idx
];
179 for (i
= 0; i
< N_DP_NETDEVS
; i
++) {
180 struct dp_netdev
*dp
= dp_netdevs
[i
];
182 struct dp_netdev_port
*port
;
183 if (!get_port_by_name(dp
, name
, &port
)) {
192 create_dpif_netdev(struct dp_netdev
*dp
)
194 struct dpif_netdev
*dpif
;
199 dpname
= xasprintf("dp%d", dp
->dp_idx
);
200 dpif
= xmalloc(sizeof *dpif
);
201 dpif_init(&dpif
->dpif
, &dpif_netdev_class
, dpname
, dp
->dp_idx
, dp
->dp_idx
);
203 dpif
->listen_mask
= 0;
204 dpif
->dp_serial
= dp
->serial
;
211 create_dp_netdev(const char *name
, int dp_idx
, struct dpif
**dpifp
)
213 struct dp_netdev
*dp
;
217 if (dp_netdevs
[dp_idx
]) {
221 /* Create datapath. */
222 dp_netdevs
[dp_idx
] = dp
= xzalloc(sizeof *dp
);
223 list_push_back(&dp_netdev_list
, &dp
->node
);
226 dp
->drop_frags
= false;
227 for (i
= 0; i
< N_QUEUES
; i
++) {
228 queue_init(&dp
->queues
[i
]);
230 hmap_init(&dp
->flow_table
);
231 for (i
= 0; i
< N_GROUPS
; i
++) {
232 dp
->groups
[i
].ports
= NULL
;
233 dp
->groups
[i
].n_ports
= 0;
234 dp
->groups
[i
].group
= i
;
236 list_init(&dp
->port_list
);
237 error
= do_add_port(dp
, name
, ODP_PORT_INTERNAL
, ODPP_LOCAL
);
243 *dpifp
= create_dpif_netdev(dp
);
248 dpif_netdev_open(const char *name
, const char *type OVS_UNUSED
, bool create
,
252 if (find_dp_netdev(name
)) {
255 int dp_idx
= name_to_dp_idx(name
);
257 return create_dp_netdev(name
, dp_idx
, dpifp
);
259 /* Scan for unused dp_idx number. */
260 for (dp_idx
= 0; dp_idx
< N_DP_NETDEVS
; dp_idx
++) {
261 int error
= create_dp_netdev(name
, dp_idx
, dpifp
);
262 if (error
!= EBUSY
) {
267 /* All datapath numbers in use. */
272 struct dp_netdev
*dp
= find_dp_netdev(name
);
274 *dpifp
= create_dpif_netdev(dp
);
283 dp_netdev_free(struct dp_netdev
*dp
)
287 dp_netdev_flow_flush(dp
);
288 while (dp
->n_ports
> 0) {
289 struct dp_netdev_port
*port
= CONTAINER_OF(
290 dp
->port_list
.next
, struct dp_netdev_port
, node
);
291 do_del_port(dp
, port
->port_no
);
293 for (i
= 0; i
< N_QUEUES
; i
++) {
294 queue_destroy(&dp
->queues
[i
]);
296 hmap_destroy(&dp
->flow_table
);
297 for (i
= 0; i
< N_GROUPS
; i
++) {
298 free(dp
->groups
[i
].ports
);
300 dp_netdevs
[dp
->dp_idx
] = NULL
;
301 list_remove(&dp
->node
);
306 dpif_netdev_close(struct dpif
*dpif
)
308 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
309 assert(dp
->open_cnt
> 0);
310 if (--dp
->open_cnt
== 0 && dp
->destroyed
) {
317 dpif_netdev_destroy(struct dpif
*dpif
)
319 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
320 dp
->destroyed
= true;
325 dpif_netdev_get_stats(const struct dpif
*dpif
, struct odp_stats
*stats
)
327 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
328 memset(stats
, 0, sizeof *stats
);
329 stats
->n_flows
= hmap_count(&dp
->flow_table
);
330 stats
->cur_capacity
= hmap_capacity(&dp
->flow_table
);
331 stats
->max_capacity
= MAX_FLOWS
;
332 stats
->n_ports
= dp
->n_ports
;
333 stats
->max_ports
= MAX_PORTS
;
334 stats
->max_groups
= N_GROUPS
;
335 stats
->n_frags
= dp
->n_frags
;
336 stats
->n_hit
= dp
->n_hit
;
337 stats
->n_missed
= dp
->n_missed
;
338 stats
->n_lost
= dp
->n_lost
;
339 stats
->max_miss_queue
= MAX_QUEUE_LEN
;
340 stats
->max_action_queue
= MAX_QUEUE_LEN
;
345 dpif_netdev_get_drop_frags(const struct dpif
*dpif
, bool *drop_fragsp
)
347 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
348 *drop_fragsp
= dp
->drop_frags
;
353 dpif_netdev_set_drop_frags(struct dpif
*dpif
, bool drop_frags
)
355 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
356 dp
->drop_frags
= drop_frags
;
361 do_add_port(struct dp_netdev
*dp
, const char *devname
, uint16_t flags
,
364 bool internal
= (flags
& ODP_PORT_INTERNAL
) != 0;
365 struct dp_netdev_port
*port
;
366 struct netdev_options netdev_options
;
367 struct netdev
*netdev
;
371 /* XXX reject devices already in some dp_netdev. */
373 /* Open and validate network device. */
374 memset(&netdev_options
, 0, sizeof netdev_options
);
375 netdev_options
.name
= devname
;
376 netdev_options
.ethertype
= NETDEV_ETH_TYPE_ANY
;
377 netdev_options
.may_create
= true;
379 netdev_options
.type
= "tap";
381 netdev_options
.may_open
= true;
384 error
= netdev_open(&netdev_options
, &netdev
);
388 /* XXX reject loopback devices */
389 /* XXX reject non-Ethernet devices */
391 error
= netdev_turn_flags_on(netdev
, NETDEV_PROMISC
, false);
393 netdev_close(netdev
);
397 port
= xmalloc(sizeof *port
);
398 port
->port_no
= port_no
;
399 port
->netdev
= netdev
;
400 port
->internal
= internal
;
402 netdev_get_mtu(netdev
, &mtu
);
407 list_push_back(&dp
->port_list
, &port
->node
);
408 dp
->ports
[port_no
] = port
;
416 dpif_netdev_port_add(struct dpif
*dpif
, const char *devname
, uint16_t flags
,
419 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
422 for (port_no
= 0; port_no
< MAX_PORTS
; port_no
++) {
423 if (!dp
->ports
[port_no
]) {
425 return do_add_port(dp
, devname
, flags
, port_no
);
432 dpif_netdev_port_del(struct dpif
*dpif
, uint16_t port_no
)
434 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
435 return port_no
== ODPP_LOCAL
? EINVAL
: do_del_port(dp
, port_no
);
439 is_valid_port_number(uint16_t port_no
)
441 return port_no
< MAX_PORTS
;
445 get_port_by_number(struct dp_netdev
*dp
,
446 uint16_t port_no
, struct dp_netdev_port
**portp
)
448 if (!is_valid_port_number(port_no
)) {
452 *portp
= dp
->ports
[port_no
];
453 return *portp
? 0 : ENOENT
;
458 get_port_by_name(struct dp_netdev
*dp
,
459 const char *devname
, struct dp_netdev_port
**portp
)
461 struct dp_netdev_port
*port
;
463 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
464 if (!strcmp(netdev_get_name(port
->netdev
), devname
)) {
473 do_del_port(struct dp_netdev
*dp
, uint16_t port_no
)
475 struct dp_netdev_port
*port
;
479 error
= get_port_by_number(dp
, port_no
, &port
);
484 list_remove(&port
->node
);
485 dp
->ports
[port
->port_no
] = NULL
;
489 name
= xstrdup(netdev_get_name(port
->netdev
));
490 netdev_close(port
->netdev
);
499 answer_port_query(const struct dp_netdev_port
*port
, struct odp_port
*odp_port
)
501 memset(odp_port
, 0, sizeof *odp_port
);
502 ovs_strlcpy(odp_port
->devname
, netdev_get_name(port
->netdev
),
503 sizeof odp_port
->devname
);
504 odp_port
->port
= port
->port_no
;
505 odp_port
->flags
= port
->internal
? ODP_PORT_INTERNAL
: 0;
509 dpif_netdev_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
510 struct odp_port
*odp_port
)
512 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
513 struct dp_netdev_port
*port
;
516 error
= get_port_by_number(dp
, port_no
, &port
);
518 answer_port_query(port
, odp_port
);
524 dpif_netdev_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
525 struct odp_port
*odp_port
)
527 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
528 struct dp_netdev_port
*port
;
531 error
= get_port_by_name(dp
, devname
, &port
);
533 answer_port_query(port
, odp_port
);
539 dp_netdev_free_flow(struct dp_netdev
*dp
, struct dp_netdev_flow
*flow
)
541 hmap_remove(&dp
->flow_table
, &flow
->node
);
547 dp_netdev_flow_flush(struct dp_netdev
*dp
)
549 struct dp_netdev_flow
*flow
, *next
;
551 HMAP_FOR_EACH_SAFE (flow
, next
, struct dp_netdev_flow
, node
,
553 dp_netdev_free_flow(dp
, flow
);
558 dpif_netdev_flow_flush(struct dpif
*dpif
)
560 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
561 dp_netdev_flow_flush(dp
);
566 dpif_netdev_port_list(const struct dpif
*dpif
, struct odp_port
*ports
, int n
)
568 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
569 struct dp_netdev_port
*port
;
573 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
574 struct odp_port
*odp_port
= &ports
[i
];
578 answer_port_query(port
, odp_port
);
585 dpif_netdev_port_poll(const struct dpif
*dpif_
, char **devnamep OVS_UNUSED
)
587 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
588 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
589 dpif
->dp_serial
= dpif
->dp
->serial
;
597 dpif_netdev_port_poll_wait(const struct dpif
*dpif_
)
599 struct dpif_netdev
*dpif
= dpif_netdev_cast(dpif_
);
600 if (dpif
->dp_serial
!= dpif
->dp
->serial
) {
601 poll_immediate_wake();
606 get_port_group(const struct dpif
*dpif
, int group_no
,
607 struct odp_port_group
**groupp
)
609 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
611 if (group_no
>= 0 && group_no
< N_GROUPS
) {
612 *groupp
= &dp
->groups
[group_no
];
621 dpif_netdev_port_group_get(const struct dpif
*dpif
, int group_no
,
622 uint16_t ports
[], int n
)
624 struct odp_port_group
*group
;
631 error
= get_port_group(dpif
, group_no
, &group
);
633 memcpy(ports
, group
->ports
, MIN(n
, group
->n_ports
) * sizeof *ports
);
634 return group
->n_ports
;
641 dpif_netdev_port_group_set(struct dpif
*dpif
, int group_no
,
642 const uint16_t ports
[], int n
)
644 struct odp_port_group
*group
;
647 if (n
< 0 || n
> MAX_PORTS
) {
651 error
= get_port_group(dpif
, group_no
, &group
);
654 group
->ports
= xmemdup(ports
, n
* sizeof *group
->ports
);
656 group
->group
= group_no
;
661 static struct dp_netdev_flow
*
662 dp_netdev_lookup_flow(const struct dp_netdev
*dp
, const flow_t
*key
)
664 struct dp_netdev_flow
*flow
;
666 assert(!key
->reserved
[0] && !key
->reserved
[1] && !key
->reserved
[2]);
667 HMAP_FOR_EACH_WITH_HASH (flow
, struct dp_netdev_flow
, node
,
668 flow_hash(key
, 0), &dp
->flow_table
) {
669 if (flow_equal(&flow
->key
, key
)) {
677 answer_flow_query(struct dp_netdev_flow
*flow
, uint32_t query_flags
,
678 struct odp_flow
*odp_flow
)
681 odp_flow
->key
= flow
->key
;
682 odp_flow
->stats
.n_packets
= flow
->packet_count
;
683 odp_flow
->stats
.n_bytes
= flow
->byte_count
;
684 odp_flow
->stats
.used_sec
= flow
->used
.tv_sec
;
685 odp_flow
->stats
.used_nsec
= flow
->used
.tv_usec
* 1000;
686 odp_flow
->stats
.tcp_flags
= TCP_FLAGS(flow
->tcp_ctl
);
687 odp_flow
->stats
.ip_tos
= flow
->ip_tos
;
688 odp_flow
->stats
.error
= 0;
689 if (odp_flow
->n_actions
> 0) {
690 unsigned int n
= MIN(odp_flow
->n_actions
, flow
->n_actions
);
691 memcpy(odp_flow
->actions
, flow
->actions
,
692 n
* sizeof *odp_flow
->actions
);
693 odp_flow
->n_actions
= flow
->n_actions
;
696 if (query_flags
& ODPFF_ZERO_TCP_FLAGS
) {
701 odp_flow
->stats
.error
= ENOENT
;
706 dpif_netdev_flow_get(const struct dpif
*dpif
, struct odp_flow flows
[], int n
)
708 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
711 for (i
= 0; i
< n
; i
++) {
712 struct odp_flow
*odp_flow
= &flows
[i
];
713 answer_flow_query(dp_netdev_lookup_flow(dp
, &odp_flow
->key
),
714 odp_flow
->flags
, odp_flow
);
720 dpif_netdev_validate_actions(const union odp_action
*actions
, int n_actions
,
726 for (i
= 0; i
< n_actions
; i
++) {
727 const union odp_action
*a
= &actions
[i
];
730 if (a
->output
.port
>= MAX_PORTS
) {
735 case ODPAT_OUTPUT_GROUP
:
737 if (a
->output_group
.group
>= N_GROUPS
) {
742 case ODPAT_CONTROLLER
:
745 case ODPAT_SET_VLAN_VID
:
747 if (a
->vlan_vid
.vlan_vid
& htons(~VLAN_VID_MASK
)) {
752 case ODPAT_SET_VLAN_PCP
:
754 if (a
->vlan_pcp
.vlan_pcp
& ~VLAN_PCP_MASK
) {
759 case ODPAT_STRIP_VLAN
:
760 case ODPAT_SET_DL_SRC
:
761 case ODPAT_SET_DL_DST
:
762 case ODPAT_SET_NW_SRC
:
763 case ODPAT_SET_NW_DST
:
764 case ODPAT_SET_NW_TOS
:
765 case ODPAT_SET_TP_SRC
:
766 case ODPAT_SET_TP_DST
:
778 set_flow_actions(struct dp_netdev_flow
*flow
, struct odp_flow
*odp_flow
)
784 if (odp_flow
->n_actions
>= 4096 / sizeof *odp_flow
->actions
) {
787 error
= dpif_netdev_validate_actions(odp_flow
->actions
,
788 odp_flow
->n_actions
, &mutates
);
793 n_bytes
= odp_flow
->n_actions
* sizeof *flow
->actions
;
794 flow
->actions
= xrealloc(flow
->actions
, n_bytes
);
795 flow
->n_actions
= odp_flow
->n_actions
;
796 memcpy(flow
->actions
, odp_flow
->actions
, n_bytes
);
801 add_flow(struct dpif
*dpif
, struct odp_flow
*odp_flow
)
803 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
804 struct dp_netdev_flow
*flow
;
807 flow
= xzalloc(sizeof *flow
);
808 flow
->key
= odp_flow
->key
;
809 memset(flow
->key
.reserved
, 0, sizeof flow
->key
.reserved
);
811 error
= set_flow_actions(flow
, odp_flow
);
817 hmap_insert(&dp
->flow_table
, &flow
->node
, flow_hash(&flow
->key
, 0));
822 clear_stats(struct dp_netdev_flow
*flow
)
824 flow
->used
.tv_sec
= 0;
825 flow
->used
.tv_usec
= 0;
826 flow
->packet_count
= 0;
827 flow
->byte_count
= 0;
833 dpif_netdev_flow_put(struct dpif
*dpif
, struct odp_flow_put
*put
)
835 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
836 struct dp_netdev_flow
*flow
;
838 flow
= dp_netdev_lookup_flow(dp
, &put
->flow
.key
);
840 if (put
->flags
& ODPPF_CREATE
) {
841 if (hmap_count(&dp
->flow_table
) < MAX_FLOWS
) {
842 return add_flow(dpif
, &put
->flow
);
850 if (put
->flags
& ODPPF_MODIFY
) {
851 int error
= set_flow_actions(flow
, &put
->flow
);
852 if (!error
&& put
->flags
& ODPPF_ZERO_STATS
) {
864 dpif_netdev_flow_del(struct dpif
*dpif
, struct odp_flow
*odp_flow
)
866 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
867 struct dp_netdev_flow
*flow
;
869 flow
= dp_netdev_lookup_flow(dp
, &odp_flow
->key
);
871 answer_flow_query(flow
, 0, odp_flow
);
872 dp_netdev_free_flow(dp
, flow
);
880 dpif_netdev_flow_list(const struct dpif
*dpif
, struct odp_flow flows
[], int n
)
882 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
883 struct dp_netdev_flow
*flow
;
887 HMAP_FOR_EACH (flow
, struct dp_netdev_flow
, node
, &dp
->flow_table
) {
891 answer_flow_query(flow
, 0, &flows
[i
++]);
893 return hmap_count(&dp
->flow_table
);
897 dpif_netdev_execute(struct dpif
*dpif
, uint16_t in_port
,
898 const union odp_action actions
[], int n_actions
,
899 const struct ofpbuf
*packet
)
901 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
907 if (packet
->size
< ETH_HEADER_LEN
|| packet
->size
> UINT16_MAX
) {
911 error
= dpif_netdev_validate_actions(actions
, n_actions
, &mutates
);
917 /* We need a deep copy of 'packet' since we're going to modify its
919 ofpbuf_init(©
, DP_NETDEV_HEADROOM
+ packet
->size
);
920 copy
.data
= (char*)copy
.base
+ DP_NETDEV_HEADROOM
;
921 ofpbuf_put(©
, packet
->data
, packet
->size
);
923 /* We still need a shallow copy of 'packet', even though we won't
924 * modify its data, because flow_extract() modifies packet->l2, etc.
925 * We could probably get away with modifying those but it's more polite
929 flow_extract(©
, in_port
, &flow
);
930 error
= dp_netdev_execute_actions(dp
, ©
, &flow
, actions
, n_actions
);
932 ofpbuf_uninit(©
);
938 dpif_netdev_recv_get_mask(const struct dpif
*dpif
, int *listen_mask
)
940 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
941 *listen_mask
= dpif_netdev
->listen_mask
;
946 dpif_netdev_recv_set_mask(struct dpif
*dpif
, int listen_mask
)
948 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
949 if (!(listen_mask
& ~ODPL_ALL
)) {
950 dpif_netdev
->listen_mask
= listen_mask
;
957 static struct ovs_queue
*
958 find_nonempty_queue(struct dpif
*dpif
)
960 struct dpif_netdev
*dpif_netdev
= dpif_netdev_cast(dpif
);
961 struct dp_netdev
*dp
= get_dp_netdev(dpif
);
962 int mask
= dpif_netdev
->listen_mask
;
965 for (i
= 0; i
< N_QUEUES
; i
++) {
966 struct ovs_queue
*q
= &dp
->queues
[i
];
967 if (q
->n
&& mask
& (1u << i
)) {
975 dpif_netdev_recv(struct dpif
*dpif
, struct ofpbuf
**bufp
)
977 struct ovs_queue
*q
= find_nonempty_queue(dpif
);
979 *bufp
= queue_pop_head(q
);
987 dpif_netdev_recv_wait(struct dpif
*dpif
)
989 struct ovs_queue
*q
= find_nonempty_queue(dpif
);
991 poll_immediate_wake();
993 /* No messages ready to be received, and dp_wait() will ensure that we
994 * wake up to queue new messages, so there is nothing to do. */
999 dp_netdev_flow_used(struct dp_netdev_flow
*flow
, const flow_t
*key
,
1000 const struct ofpbuf
*packet
)
1002 time_timeval(&flow
->used
);
1003 flow
->packet_count
++;
1004 flow
->byte_count
+= packet
->size
;
1005 if (key
->dl_type
== htons(ETH_TYPE_IP
)) {
1006 struct ip_header
*nh
= packet
->l3
;
1007 flow
->ip_tos
= nh
->ip_tos
;
1009 if (key
->nw_proto
== IPPROTO_TCP
) {
1010 struct tcp_header
*th
= packet
->l4
;
1011 flow
->tcp_ctl
|= th
->tcp_ctl
;
1017 dp_netdev_port_input(struct dp_netdev
*dp
, struct dp_netdev_port
*port
,
1018 struct ofpbuf
*packet
)
1020 struct dp_netdev_flow
*flow
;
1023 if (flow_extract(packet
, port
->port_no
, &key
) && dp
->drop_frags
) {
1028 flow
= dp_netdev_lookup_flow(dp
, &key
);
1030 dp_netdev_flow_used(flow
, &key
, packet
);
1031 dp_netdev_execute_actions(dp
, packet
, &key
,
1032 flow
->actions
, flow
->n_actions
);
1036 dp_netdev_output_control(dp
, packet
, _ODPL_MISS_NR
, port
->port_no
, 0);
1043 struct ofpbuf packet
;
1044 struct dp_netdev
*dp
;
1046 ofpbuf_init(&packet
, DP_NETDEV_HEADROOM
+ max_mtu
);
1047 LIST_FOR_EACH (dp
, struct dp_netdev
, node
, &dp_netdev_list
) {
1048 struct dp_netdev_port
*port
;
1050 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
1053 /* Reset packet contents. */
1054 packet
.data
= (char*)packet
.base
+ DP_NETDEV_HEADROOM
;
1057 error
= netdev_recv(port
->netdev
, &packet
);
1059 dp_netdev_port_input(dp
, port
, &packet
);
1060 } else if (error
!= EAGAIN
) {
1061 struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1062 VLOG_ERR_RL(&rl
, "error receiving data from %s: %s",
1063 netdev_get_name(port
->netdev
), strerror(error
));
1067 ofpbuf_uninit(&packet
);
1071 dp_netdev_wait(void)
1073 struct dp_netdev
*dp
;
1075 LIST_FOR_EACH (dp
, struct dp_netdev
, node
, &dp_netdev_list
) {
1076 struct dp_netdev_port
*port
;
1077 LIST_FOR_EACH (port
, struct dp_netdev_port
, node
, &dp
->port_list
) {
1078 netdev_recv_wait(port
->netdev
);
1084 dp_netdev_modify_vlan_tci(struct ofpbuf
*packet
, flow_t
*key
,
1085 uint16_t tci
, uint16_t mask
)
1087 struct vlan_eth_header
*veh
;
1089 if (key
->dl_vlan
!= htons(ODP_VLAN_NONE
)) {
1090 /* Modify 'mask' bits, but maintain other TCI bits. */
1092 veh
->veth_tci
&= ~htons(mask
);
1093 veh
->veth_tci
|= htons(tci
);
1095 /* Insert new 802.1Q header. */
1096 struct eth_header
*eh
= packet
->l2
;
1097 struct vlan_eth_header tmp
;
1098 memcpy(tmp
.veth_dst
, eh
->eth_dst
, ETH_ADDR_LEN
);
1099 memcpy(tmp
.veth_src
, eh
->eth_src
, ETH_ADDR_LEN
);
1100 tmp
.veth_type
= htons(ETH_TYPE_VLAN
);
1101 tmp
.veth_tci
= htons(tci
);
1102 tmp
.veth_next_type
= eh
->eth_type
;
1104 veh
= ofpbuf_push_uninit(packet
, VLAN_HEADER_LEN
);
1105 memcpy(veh
, &tmp
, sizeof tmp
);
1106 packet
->l2
= (char*)packet
->l2
- VLAN_HEADER_LEN
;
1109 key
->dl_vlan
= veh
->veth_tci
& htons(VLAN_VID_MASK
);
1113 dp_netdev_strip_vlan(struct ofpbuf
*packet
, flow_t
*key
)
1115 struct vlan_eth_header
*veh
= packet
->l2
;
1116 if (veh
->veth_type
== htons(ETH_TYPE_VLAN
)) {
1117 struct eth_header tmp
;
1119 memcpy(tmp
.eth_dst
, veh
->veth_dst
, ETH_ADDR_LEN
);
1120 memcpy(tmp
.eth_src
, veh
->veth_src
, ETH_ADDR_LEN
);
1121 tmp
.eth_type
= veh
->veth_next_type
;
1123 packet
->size
-= VLAN_HEADER_LEN
;
1124 packet
->data
= (char*)packet
->data
+ VLAN_HEADER_LEN
;
1125 packet
->l2
= (char*)packet
->l2
+ VLAN_HEADER_LEN
;
1126 memcpy(packet
->data
, &tmp
, sizeof tmp
);
1128 key
->dl_vlan
= htons(ODP_VLAN_NONE
);
1133 dp_netdev_set_dl_src(struct ofpbuf
*packet
,
1134 const uint8_t dl_addr
[ETH_ADDR_LEN
])
1136 struct eth_header
*eh
= packet
->l2
;
1137 memcpy(eh
->eth_src
, dl_addr
, sizeof eh
->eth_src
);
1141 dp_netdev_set_dl_dst(struct ofpbuf
*packet
,
1142 const uint8_t dl_addr
[ETH_ADDR_LEN
])
1144 struct eth_header
*eh
= packet
->l2
;
1145 memcpy(eh
->eth_dst
, dl_addr
, sizeof eh
->eth_dst
);
1149 dp_netdev_set_nw_addr(struct ofpbuf
*packet
, flow_t
*key
,
1150 const struct odp_action_nw_addr
*a
)
1152 if (key
->dl_type
== htons(ETH_TYPE_IP
)) {
1153 struct ip_header
*nh
= packet
->l3
;
1156 field
= a
->type
== ODPAT_SET_NW_SRC
? &nh
->ip_src
: &nh
->ip_dst
;
1157 if (key
->nw_proto
== IP_TYPE_TCP
) {
1158 struct tcp_header
*th
= packet
->l4
;
1159 th
->tcp_csum
= recalc_csum32(th
->tcp_csum
, *field
, a
->nw_addr
);
1160 } else if (key
->nw_proto
== IP_TYPE_UDP
) {
1161 struct udp_header
*uh
= packet
->l4
;
1163 uh
->udp_csum
= recalc_csum32(uh
->udp_csum
, *field
, a
->nw_addr
);
1164 if (!uh
->udp_csum
) {
1165 uh
->udp_csum
= 0xffff;
1169 nh
->ip_csum
= recalc_csum32(nh
->ip_csum
, *field
, a
->nw_addr
);
1170 *field
= a
->nw_addr
;
1175 dp_netdev_set_nw_tos(struct ofpbuf
*packet
, flow_t
*key
,
1176 const struct odp_action_nw_tos
*a
)
1178 if (key
->dl_type
== htons(ETH_TYPE_IP
)) {
1179 struct ip_header
*nh
= packet
->l3
;
1180 uint8_t *field
= &nh
->ip_tos
;
1182 /* We only set the lower 6 bits. */
1183 uint8_t new = (a
->nw_tos
& 0x3f) | (nh
->ip_tos
& 0xc0);
1185 nh
->ip_csum
= recalc_csum16(nh
->ip_csum
, htons((uint16_t)*field
),
1186 htons((uint16_t)a
->nw_tos
));
1192 dp_netdev_set_tp_port(struct ofpbuf
*packet
, flow_t
*key
,
1193 const struct odp_action_tp_port
*a
)
1195 if (key
->dl_type
== htons(ETH_TYPE_IP
)) {
1197 if (key
->nw_proto
== IPPROTO_TCP
) {
1198 struct tcp_header
*th
= packet
->l4
;
1199 field
= a
->type
== ODPAT_SET_TP_SRC
? &th
->tcp_src
: &th
->tcp_dst
;
1200 th
->tcp_csum
= recalc_csum16(th
->tcp_csum
, *field
, a
->tp_port
);
1201 *field
= a
->tp_port
;
1202 } else if (key
->nw_proto
== IPPROTO_UDP
) {
1203 struct udp_header
*uh
= packet
->l4
;
1204 field
= a
->type
== ODPAT_SET_TP_SRC
? &uh
->udp_src
: &uh
->udp_dst
;
1205 uh
->udp_csum
= recalc_csum16(uh
->udp_csum
, *field
, a
->tp_port
);
1206 *field
= a
->tp_port
;
1212 dp_netdev_output_port(struct dp_netdev
*dp
, struct ofpbuf
*packet
,
1215 struct dp_netdev_port
*p
= dp
->ports
[out_port
];
1217 netdev_send(p
->netdev
, packet
);
1222 dp_netdev_output_group(struct dp_netdev
*dp
, uint16_t group
, uint16_t in_port
,
1223 struct ofpbuf
*packet
)
1225 struct odp_port_group
*g
= &dp
->groups
[group
];
1228 for (i
= 0; i
< g
->n_ports
; i
++) {
1229 uint16_t out_port
= g
->ports
[i
];
1230 if (out_port
!= in_port
) {
1231 dp_netdev_output_port(dp
, packet
, out_port
);
1237 dp_netdev_output_control(struct dp_netdev
*dp
, const struct ofpbuf
*packet
,
1238 int queue_no
, int port_no
, uint32_t arg
)
1240 struct ovs_queue
*q
= &dp
->queues
[queue_no
];
1241 struct odp_msg
*header
;
1245 if (q
->n
>= MAX_QUEUE_LEN
) {
1250 msg_size
= sizeof *header
+ packet
->size
;
1251 msg
= ofpbuf_new(msg_size
);
1252 header
= ofpbuf_put_uninit(msg
, sizeof *header
);
1253 header
->type
= queue_no
;
1254 header
->length
= msg_size
;
1255 header
->port
= port_no
;
1257 ofpbuf_put(msg
, packet
->data
, packet
->size
);
1258 queue_push_tail(q
, msg
);
1264 dp_netdev_execute_actions(struct dp_netdev
*dp
,
1265 struct ofpbuf
*packet
, flow_t
*key
,
1266 const union odp_action
*actions
, int n_actions
)
1269 for (i
= 0; i
< n_actions
; i
++) {
1270 const union odp_action
*a
= &actions
[i
];
1274 dp_netdev_output_port(dp
, packet
, a
->output
.port
);
1277 case ODPAT_OUTPUT_GROUP
:
1278 dp_netdev_output_group(dp
, a
->output_group
.group
, key
->in_port
,
1282 case ODPAT_CONTROLLER
:
1283 dp_netdev_output_control(dp
, packet
, _ODPL_ACTION_NR
,
1284 key
->in_port
, a
->controller
.arg
);
1287 case ODPAT_SET_VLAN_VID
:
1288 dp_netdev_modify_vlan_tci(packet
, key
, ntohs(a
->vlan_vid
.vlan_vid
),
1292 case ODPAT_SET_VLAN_PCP
:
1293 dp_netdev_modify_vlan_tci(packet
, key
, a
->vlan_pcp
.vlan_pcp
<< 13,
1297 case ODPAT_STRIP_VLAN
:
1298 dp_netdev_strip_vlan(packet
, key
);
1301 case ODPAT_SET_DL_SRC
:
1302 dp_netdev_set_dl_src(packet
, a
->dl_addr
.dl_addr
);
1305 case ODPAT_SET_DL_DST
:
1306 dp_netdev_set_dl_dst(packet
, a
->dl_addr
.dl_addr
);
1309 case ODPAT_SET_NW_SRC
:
1310 case ODPAT_SET_NW_DST
:
1311 dp_netdev_set_nw_addr(packet
, key
, &a
->nw_addr
);
1314 case ODPAT_SET_NW_TOS
:
1315 dp_netdev_set_nw_tos(packet
, key
, &a
->nw_tos
);
1318 case ODPAT_SET_TP_SRC
:
1319 case ODPAT_SET_TP_DST
:
1320 dp_netdev_set_tp_port(packet
, key
, &a
->tp_port
);
1327 const struct dpif_class dpif_netdev_class
= {
1331 NULL
, /* enumerate */
1334 NULL
, /* get_all_names */
1335 dpif_netdev_destroy
,
1336 dpif_netdev_get_stats
,
1337 dpif_netdev_get_drop_frags
,
1338 dpif_netdev_set_drop_frags
,
1339 dpif_netdev_port_add
,
1340 dpif_netdev_port_del
,
1341 dpif_netdev_port_query_by_number
,
1342 dpif_netdev_port_query_by_name
,
1343 dpif_netdev_port_list
,
1344 dpif_netdev_port_poll
,
1345 dpif_netdev_port_poll_wait
,
1346 dpif_netdev_port_group_get
,
1347 dpif_netdev_port_group_set
,
1348 dpif_netdev_flow_get
,
1349 dpif_netdev_flow_put
,
1350 dpif_netdev_flow_del
,
1351 dpif_netdev_flow_flush
,
1352 dpif_netdev_flow_list
,
1353 dpif_netdev_execute
,
1354 dpif_netdev_recv_get_mask
,
1355 dpif_netdev_recv_set_mask
,
1356 NULL
, /* get_sflow_probability */
1357 NULL
, /* set_sflow_probability */
1359 dpif_netdev_recv_wait
,