2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-linux.h"
26 #include <linux/types.h>
27 #include <linux/pkt_sched.h>
28 #include <linux/rtnetlink.h>
29 #include <linux/sockios.h>
33 #include <sys/epoll.h>
38 #include "dpif-provider.h"
39 #include "dynamic-string.h"
42 #include "netdev-linux.h"
43 #include "netdev-vport.h"
44 #include "netlink-notifier.h"
45 #include "netlink-socket.h"
49 #include "openvswitch/datapath-compat.h"
51 #include "poll-loop.h"
56 #include "unaligned.h"
60 VLOG_DEFINE_THIS_MODULE(dpif_linux
);
61 enum { MAX_PORTS
= USHRT_MAX
};
63 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
64 * missing if we have old headers. */
65 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
67 struct dpif_linux_dp
{
68 /* Generic Netlink header. */
71 /* struct ovs_header. */
75 const char *name
; /* OVS_DP_ATTR_NAME. */
76 const uint32_t *upcall_pid
; /* OVS_DP_UPCALL_PID. */
77 struct ovs_dp_stats stats
; /* OVS_DP_ATTR_STATS. */
80 static void dpif_linux_dp_init(struct dpif_linux_dp
*);
81 static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp
*,
82 const struct ofpbuf
*);
83 static void dpif_linux_dp_dump_start(struct nl_dump
*);
84 static int dpif_linux_dp_transact(const struct dpif_linux_dp
*request
,
85 struct dpif_linux_dp
*reply
,
86 struct ofpbuf
**bufp
);
87 static int dpif_linux_dp_get(const struct dpif
*, struct dpif_linux_dp
*reply
,
88 struct ofpbuf
**bufp
);
90 struct dpif_linux_flow
{
91 /* Generic Netlink header. */
94 /* struct ovs_header. */
95 unsigned int nlmsg_flags
;
100 * The 'stats' member points to 64-bit data that might only be aligned on
101 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
104 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
105 * the Netlink version of the command, even if actions_len is zero. */
106 const struct nlattr
*key
; /* OVS_FLOW_ATTR_KEY. */
108 const struct nlattr
*mask
; /* OVS_FLOW_ATTR_MASK. */
110 const struct nlattr
*actions
; /* OVS_FLOW_ATTR_ACTIONS. */
112 const struct ovs_flow_stats
*stats
; /* OVS_FLOW_ATTR_STATS. */
113 const uint8_t *tcp_flags
; /* OVS_FLOW_ATTR_TCP_FLAGS. */
114 const ovs_32aligned_u64
*used
; /* OVS_FLOW_ATTR_USED. */
115 bool clear
; /* OVS_FLOW_ATTR_CLEAR. */
118 static void dpif_linux_flow_init(struct dpif_linux_flow
*);
119 static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow
*,
120 const struct ofpbuf
*);
121 static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow
*,
123 static int dpif_linux_flow_transact(struct dpif_linux_flow
*request
,
124 struct dpif_linux_flow
*reply
,
125 struct ofpbuf
**bufp
);
126 static void dpif_linux_flow_get_stats(const struct dpif_linux_flow
*,
127 struct dpif_flow_stats
*);
129 /* One of the dpif channels between the kernel and userspace. */
130 struct dpif_channel
{
131 struct nl_sock
*sock
; /* Netlink socket. */
132 long long int last_poll
; /* Last time this channel was polled. */
135 static void report_loss(struct dpif
*, struct dpif_channel
*);
137 /* Datapath interface for the openvswitch Linux kernel module. */
142 /* Upcall messages. */
143 int uc_array_size
; /* Size of 'channels' and 'epoll_events'. */
144 struct dpif_channel
*channels
;
145 struct epoll_event
*epoll_events
;
146 int epoll_fd
; /* epoll fd that includes channel socks. */
147 int n_events
; /* Num events returned by epoll_wait(). */
148 int event_offset
; /* Offset into 'epoll_events'. */
150 /* Change notification. */
151 struct sset changed_ports
; /* Ports that have changed. */
152 struct nln_notifier
*port_notifier
;
156 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(9999, 5);
158 /* Generic Netlink family numbers for OVS. */
159 static int ovs_datapath_family
;
160 static int ovs_vport_family
;
161 static int ovs_flow_family
;
162 static int ovs_packet_family
;
164 /* Generic Netlink socket. */
165 static struct nl_sock
*genl_sock
;
166 static struct nln
*nln
= NULL
;
168 static int dpif_linux_init(void);
169 static void open_dpif(const struct dpif_linux_dp
*, struct dpif
**);
170 static bool dpif_linux_nln_parse(struct ofpbuf
*, void *);
171 static void dpif_linux_port_changed(const void *vport
, void *dpif
);
172 static uint32_t dpif_linux_port_get_pid(const struct dpif
*,
175 static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport
*,
177 static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport
*,
178 const struct ofpbuf
*);
180 static struct dpif_linux
*
181 dpif_linux_cast(const struct dpif
*dpif
)
183 dpif_assert_class(dpif
, &dpif_linux_class
);
184 return CONTAINER_OF(dpif
, struct dpif_linux
, dpif
);
188 dpif_linux_enumerate(struct sset
*all_dps
)
194 error
= dpif_linux_init();
199 dpif_linux_dp_dump_start(&dump
);
200 while (nl_dump_next(&dump
, &msg
)) {
201 struct dpif_linux_dp dp
;
203 if (!dpif_linux_dp_from_ofpbuf(&dp
, &msg
)) {
204 sset_add(all_dps
, dp
.name
);
207 return nl_dump_done(&dump
);
211 dpif_linux_open(const struct dpif_class
*class OVS_UNUSED
, const char *name
,
212 bool create
, struct dpif
**dpifp
)
214 struct dpif_linux_dp dp_request
, dp
;
219 error
= dpif_linux_init();
224 /* Create or look up datapath. */
225 dpif_linux_dp_init(&dp_request
);
227 dp_request
.cmd
= OVS_DP_CMD_NEW
;
229 dp_request
.upcall_pid
= &upcall_pid
;
231 dp_request
.cmd
= OVS_DP_CMD_GET
;
233 dp_request
.name
= name
;
234 error
= dpif_linux_dp_transact(&dp_request
, &dp
, &buf
);
239 open_dpif(&dp
, dpifp
);
245 open_dpif(const struct dpif_linux_dp
*dp
, struct dpif
**dpifp
)
247 struct dpif_linux
*dpif
;
249 dpif
= xzalloc(sizeof *dpif
);
250 dpif
->port_notifier
= nln_notifier_create(nln
, dpif_linux_port_changed
,
254 dpif_init(&dpif
->dpif
, &dpif_linux_class
, dp
->name
,
255 dp
->dp_ifindex
, dp
->dp_ifindex
);
257 dpif
->dp_ifindex
= dp
->dp_ifindex
;
258 sset_init(&dpif
->changed_ports
);
259 *dpifp
= &dpif
->dpif
;
263 destroy_channels(struct dpif_linux
*dpif
)
267 if (dpif
->epoll_fd
< 0) {
271 for (i
= 0; i
< dpif
->uc_array_size
; i
++ ) {
272 struct dpif_linux_vport vport_request
;
273 struct dpif_channel
*ch
= &dpif
->channels
[i
];
274 uint32_t upcall_pid
= 0;
280 /* Turn off upcalls. */
281 dpif_linux_vport_init(&vport_request
);
282 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
283 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
284 vport_request
.port_no
= u32_to_odp(i
);
285 vport_request
.upcall_pid
= &upcall_pid
;
286 dpif_linux_vport_transact(&vport_request
, NULL
, NULL
);
288 nl_sock_destroy(ch
->sock
);
291 free(dpif
->channels
);
292 dpif
->channels
= NULL
;
293 dpif
->uc_array_size
= 0;
295 free(dpif
->epoll_events
);
296 dpif
->epoll_events
= NULL
;
297 dpif
->n_events
= dpif
->event_offset
= 0;
299 close(dpif
->epoll_fd
);
304 add_channel(struct dpif_linux
*dpif
, odp_port_t port_no
, struct nl_sock
*sock
)
306 struct epoll_event event
;
307 uint32_t port_idx
= odp_to_u32(port_no
);
309 if (dpif
->epoll_fd
< 0) {
313 /* We assume that the datapath densely chooses port numbers, which
314 * can therefore be used as an index into an array of channels. */
315 if (port_idx
>= dpif
->uc_array_size
) {
316 uint32_t new_size
= port_idx
+ 1;
319 if (new_size
> MAX_PORTS
) {
320 VLOG_WARN_RL(&error_rl
, "%s: datapath port %"PRIu32
" too big",
321 dpif_name(&dpif
->dpif
), port_no
);
325 dpif
->channels
= xrealloc(dpif
->channels
,
326 new_size
* sizeof *dpif
->channels
);
327 for (i
= dpif
->uc_array_size
; i
< new_size
; i
++) {
328 dpif
->channels
[i
].sock
= NULL
;
331 dpif
->epoll_events
= xrealloc(dpif
->epoll_events
,
332 new_size
* sizeof *dpif
->epoll_events
);
333 dpif
->uc_array_size
= new_size
;
336 memset(&event
, 0, sizeof event
);
337 event
.events
= EPOLLIN
;
338 event
.data
.u32
= port_idx
;
339 if (epoll_ctl(dpif
->epoll_fd
, EPOLL_CTL_ADD
, nl_sock_fd(sock
),
344 nl_sock_destroy(dpif
->channels
[port_idx
].sock
);
345 dpif
->channels
[port_idx
].sock
= sock
;
346 dpif
->channels
[port_idx
].last_poll
= LLONG_MIN
;
352 del_channel(struct dpif_linux
*dpif
, odp_port_t port_no
)
354 struct dpif_channel
*ch
;
355 uint32_t port_idx
= odp_to_u32(port_no
);
357 if (dpif
->epoll_fd
< 0 || port_idx
>= dpif
->uc_array_size
) {
361 ch
= &dpif
->channels
[port_idx
];
366 epoll_ctl(dpif
->epoll_fd
, EPOLL_CTL_DEL
, nl_sock_fd(ch
->sock
), NULL
);
367 dpif
->event_offset
= dpif
->n_events
= 0;
369 nl_sock_destroy(ch
->sock
);
374 dpif_linux_close(struct dpif
*dpif_
)
376 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
378 nln_notifier_destroy(dpif
->port_notifier
);
379 destroy_channels(dpif
);
380 sset_destroy(&dpif
->changed_ports
);
385 dpif_linux_destroy(struct dpif
*dpif_
)
387 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
388 struct dpif_linux_dp dp
;
390 dpif_linux_dp_init(&dp
);
391 dp
.cmd
= OVS_DP_CMD_DEL
;
392 dp
.dp_ifindex
= dpif
->dp_ifindex
;
393 return dpif_linux_dp_transact(&dp
, NULL
, NULL
);
397 dpif_linux_run(struct dpif
*dpif_ OVS_UNUSED
)
405 dpif_linux_wait(struct dpif
*dpif OVS_UNUSED
)
413 dpif_linux_get_stats(const struct dpif
*dpif_
, struct dpif_dp_stats
*stats
)
415 struct dpif_linux_dp dp
;
419 error
= dpif_linux_dp_get(dpif_
, &dp
, &buf
);
421 stats
->n_hit
= dp
.stats
.n_hit
;
422 stats
->n_missed
= dp
.stats
.n_missed
;
423 stats
->n_lost
= dp
.stats
.n_lost
;
424 stats
->n_flows
= dp
.stats
.n_flows
;
431 get_vport_type(const struct dpif_linux_vport
*vport
)
433 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
435 switch (vport
->type
) {
436 case OVS_VPORT_TYPE_NETDEV
:
439 case OVS_VPORT_TYPE_INTERNAL
:
442 case OVS_VPORT_TYPE_GRE
:
445 case OVS_VPORT_TYPE_GRE64
:
448 case OVS_VPORT_TYPE_VXLAN
:
451 case OVS_VPORT_TYPE_LISP
:
454 case OVS_VPORT_TYPE_UNSPEC
:
455 case __OVS_VPORT_TYPE_MAX
:
459 VLOG_WARN_RL(&rl
, "dp%d: port `%s' has unsupported type %u",
460 vport
->dp_ifindex
, vport
->name
, (unsigned int) vport
->type
);
464 static enum ovs_vport_type
465 netdev_to_ovs_vport_type(const struct netdev
*netdev
)
467 const char *type
= netdev_get_type(netdev
);
469 if (!strcmp(type
, "tap") || !strcmp(type
, "system")) {
470 return OVS_VPORT_TYPE_NETDEV
;
471 } else if (!strcmp(type
, "internal")) {
472 return OVS_VPORT_TYPE_INTERNAL
;
473 } else if (strstr(type
, "gre64")) {
474 return OVS_VPORT_TYPE_GRE64
;
475 } else if (strstr(type
, "gre")) {
476 return OVS_VPORT_TYPE_GRE
;
477 } else if (!strcmp(type
, "vxlan")) {
478 return OVS_VPORT_TYPE_VXLAN
;
479 } else if (!strcmp(type
, "lisp")) {
480 return OVS_VPORT_TYPE_LISP
;
482 return OVS_VPORT_TYPE_UNSPEC
;
487 dpif_linux_port_add(struct dpif
*dpif_
, struct netdev
*netdev
,
488 odp_port_t
*port_nop
)
490 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
491 const struct netdev_tunnel_config
*tnl_cfg
;
492 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
493 const char *name
= netdev_vport_get_dpif_port(netdev
,
494 namebuf
, sizeof namebuf
);
495 const char *type
= netdev_get_type(netdev
);
496 struct dpif_linux_vport request
, reply
;
497 struct nl_sock
*sock
= NULL
;
500 uint64_t options_stub
[64 / 8];
501 struct ofpbuf options
;
504 if (dpif
->epoll_fd
>= 0) {
505 error
= nl_sock_create(NETLINK_GENERIC
, &sock
);
511 dpif_linux_vport_init(&request
);
512 request
.cmd
= OVS_VPORT_CMD_NEW
;
513 request
.dp_ifindex
= dpif
->dp_ifindex
;
514 request
.type
= netdev_to_ovs_vport_type(netdev
);
515 if (request
.type
== OVS_VPORT_TYPE_UNSPEC
) {
516 VLOG_WARN_RL(&error_rl
, "%s: cannot create port `%s' because it has "
517 "unsupported type `%s'",
518 dpif_name(dpif_
), name
, type
);
519 nl_sock_destroy(sock
);
524 if (request
.type
== OVS_VPORT_TYPE_NETDEV
) {
525 netdev_linux_ethtool_set_flag(netdev
, ETH_FLAG_LRO
, "LRO", false);
528 tnl_cfg
= netdev_get_tunnel_config(netdev
);
529 if (tnl_cfg
&& tnl_cfg
->dst_port
!= 0) {
530 ofpbuf_use_stack(&options
, options_stub
, sizeof options_stub
);
531 nl_msg_put_u16(&options
, OVS_TUNNEL_ATTR_DST_PORT
,
532 ntohs(tnl_cfg
->dst_port
));
533 request
.options
= options
.data
;
534 request
.options_len
= options
.size
;
537 request
.port_no
= *port_nop
;
538 upcall_pid
= sock
? nl_sock_pid(sock
) : 0;
539 request
.upcall_pid
= &upcall_pid
;
541 error
= dpif_linux_vport_transact(&request
, &reply
, &buf
);
543 *port_nop
= reply
.port_no
;
544 VLOG_DBG("%s: assigning port %"PRIu32
" to netlink pid %"PRIu32
,
545 dpif_name(dpif_
), reply
.port_no
, upcall_pid
);
547 if (error
== EBUSY
&& *port_nop
!= ODPP_NONE
) {
548 VLOG_INFO("%s: requested port %"PRIu32
" is in use",
549 dpif_name(dpif_
), *port_nop
);
551 nl_sock_destroy(sock
);
558 error
= add_channel(dpif
, *port_nop
, sock
);
560 VLOG_INFO("%s: could not add channel for port %s",
561 dpif_name(dpif_
), name
);
563 /* Delete the port. */
564 dpif_linux_vport_init(&request
);
565 request
.cmd
= OVS_VPORT_CMD_DEL
;
566 request
.dp_ifindex
= dpif
->dp_ifindex
;
567 request
.port_no
= *port_nop
;
568 dpif_linux_vport_transact(&request
, NULL
, NULL
);
570 nl_sock_destroy(sock
);
579 dpif_linux_port_del(struct dpif
*dpif_
, odp_port_t port_no
)
581 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
582 struct dpif_linux_vport vport
;
585 dpif_linux_vport_init(&vport
);
586 vport
.cmd
= OVS_VPORT_CMD_DEL
;
587 vport
.dp_ifindex
= dpif
->dp_ifindex
;
588 vport
.port_no
= port_no
;
589 error
= dpif_linux_vport_transact(&vport
, NULL
, NULL
);
591 del_channel(dpif
, port_no
);
597 dpif_linux_port_query__(const struct dpif
*dpif
, odp_port_t port_no
,
598 const char *port_name
, struct dpif_port
*dpif_port
)
600 struct dpif_linux_vport request
;
601 struct dpif_linux_vport reply
;
605 dpif_linux_vport_init(&request
);
606 request
.cmd
= OVS_VPORT_CMD_GET
;
607 request
.dp_ifindex
= dpif_linux_cast(dpif
)->dp_ifindex
;
608 request
.port_no
= port_no
;
609 request
.name
= port_name
;
611 error
= dpif_linux_vport_transact(&request
, &reply
, &buf
);
613 if (reply
.dp_ifindex
!= request
.dp_ifindex
) {
614 /* A query by name reported that 'port_name' is in some datapath
615 * other than 'dpif', but the caller wants to know about 'dpif'. */
617 } else if (dpif_port
) {
618 dpif_port
->name
= xstrdup(reply
.name
);
619 dpif_port
->type
= xstrdup(get_vport_type(&reply
));
620 dpif_port
->port_no
= reply
.port_no
;
628 dpif_linux_port_query_by_number(const struct dpif
*dpif
, odp_port_t port_no
,
629 struct dpif_port
*dpif_port
)
631 return dpif_linux_port_query__(dpif
, port_no
, NULL
, dpif_port
);
635 dpif_linux_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
636 struct dpif_port
*dpif_port
)
638 return dpif_linux_port_query__(dpif
, 0, devname
, dpif_port
);
642 dpif_linux_get_max_ports(const struct dpif
*dpif OVS_UNUSED
)
644 return u32_to_odp(MAX_PORTS
);
648 dpif_linux_port_get_pid(const struct dpif
*dpif_
, odp_port_t port_no
)
650 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
651 uint32_t port_idx
= odp_to_u32(port_no
);
653 if (dpif
->epoll_fd
< 0) {
656 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
657 * channel, since it is not heavily loaded. */
658 uint32_t idx
= port_idx
>= dpif
->uc_array_size
? 0 : port_idx
;
659 return nl_sock_pid(dpif
->channels
[idx
].sock
);
664 dpif_linux_flow_flush(struct dpif
*dpif_
)
666 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
667 struct dpif_linux_flow flow
;
669 dpif_linux_flow_init(&flow
);
670 flow
.cmd
= OVS_FLOW_CMD_DEL
;
671 flow
.dp_ifindex
= dpif
->dp_ifindex
;
672 return dpif_linux_flow_transact(&flow
, NULL
, NULL
);
675 struct dpif_linux_port_state
{
680 dpif_linux_port_dump_start(const struct dpif
*dpif_
, void **statep
)
682 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
683 struct dpif_linux_port_state
*state
;
684 struct dpif_linux_vport request
;
687 *statep
= state
= xmalloc(sizeof *state
);
689 dpif_linux_vport_init(&request
);
690 request
.cmd
= OVS_DP_CMD_GET
;
691 request
.dp_ifindex
= dpif
->dp_ifindex
;
693 buf
= ofpbuf_new(1024);
694 dpif_linux_vport_to_ofpbuf(&request
, buf
);
695 nl_dump_start(&state
->dump
, genl_sock
, buf
);
702 dpif_linux_port_dump_next(const struct dpif
*dpif OVS_UNUSED
, void *state_
,
703 struct dpif_port
*dpif_port
)
705 struct dpif_linux_port_state
*state
= state_
;
706 struct dpif_linux_vport vport
;
710 if (!nl_dump_next(&state
->dump
, &buf
)) {
714 error
= dpif_linux_vport_from_ofpbuf(&vport
, &buf
);
719 dpif_port
->name
= CONST_CAST(char *, vport
.name
);
720 dpif_port
->type
= CONST_CAST(char *, get_vport_type(&vport
));
721 dpif_port
->port_no
= vport
.port_no
;
726 dpif_linux_port_dump_done(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
)
728 struct dpif_linux_port_state
*state
= state_
;
729 int error
= nl_dump_done(&state
->dump
);
736 dpif_linux_port_poll(const struct dpif
*dpif_
, char **devnamep
)
738 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
740 if (dpif
->change_error
) {
741 dpif
->change_error
= false;
742 sset_clear(&dpif
->changed_ports
);
744 } else if (!sset_is_empty(&dpif
->changed_ports
)) {
745 *devnamep
= sset_pop(&dpif
->changed_ports
);
753 dpif_linux_port_poll_wait(const struct dpif
*dpif_
)
755 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
756 if (!sset_is_empty(&dpif
->changed_ports
) || dpif
->change_error
) {
757 poll_immediate_wake();
762 dpif_linux_flow_get__(const struct dpif
*dpif_
,
763 const struct nlattr
*key
, size_t key_len
,
764 struct dpif_linux_flow
*reply
, struct ofpbuf
**bufp
)
766 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
767 struct dpif_linux_flow request
;
769 dpif_linux_flow_init(&request
);
770 request
.cmd
= OVS_FLOW_CMD_GET
;
771 request
.dp_ifindex
= dpif
->dp_ifindex
;
773 request
.key_len
= key_len
;
774 return dpif_linux_flow_transact(&request
, reply
, bufp
);
778 dpif_linux_flow_get(const struct dpif
*dpif_
,
779 const struct nlattr
*key
, size_t key_len
,
780 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
782 struct dpif_linux_flow reply
;
786 error
= dpif_linux_flow_get__(dpif_
, key
, key_len
, &reply
, &buf
);
789 dpif_linux_flow_get_stats(&reply
, stats
);
792 buf
->data
= CONST_CAST(struct nlattr
*, reply
.actions
);
793 buf
->size
= reply
.actions_len
;
803 dpif_linux_init_flow_put(struct dpif
*dpif_
, const struct dpif_flow_put
*put
,
804 struct dpif_linux_flow
*request
)
806 static const struct nlattr dummy_action
;
808 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
810 dpif_linux_flow_init(request
);
811 request
->cmd
= (put
->flags
& DPIF_FP_CREATE
812 ? OVS_FLOW_CMD_NEW
: OVS_FLOW_CMD_SET
);
813 request
->dp_ifindex
= dpif
->dp_ifindex
;
814 request
->key
= put
->key
;
815 request
->key_len
= put
->key_len
;
816 request
->mask
= put
->mask
;
817 request
->mask_len
= put
->mask_len
;
818 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
819 request
->actions
= (put
->actions
821 : CONST_CAST(struct nlattr
*, &dummy_action
));
822 request
->actions_len
= put
->actions_len
;
823 if (put
->flags
& DPIF_FP_ZERO_STATS
) {
824 request
->clear
= true;
826 request
->nlmsg_flags
= put
->flags
& DPIF_FP_MODIFY
? 0 : NLM_F_CREATE
;
830 dpif_linux_flow_put(struct dpif
*dpif_
, const struct dpif_flow_put
*put
)
832 struct dpif_linux_flow request
, reply
;
836 dpif_linux_init_flow_put(dpif_
, put
, &request
);
837 error
= dpif_linux_flow_transact(&request
,
838 put
->stats
? &reply
: NULL
,
839 put
->stats
? &buf
: NULL
);
840 if (!error
&& put
->stats
) {
841 dpif_linux_flow_get_stats(&reply
, put
->stats
);
848 dpif_linux_init_flow_del(struct dpif
*dpif_
, const struct dpif_flow_del
*del
,
849 struct dpif_linux_flow
*request
)
851 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
853 dpif_linux_flow_init(request
);
854 request
->cmd
= OVS_FLOW_CMD_DEL
;
855 request
->dp_ifindex
= dpif
->dp_ifindex
;
856 request
->key
= del
->key
;
857 request
->key_len
= del
->key_len
;
861 dpif_linux_flow_del(struct dpif
*dpif_
, const struct dpif_flow_del
*del
)
863 struct dpif_linux_flow request
, reply
;
867 dpif_linux_init_flow_del(dpif_
, del
, &request
);
868 error
= dpif_linux_flow_transact(&request
,
869 del
->stats
? &reply
: NULL
,
870 del
->stats
? &buf
: NULL
);
871 if (!error
&& del
->stats
) {
872 dpif_linux_flow_get_stats(&reply
, del
->stats
);
878 struct dpif_linux_flow_state
{
880 struct dpif_linux_flow flow
;
881 struct dpif_flow_stats stats
;
886 dpif_linux_flow_dump_start(const struct dpif
*dpif_
, void **statep
)
888 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
889 struct dpif_linux_flow_state
*state
;
890 struct dpif_linux_flow request
;
893 *statep
= state
= xmalloc(sizeof *state
);
895 dpif_linux_flow_init(&request
);
896 request
.cmd
= OVS_DP_CMD_GET
;
897 request
.dp_ifindex
= dpif
->dp_ifindex
;
899 buf
= ofpbuf_new(1024);
900 dpif_linux_flow_to_ofpbuf(&request
, buf
);
901 nl_dump_start(&state
->dump
, genl_sock
, buf
);
910 dpif_linux_flow_dump_next(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
,
911 const struct nlattr
**key
, size_t *key_len
,
912 const struct nlattr
**mask
, size_t *mask_len
,
913 const struct nlattr
**actions
, size_t *actions_len
,
914 const struct dpif_flow_stats
**stats
)
916 struct dpif_linux_flow_state
*state
= state_
;
921 ofpbuf_delete(state
->buf
);
924 if (!nl_dump_next(&state
->dump
, &buf
)) {
928 error
= dpif_linux_flow_from_ofpbuf(&state
->flow
, &buf
);
933 if (actions
&& !state
->flow
.actions
) {
934 error
= dpif_linux_flow_get__(dpif_
, state
->flow
.key
,
936 &state
->flow
, &state
->buf
);
937 if (error
== ENOENT
) {
938 VLOG_DBG("dumped flow disappeared on get");
940 VLOG_WARN("error fetching dumped flow: %s", strerror(error
));
946 *actions
= state
->flow
.actions
;
947 *actions_len
= state
->flow
.actions_len
;
950 *key
= state
->flow
.key
;
951 *key_len
= state
->flow
.key_len
;
954 *mask
= state
->flow
.mask
;
955 *mask_len
= state
->flow
.mask
? state
->flow
.mask_len
: 0;
958 dpif_linux_flow_get_stats(&state
->flow
, &state
->stats
);
959 *stats
= &state
->stats
;
965 dpif_linux_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
967 struct dpif_linux_flow_state
*state
= state_
;
968 int error
= nl_dump_done(&state
->dump
);
969 ofpbuf_delete(state
->buf
);
975 dpif_linux_encode_execute(int dp_ifindex
, const struct dpif_execute
*d_exec
,
978 struct ovs_header
*k_exec
;
980 ofpbuf_prealloc_tailroom(buf
, (64
981 + d_exec
->packet
->size
983 + d_exec
->actions_len
));
985 nl_msg_put_genlmsghdr(buf
, 0, ovs_packet_family
, NLM_F_REQUEST
,
986 OVS_PACKET_CMD_EXECUTE
, OVS_PACKET_VERSION
);
988 k_exec
= ofpbuf_put_uninit(buf
, sizeof *k_exec
);
989 k_exec
->dp_ifindex
= dp_ifindex
;
991 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_PACKET
,
992 d_exec
->packet
->data
, d_exec
->packet
->size
);
993 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_KEY
, d_exec
->key
, d_exec
->key_len
);
994 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_ACTIONS
,
995 d_exec
->actions
, d_exec
->actions_len
);
999 dpif_linux_execute__(int dp_ifindex
, const struct dpif_execute
*execute
)
1001 uint64_t request_stub
[1024 / 8];
1002 struct ofpbuf request
;
1005 ofpbuf_use_stub(&request
, request_stub
, sizeof request_stub
);
1006 dpif_linux_encode_execute(dp_ifindex
, execute
, &request
);
1007 error
= nl_sock_transact(genl_sock
, &request
, NULL
);
1008 ofpbuf_uninit(&request
);
1014 dpif_linux_execute(struct dpif
*dpif_
, const struct dpif_execute
*execute
)
1016 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1018 return dpif_linux_execute__(dpif
->dp_ifindex
, execute
);
1024 dpif_linux_operate__(struct dpif
*dpif_
, struct dpif_op
**ops
, size_t n_ops
)
1026 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1029 struct nl_transaction txn
;
1031 struct ofpbuf request
;
1032 uint64_t request_stub
[1024 / 8];
1034 struct ofpbuf reply
;
1035 uint64_t reply_stub
[1024 / 8];
1038 struct nl_transaction
*txnsp
[MAX_OPS
];
1041 ovs_assert(n_ops
<= MAX_OPS
);
1042 for (i
= 0; i
< n_ops
; i
++) {
1043 struct op_auxdata
*aux
= &auxes
[i
];
1044 struct dpif_op
*op
= ops
[i
];
1045 struct dpif_flow_put
*put
;
1046 struct dpif_flow_del
*del
;
1047 struct dpif_execute
*execute
;
1048 struct dpif_linux_flow flow
;
1050 ofpbuf_use_stub(&aux
->request
,
1051 aux
->request_stub
, sizeof aux
->request_stub
);
1052 aux
->txn
.request
= &aux
->request
;
1054 ofpbuf_use_stub(&aux
->reply
, aux
->reply_stub
, sizeof aux
->reply_stub
);
1055 aux
->txn
.reply
= NULL
;
1058 case DPIF_OP_FLOW_PUT
:
1059 put
= &op
->u
.flow_put
;
1060 dpif_linux_init_flow_put(dpif_
, put
, &flow
);
1062 flow
.nlmsg_flags
|= NLM_F_ECHO
;
1063 aux
->txn
.reply
= &aux
->reply
;
1065 dpif_linux_flow_to_ofpbuf(&flow
, &aux
->request
);
1068 case DPIF_OP_FLOW_DEL
:
1069 del
= &op
->u
.flow_del
;
1070 dpif_linux_init_flow_del(dpif_
, del
, &flow
);
1072 flow
.nlmsg_flags
|= NLM_F_ECHO
;
1073 aux
->txn
.reply
= &aux
->reply
;
1075 dpif_linux_flow_to_ofpbuf(&flow
, &aux
->request
);
1078 case DPIF_OP_EXECUTE
:
1079 execute
= &op
->u
.execute
;
1080 dpif_linux_encode_execute(dpif
->dp_ifindex
, execute
,
1089 for (i
= 0; i
< n_ops
; i
++) {
1090 txnsp
[i
] = &auxes
[i
].txn
;
1092 nl_sock_transact_multiple(genl_sock
, txnsp
, n_ops
);
1094 for (i
= 0; i
< n_ops
; i
++) {
1095 struct op_auxdata
*aux
= &auxes
[i
];
1096 struct nl_transaction
*txn
= &auxes
[i
].txn
;
1097 struct dpif_op
*op
= ops
[i
];
1098 struct dpif_flow_put
*put
;
1099 struct dpif_flow_del
*del
;
1101 op
->error
= txn
->error
;
1104 case DPIF_OP_FLOW_PUT
:
1105 put
= &op
->u
.flow_put
;
1108 struct dpif_linux_flow reply
;
1110 op
->error
= dpif_linux_flow_from_ofpbuf(&reply
,
1113 dpif_linux_flow_get_stats(&reply
, put
->stats
);
1118 memset(put
->stats
, 0, sizeof *put
->stats
);
1123 case DPIF_OP_FLOW_DEL
:
1124 del
= &op
->u
.flow_del
;
1127 struct dpif_linux_flow reply
;
1129 op
->error
= dpif_linux_flow_from_ofpbuf(&reply
,
1132 dpif_linux_flow_get_stats(&reply
, del
->stats
);
1137 memset(del
->stats
, 0, sizeof *del
->stats
);
1142 case DPIF_OP_EXECUTE
:
1149 ofpbuf_uninit(&aux
->request
);
1150 ofpbuf_uninit(&aux
->reply
);
1155 dpif_linux_operate(struct dpif
*dpif
, struct dpif_op
**ops
, size_t n_ops
)
1158 size_t chunk
= MIN(n_ops
, MAX_OPS
);
1159 dpif_linux_operate__(dpif
, ops
, chunk
);
1166 dpif_linux_recv_set(struct dpif
*dpif_
, bool enable
)
1168 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1170 if ((dpif
->epoll_fd
>= 0) == enable
) {
1175 destroy_channels(dpif
);
1177 struct dpif_port_dump port_dump
;
1178 struct dpif_port port
;
1180 dpif
->epoll_fd
= epoll_create(10);
1181 if (dpif
->epoll_fd
< 0) {
1185 DPIF_PORT_FOR_EACH (&port
, &port_dump
, &dpif
->dpif
) {
1186 struct dpif_linux_vport vport_request
;
1187 struct nl_sock
*sock
;
1188 uint32_t upcall_pid
;
1191 error
= nl_sock_create(NETLINK_GENERIC
, &sock
);
1196 upcall_pid
= nl_sock_pid(sock
);
1198 dpif_linux_vport_init(&vport_request
);
1199 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
1200 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
1201 vport_request
.port_no
= port
.port_no
;
1202 vport_request
.upcall_pid
= &upcall_pid
;
1203 error
= dpif_linux_vport_transact(&vport_request
, NULL
, NULL
);
1205 VLOG_DBG("%s: assigning port %"PRIu32
" to netlink pid %"PRIu32
,
1206 dpif_name(&dpif
->dpif
), vport_request
.port_no
,
1209 VLOG_WARN_RL(&error_rl
,
1210 "%s: failed to set upcall pid on port: %s",
1211 dpif_name(&dpif
->dpif
), strerror(error
));
1212 nl_sock_destroy(sock
);
1214 if (error
== ENODEV
|| error
== ENOENT
) {
1215 /* This device isn't there, but keep trying the others. */
1222 error
= add_channel(dpif
, port
.port_no
, sock
);
1224 VLOG_INFO("%s: could not add channel for port %s",
1225 dpif_name(dpif_
), port
.name
);
1226 nl_sock_destroy(sock
);
1236 dpif_linux_queue_to_priority(const struct dpif
*dpif OVS_UNUSED
,
1237 uint32_t queue_id
, uint32_t *priority
)
1239 if (queue_id
< 0xf000) {
1240 *priority
= TC_H_MAKE(1 << 16, queue_id
+ 1);
1248 parse_odp_packet(struct ofpbuf
*buf
, struct dpif_upcall
*upcall
,
1251 static const struct nl_policy ovs_packet_policy
[] = {
1252 /* Always present. */
1253 [OVS_PACKET_ATTR_PACKET
] = { .type
= NL_A_UNSPEC
,
1254 .min_len
= ETH_HEADER_LEN
},
1255 [OVS_PACKET_ATTR_KEY
] = { .type
= NL_A_NESTED
},
1257 /* OVS_PACKET_CMD_ACTION only. */
1258 [OVS_PACKET_ATTR_USERDATA
] = { .type
= NL_A_UNSPEC
, .optional
= true },
1261 struct ovs_header
*ovs_header
;
1262 struct nlattr
*a
[ARRAY_SIZE(ovs_packet_policy
)];
1263 struct nlmsghdr
*nlmsg
;
1264 struct genlmsghdr
*genl
;
1268 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1270 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1271 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1272 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1273 if (!nlmsg
|| !genl
|| !ovs_header
1274 || nlmsg
->nlmsg_type
!= ovs_packet_family
1275 || !nl_policy_parse(&b
, 0, ovs_packet_policy
, a
,
1276 ARRAY_SIZE(ovs_packet_policy
))) {
1280 type
= (genl
->cmd
== OVS_PACKET_CMD_MISS
? DPIF_UC_MISS
1281 : genl
->cmd
== OVS_PACKET_CMD_ACTION
? DPIF_UC_ACTION
1287 memset(upcall
, 0, sizeof *upcall
);
1288 upcall
->type
= type
;
1289 upcall
->packet
= buf
;
1290 upcall
->packet
->data
= CONST_CAST(struct nlattr
*,
1291 nl_attr_get(a
[OVS_PACKET_ATTR_PACKET
]));
1292 upcall
->packet
->size
= nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]);
1293 upcall
->key
= CONST_CAST(struct nlattr
*,
1294 nl_attr_get(a
[OVS_PACKET_ATTR_KEY
]));
1295 upcall
->key_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_KEY
]);
1296 upcall
->userdata
= a
[OVS_PACKET_ATTR_USERDATA
];
1297 *dp_ifindex
= ovs_header
->dp_ifindex
;
1303 dpif_linux_recv(struct dpif
*dpif_
, struct dpif_upcall
*upcall
,
1306 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1309 if (dpif
->epoll_fd
< 0) {
1313 if (dpif
->event_offset
>= dpif
->n_events
) {
1316 dpif
->event_offset
= dpif
->n_events
= 0;
1319 retval
= epoll_wait(dpif
->epoll_fd
, dpif
->epoll_events
,
1320 dpif
->uc_array_size
, 0);
1321 } while (retval
< 0 && errno
== EINTR
);
1323 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1324 VLOG_WARN_RL(&rl
, "epoll_wait failed (%s)", strerror(errno
));
1325 } else if (retval
> 0) {
1326 dpif
->n_events
= retval
;
1330 while (dpif
->event_offset
< dpif
->n_events
) {
1331 int idx
= dpif
->epoll_events
[dpif
->event_offset
].data
.u32
;
1332 struct dpif_channel
*ch
= &dpif
->channels
[idx
];
1334 dpif
->event_offset
++;
1340 if (++read_tries
> 50) {
1344 error
= nl_sock_recv(ch
->sock
, buf
, false);
1345 if (error
== ENOBUFS
) {
1346 /* ENOBUFS typically means that we've received so many
1347 * packets that the buffer overflowed. Try again
1348 * immediately because there's almost certainly a packet
1349 * waiting for us. */
1350 report_loss(dpif_
, ch
);
1354 ch
->last_poll
= time_msec();
1356 if (error
== EAGAIN
) {
1362 error
= parse_odp_packet(buf
, upcall
, &dp_ifindex
);
1363 if (!error
&& dp_ifindex
== dpif
->dp_ifindex
) {
1375 dpif_linux_recv_wait(struct dpif
*dpif_
)
1377 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1379 if (dpif
->epoll_fd
< 0) {
1383 poll_fd_wait(dpif
->epoll_fd
, POLLIN
);
1387 dpif_linux_recv_purge(struct dpif
*dpif_
)
1389 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1390 struct dpif_channel
*ch
;
1392 if (dpif
->epoll_fd
< 0) {
1396 for (ch
= dpif
->channels
; ch
< &dpif
->channels
[dpif
->uc_array_size
]; ch
++) {
1398 nl_sock_drain(ch
->sock
);
1403 const struct dpif_class dpif_linux_class
= {
1405 dpif_linux_enumerate
,
1412 dpif_linux_get_stats
,
1413 dpif_linux_port_add
,
1414 dpif_linux_port_del
,
1415 dpif_linux_port_query_by_number
,
1416 dpif_linux_port_query_by_name
,
1417 dpif_linux_get_max_ports
,
1418 dpif_linux_port_get_pid
,
1419 dpif_linux_port_dump_start
,
1420 dpif_linux_port_dump_next
,
1421 dpif_linux_port_dump_done
,
1422 dpif_linux_port_poll
,
1423 dpif_linux_port_poll_wait
,
1424 dpif_linux_flow_get
,
1425 dpif_linux_flow_put
,
1426 dpif_linux_flow_del
,
1427 dpif_linux_flow_flush
,
1428 dpif_linux_flow_dump_start
,
1429 dpif_linux_flow_dump_next
,
1430 dpif_linux_flow_dump_done
,
1433 dpif_linux_recv_set
,
1434 dpif_linux_queue_to_priority
,
1436 dpif_linux_recv_wait
,
1437 dpif_linux_recv_purge
,
1441 dpif_linux_init(void)
1443 static int error
= -1;
1446 unsigned int ovs_vport_mcgroup
;
1448 error
= nl_lookup_genl_family(OVS_DATAPATH_FAMILY
,
1449 &ovs_datapath_family
);
1451 VLOG_ERR("Generic Netlink family '%s' does not exist. "
1452 "The Open vSwitch kernel module is probably not loaded.",
1453 OVS_DATAPATH_FAMILY
);
1456 error
= nl_lookup_genl_family(OVS_VPORT_FAMILY
, &ovs_vport_family
);
1459 error
= nl_lookup_genl_family(OVS_FLOW_FAMILY
, &ovs_flow_family
);
1462 error
= nl_lookup_genl_family(OVS_PACKET_FAMILY
,
1463 &ovs_packet_family
);
1466 error
= nl_sock_create(NETLINK_GENERIC
, &genl_sock
);
1469 error
= nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY
, OVS_VPORT_MCGROUP
,
1471 OVS_VPORT_MCGROUP_FALLBACK_ID
);
1474 static struct dpif_linux_vport vport
;
1475 nln
= nln_create(NETLINK_GENERIC
, ovs_vport_mcgroup
,
1476 dpif_linux_nln_parse
, &vport
);
1484 dpif_linux_is_internal_device(const char *name
)
1486 struct dpif_linux_vport reply
;
1490 error
= dpif_linux_vport_get(name
, &reply
, &buf
);
1493 } else if (error
!= ENODEV
&& error
!= ENOENT
) {
1494 VLOG_WARN_RL(&error_rl
, "%s: vport query failed (%s)",
1495 name
, strerror(error
));
1498 return reply
.type
== OVS_VPORT_TYPE_INTERNAL
;
1502 dpif_linux_nln_parse(struct ofpbuf
*buf
, void *vport_
)
1504 struct dpif_linux_vport
*vport
= vport_
;
1505 return dpif_linux_vport_from_ofpbuf(vport
, buf
) == 0;
1509 dpif_linux_port_changed(const void *vport_
, void *dpif_
)
1511 const struct dpif_linux_vport
*vport
= vport_
;
1512 struct dpif_linux
*dpif
= dpif_
;
1515 if (vport
->dp_ifindex
== dpif
->dp_ifindex
1516 && (vport
->cmd
== OVS_VPORT_CMD_NEW
1517 || vport
->cmd
== OVS_VPORT_CMD_DEL
1518 || vport
->cmd
== OVS_VPORT_CMD_SET
)) {
1519 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8
,
1520 dpif
->dpif
.full_name
, vport
->name
, vport
->cmd
);
1521 sset_add(&dpif
->changed_ports
, vport
->name
);
1524 dpif
->change_error
= true;
1528 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1529 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
1530 * positive errno value.
1532 * 'vport' will contain pointers into 'buf', so the caller should not free
1533 * 'buf' while 'vport' is still in use. */
1535 dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport
*vport
,
1536 const struct ofpbuf
*buf
)
1538 static const struct nl_policy ovs_vport_policy
[] = {
1539 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NL_A_U32
},
1540 [OVS_VPORT_ATTR_TYPE
] = { .type
= NL_A_U32
},
1541 [OVS_VPORT_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
1542 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NL_A_U32
},
1543 [OVS_VPORT_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_vport_stats
),
1545 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1548 struct nlattr
*a
[ARRAY_SIZE(ovs_vport_policy
)];
1549 struct ovs_header
*ovs_header
;
1550 struct nlmsghdr
*nlmsg
;
1551 struct genlmsghdr
*genl
;
1554 dpif_linux_vport_init(vport
);
1556 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1557 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1558 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1559 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1560 if (!nlmsg
|| !genl
|| !ovs_header
1561 || nlmsg
->nlmsg_type
!= ovs_vport_family
1562 || !nl_policy_parse(&b
, 0, ovs_vport_policy
, a
,
1563 ARRAY_SIZE(ovs_vport_policy
))) {
1567 vport
->cmd
= genl
->cmd
;
1568 vport
->dp_ifindex
= ovs_header
->dp_ifindex
;
1569 vport
->port_no
= nl_attr_get_odp_port(a
[OVS_VPORT_ATTR_PORT_NO
]);
1570 vport
->type
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
1571 vport
->name
= nl_attr_get_string(a
[OVS_VPORT_ATTR_NAME
]);
1572 if (a
[OVS_VPORT_ATTR_UPCALL_PID
]) {
1573 vport
->upcall_pid
= nl_attr_get(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
1575 if (a
[OVS_VPORT_ATTR_STATS
]) {
1576 vport
->stats
= nl_attr_get(a
[OVS_VPORT_ATTR_STATS
]);
1578 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
1579 vport
->options
= nl_attr_get(a
[OVS_VPORT_ATTR_OPTIONS
]);
1580 vport
->options_len
= nl_attr_get_size(a
[OVS_VPORT_ATTR_OPTIONS
]);
1585 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
1586 * followed by Netlink attributes corresponding to 'vport'. */
1588 dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport
*vport
,
1591 struct ovs_header
*ovs_header
;
1593 nl_msg_put_genlmsghdr(buf
, 0, ovs_vport_family
, NLM_F_REQUEST
| NLM_F_ECHO
,
1594 vport
->cmd
, OVS_VPORT_VERSION
);
1596 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1597 ovs_header
->dp_ifindex
= vport
->dp_ifindex
;
1599 if (vport
->port_no
!= ODPP_NONE
) {
1600 nl_msg_put_odp_port(buf
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
);
1603 if (vport
->type
!= OVS_VPORT_TYPE_UNSPEC
) {
1604 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_TYPE
, vport
->type
);
1608 nl_msg_put_string(buf
, OVS_VPORT_ATTR_NAME
, vport
->name
);
1611 if (vport
->upcall_pid
) {
1612 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_UPCALL_PID
, *vport
->upcall_pid
);
1616 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_STATS
,
1617 vport
->stats
, sizeof *vport
->stats
);
1620 if (vport
->options
) {
1621 nl_msg_put_nested(buf
, OVS_VPORT_ATTR_OPTIONS
,
1622 vport
->options
, vport
->options_len
);
1626 /* Clears 'vport' to "empty" values. */
1628 dpif_linux_vport_init(struct dpif_linux_vport
*vport
)
1630 memset(vport
, 0, sizeof *vport
);
1631 vport
->port_no
= ODPP_NONE
;
1634 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1635 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1636 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1637 * result of the command is expected to be an ovs_vport also, which is decoded
1638 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1639 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1641 dpif_linux_vport_transact(const struct dpif_linux_vport
*request
,
1642 struct dpif_linux_vport
*reply
,
1643 struct ofpbuf
**bufp
)
1645 struct ofpbuf
*request_buf
;
1648 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
1650 error
= dpif_linux_init();
1654 dpif_linux_vport_init(reply
);
1659 request_buf
= ofpbuf_new(1024);
1660 dpif_linux_vport_to_ofpbuf(request
, request_buf
);
1661 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1662 ofpbuf_delete(request_buf
);
1666 error
= dpif_linux_vport_from_ofpbuf(reply
, *bufp
);
1669 dpif_linux_vport_init(reply
);
1670 ofpbuf_delete(*bufp
);
1677 /* Obtains information about the kernel vport named 'name' and stores it into
1678 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
1679 * longer needed ('reply' will contain pointers into '*bufp'). */
1681 dpif_linux_vport_get(const char *name
, struct dpif_linux_vport
*reply
,
1682 struct ofpbuf
**bufp
)
1684 struct dpif_linux_vport request
;
1686 dpif_linux_vport_init(&request
);
1687 request
.cmd
= OVS_VPORT_CMD_GET
;
1688 request
.name
= name
;
1690 return dpif_linux_vport_transact(&request
, reply
, bufp
);
1693 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1694 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
1695 * positive errno value.
1697 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
1698 * while 'dp' is still in use. */
1700 dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp
*dp
, const struct ofpbuf
*buf
)
1702 static const struct nl_policy ovs_datapath_policy
[] = {
1703 [OVS_DP_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
1704 [OVS_DP_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_dp_stats
),
1708 struct nlattr
*a
[ARRAY_SIZE(ovs_datapath_policy
)];
1709 struct ovs_header
*ovs_header
;
1710 struct nlmsghdr
*nlmsg
;
1711 struct genlmsghdr
*genl
;
1714 dpif_linux_dp_init(dp
);
1716 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1717 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1718 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1719 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1720 if (!nlmsg
|| !genl
|| !ovs_header
1721 || nlmsg
->nlmsg_type
!= ovs_datapath_family
1722 || !nl_policy_parse(&b
, 0, ovs_datapath_policy
, a
,
1723 ARRAY_SIZE(ovs_datapath_policy
))) {
1727 dp
->cmd
= genl
->cmd
;
1728 dp
->dp_ifindex
= ovs_header
->dp_ifindex
;
1729 dp
->name
= nl_attr_get_string(a
[OVS_DP_ATTR_NAME
]);
1730 if (a
[OVS_DP_ATTR_STATS
]) {
1731 /* Can't use structure assignment because Netlink doesn't ensure
1732 * sufficient alignment for 64-bit members. */
1733 memcpy(&dp
->stats
, nl_attr_get(a
[OVS_DP_ATTR_STATS
]),
1740 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
1742 dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp
*dp
, struct ofpbuf
*buf
)
1744 struct ovs_header
*ovs_header
;
1746 nl_msg_put_genlmsghdr(buf
, 0, ovs_datapath_family
,
1747 NLM_F_REQUEST
| NLM_F_ECHO
, dp
->cmd
,
1748 OVS_DATAPATH_VERSION
);
1750 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1751 ovs_header
->dp_ifindex
= dp
->dp_ifindex
;
1754 nl_msg_put_string(buf
, OVS_DP_ATTR_NAME
, dp
->name
);
1757 if (dp
->upcall_pid
) {
1758 nl_msg_put_u32(buf
, OVS_DP_ATTR_UPCALL_PID
, *dp
->upcall_pid
);
1761 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
1764 /* Clears 'dp' to "empty" values. */
1766 dpif_linux_dp_init(struct dpif_linux_dp
*dp
)
1768 memset(dp
, 0, sizeof *dp
);
1772 dpif_linux_dp_dump_start(struct nl_dump
*dump
)
1774 struct dpif_linux_dp request
;
1777 dpif_linux_dp_init(&request
);
1778 request
.cmd
= OVS_DP_CMD_GET
;
1780 buf
= ofpbuf_new(1024);
1781 dpif_linux_dp_to_ofpbuf(&request
, buf
);
1782 nl_dump_start(dump
, genl_sock
, buf
);
1786 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1787 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1788 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1789 * result of the command is expected to be of the same form, which is decoded
1790 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1791 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1793 dpif_linux_dp_transact(const struct dpif_linux_dp
*request
,
1794 struct dpif_linux_dp
*reply
, struct ofpbuf
**bufp
)
1796 struct ofpbuf
*request_buf
;
1799 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
1801 request_buf
= ofpbuf_new(1024);
1802 dpif_linux_dp_to_ofpbuf(request
, request_buf
);
1803 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1804 ofpbuf_delete(request_buf
);
1808 error
= dpif_linux_dp_from_ofpbuf(reply
, *bufp
);
1811 dpif_linux_dp_init(reply
);
1812 ofpbuf_delete(*bufp
);
1819 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
1820 * The caller must free '*bufp' when the reply is no longer needed ('reply'
1821 * will contain pointers into '*bufp'). */
1823 dpif_linux_dp_get(const struct dpif
*dpif_
, struct dpif_linux_dp
*reply
,
1824 struct ofpbuf
**bufp
)
1826 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1827 struct dpif_linux_dp request
;
1829 dpif_linux_dp_init(&request
);
1830 request
.cmd
= OVS_DP_CMD_GET
;
1831 request
.dp_ifindex
= dpif
->dp_ifindex
;
1833 return dpif_linux_dp_transact(&request
, reply
, bufp
);
1836 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1837 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
1838 * positive errno value.
1840 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
1841 * while 'flow' is still in use. */
1843 dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow
*flow
,
1844 const struct ofpbuf
*buf
)
1846 static const struct nl_policy ovs_flow_policy
[] = {
1847 [OVS_FLOW_ATTR_KEY
] = { .type
= NL_A_NESTED
},
1848 [OVS_FLOW_ATTR_MASK
] = { .type
= NL_A_NESTED
, .optional
= true },
1849 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1850 [OVS_FLOW_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_flow_stats
),
1852 [OVS_FLOW_ATTR_TCP_FLAGS
] = { .type
= NL_A_U8
, .optional
= true },
1853 [OVS_FLOW_ATTR_USED
] = { .type
= NL_A_U64
, .optional
= true },
1854 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
1857 struct nlattr
*a
[ARRAY_SIZE(ovs_flow_policy
)];
1858 struct ovs_header
*ovs_header
;
1859 struct nlmsghdr
*nlmsg
;
1860 struct genlmsghdr
*genl
;
1863 dpif_linux_flow_init(flow
);
1865 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1866 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1867 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1868 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1869 if (!nlmsg
|| !genl
|| !ovs_header
1870 || nlmsg
->nlmsg_type
!= ovs_flow_family
1871 || !nl_policy_parse(&b
, 0, ovs_flow_policy
, a
,
1872 ARRAY_SIZE(ovs_flow_policy
))) {
1876 flow
->nlmsg_flags
= nlmsg
->nlmsg_flags
;
1877 flow
->dp_ifindex
= ovs_header
->dp_ifindex
;
1878 flow
->key
= nl_attr_get(a
[OVS_FLOW_ATTR_KEY
]);
1879 flow
->key_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_KEY
]);
1881 if (a
[OVS_FLOW_ATTR_MASK
]) {
1882 flow
->mask
= nl_attr_get(a
[OVS_FLOW_ATTR_MASK
]);
1883 flow
->mask_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_MASK
]);
1885 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
1886 flow
->actions
= nl_attr_get(a
[OVS_FLOW_ATTR_ACTIONS
]);
1887 flow
->actions_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_ACTIONS
]);
1889 if (a
[OVS_FLOW_ATTR_STATS
]) {
1890 flow
->stats
= nl_attr_get(a
[OVS_FLOW_ATTR_STATS
]);
1892 if (a
[OVS_FLOW_ATTR_TCP_FLAGS
]) {
1893 flow
->tcp_flags
= nl_attr_get(a
[OVS_FLOW_ATTR_TCP_FLAGS
]);
1895 if (a
[OVS_FLOW_ATTR_USED
]) {
1896 flow
->used
= nl_attr_get(a
[OVS_FLOW_ATTR_USED
]);
1901 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
1902 * followed by Netlink attributes corresponding to 'flow'. */
1904 dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow
*flow
,
1907 struct ovs_header
*ovs_header
;
1909 nl_msg_put_genlmsghdr(buf
, 0, ovs_flow_family
,
1910 NLM_F_REQUEST
| flow
->nlmsg_flags
,
1911 flow
->cmd
, OVS_FLOW_VERSION
);
1913 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1914 ovs_header
->dp_ifindex
= flow
->dp_ifindex
;
1916 if (flow
->key_len
) {
1917 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_KEY
, flow
->key
, flow
->key_len
);
1920 if (flow
->mask_len
) {
1921 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_MASK
, flow
->mask
, flow
->mask_len
);
1924 if (flow
->actions
|| flow
->actions_len
) {
1925 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_ACTIONS
,
1926 flow
->actions
, flow
->actions_len
);
1929 /* We never need to send these to the kernel. */
1930 ovs_assert(!flow
->stats
);
1931 ovs_assert(!flow
->tcp_flags
);
1932 ovs_assert(!flow
->used
);
1935 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_CLEAR
);
1939 /* Clears 'flow' to "empty" values. */
1941 dpif_linux_flow_init(struct dpif_linux_flow
*flow
)
1943 memset(flow
, 0, sizeof *flow
);
1946 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1947 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1948 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1949 * result of the command is expected to be a flow also, which is decoded and
1950 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
1951 * is no longer needed ('reply' will contain pointers into '*bufp'). */
1953 dpif_linux_flow_transact(struct dpif_linux_flow
*request
,
1954 struct dpif_linux_flow
*reply
, struct ofpbuf
**bufp
)
1956 struct ofpbuf
*request_buf
;
1959 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
1962 request
->nlmsg_flags
|= NLM_F_ECHO
;
1965 request_buf
= ofpbuf_new(1024);
1966 dpif_linux_flow_to_ofpbuf(request
, request_buf
);
1967 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1968 ofpbuf_delete(request_buf
);
1972 error
= dpif_linux_flow_from_ofpbuf(reply
, *bufp
);
1975 dpif_linux_flow_init(reply
);
1976 ofpbuf_delete(*bufp
);
1984 dpif_linux_flow_get_stats(const struct dpif_linux_flow
*flow
,
1985 struct dpif_flow_stats
*stats
)
1988 stats
->n_packets
= get_unaligned_u64(&flow
->stats
->n_packets
);
1989 stats
->n_bytes
= get_unaligned_u64(&flow
->stats
->n_bytes
);
1991 stats
->n_packets
= 0;
1994 stats
->used
= flow
->used
? get_32aligned_u64(flow
->used
) : 0;
1995 stats
->tcp_flags
= flow
->tcp_flags
? *flow
->tcp_flags
: 0;
1998 /* Logs information about a packet that was recently lost in 'ch' (in
2001 report_loss(struct dpif
*dpif_
, struct dpif_channel
*ch
)
2003 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
2004 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 5);
2007 if (VLOG_DROP_WARN(&rl
)) {
2012 if (ch
->last_poll
!= LLONG_MIN
) {
2013 ds_put_format(&s
, " (last polled %lld ms ago)",
2014 time_msec() - ch
->last_poll
);
2017 VLOG_WARN("%s: lost packet on channel %td%s",
2018 dpif_name(dpif_
), ch
- dpif
->channels
, ds_cstr(&s
));