2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-linux.h"
27 #include <linux/types.h>
28 #include <linux/pkt_sched.h>
29 #include <linux/rtnetlink.h>
30 #include <linux/sockios.h>
34 #include <sys/epoll.h>
39 #include "dpif-provider.h"
40 #include "dynamic-string.h"
43 #include "netdev-linux.h"
44 #include "netdev-vport.h"
45 #include "netlink-notifier.h"
46 #include "netlink-socket.h"
50 #include "openvswitch/datapath-compat.h"
51 #include "openvswitch/tunnel.h"
53 #include "poll-loop.h"
57 #include "unaligned.h"
61 VLOG_DEFINE_THIS_MODULE(dpif_linux
);
62 enum { MAX_PORTS
= USHRT_MAX
};
64 enum { N_UPCALL_SOCKS
= 16 };
65 BUILD_ASSERT_DECL(IS_POW2(N_UPCALL_SOCKS
));
66 BUILD_ASSERT_DECL(N_UPCALL_SOCKS
<= 32); /* We use a 32-bit word as a mask. */
68 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
69 * missing if we have old headers. */
70 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
72 struct dpif_linux_dp
{
73 /* Generic Netlink header. */
76 /* struct ovs_header. */
80 const char *name
; /* OVS_DP_ATTR_NAME. */
81 const uint32_t *upcall_pid
; /* OVS_DP_UPCALL_PID. */
82 struct ovs_dp_stats stats
; /* OVS_DP_ATTR_STATS. */
85 static void dpif_linux_dp_init(struct dpif_linux_dp
*);
86 static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp
*,
87 const struct ofpbuf
*);
88 static void dpif_linux_dp_dump_start(struct nl_dump
*);
89 static int dpif_linux_dp_transact(const struct dpif_linux_dp
*request
,
90 struct dpif_linux_dp
*reply
,
91 struct ofpbuf
**bufp
);
92 static int dpif_linux_dp_get(const struct dpif
*, struct dpif_linux_dp
*reply
,
93 struct ofpbuf
**bufp
);
95 struct dpif_linux_flow
{
96 /* Generic Netlink header. */
99 /* struct ovs_header. */
100 unsigned int nlmsg_flags
;
105 * The 'stats' member points to 64-bit data that might only be aligned on
106 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
109 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
110 * the Netlink version of the command, even if actions_len is zero. */
111 const struct nlattr
*key
; /* OVS_FLOW_ATTR_KEY. */
113 const struct nlattr
*actions
; /* OVS_FLOW_ATTR_ACTIONS. */
115 const struct ovs_flow_stats
*stats
; /* OVS_FLOW_ATTR_STATS. */
116 const uint8_t *tcp_flags
; /* OVS_FLOW_ATTR_TCP_FLAGS. */
117 const ovs_32aligned_u64
*used
; /* OVS_FLOW_ATTR_USED. */
118 bool clear
; /* OVS_FLOW_ATTR_CLEAR. */
121 static void dpif_linux_flow_init(struct dpif_linux_flow
*);
122 static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow
*,
123 const struct ofpbuf
*);
124 static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow
*,
126 static int dpif_linux_flow_transact(struct dpif_linux_flow
*request
,
127 struct dpif_linux_flow
*reply
,
128 struct ofpbuf
**bufp
);
129 static void dpif_linux_flow_get_stats(const struct dpif_linux_flow
*,
130 struct dpif_flow_stats
*);
132 /* Datapath interface for the openvswitch Linux kernel module. */
137 /* Upcall messages. */
138 struct nl_sock
*upcall_socks
[N_UPCALL_SOCKS
];
139 uint32_t ready_mask
; /* 1-bit for each sock with unread messages. */
140 int epoll_fd
; /* epoll fd that includes the upcall socks. */
142 /* Change notification. */
143 struct sset changed_ports
; /* Ports that have changed. */
144 struct nln_notifier
*port_notifier
;
147 /* Port number allocation. */
148 uint16_t alloc_port_no
;
151 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(9999, 5);
153 /* Generic Netlink family numbers for OVS. */
154 static int ovs_datapath_family
;
155 static int ovs_vport_family
;
156 static int ovs_flow_family
;
157 static int ovs_packet_family
;
159 /* Generic Netlink socket. */
160 static struct nl_sock
*genl_sock
;
161 static struct nln
*nln
= NULL
;
163 static int dpif_linux_init(void);
164 static void open_dpif(const struct dpif_linux_dp
*, struct dpif
**);
165 static bool dpif_linux_nln_parse(struct ofpbuf
*, void *);
166 static void dpif_linux_port_changed(const void *vport
, void *dpif
);
167 static uint32_t dpif_linux_port_get_pid(const struct dpif
*, uint16_t port_no
);
169 static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport
*,
171 static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport
*,
172 const struct ofpbuf
*);
174 static struct dpif_linux
*
175 dpif_linux_cast(const struct dpif
*dpif
)
177 dpif_assert_class(dpif
, &dpif_linux_class
);
178 return CONTAINER_OF(dpif
, struct dpif_linux
, dpif
);
182 dpif_linux_enumerate(struct sset
*all_dps
)
188 error
= dpif_linux_init();
193 dpif_linux_dp_dump_start(&dump
);
194 while (nl_dump_next(&dump
, &msg
)) {
195 struct dpif_linux_dp dp
;
197 if (!dpif_linux_dp_from_ofpbuf(&dp
, &msg
)) {
198 sset_add(all_dps
, dp
.name
);
201 return nl_dump_done(&dump
);
205 dpif_linux_open(const struct dpif_class
*class OVS_UNUSED
, const char *name
,
206 bool create
, struct dpif
**dpifp
)
208 struct dpif_linux_dp dp_request
, dp
;
213 error
= dpif_linux_init();
218 /* Create or look up datapath. */
219 dpif_linux_dp_init(&dp_request
);
221 dp_request
.cmd
= OVS_DP_CMD_NEW
;
223 dp_request
.upcall_pid
= &upcall_pid
;
225 dp_request
.cmd
= OVS_DP_CMD_GET
;
227 dp_request
.name
= name
;
228 error
= dpif_linux_dp_transact(&dp_request
, &dp
, &buf
);
233 open_dpif(&dp
, dpifp
);
239 open_dpif(const struct dpif_linux_dp
*dp
, struct dpif
**dpifp
)
241 struct dpif_linux
*dpif
;
243 dpif
= xzalloc(sizeof *dpif
);
244 dpif
->port_notifier
= nln_notifier_create(nln
, dpif_linux_port_changed
,
248 dpif_init(&dpif
->dpif
, &dpif_linux_class
, dp
->name
,
249 dp
->dp_ifindex
, dp
->dp_ifindex
);
251 dpif
->dp_ifindex
= dp
->dp_ifindex
;
252 sset_init(&dpif
->changed_ports
);
253 *dpifp
= &dpif
->dpif
;
257 destroy_upcall_socks(struct dpif_linux
*dpif
)
261 if (dpif
->epoll_fd
>= 0) {
262 close(dpif
->epoll_fd
);
265 for (i
= 0; i
< N_UPCALL_SOCKS
; i
++) {
266 nl_sock_destroy(dpif
->upcall_socks
[i
]);
267 dpif
->upcall_socks
[i
] = NULL
;
272 dpif_linux_close(struct dpif
*dpif_
)
274 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
276 nln_notifier_destroy(dpif
->port_notifier
);
277 destroy_upcall_socks(dpif
);
278 sset_destroy(&dpif
->changed_ports
);
283 dpif_linux_destroy(struct dpif
*dpif_
)
285 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
286 struct dpif_linux_dp dp
;
288 dpif_linux_dp_init(&dp
);
289 dp
.cmd
= OVS_DP_CMD_DEL
;
290 dp
.dp_ifindex
= dpif
->dp_ifindex
;
291 return dpif_linux_dp_transact(&dp
, NULL
, NULL
);
295 dpif_linux_run(struct dpif
*dpif OVS_UNUSED
)
303 dpif_linux_wait(struct dpif
*dpif OVS_UNUSED
)
311 dpif_linux_get_stats(const struct dpif
*dpif_
, struct dpif_dp_stats
*stats
)
313 struct dpif_linux_dp dp
;
317 error
= dpif_linux_dp_get(dpif_
, &dp
, &buf
);
319 stats
->n_hit
= dp
.stats
.n_hit
;
320 stats
->n_missed
= dp
.stats
.n_missed
;
321 stats
->n_lost
= dp
.stats
.n_lost
;
322 stats
->n_flows
= dp
.stats
.n_flows
;
329 dpif_linux_port_add(struct dpif
*dpif_
, struct netdev
*netdev
,
332 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
333 const char *name
= netdev_get_name(netdev
);
334 const char *type
= netdev_get_type(netdev
);
335 struct dpif_linux_vport request
, reply
;
336 const struct ofpbuf
*options
;
338 int error
, i
= 0, max_ports
= MAX_PORTS
;
340 dpif_linux_vport_init(&request
);
341 request
.cmd
= OVS_VPORT_CMD_NEW
;
342 request
.dp_ifindex
= dpif
->dp_ifindex
;
343 request
.type
= netdev_vport_get_vport_type(netdev
);
344 if (request
.type
== OVS_VPORT_TYPE_UNSPEC
) {
345 VLOG_WARN_RL(&error_rl
, "%s: cannot create port `%s' because it has "
346 "unsupported type `%s'",
347 dpif_name(dpif_
), name
, type
);
352 options
= netdev_vport_get_options(netdev
);
353 if (options
&& options
->size
) {
354 request
.options
= options
->data
;
355 request
.options_len
= options
->size
;
358 if (request
.type
== OVS_VPORT_TYPE_NETDEV
) {
359 netdev_linux_ethtool_set_flag(netdev
, ETH_FLAG_LRO
, "LRO", false);
362 /* Loop until we find a port that isn't used. */
366 request
.port_no
= ++dpif
->alloc_port_no
;
367 upcall_pid
= dpif_linux_port_get_pid(dpif_
, request
.port_no
);
368 request
.upcall_pid
= &upcall_pid
;
369 error
= dpif_linux_vport_transact(&request
, &reply
, &buf
);
372 *port_nop
= reply
.port_no
;
373 VLOG_DBG("%s: assigning port %"PRIu32
" to netlink pid %"PRIu32
,
374 dpif_name(dpif_
), request
.port_no
, upcall_pid
);
375 } else if (error
== EFBIG
) {
376 /* Older datapath has lower limit. */
377 max_ports
= dpif
->alloc_port_no
;
378 dpif
->alloc_port_no
= 0;
382 } while ((i
++ < max_ports
)
383 && (error
== EBUSY
|| error
== EFBIG
));
389 dpif_linux_port_del(struct dpif
*dpif_
, uint16_t port_no
)
391 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
392 struct dpif_linux_vport vport
;
395 dpif_linux_vport_init(&vport
);
396 vport
.cmd
= OVS_VPORT_CMD_DEL
;
397 vport
.dp_ifindex
= dpif
->dp_ifindex
;
398 vport
.port_no
= port_no
;
399 error
= dpif_linux_vport_transact(&vport
, NULL
, NULL
);
405 dpif_linux_port_query__(const struct dpif
*dpif
, uint32_t port_no
,
406 const char *port_name
, struct dpif_port
*dpif_port
)
408 struct dpif_linux_vport request
;
409 struct dpif_linux_vport reply
;
413 dpif_linux_vport_init(&request
);
414 request
.cmd
= OVS_VPORT_CMD_GET
;
415 request
.dp_ifindex
= dpif_linux_cast(dpif
)->dp_ifindex
;
416 request
.port_no
= port_no
;
417 request
.name
= port_name
;
419 error
= dpif_linux_vport_transact(&request
, &reply
, &buf
);
421 dpif_port
->name
= xstrdup(reply
.name
);
422 dpif_port
->type
= xstrdup(netdev_vport_get_netdev_type(&reply
));
423 dpif_port
->port_no
= reply
.port_no
;
430 dpif_linux_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
431 struct dpif_port
*dpif_port
)
433 return dpif_linux_port_query__(dpif
, port_no
, NULL
, dpif_port
);
437 dpif_linux_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
438 struct dpif_port
*dpif_port
)
440 return dpif_linux_port_query__(dpif
, 0, devname
, dpif_port
);
444 dpif_linux_get_max_ports(const struct dpif
*dpif OVS_UNUSED
)
450 dpif_linux_port_get_pid(const struct dpif
*dpif_
, uint16_t port_no
)
452 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
454 if (dpif
->epoll_fd
< 0) {
457 int idx
= port_no
& (N_UPCALL_SOCKS
- 1);
458 return nl_sock_pid(dpif
->upcall_socks
[idx
]);
463 dpif_linux_flow_flush(struct dpif
*dpif_
)
465 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
466 struct dpif_linux_flow flow
;
468 dpif_linux_flow_init(&flow
);
469 flow
.cmd
= OVS_FLOW_CMD_DEL
;
470 flow
.dp_ifindex
= dpif
->dp_ifindex
;
471 return dpif_linux_flow_transact(&flow
, NULL
, NULL
);
474 struct dpif_linux_port_state
{
479 dpif_linux_port_dump_start(const struct dpif
*dpif_
, void **statep
)
481 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
482 struct dpif_linux_port_state
*state
;
483 struct dpif_linux_vport request
;
486 *statep
= state
= xmalloc(sizeof *state
);
488 dpif_linux_vport_init(&request
);
489 request
.cmd
= OVS_DP_CMD_GET
;
490 request
.dp_ifindex
= dpif
->dp_ifindex
;
492 buf
= ofpbuf_new(1024);
493 dpif_linux_vport_to_ofpbuf(&request
, buf
);
494 nl_dump_start(&state
->dump
, genl_sock
, buf
);
501 dpif_linux_port_dump_next(const struct dpif
*dpif OVS_UNUSED
, void *state_
,
502 struct dpif_port
*dpif_port
)
504 struct dpif_linux_port_state
*state
= state_
;
505 struct dpif_linux_vport vport
;
509 if (!nl_dump_next(&state
->dump
, &buf
)) {
513 error
= dpif_linux_vport_from_ofpbuf(&vport
, &buf
);
518 dpif_port
->name
= (char *) vport
.name
;
519 dpif_port
->type
= (char *) netdev_vport_get_netdev_type(&vport
);
520 dpif_port
->port_no
= vport
.port_no
;
525 dpif_linux_port_dump_done(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
)
527 struct dpif_linux_port_state
*state
= state_
;
528 int error
= nl_dump_done(&state
->dump
);
535 dpif_linux_port_poll(const struct dpif
*dpif_
, char **devnamep
)
537 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
539 if (dpif
->change_error
) {
540 dpif
->change_error
= false;
541 sset_clear(&dpif
->changed_ports
);
543 } else if (!sset_is_empty(&dpif
->changed_ports
)) {
544 *devnamep
= sset_pop(&dpif
->changed_ports
);
552 dpif_linux_port_poll_wait(const struct dpif
*dpif_
)
554 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
555 if (!sset_is_empty(&dpif
->changed_ports
) || dpif
->change_error
) {
556 poll_immediate_wake();
561 dpif_linux_flow_get__(const struct dpif
*dpif_
,
562 const struct nlattr
*key
, size_t key_len
,
563 struct dpif_linux_flow
*reply
, struct ofpbuf
**bufp
)
565 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
566 struct dpif_linux_flow request
;
568 dpif_linux_flow_init(&request
);
569 request
.cmd
= OVS_FLOW_CMD_GET
;
570 request
.dp_ifindex
= dpif
->dp_ifindex
;
572 request
.key_len
= key_len
;
573 return dpif_linux_flow_transact(&request
, reply
, bufp
);
577 dpif_linux_flow_get(const struct dpif
*dpif_
,
578 const struct nlattr
*key
, size_t key_len
,
579 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
581 struct dpif_linux_flow reply
;
585 error
= dpif_linux_flow_get__(dpif_
, key
, key_len
, &reply
, &buf
);
588 dpif_linux_flow_get_stats(&reply
, stats
);
591 buf
->data
= (void *) reply
.actions
;
592 buf
->size
= reply
.actions_len
;
602 dpif_linux_init_flow_put(struct dpif
*dpif_
, const struct dpif_flow_put
*put
,
603 struct dpif_linux_flow
*request
)
605 static struct nlattr dummy_action
;
607 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
609 dpif_linux_flow_init(request
);
610 request
->cmd
= (put
->flags
& DPIF_FP_CREATE
611 ? OVS_FLOW_CMD_NEW
: OVS_FLOW_CMD_SET
);
612 request
->dp_ifindex
= dpif
->dp_ifindex
;
613 request
->key
= put
->key
;
614 request
->key_len
= put
->key_len
;
615 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
616 request
->actions
= put
->actions
? put
->actions
: &dummy_action
;
617 request
->actions_len
= put
->actions_len
;
618 if (put
->flags
& DPIF_FP_ZERO_STATS
) {
619 request
->clear
= true;
621 request
->nlmsg_flags
= put
->flags
& DPIF_FP_MODIFY
? 0 : NLM_F_CREATE
;
625 dpif_linux_flow_put(struct dpif
*dpif_
, const struct dpif_flow_put
*put
)
627 struct dpif_linux_flow request
, reply
;
631 dpif_linux_init_flow_put(dpif_
, put
, &request
);
632 error
= dpif_linux_flow_transact(&request
,
633 put
->stats
? &reply
: NULL
,
634 put
->stats
? &buf
: NULL
);
635 if (!error
&& put
->stats
) {
636 dpif_linux_flow_get_stats(&reply
, put
->stats
);
643 dpif_linux_flow_del(struct dpif
*dpif_
,
644 const struct nlattr
*key
, size_t key_len
,
645 struct dpif_flow_stats
*stats
)
647 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
648 struct dpif_linux_flow request
, reply
;
652 dpif_linux_flow_init(&request
);
653 request
.cmd
= OVS_FLOW_CMD_DEL
;
654 request
.dp_ifindex
= dpif
->dp_ifindex
;
656 request
.key_len
= key_len
;
657 error
= dpif_linux_flow_transact(&request
,
658 stats
? &reply
: NULL
,
659 stats
? &buf
: NULL
);
660 if (!error
&& stats
) {
661 dpif_linux_flow_get_stats(&reply
, stats
);
667 struct dpif_linux_flow_state
{
669 struct dpif_linux_flow flow
;
670 struct dpif_flow_stats stats
;
675 dpif_linux_flow_dump_start(const struct dpif
*dpif_
, void **statep
)
677 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
678 struct dpif_linux_flow_state
*state
;
679 struct dpif_linux_flow request
;
682 *statep
= state
= xmalloc(sizeof *state
);
684 dpif_linux_flow_init(&request
);
685 request
.cmd
= OVS_DP_CMD_GET
;
686 request
.dp_ifindex
= dpif
->dp_ifindex
;
688 buf
= ofpbuf_new(1024);
689 dpif_linux_flow_to_ofpbuf(&request
, buf
);
690 nl_dump_start(&state
->dump
, genl_sock
, buf
);
699 dpif_linux_flow_dump_next(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
,
700 const struct nlattr
**key
, size_t *key_len
,
701 const struct nlattr
**actions
, size_t *actions_len
,
702 const struct dpif_flow_stats
**stats
)
704 struct dpif_linux_flow_state
*state
= state_
;
709 ofpbuf_delete(state
->buf
);
712 if (!nl_dump_next(&state
->dump
, &buf
)) {
716 error
= dpif_linux_flow_from_ofpbuf(&state
->flow
, &buf
);
721 if (actions
&& !state
->flow
.actions
) {
722 error
= dpif_linux_flow_get__(dpif_
, state
->flow
.key
,
724 &state
->flow
, &state
->buf
);
725 if (error
== ENOENT
) {
726 VLOG_DBG("dumped flow disappeared on get");
728 VLOG_WARN("error fetching dumped flow: %s", strerror(error
));
734 *actions
= state
->flow
.actions
;
735 *actions_len
= state
->flow
.actions_len
;
738 *key
= state
->flow
.key
;
739 *key_len
= state
->flow
.key_len
;
742 dpif_linux_flow_get_stats(&state
->flow
, &state
->stats
);
743 *stats
= &state
->stats
;
749 dpif_linux_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
751 struct dpif_linux_flow_state
*state
= state_
;
752 int error
= nl_dump_done(&state
->dump
);
753 ofpbuf_delete(state
->buf
);
758 static struct ofpbuf
*
759 dpif_linux_encode_execute(int dp_ifindex
,
760 const struct dpif_execute
*d_exec
)
762 struct ovs_header
*k_exec
;
765 buf
= ofpbuf_new(128 + d_exec
->actions_len
+ d_exec
->packet
->size
);
767 nl_msg_put_genlmsghdr(buf
, 0, ovs_packet_family
, NLM_F_REQUEST
,
768 OVS_PACKET_CMD_EXECUTE
, OVS_PACKET_VERSION
);
770 k_exec
= ofpbuf_put_uninit(buf
, sizeof *k_exec
);
771 k_exec
->dp_ifindex
= dp_ifindex
;
773 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_PACKET
,
774 d_exec
->packet
->data
, d_exec
->packet
->size
);
775 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_KEY
, d_exec
->key
, d_exec
->key_len
);
776 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_ACTIONS
,
777 d_exec
->actions
, d_exec
->actions_len
);
783 dpif_linux_execute__(int dp_ifindex
, const struct dpif_execute
*execute
)
785 struct ofpbuf
*request
;
788 request
= dpif_linux_encode_execute(dp_ifindex
, execute
);
789 error
= nl_sock_transact(genl_sock
, request
, NULL
);
790 ofpbuf_delete(request
);
796 dpif_linux_execute(struct dpif
*dpif_
, const struct dpif_execute
*execute
)
798 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
800 return dpif_linux_execute__(dpif
->dp_ifindex
, execute
);
804 dpif_linux_operate(struct dpif
*dpif_
, struct dpif_op
**ops
, size_t n_ops
)
806 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
807 struct nl_transaction
**txnsp
;
808 struct nl_transaction
*txns
;
811 txns
= xmalloc(n_ops
* sizeof *txns
);
812 for (i
= 0; i
< n_ops
; i
++) {
813 struct nl_transaction
*txn
= &txns
[i
];
814 struct dpif_op
*op
= ops
[i
];
816 if (op
->type
== DPIF_OP_FLOW_PUT
) {
817 struct dpif_flow_put
*put
= &op
->u
.flow_put
;
818 struct dpif_linux_flow request
;
820 dpif_linux_init_flow_put(dpif_
, put
, &request
);
822 request
.nlmsg_flags
|= NLM_F_ECHO
;
824 txn
->request
= ofpbuf_new(1024);
825 dpif_linux_flow_to_ofpbuf(&request
, txn
->request
);
826 } else if (op
->type
== DPIF_OP_EXECUTE
) {
827 struct dpif_execute
*execute
= &op
->u
.execute
;
829 txn
->request
= dpif_linux_encode_execute(dpif
->dp_ifindex
,
836 txnsp
= xmalloc(n_ops
* sizeof *txnsp
);
837 for (i
= 0; i
< n_ops
; i
++) {
841 nl_sock_transact_multiple(genl_sock
, txnsp
, n_ops
);
845 for (i
= 0; i
< n_ops
; i
++) {
846 struct nl_transaction
*txn
= &txns
[i
];
847 struct dpif_op
*op
= ops
[i
];
849 if (op
->type
== DPIF_OP_FLOW_PUT
) {
850 struct dpif_flow_put
*put
= &op
->u
.flow_put
;
851 int error
= txn
->error
;
853 if (!error
&& put
->stats
) {
854 struct dpif_linux_flow reply
;
856 error
= dpif_linux_flow_from_ofpbuf(&reply
, txn
->reply
);
858 dpif_linux_flow_get_stats(&reply
, put
->stats
);
862 } else if (op
->type
== DPIF_OP_EXECUTE
) {
863 op
->error
= txn
->error
;
868 ofpbuf_delete(txn
->request
);
869 ofpbuf_delete(txn
->reply
);
875 set_upcall_pids(struct dpif
*dpif_
)
877 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
878 struct dpif_port_dump port_dump
;
879 struct dpif_port port
;
882 DPIF_PORT_FOR_EACH (&port
, &port_dump
, &dpif
->dpif
) {
883 uint32_t upcall_pid
= dpif_linux_port_get_pid(dpif_
, port
.port_no
);
884 struct dpif_linux_vport vport_request
;
886 dpif_linux_vport_init(&vport_request
);
887 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
888 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
889 vport_request
.port_no
= port
.port_no
;
890 vport_request
.upcall_pid
= &upcall_pid
;
891 error
= dpif_linux_vport_transact(&vport_request
, NULL
, NULL
);
893 VLOG_DBG("%s: assigning port %"PRIu32
" to netlink pid %"PRIu32
,
894 dpif_name(&dpif
->dpif
), vport_request
.port_no
,
897 VLOG_WARN_RL(&error_rl
, "%s: failed to set upcall pid on port: %s",
898 dpif_name(&dpif
->dpif
), strerror(error
));
904 dpif_linux_recv_set(struct dpif
*dpif_
, bool enable
)
906 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
908 if ((dpif
->epoll_fd
>= 0) == enable
) {
913 destroy_upcall_socks(dpif
);
918 dpif
->epoll_fd
= epoll_create(N_UPCALL_SOCKS
);
919 if (dpif
->epoll_fd
< 0) {
923 for (i
= 0; i
< N_UPCALL_SOCKS
; i
++) {
924 struct epoll_event event
;
926 error
= nl_sock_create(NETLINK_GENERIC
, &dpif
->upcall_socks
[i
]);
928 destroy_upcall_socks(dpif
);
932 memset(&event
, 0, sizeof event
);
933 event
.events
= EPOLLIN
;
935 if (epoll_ctl(dpif
->epoll_fd
, EPOLL_CTL_ADD
,
936 nl_sock_fd(dpif
->upcall_socks
[i
]), &event
) < 0) {
938 destroy_upcall_socks(dpif
);
943 dpif
->ready_mask
= 0;
946 set_upcall_pids(dpif_
);
952 dpif_linux_queue_to_priority(const struct dpif
*dpif OVS_UNUSED
,
953 uint32_t queue_id
, uint32_t *priority
)
955 if (queue_id
< 0xf000) {
956 *priority
= TC_H_MAKE(1 << 16, queue_id
+ 1);
964 parse_odp_packet(struct ofpbuf
*buf
, struct dpif_upcall
*upcall
,
967 static const struct nl_policy ovs_packet_policy
[] = {
968 /* Always present. */
969 [OVS_PACKET_ATTR_PACKET
] = { .type
= NL_A_UNSPEC
,
970 .min_len
= ETH_HEADER_LEN
},
971 [OVS_PACKET_ATTR_KEY
] = { .type
= NL_A_NESTED
},
973 /* OVS_PACKET_CMD_ACTION only. */
974 [OVS_PACKET_ATTR_USERDATA
] = { .type
= NL_A_U64
, .optional
= true },
977 struct ovs_header
*ovs_header
;
978 struct nlattr
*a
[ARRAY_SIZE(ovs_packet_policy
)];
979 struct nlmsghdr
*nlmsg
;
980 struct genlmsghdr
*genl
;
984 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
986 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
987 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
988 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
989 if (!nlmsg
|| !genl
|| !ovs_header
990 || nlmsg
->nlmsg_type
!= ovs_packet_family
991 || !nl_policy_parse(&b
, 0, ovs_packet_policy
, a
,
992 ARRAY_SIZE(ovs_packet_policy
))) {
996 type
= (genl
->cmd
== OVS_PACKET_CMD_MISS
? DPIF_UC_MISS
997 : genl
->cmd
== OVS_PACKET_CMD_ACTION
? DPIF_UC_ACTION
1003 memset(upcall
, 0, sizeof *upcall
);
1004 upcall
->type
= type
;
1005 upcall
->packet
= buf
;
1006 upcall
->packet
->data
= (void *) nl_attr_get(a
[OVS_PACKET_ATTR_PACKET
]);
1007 upcall
->packet
->size
= nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]);
1008 upcall
->key
= (void *) nl_attr_get(a
[OVS_PACKET_ATTR_KEY
]);
1009 upcall
->key_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_KEY
]);
1010 upcall
->userdata
= (a
[OVS_PACKET_ATTR_USERDATA
]
1011 ? nl_attr_get_u64(a
[OVS_PACKET_ATTR_USERDATA
])
1013 *dp_ifindex
= ovs_header
->dp_ifindex
;
1019 dpif_linux_recv(struct dpif
*dpif_
, struct dpif_upcall
*upcall
)
1021 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1024 if (dpif
->epoll_fd
< 0) {
1028 if (!dpif
->ready_mask
) {
1029 struct epoll_event events
[N_UPCALL_SOCKS
];
1034 retval
= epoll_wait(dpif
->epoll_fd
, events
, N_UPCALL_SOCKS
, 0);
1035 } while (retval
< 0 && errno
== EINTR
);
1037 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1038 VLOG_WARN_RL(&rl
, "epoll_wait failed (%s)", strerror(errno
));
1041 for (i
= 0; i
< retval
; i
++) {
1042 dpif
->ready_mask
|= 1u << events
[i
].data
.u32
;
1046 while (dpif
->ready_mask
) {
1047 int indx
= ffs(dpif
->ready_mask
) - 1;
1048 struct nl_sock
*upcall_sock
= dpif
->upcall_socks
[indx
];
1050 dpif
->ready_mask
&= ~(1u << indx
);
1057 if (++read_tries
> 50) {
1061 error
= nl_sock_recv(upcall_sock
, &buf
, false);
1062 if (error
== EAGAIN
) {
1068 error
= parse_odp_packet(buf
, upcall
, &dp_ifindex
);
1069 if (!error
&& dp_ifindex
== dpif
->dp_ifindex
) {
1084 dpif_linux_recv_wait(struct dpif
*dpif_
)
1086 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1088 if (dpif
->epoll_fd
< 0) {
1092 poll_fd_wait(dpif
->epoll_fd
, POLLIN
);
1096 dpif_linux_recv_purge(struct dpif
*dpif_
)
1098 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1101 if (dpif
->epoll_fd
< 0) {
1105 for (i
= 0; i
< N_UPCALL_SOCKS
; i
++) {
1106 nl_sock_drain(dpif
->upcall_socks
[i
]);
1110 const struct dpif_class dpif_linux_class
= {
1112 dpif_linux_enumerate
,
1118 dpif_linux_get_stats
,
1119 dpif_linux_port_add
,
1120 dpif_linux_port_del
,
1121 dpif_linux_port_query_by_number
,
1122 dpif_linux_port_query_by_name
,
1123 dpif_linux_get_max_ports
,
1124 dpif_linux_port_get_pid
,
1125 dpif_linux_port_dump_start
,
1126 dpif_linux_port_dump_next
,
1127 dpif_linux_port_dump_done
,
1128 dpif_linux_port_poll
,
1129 dpif_linux_port_poll_wait
,
1130 dpif_linux_flow_get
,
1131 dpif_linux_flow_put
,
1132 dpif_linux_flow_del
,
1133 dpif_linux_flow_flush
,
1134 dpif_linux_flow_dump_start
,
1135 dpif_linux_flow_dump_next
,
1136 dpif_linux_flow_dump_done
,
1139 dpif_linux_recv_set
,
1140 dpif_linux_queue_to_priority
,
1142 dpif_linux_recv_wait
,
1143 dpif_linux_recv_purge
,
1147 dpif_linux_init(void)
1149 static int error
= -1;
1152 unsigned int ovs_vport_mcgroup
;
1154 error
= nl_lookup_genl_family(OVS_DATAPATH_FAMILY
,
1155 &ovs_datapath_family
);
1157 VLOG_ERR("Generic Netlink family '%s' does not exist. "
1158 "The Open vSwitch kernel module is probably not loaded.",
1159 OVS_DATAPATH_FAMILY
);
1162 error
= nl_lookup_genl_family(OVS_VPORT_FAMILY
, &ovs_vport_family
);
1165 error
= nl_lookup_genl_family(OVS_FLOW_FAMILY
, &ovs_flow_family
);
1168 error
= nl_lookup_genl_family(OVS_PACKET_FAMILY
,
1169 &ovs_packet_family
);
1172 error
= nl_sock_create(NETLINK_GENERIC
, &genl_sock
);
1175 error
= nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY
, OVS_VPORT_MCGROUP
,
1177 OVS_VPORT_MCGROUP_FALLBACK_ID
);
1180 static struct dpif_linux_vport vport
;
1181 nln
= nln_create(NETLINK_GENERIC
, ovs_vport_mcgroup
,
1182 dpif_linux_nln_parse
, &vport
);
1190 dpif_linux_is_internal_device(const char *name
)
1192 struct dpif_linux_vport reply
;
1196 error
= dpif_linux_vport_get(name
, &reply
, &buf
);
1199 } else if (error
!= ENODEV
&& error
!= ENOENT
) {
1200 VLOG_WARN_RL(&error_rl
, "%s: vport query failed (%s)",
1201 name
, strerror(error
));
1204 return reply
.type
== OVS_VPORT_TYPE_INTERNAL
;
1208 dpif_linux_vport_send(int dp_ifindex
, uint32_t port_no
,
1209 const void *data
, size_t size
)
1211 struct ofpbuf actions
, key
, packet
;
1212 struct odputil_keybuf keybuf
;
1213 struct dpif_execute execute
;
1217 ofpbuf_use_const(&packet
, data
, size
);
1218 flow_extract(&packet
, 0, htonll(0), 0, &flow
);
1220 ofpbuf_use_stack(&key
, &keybuf
, sizeof keybuf
);
1221 odp_flow_key_from_flow(&key
, &flow
);
1223 ofpbuf_use_stack(&actions
, &action
, sizeof action
);
1224 nl_msg_put_u32(&actions
, OVS_ACTION_ATTR_OUTPUT
, port_no
);
1226 execute
.key
= key
.data
;
1227 execute
.key_len
= key
.size
;
1228 execute
.actions
= actions
.data
;
1229 execute
.actions_len
= actions
.size
;
1230 execute
.packet
= &packet
;
1231 return dpif_linux_execute__(dp_ifindex
, &execute
);
1235 dpif_linux_nln_parse(struct ofpbuf
*buf
, void *vport_
)
1237 struct dpif_linux_vport
*vport
= vport_
;
1238 return dpif_linux_vport_from_ofpbuf(vport
, buf
) == 0;
1242 dpif_linux_port_changed(const void *vport_
, void *dpif_
)
1244 const struct dpif_linux_vport
*vport
= vport_
;
1245 struct dpif_linux
*dpif
= dpif_
;
1248 if (vport
->dp_ifindex
== dpif
->dp_ifindex
1249 && (vport
->cmd
== OVS_VPORT_CMD_NEW
1250 || vport
->cmd
== OVS_VPORT_CMD_DEL
1251 || vport
->cmd
== OVS_VPORT_CMD_SET
)) {
1252 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8
,
1253 dpif
->dpif
.full_name
, vport
->name
, vport
->cmd
);
1254 sset_add(&dpif
->changed_ports
, vport
->name
);
1257 dpif
->change_error
= true;
1261 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1262 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
1263 * positive errno value.
1265 * 'vport' will contain pointers into 'buf', so the caller should not free
1266 * 'buf' while 'vport' is still in use. */
1268 dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport
*vport
,
1269 const struct ofpbuf
*buf
)
1271 static const struct nl_policy ovs_vport_policy
[] = {
1272 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NL_A_U32
},
1273 [OVS_VPORT_ATTR_TYPE
] = { .type
= NL_A_U32
},
1274 [OVS_VPORT_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
1275 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NL_A_U32
},
1276 [OVS_VPORT_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_vport_stats
),
1278 [OVS_VPORT_ATTR_ADDRESS
] = { .type
= NL_A_UNSPEC
,
1279 .min_len
= ETH_ADDR_LEN
,
1280 .max_len
= ETH_ADDR_LEN
,
1282 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1285 struct nlattr
*a
[ARRAY_SIZE(ovs_vport_policy
)];
1286 struct ovs_header
*ovs_header
;
1287 struct nlmsghdr
*nlmsg
;
1288 struct genlmsghdr
*genl
;
1291 dpif_linux_vport_init(vport
);
1293 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1294 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1295 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1296 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1297 if (!nlmsg
|| !genl
|| !ovs_header
1298 || nlmsg
->nlmsg_type
!= ovs_vport_family
1299 || !nl_policy_parse(&b
, 0, ovs_vport_policy
, a
,
1300 ARRAY_SIZE(ovs_vport_policy
))) {
1304 vport
->cmd
= genl
->cmd
;
1305 vport
->dp_ifindex
= ovs_header
->dp_ifindex
;
1306 vport
->port_no
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
1307 vport
->type
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
1308 vport
->name
= nl_attr_get_string(a
[OVS_VPORT_ATTR_NAME
]);
1309 if (a
[OVS_VPORT_ATTR_UPCALL_PID
]) {
1310 vport
->upcall_pid
= nl_attr_get(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
1312 if (a
[OVS_VPORT_ATTR_STATS
]) {
1313 vport
->stats
= nl_attr_get(a
[OVS_VPORT_ATTR_STATS
]);
1315 if (a
[OVS_VPORT_ATTR_ADDRESS
]) {
1316 vport
->address
= nl_attr_get(a
[OVS_VPORT_ATTR_ADDRESS
]);
1318 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
1319 vport
->options
= nl_attr_get(a
[OVS_VPORT_ATTR_OPTIONS
]);
1320 vport
->options_len
= nl_attr_get_size(a
[OVS_VPORT_ATTR_OPTIONS
]);
1325 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
1326 * followed by Netlink attributes corresponding to 'vport'. */
1328 dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport
*vport
,
1331 struct ovs_header
*ovs_header
;
1333 nl_msg_put_genlmsghdr(buf
, 0, ovs_vport_family
, NLM_F_REQUEST
| NLM_F_ECHO
,
1334 vport
->cmd
, OVS_VPORT_VERSION
);
1336 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1337 ovs_header
->dp_ifindex
= vport
->dp_ifindex
;
1339 if (vport
->port_no
!= UINT32_MAX
) {
1340 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
);
1343 if (vport
->type
!= OVS_VPORT_TYPE_UNSPEC
) {
1344 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_TYPE
, vport
->type
);
1348 nl_msg_put_string(buf
, OVS_VPORT_ATTR_NAME
, vport
->name
);
1351 if (vport
->upcall_pid
) {
1352 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_UPCALL_PID
, *vport
->upcall_pid
);
1356 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_STATS
,
1357 vport
->stats
, sizeof *vport
->stats
);
1360 if (vport
->address
) {
1361 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_ADDRESS
,
1362 vport
->address
, ETH_ADDR_LEN
);
1365 if (vport
->options
) {
1366 nl_msg_put_nested(buf
, OVS_VPORT_ATTR_OPTIONS
,
1367 vport
->options
, vport
->options_len
);
1371 /* Clears 'vport' to "empty" values. */
1373 dpif_linux_vport_init(struct dpif_linux_vport
*vport
)
1375 memset(vport
, 0, sizeof *vport
);
1376 vport
->port_no
= UINT32_MAX
;
1379 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1380 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1381 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1382 * result of the command is expected to be an ovs_vport also, which is decoded
1383 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1384 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1386 dpif_linux_vport_transact(const struct dpif_linux_vport
*request
,
1387 struct dpif_linux_vport
*reply
,
1388 struct ofpbuf
**bufp
)
1390 struct ofpbuf
*request_buf
;
1393 assert((reply
!= NULL
) == (bufp
!= NULL
));
1395 error
= dpif_linux_init();
1399 dpif_linux_vport_init(reply
);
1404 request_buf
= ofpbuf_new(1024);
1405 dpif_linux_vport_to_ofpbuf(request
, request_buf
);
1406 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1407 ofpbuf_delete(request_buf
);
1411 error
= dpif_linux_vport_from_ofpbuf(reply
, *bufp
);
1414 dpif_linux_vport_init(reply
);
1415 ofpbuf_delete(*bufp
);
1422 /* Obtains information about the kernel vport named 'name' and stores it into
1423 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
1424 * longer needed ('reply' will contain pointers into '*bufp'). */
1426 dpif_linux_vport_get(const char *name
, struct dpif_linux_vport
*reply
,
1427 struct ofpbuf
**bufp
)
1429 struct dpif_linux_vport request
;
1431 dpif_linux_vport_init(&request
);
1432 request
.cmd
= OVS_VPORT_CMD_GET
;
1433 request
.name
= name
;
1435 return dpif_linux_vport_transact(&request
, reply
, bufp
);
1438 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1439 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
1440 * positive errno value.
1442 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
1443 * while 'dp' is still in use. */
1445 dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp
*dp
, const struct ofpbuf
*buf
)
1447 static const struct nl_policy ovs_datapath_policy
[] = {
1448 [OVS_DP_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
1449 [OVS_DP_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_dp_stats
),
1453 struct nlattr
*a
[ARRAY_SIZE(ovs_datapath_policy
)];
1454 struct ovs_header
*ovs_header
;
1455 struct nlmsghdr
*nlmsg
;
1456 struct genlmsghdr
*genl
;
1459 dpif_linux_dp_init(dp
);
1461 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1462 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1463 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1464 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1465 if (!nlmsg
|| !genl
|| !ovs_header
1466 || nlmsg
->nlmsg_type
!= ovs_datapath_family
1467 || !nl_policy_parse(&b
, 0, ovs_datapath_policy
, a
,
1468 ARRAY_SIZE(ovs_datapath_policy
))) {
1472 dp
->cmd
= genl
->cmd
;
1473 dp
->dp_ifindex
= ovs_header
->dp_ifindex
;
1474 dp
->name
= nl_attr_get_string(a
[OVS_DP_ATTR_NAME
]);
1475 if (a
[OVS_DP_ATTR_STATS
]) {
1476 /* Can't use structure assignment because Netlink doesn't ensure
1477 * sufficient alignment for 64-bit members. */
1478 memcpy(&dp
->stats
, nl_attr_get(a
[OVS_DP_ATTR_STATS
]),
1485 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
1487 dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp
*dp
, struct ofpbuf
*buf
)
1489 struct ovs_header
*ovs_header
;
1491 nl_msg_put_genlmsghdr(buf
, 0, ovs_datapath_family
,
1492 NLM_F_REQUEST
| NLM_F_ECHO
, dp
->cmd
,
1493 OVS_DATAPATH_VERSION
);
1495 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1496 ovs_header
->dp_ifindex
= dp
->dp_ifindex
;
1499 nl_msg_put_string(buf
, OVS_DP_ATTR_NAME
, dp
->name
);
1502 if (dp
->upcall_pid
) {
1503 nl_msg_put_u32(buf
, OVS_DP_ATTR_UPCALL_PID
, *dp
->upcall_pid
);
1506 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
1509 /* Clears 'dp' to "empty" values. */
1511 dpif_linux_dp_init(struct dpif_linux_dp
*dp
)
1513 memset(dp
, 0, sizeof *dp
);
1517 dpif_linux_dp_dump_start(struct nl_dump
*dump
)
1519 struct dpif_linux_dp request
;
1522 dpif_linux_dp_init(&request
);
1523 request
.cmd
= OVS_DP_CMD_GET
;
1525 buf
= ofpbuf_new(1024);
1526 dpif_linux_dp_to_ofpbuf(&request
, buf
);
1527 nl_dump_start(dump
, genl_sock
, buf
);
1531 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1532 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1533 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1534 * result of the command is expected to be of the same form, which is decoded
1535 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1536 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1538 dpif_linux_dp_transact(const struct dpif_linux_dp
*request
,
1539 struct dpif_linux_dp
*reply
, struct ofpbuf
**bufp
)
1541 struct ofpbuf
*request_buf
;
1544 assert((reply
!= NULL
) == (bufp
!= NULL
));
1546 request_buf
= ofpbuf_new(1024);
1547 dpif_linux_dp_to_ofpbuf(request
, request_buf
);
1548 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1549 ofpbuf_delete(request_buf
);
1553 error
= dpif_linux_dp_from_ofpbuf(reply
, *bufp
);
1556 dpif_linux_dp_init(reply
);
1557 ofpbuf_delete(*bufp
);
1564 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
1565 * The caller must free '*bufp' when the reply is no longer needed ('reply'
1566 * will contain pointers into '*bufp'). */
1568 dpif_linux_dp_get(const struct dpif
*dpif_
, struct dpif_linux_dp
*reply
,
1569 struct ofpbuf
**bufp
)
1571 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1572 struct dpif_linux_dp request
;
1574 dpif_linux_dp_init(&request
);
1575 request
.cmd
= OVS_DP_CMD_GET
;
1576 request
.dp_ifindex
= dpif
->dp_ifindex
;
1578 return dpif_linux_dp_transact(&request
, reply
, bufp
);
1581 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1582 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
1583 * positive errno value.
1585 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
1586 * while 'flow' is still in use. */
1588 dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow
*flow
,
1589 const struct ofpbuf
*buf
)
1591 static const struct nl_policy ovs_flow_policy
[] = {
1592 [OVS_FLOW_ATTR_KEY
] = { .type
= NL_A_NESTED
},
1593 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1594 [OVS_FLOW_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_flow_stats
),
1596 [OVS_FLOW_ATTR_TCP_FLAGS
] = { .type
= NL_A_U8
, .optional
= true },
1597 [OVS_FLOW_ATTR_USED
] = { .type
= NL_A_U64
, .optional
= true },
1598 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
1601 struct nlattr
*a
[ARRAY_SIZE(ovs_flow_policy
)];
1602 struct ovs_header
*ovs_header
;
1603 struct nlmsghdr
*nlmsg
;
1604 struct genlmsghdr
*genl
;
1607 dpif_linux_flow_init(flow
);
1609 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1610 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1611 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1612 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1613 if (!nlmsg
|| !genl
|| !ovs_header
1614 || nlmsg
->nlmsg_type
!= ovs_flow_family
1615 || !nl_policy_parse(&b
, 0, ovs_flow_policy
, a
,
1616 ARRAY_SIZE(ovs_flow_policy
))) {
1620 flow
->nlmsg_flags
= nlmsg
->nlmsg_flags
;
1621 flow
->dp_ifindex
= ovs_header
->dp_ifindex
;
1622 flow
->key
= nl_attr_get(a
[OVS_FLOW_ATTR_KEY
]);
1623 flow
->key_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_KEY
]);
1624 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
1625 flow
->actions
= nl_attr_get(a
[OVS_FLOW_ATTR_ACTIONS
]);
1626 flow
->actions_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_ACTIONS
]);
1628 if (a
[OVS_FLOW_ATTR_STATS
]) {
1629 flow
->stats
= nl_attr_get(a
[OVS_FLOW_ATTR_STATS
]);
1631 if (a
[OVS_FLOW_ATTR_TCP_FLAGS
]) {
1632 flow
->tcp_flags
= nl_attr_get(a
[OVS_FLOW_ATTR_TCP_FLAGS
]);
1634 if (a
[OVS_FLOW_ATTR_USED
]) {
1635 flow
->used
= nl_attr_get(a
[OVS_FLOW_ATTR_USED
]);
1640 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
1641 * followed by Netlink attributes corresponding to 'flow'. */
1643 dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow
*flow
,
1646 struct ovs_header
*ovs_header
;
1648 nl_msg_put_genlmsghdr(buf
, 0, ovs_flow_family
,
1649 NLM_F_REQUEST
| flow
->nlmsg_flags
,
1650 flow
->cmd
, OVS_FLOW_VERSION
);
1652 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1653 ovs_header
->dp_ifindex
= flow
->dp_ifindex
;
1655 if (flow
->key_len
) {
1656 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_KEY
, flow
->key
, flow
->key_len
);
1659 if (flow
->actions
|| flow
->actions_len
) {
1660 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_ACTIONS
,
1661 flow
->actions
, flow
->actions_len
);
1664 /* We never need to send these to the kernel. */
1665 assert(!flow
->stats
);
1666 assert(!flow
->tcp_flags
);
1667 assert(!flow
->used
);
1670 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_CLEAR
);
1674 /* Clears 'flow' to "empty" values. */
1676 dpif_linux_flow_init(struct dpif_linux_flow
*flow
)
1678 memset(flow
, 0, sizeof *flow
);
1681 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1682 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1683 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1684 * result of the command is expected to be a flow also, which is decoded and
1685 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
1686 * is no longer needed ('reply' will contain pointers into '*bufp'). */
1688 dpif_linux_flow_transact(struct dpif_linux_flow
*request
,
1689 struct dpif_linux_flow
*reply
, struct ofpbuf
**bufp
)
1691 struct ofpbuf
*request_buf
;
1694 assert((reply
!= NULL
) == (bufp
!= NULL
));
1697 request
->nlmsg_flags
|= NLM_F_ECHO
;
1700 request_buf
= ofpbuf_new(1024);
1701 dpif_linux_flow_to_ofpbuf(request
, request_buf
);
1702 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1703 ofpbuf_delete(request_buf
);
1707 error
= dpif_linux_flow_from_ofpbuf(reply
, *bufp
);
1710 dpif_linux_flow_init(reply
);
1711 ofpbuf_delete(*bufp
);
1719 dpif_linux_flow_get_stats(const struct dpif_linux_flow
*flow
,
1720 struct dpif_flow_stats
*stats
)
1723 stats
->n_packets
= get_unaligned_u64(&flow
->stats
->n_packets
);
1724 stats
->n_bytes
= get_unaligned_u64(&flow
->stats
->n_bytes
);
1726 stats
->n_packets
= 0;
1729 stats
->used
= flow
->used
? get_32aligned_u64(flow
->used
) : 0;
1730 stats
->tcp_flags
= flow
->tcp_flags
? *flow
->tcp_flags
: 0;