2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-linux.h"
27 #include <linux/types.h>
28 #include <linux/pkt_sched.h>
29 #include <linux/rtnetlink.h>
30 #include <linux/sockios.h>
36 #include "dpif-provider.h"
37 #include "dynamic-string.h"
40 #include "netdev-linux.h"
41 #include "netdev-vport.h"
42 #include "netlink-notifier.h"
43 #include "netlink-socket.h"
47 #include "openvswitch/datapath-compat.h"
48 #include "openvswitch/tunnel.h"
50 #include "poll-loop.h"
54 #include "unaligned.h"
58 VLOG_DEFINE_THIS_MODULE(dpif_linux
);
60 enum { LRU_MAX_PORTS
= 1024 };
61 enum { LRU_MASK
= LRU_MAX_PORTS
- 1};
62 BUILD_ASSERT_DECL(IS_POW2(LRU_MAX_PORTS
));
64 enum { N_UPCALL_SOCKS
= 16 };
65 BUILD_ASSERT_DECL(IS_POW2(N_UPCALL_SOCKS
));
67 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
68 * missing if we have old headers. */
69 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
71 struct dpif_linux_dp
{
72 /* Generic Netlink header. */
75 /* struct ovs_header. */
79 const char *name
; /* OVS_DP_ATTR_NAME. */
80 uint32_t upcall_pid
; /* OVS_DP_UPCALL_PID. */
81 struct ovs_dp_stats stats
; /* OVS_DP_ATTR_STATS. */
82 enum ovs_frag_handling ipv4_frags
; /* OVS_DP_ATTR_IPV4_FRAGS. */
83 const uint32_t *sampling
; /* OVS_DP_ATTR_SAMPLING. */
86 static void dpif_linux_dp_init(struct dpif_linux_dp
*);
87 static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp
*,
88 const struct ofpbuf
*);
89 static void dpif_linux_dp_dump_start(struct nl_dump
*);
90 static int dpif_linux_dp_transact(const struct dpif_linux_dp
*request
,
91 struct dpif_linux_dp
*reply
,
92 struct ofpbuf
**bufp
);
93 static int dpif_linux_dp_get(const struct dpif
*, struct dpif_linux_dp
*reply
,
94 struct ofpbuf
**bufp
);
96 struct dpif_linux_flow
{
97 /* Generic Netlink header. */
100 /* struct ovs_header. */
101 unsigned int nlmsg_flags
;
106 * The 'stats' and 'used' members point to 64-bit data that might only be
107 * aligned on 32-bit boundaries, so get_unaligned_u64() should be used to
108 * access their values.
110 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
111 * the Netlink version of the command, even if actions_len is zero. */
112 const struct nlattr
*key
; /* OVS_FLOW_ATTR_KEY. */
114 const struct nlattr
*actions
; /* OVS_FLOW_ATTR_ACTIONS. */
116 uint32_t upcall_pid
; /* OVS_FLOW_ATTR_UPCALL_PID. */
117 const struct ovs_flow_stats
*stats
; /* OVS_FLOW_ATTR_STATS. */
118 const uint8_t *tcp_flags
; /* OVS_FLOW_ATTR_TCP_FLAGS. */
119 const uint64_t *used
; /* OVS_FLOW_ATTR_USED. */
120 bool clear
; /* OVS_FLOW_ATTR_CLEAR. */
123 static void dpif_linux_flow_init(struct dpif_linux_flow
*);
124 static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow
*,
125 const struct ofpbuf
*);
126 static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow
*,
128 static int dpif_linux_flow_transact(const struct dpif_linux_flow
*request
,
129 struct dpif_linux_flow
*reply
,
130 struct ofpbuf
**bufp
);
131 static void dpif_linux_flow_get_stats(const struct dpif_linux_flow
*,
132 struct dpif_flow_stats
*);
134 /* Datapath interface for the openvswitch Linux kernel module. */
139 /* Upcall messages. */
140 struct nl_sock
*upcall_socks
[N_UPCALL_SOCKS
];
141 int last_read_upcall
;
142 unsigned int listen_mask
;
144 /* Change notification. */
145 struct sset changed_ports
; /* Ports that have changed. */
146 struct nln_notifier
*port_notifier
;
149 /* Queue of unused ports. */
150 unsigned long *lru_bitmap
;
151 uint16_t lru_ports
[LRU_MAX_PORTS
];
156 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(9999, 5);
158 /* Generic Netlink family numbers for OVS. */
159 static int ovs_datapath_family
;
160 static int ovs_vport_family
;
161 static int ovs_flow_family
;
162 static int ovs_packet_family
;
164 /* Generic Netlink socket. */
165 static struct nl_sock
*genl_sock
;
166 static struct nln
*nln
= NULL
;
168 static int dpif_linux_init(void);
169 static void open_dpif(const struct dpif_linux_dp
*, struct dpif
**);
170 static bool dpif_linux_nln_parse(struct ofpbuf
*, void *);
171 static void dpif_linux_port_changed(const void *vport
, void *dpif
);
172 static uint32_t get_upcall_pid_port(struct dpif_linux
*, uint32_t port
);
173 static uint32_t get_upcall_pid_flow(struct dpif_linux
*,
174 const struct nlattr
*key
, size_t key_len
);
176 static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport
*,
178 static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport
*,
179 const struct ofpbuf
*);
181 static struct dpif_linux
*
182 dpif_linux_cast(const struct dpif
*dpif
)
184 dpif_assert_class(dpif
, &dpif_linux_class
);
185 return CONTAINER_OF(dpif
, struct dpif_linux
, dpif
);
189 dpif_linux_push_port(struct dpif_linux
*dp
, uint16_t port
)
191 if (port
< LRU_MAX_PORTS
&& !bitmap_is_set(dp
->lru_bitmap
, port
)) {
192 bitmap_set1(dp
->lru_bitmap
, port
);
193 dp
->lru_ports
[dp
->lru_head
++ & LRU_MASK
] = port
;
198 dpif_linux_pop_port(struct dpif_linux
*dp
)
202 if (dp
->lru_head
== dp
->lru_tail
) {
206 port
= dp
->lru_ports
[dp
->lru_tail
++ & LRU_MASK
];
207 bitmap_set0(dp
->lru_bitmap
, port
);
212 dpif_linux_enumerate(struct sset
*all_dps
)
218 error
= dpif_linux_init();
223 dpif_linux_dp_dump_start(&dump
);
224 while (nl_dump_next(&dump
, &msg
)) {
225 struct dpif_linux_dp dp
;
227 if (!dpif_linux_dp_from_ofpbuf(&dp
, &msg
)) {
228 sset_add(all_dps
, dp
.name
);
231 return nl_dump_done(&dump
);
235 dpif_linux_open(const struct dpif_class
*class OVS_UNUSED
, const char *name
,
236 bool create
, struct dpif
**dpifp
)
238 struct dpif_linux_dp dp_request
, dp
;
242 error
= dpif_linux_init();
247 /* Create or look up datapath. */
248 dpif_linux_dp_init(&dp_request
);
249 dp_request
.cmd
= create
? OVS_DP_CMD_NEW
: OVS_DP_CMD_GET
;
250 dp_request
.name
= name
;
251 error
= dpif_linux_dp_transact(&dp_request
, &dp
, &buf
);
256 open_dpif(&dp
, dpifp
);
262 open_dpif(const struct dpif_linux_dp
*dp
, struct dpif
**dpifp
)
264 struct dpif_linux
*dpif
;
267 dpif
= xzalloc(sizeof *dpif
);
268 dpif
->port_notifier
= nln_notifier_create(nln
, dpif_linux_port_changed
,
271 dpif_init(&dpif
->dpif
, &dpif_linux_class
, dp
->name
,
272 dp
->dp_ifindex
, dp
->dp_ifindex
);
274 dpif
->dp_ifindex
= dp
->dp_ifindex
;
275 sset_init(&dpif
->changed_ports
);
276 *dpifp
= &dpif
->dpif
;
278 dpif
->lru_bitmap
= bitmap_allocate(LRU_MAX_PORTS
);
279 bitmap_set1(dpif
->lru_bitmap
, OVSP_LOCAL
);
280 for (i
= 1; i
< LRU_MAX_PORTS
; i
++) {
281 dpif_linux_push_port(dpif
, i
);
286 destroy_upcall_socks(struct dpif_linux
*dpif
)
290 for (i
= 0; i
< N_UPCALL_SOCKS
; i
++) {
291 nl_sock_destroy(dpif
->upcall_socks
[i
]);
292 dpif
->upcall_socks
[i
] = NULL
;
297 dpif_linux_close(struct dpif
*dpif_
)
299 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
301 nln_notifier_destroy(dpif
->port_notifier
);
302 destroy_upcall_socks(dpif
);
303 sset_destroy(&dpif
->changed_ports
);
304 free(dpif
->lru_bitmap
);
309 dpif_linux_destroy(struct dpif
*dpif_
)
311 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
312 struct dpif_linux_dp dp
;
314 dpif_linux_dp_init(&dp
);
315 dp
.cmd
= OVS_DP_CMD_DEL
;
316 dp
.dp_ifindex
= dpif
->dp_ifindex
;
317 return dpif_linux_dp_transact(&dp
, NULL
, NULL
);
321 dpif_linux_run(struct dpif
*dpif OVS_UNUSED
)
329 dpif_linux_wait(struct dpif
*dpif OVS_UNUSED
)
337 dpif_linux_get_stats(const struct dpif
*dpif_
, struct ovs_dp_stats
*stats
)
339 struct dpif_linux_dp dp
;
343 error
= dpif_linux_dp_get(dpif_
, &dp
, &buf
);
352 dpif_linux_get_drop_frags(const struct dpif
*dpif_
, bool *drop_fragsp
)
354 struct dpif_linux_dp dp
;
358 error
= dpif_linux_dp_get(dpif_
, &dp
, &buf
);
360 *drop_fragsp
= dp
.ipv4_frags
== OVS_DP_FRAG_DROP
;
367 dpif_linux_set_drop_frags(struct dpif
*dpif_
, bool drop_frags
)
369 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
370 struct dpif_linux_dp dp
;
372 dpif_linux_dp_init(&dp
);
373 dp
.cmd
= OVS_DP_CMD_SET
;
374 dp
.dp_ifindex
= dpif
->dp_ifindex
;
375 dp
.ipv4_frags
= drop_frags
? OVS_DP_FRAG_DROP
: OVS_DP_FRAG_ZERO
;
376 return dpif_linux_dp_transact(&dp
, NULL
, NULL
);
380 dpif_linux_port_add(struct dpif
*dpif_
, struct netdev
*netdev
,
383 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
384 const char *name
= netdev_get_name(netdev
);
385 const char *type
= netdev_get_type(netdev
);
386 struct dpif_linux_vport request
, reply
;
387 const struct ofpbuf
*options
;
391 dpif_linux_vport_init(&request
);
392 request
.cmd
= OVS_VPORT_CMD_NEW
;
393 request
.dp_ifindex
= dpif
->dp_ifindex
;
394 request
.type
= netdev_vport_get_vport_type(netdev
);
395 if (request
.type
== OVS_VPORT_TYPE_UNSPEC
) {
396 VLOG_WARN_RL(&error_rl
, "%s: cannot create port `%s' because it has "
397 "unsupported type `%s'",
398 dpif_name(dpif_
), name
, type
);
403 options
= netdev_vport_get_options(netdev
);
404 if (options
&& options
->size
) {
405 request
.options
= options
->data
;
406 request
.options_len
= options
->size
;
409 if (request
.type
== OVS_VPORT_TYPE_NETDEV
) {
410 netdev_linux_ethtool_set_flag(netdev
, ETH_FLAG_LRO
, "LRO", false);
413 /* Loop until we find a port that isn't used. */
415 request
.port_no
= dpif_linux_pop_port(dpif
);
416 request
.upcall_pid
= get_upcall_pid_port(dpif
, request
.port_no
);
417 error
= dpif_linux_vport_transact(&request
, &reply
, &buf
);
420 *port_nop
= reply
.port_no
;
421 VLOG_DBG("%s: assigning port %"PRIu32
" to netlink "
423 dpif_name(dpif_
), request
.port_no
,
427 } while (request
.port_no
!= UINT32_MAX
428 && (error
== EBUSY
|| error
== EFBIG
));
434 dpif_linux_port_del(struct dpif
*dpif_
, uint16_t port_no
)
436 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
437 struct dpif_linux_vport vport
;
440 dpif_linux_vport_init(&vport
);
441 vport
.cmd
= OVS_VPORT_CMD_DEL
;
442 vport
.dp_ifindex
= dpif
->dp_ifindex
;
443 vport
.port_no
= port_no
;
444 error
= dpif_linux_vport_transact(&vport
, NULL
, NULL
);
447 dpif_linux_push_port(dpif
, port_no
);
453 dpif_linux_port_query__(const struct dpif
*dpif
, uint32_t port_no
,
454 const char *port_name
, struct dpif_port
*dpif_port
)
456 struct dpif_linux_vport request
;
457 struct dpif_linux_vport reply
;
461 dpif_linux_vport_init(&request
);
462 request
.cmd
= OVS_VPORT_CMD_GET
;
463 request
.dp_ifindex
= dpif_linux_cast(dpif
)->dp_ifindex
;
464 request
.port_no
= port_no
;
465 request
.name
= port_name
;
467 error
= dpif_linux_vport_transact(&request
, &reply
, &buf
);
469 dpif_port
->name
= xstrdup(reply
.name
);
470 dpif_port
->type
= xstrdup(netdev_vport_get_netdev_type(&reply
));
471 dpif_port
->port_no
= reply
.port_no
;
478 dpif_linux_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
479 struct dpif_port
*dpif_port
)
481 return dpif_linux_port_query__(dpif
, port_no
, NULL
, dpif_port
);
485 dpif_linux_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
486 struct dpif_port
*dpif_port
)
488 return dpif_linux_port_query__(dpif
, 0, devname
, dpif_port
);
492 dpif_linux_get_max_ports(const struct dpif
*dpif OVS_UNUSED
)
494 /* If the datapath increases its range of supported ports, then it should
495 * start reporting that. */
500 dpif_linux_flow_flush(struct dpif
*dpif_
)
502 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
503 struct dpif_linux_flow flow
;
505 dpif_linux_flow_init(&flow
);
506 flow
.cmd
= OVS_FLOW_CMD_DEL
;
507 flow
.dp_ifindex
= dpif
->dp_ifindex
;
508 return dpif_linux_flow_transact(&flow
, NULL
, NULL
);
511 struct dpif_linux_port_state
{
513 unsigned long *port_bitmap
; /* Ports in the datapath. */
514 bool complete
; /* Dump completed without error. */
518 dpif_linux_port_dump_start(const struct dpif
*dpif_
, void **statep
)
520 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
521 struct dpif_linux_port_state
*state
;
522 struct dpif_linux_vport request
;
525 *statep
= state
= xmalloc(sizeof *state
);
526 state
->port_bitmap
= bitmap_allocate(LRU_MAX_PORTS
);
527 state
->complete
= false;
529 dpif_linux_vport_init(&request
);
530 request
.cmd
= OVS_DP_CMD_GET
;
531 request
.dp_ifindex
= dpif
->dp_ifindex
;
533 buf
= ofpbuf_new(1024);
534 dpif_linux_vport_to_ofpbuf(&request
, buf
);
535 nl_dump_start(&state
->dump
, genl_sock
, buf
);
542 dpif_linux_port_dump_next(const struct dpif
*dpif OVS_UNUSED
, void *state_
,
543 struct dpif_port
*dpif_port
)
545 struct dpif_linux_port_state
*state
= state_
;
546 struct dpif_linux_vport vport
;
550 if (!nl_dump_next(&state
->dump
, &buf
)) {
551 state
->complete
= true;
555 error
= dpif_linux_vport_from_ofpbuf(&vport
, &buf
);
560 if (vport
.port_no
< LRU_MAX_PORTS
) {
561 bitmap_set1(state
->port_bitmap
, vport
.port_no
);
564 dpif_port
->name
= (char *) vport
.name
;
565 dpif_port
->type
= (char *) netdev_vport_get_netdev_type(&vport
);
566 dpif_port
->port_no
= vport
.port_no
;
571 dpif_linux_port_dump_done(const struct dpif
*dpif_
, void *state_
)
573 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
574 struct dpif_linux_port_state
*state
= state_
;
575 int error
= nl_dump_done(&state
->dump
);
577 if (state
->complete
) {
580 for (i
= 0; i
< LRU_MAX_PORTS
; i
++) {
581 if (!bitmap_is_set(state
->port_bitmap
, i
)) {
582 dpif_linux_push_port(dpif
, i
);
587 free(state
->port_bitmap
);
593 dpif_linux_port_poll(const struct dpif
*dpif_
, char **devnamep
)
595 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
597 if (dpif
->change_error
) {
598 dpif
->change_error
= false;
599 sset_clear(&dpif
->changed_ports
);
601 } else if (!sset_is_empty(&dpif
->changed_ports
)) {
602 *devnamep
= sset_pop(&dpif
->changed_ports
);
610 dpif_linux_port_poll_wait(const struct dpif
*dpif_
)
612 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
613 if (!sset_is_empty(&dpif
->changed_ports
) || dpif
->change_error
) {
614 poll_immediate_wake();
619 dpif_linux_flow_get__(const struct dpif
*dpif_
,
620 const struct nlattr
*key
, size_t key_len
,
621 struct dpif_linux_flow
*reply
, struct ofpbuf
**bufp
)
623 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
624 struct dpif_linux_flow request
;
626 dpif_linux_flow_init(&request
);
627 request
.cmd
= OVS_FLOW_CMD_GET
;
628 request
.dp_ifindex
= dpif
->dp_ifindex
;
630 request
.key_len
= key_len
;
631 return dpif_linux_flow_transact(&request
, reply
, bufp
);
635 dpif_linux_flow_get(const struct dpif
*dpif_
,
636 const struct nlattr
*key
, size_t key_len
,
637 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
639 struct dpif_linux_flow reply
;
643 error
= dpif_linux_flow_get__(dpif_
, key
, key_len
, &reply
, &buf
);
646 dpif_linux_flow_get_stats(&reply
, stats
);
649 buf
->data
= (void *) reply
.actions
;
650 buf
->size
= reply
.actions_len
;
660 dpif_linux_flow_put(struct dpif
*dpif_
, enum dpif_flow_put_flags flags
,
661 const struct nlattr
*key
, size_t key_len
,
662 const struct nlattr
*actions
, size_t actions_len
,
663 struct dpif_flow_stats
*stats
)
665 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
666 struct dpif_linux_flow request
, reply
;
667 struct nlattr dummy_action
;
671 dpif_linux_flow_init(&request
);
672 request
.cmd
= flags
& DPIF_FP_CREATE
? OVS_FLOW_CMD_NEW
: OVS_FLOW_CMD_SET
;
673 request
.dp_ifindex
= dpif
->dp_ifindex
;
675 request
.key_len
= key_len
;
676 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
677 request
.actions
= actions
? actions
: &dummy_action
;
678 request
.actions_len
= actions_len
;
679 request
.upcall_pid
= get_upcall_pid_flow(dpif
, key
, key_len
);
680 if (flags
& DPIF_FP_ZERO_STATS
) {
681 request
.clear
= true;
683 request
.nlmsg_flags
= flags
& DPIF_FP_MODIFY
? 0 : NLM_F_CREATE
;
684 error
= dpif_linux_flow_transact(&request
,
685 stats
? &reply
: NULL
,
686 stats
? &buf
: NULL
);
687 if (!error
&& stats
) {
688 dpif_linux_flow_get_stats(&reply
, stats
);
695 dpif_linux_flow_del(struct dpif
*dpif_
,
696 const struct nlattr
*key
, size_t key_len
,
697 struct dpif_flow_stats
*stats
)
699 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
700 struct dpif_linux_flow request
, reply
;
704 dpif_linux_flow_init(&request
);
705 request
.cmd
= OVS_FLOW_CMD_DEL
;
706 request
.dp_ifindex
= dpif
->dp_ifindex
;
708 request
.key_len
= key_len
;
709 error
= dpif_linux_flow_transact(&request
,
710 stats
? &reply
: NULL
,
711 stats
? &buf
: NULL
);
712 if (!error
&& stats
) {
713 dpif_linux_flow_get_stats(&reply
, stats
);
719 struct dpif_linux_flow_state
{
721 struct dpif_linux_flow flow
;
722 struct dpif_flow_stats stats
;
727 dpif_linux_flow_dump_start(const struct dpif
*dpif_
, void **statep
)
729 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
730 struct dpif_linux_flow_state
*state
;
731 struct dpif_linux_flow request
;
734 *statep
= state
= xmalloc(sizeof *state
);
736 dpif_linux_flow_init(&request
);
737 request
.cmd
= OVS_DP_CMD_GET
;
738 request
.dp_ifindex
= dpif
->dp_ifindex
;
740 buf
= ofpbuf_new(1024);
741 dpif_linux_flow_to_ofpbuf(&request
, buf
);
742 nl_dump_start(&state
->dump
, genl_sock
, buf
);
751 dpif_linux_flow_dump_next(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
,
752 const struct nlattr
**key
, size_t *key_len
,
753 const struct nlattr
**actions
, size_t *actions_len
,
754 const struct dpif_flow_stats
**stats
)
756 struct dpif_linux_flow_state
*state
= state_
;
761 ofpbuf_delete(state
->buf
);
764 if (!nl_dump_next(&state
->dump
, &buf
)) {
768 error
= dpif_linux_flow_from_ofpbuf(&state
->flow
, &buf
);
773 if (actions
&& !state
->flow
.actions
) {
774 error
= dpif_linux_flow_get__(dpif_
, state
->flow
.key
,
776 &state
->flow
, &state
->buf
);
777 if (error
== ENOENT
) {
778 VLOG_DBG("dumped flow disappeared on get");
780 VLOG_WARN("error fetching dumped flow: %s", strerror(error
));
786 *actions
= state
->flow
.actions
;
787 *actions_len
= state
->flow
.actions_len
;
790 *key
= state
->flow
.key
;
791 *key_len
= state
->flow
.key_len
;
794 dpif_linux_flow_get_stats(&state
->flow
, &state
->stats
);
795 *stats
= &state
->stats
;
801 dpif_linux_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
803 struct dpif_linux_flow_state
*state
= state_
;
804 int error
= nl_dump_done(&state
->dump
);
805 ofpbuf_delete(state
->buf
);
811 dpif_linux_execute__(int dp_ifindex
, uint32_t upcall_pid
,
812 const struct nlattr
*key
, size_t key_len
,
813 const struct nlattr
*actions
, size_t actions_len
,
814 const struct ofpbuf
*packet
)
816 struct ovs_header
*execute
;
820 buf
= ofpbuf_new(128 + actions_len
+ packet
->size
);
822 nl_msg_put_genlmsghdr(buf
, 0, ovs_packet_family
, NLM_F_REQUEST
,
823 OVS_PACKET_CMD_EXECUTE
, 1);
825 execute
= ofpbuf_put_uninit(buf
, sizeof *execute
);
826 execute
->dp_ifindex
= dp_ifindex
;
828 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_PACKET
, packet
->data
, packet
->size
);
829 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_KEY
, key
, key_len
);
830 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_ACTIONS
, actions
, actions_len
);
831 nl_msg_put_u32(buf
, OVS_PACKET_ATTR_UPCALL_PID
, upcall_pid
);
833 error
= nl_sock_transact(genl_sock
, buf
, NULL
);
839 dpif_linux_execute(struct dpif
*dpif_
,
840 const struct nlattr
*key
, size_t key_len
,
841 const struct nlattr
*actions
, size_t actions_len
,
842 const struct ofpbuf
*packet
)
844 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
845 uint32_t upcall_pid
= get_upcall_pid_flow(dpif
, key
, key_len
);
847 return dpif_linux_execute__(dpif
->dp_ifindex
, upcall_pid
, key
, key_len
,
848 actions
, actions_len
, packet
);
852 dpif_linux_recv_get_mask(const struct dpif
*dpif_
, int *listen_mask
)
854 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
855 *listen_mask
= dpif
->listen_mask
;
860 get_upcall_pid_port__(struct dpif_linux
*dpif
, uint32_t port
)
862 int idx
= port
& (N_UPCALL_SOCKS
- 1);
863 return nl_sock_pid(dpif
->upcall_socks
[idx
]);
867 get_upcall_pid_port(struct dpif_linux
*dpif
, uint32_t port
)
869 if (!(dpif
->listen_mask
& (1u << DPIF_UC_MISS
))) {
873 return get_upcall_pid_port__(dpif
, port
);
877 get_upcall_pid_flow(struct dpif_linux
*dpif
,
878 const struct nlattr
*key
, size_t key_len
)
880 const struct nlattr
*nla
;
883 if (!(dpif
->listen_mask
&
884 ((1u << DPIF_UC_ACTION
) | (1u << DPIF_UC_SAMPLE
)))) {
888 nla
= nl_attr_find__(key
, key_len
, OVS_KEY_ATTR_IN_PORT
);
890 port
= nl_attr_get_u32(nla
);
892 port
= random_uint32();
895 return get_upcall_pid_port__(dpif
, port
);
899 set_upcall_pids(struct dpif_linux
*dpif
)
901 struct dpif_port port
;
902 struct dpif_port_dump port_dump
;
903 struct dpif_flow_dump flow_dump
;
904 const struct nlattr
*key
;
908 DPIF_PORT_FOR_EACH (&port
, &port_dump
, &dpif
->dpif
) {
909 struct dpif_linux_vport vport_request
;
911 dpif_linux_vport_init(&vport_request
);
912 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
913 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
914 vport_request
.port_no
= port
.port_no
;
915 vport_request
.upcall_pid
= get_upcall_pid_port(dpif
,
916 vport_request
.port_no
);
917 error
= dpif_linux_vport_transact(&vport_request
, NULL
, NULL
);
919 VLOG_DBG("%s: assigning port %"PRIu32
" to netlink "
921 dpif_name(&dpif
->dpif
), vport_request
.port_no
,
922 vport_request
.upcall_pid
);
924 VLOG_WARN_RL(&error_rl
, "%s: failed to set upcall pid on port: %s",
925 dpif_name(&dpif
->dpif
), strerror(error
));
929 dpif_flow_dump_start(&flow_dump
, &dpif
->dpif
);
930 while (dpif_flow_dump_next(&flow_dump
, &key
, &key_len
,
932 struct dpif_linux_flow flow_request
;
934 dpif_linux_flow_init(&flow_request
);
935 flow_request
.cmd
= OVS_FLOW_CMD_SET
;
936 flow_request
.dp_ifindex
= dpif
->dp_ifindex
;
937 flow_request
.key
= key
;
938 flow_request
.key_len
= key_len
;
939 flow_request
.upcall_pid
= get_upcall_pid_flow(dpif
, key
, key_len
);
940 error
= dpif_linux_flow_transact(&flow_request
, NULL
, NULL
);
942 VLOG_WARN_RL(&error_rl
, "%s: failed to set upcall pid on flow: %s",
943 dpif_name(&dpif
->dpif
), strerror(error
));
946 dpif_flow_dump_done(&flow_dump
);
950 dpif_linux_recv_set_mask(struct dpif
*dpif_
, int listen_mask
)
952 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
954 if (listen_mask
== dpif
->listen_mask
) {
959 destroy_upcall_socks(dpif
);
960 } else if (!dpif
->listen_mask
) {
964 for (i
= 0; i
< N_UPCALL_SOCKS
; i
++) {
965 error
= nl_sock_create(NETLINK_GENERIC
, &dpif
->upcall_socks
[i
]);
967 destroy_upcall_socks(dpif
);
973 dpif
->listen_mask
= listen_mask
;
974 set_upcall_pids(dpif
);
980 dpif_linux_get_sflow_probability(const struct dpif
*dpif_
,
981 uint32_t *probability
)
983 struct dpif_linux_dp dp
;
987 error
= dpif_linux_dp_get(dpif_
, &dp
, &buf
);
989 *probability
= dp
.sampling
? *dp
.sampling
: 0;
996 dpif_linux_set_sflow_probability(struct dpif
*dpif_
, uint32_t probability
)
998 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
999 struct dpif_linux_dp dp
;
1001 dpif_linux_dp_init(&dp
);
1002 dp
.cmd
= OVS_DP_CMD_SET
;
1003 dp
.dp_ifindex
= dpif
->dp_ifindex
;
1004 dp
.sampling
= &probability
;
1005 return dpif_linux_dp_transact(&dp
, NULL
, NULL
);
1009 dpif_linux_queue_to_priority(const struct dpif
*dpif OVS_UNUSED
,
1010 uint32_t queue_id
, uint32_t *priority
)
1012 if (queue_id
< 0xf000) {
1013 *priority
= TC_H_MAKE(1 << 16, queue_id
+ 1);
1021 parse_odp_packet(struct ofpbuf
*buf
, struct dpif_upcall
*upcall
,
1024 static const struct nl_policy ovs_packet_policy
[] = {
1025 /* Always present. */
1026 [OVS_PACKET_ATTR_PACKET
] = { .type
= NL_A_UNSPEC
,
1027 .min_len
= ETH_HEADER_LEN
},
1028 [OVS_PACKET_ATTR_KEY
] = { .type
= NL_A_NESTED
},
1030 /* OVS_PACKET_CMD_ACTION only. */
1031 [OVS_PACKET_ATTR_USERDATA
] = { .type
= NL_A_U64
, .optional
= true },
1033 /* OVS_PACKET_CMD_SAMPLE only. */
1034 [OVS_PACKET_ATTR_SAMPLE_POOL
] = { .type
= NL_A_U32
, .optional
= true },
1035 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1038 struct ovs_header
*ovs_header
;
1039 struct nlattr
*a
[ARRAY_SIZE(ovs_packet_policy
)];
1040 struct nlmsghdr
*nlmsg
;
1041 struct genlmsghdr
*genl
;
1045 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1047 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1048 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1049 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1050 if (!nlmsg
|| !genl
|| !ovs_header
1051 || nlmsg
->nlmsg_type
!= ovs_packet_family
1052 || !nl_policy_parse(&b
, 0, ovs_packet_policy
, a
,
1053 ARRAY_SIZE(ovs_packet_policy
))) {
1057 type
= (genl
->cmd
== OVS_PACKET_CMD_MISS
? DPIF_UC_MISS
1058 : genl
->cmd
== OVS_PACKET_CMD_ACTION
? DPIF_UC_ACTION
1059 : genl
->cmd
== OVS_PACKET_CMD_SAMPLE
? DPIF_UC_SAMPLE
1065 memset(upcall
, 0, sizeof *upcall
);
1066 upcall
->type
= type
;
1067 upcall
->packet
= buf
;
1068 upcall
->packet
->data
= (void *) nl_attr_get(a
[OVS_PACKET_ATTR_PACKET
]);
1069 upcall
->packet
->size
= nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]);
1070 upcall
->key
= (void *) nl_attr_get(a
[OVS_PACKET_ATTR_KEY
]);
1071 upcall
->key_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_KEY
]);
1072 upcall
->userdata
= (a
[OVS_PACKET_ATTR_USERDATA
]
1073 ? nl_attr_get_u64(a
[OVS_PACKET_ATTR_USERDATA
])
1075 upcall
->sample_pool
= (a
[OVS_PACKET_ATTR_SAMPLE_POOL
]
1076 ? nl_attr_get_u32(a
[OVS_PACKET_ATTR_SAMPLE_POOL
])
1078 if (a
[OVS_PACKET_ATTR_ACTIONS
]) {
1079 upcall
->actions
= (void *) nl_attr_get(a
[OVS_PACKET_ATTR_ACTIONS
]);
1080 upcall
->actions_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_ACTIONS
]);
1083 *dp_ifindex
= ovs_header
->dp_ifindex
;
1089 dpif_linux_recv(struct dpif
*dpif_
, struct dpif_upcall
*upcall
)
1091 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1095 if (!dpif
->listen_mask
) {
1099 for (i
= 0; i
< N_UPCALL_SOCKS
; i
++) {
1100 struct nl_sock
*upcall_sock
;
1101 dpif
->last_read_upcall
= (dpif
->last_read_upcall
+ 1) &
1102 (N_UPCALL_SOCKS
- 1);
1103 upcall_sock
= dpif
->upcall_socks
[dpif
->last_read_upcall
];
1105 if (nl_sock_woke(upcall_sock
)) {
1112 if (++read_tries
> 50) {
1116 error
= nl_sock_recv(upcall_sock
, &buf
, false);
1117 if (error
== EAGAIN
) {
1123 error
= parse_odp_packet(buf
, upcall
, &dp_ifindex
);
1125 && dp_ifindex
== dpif
->dp_ifindex
1126 && dpif
->listen_mask
& (1u << upcall
->type
)) {
1142 dpif_linux_recv_wait(struct dpif
*dpif_
)
1144 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1147 if (!dpif
->listen_mask
) {
1151 for (i
= 0; i
< N_UPCALL_SOCKS
; i
++) {
1152 nl_sock_wait(dpif
->upcall_socks
[i
], POLLIN
);
1157 dpif_linux_recv_purge(struct dpif
*dpif_
)
1159 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1162 if (!dpif
->listen_mask
) {
1166 for (i
= 0; i
< N_UPCALL_SOCKS
; i
++) {
1167 nl_sock_drain(dpif
->upcall_socks
[i
]);
1171 const struct dpif_class dpif_linux_class
= {
1173 dpif_linux_enumerate
,
1179 dpif_linux_get_stats
,
1180 dpif_linux_get_drop_frags
,
1181 dpif_linux_set_drop_frags
,
1182 dpif_linux_port_add
,
1183 dpif_linux_port_del
,
1184 dpif_linux_port_query_by_number
,
1185 dpif_linux_port_query_by_name
,
1186 dpif_linux_get_max_ports
,
1187 dpif_linux_port_dump_start
,
1188 dpif_linux_port_dump_next
,
1189 dpif_linux_port_dump_done
,
1190 dpif_linux_port_poll
,
1191 dpif_linux_port_poll_wait
,
1192 dpif_linux_flow_get
,
1193 dpif_linux_flow_put
,
1194 dpif_linux_flow_del
,
1195 dpif_linux_flow_flush
,
1196 dpif_linux_flow_dump_start
,
1197 dpif_linux_flow_dump_next
,
1198 dpif_linux_flow_dump_done
,
1200 dpif_linux_recv_get_mask
,
1201 dpif_linux_recv_set_mask
,
1202 dpif_linux_get_sflow_probability
,
1203 dpif_linux_set_sflow_probability
,
1204 dpif_linux_queue_to_priority
,
1206 dpif_linux_recv_wait
,
1207 dpif_linux_recv_purge
,
1211 dpif_linux_init(void)
1213 static int error
= -1;
1216 unsigned int ovs_vport_mcgroup
;
1218 error
= nl_lookup_genl_family(OVS_DATAPATH_FAMILY
,
1219 &ovs_datapath_family
);
1221 VLOG_ERR("Generic Netlink family '%s' does not exist. "
1222 "The Open vSwitch kernel module is probably not loaded.",
1223 OVS_DATAPATH_FAMILY
);
1226 error
= nl_lookup_genl_family(OVS_VPORT_FAMILY
, &ovs_vport_family
);
1229 error
= nl_lookup_genl_family(OVS_FLOW_FAMILY
, &ovs_flow_family
);
1232 error
= nl_lookup_genl_family(OVS_PACKET_FAMILY
,
1233 &ovs_packet_family
);
1236 error
= nl_sock_create(NETLINK_GENERIC
, &genl_sock
);
1239 error
= nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY
, OVS_VPORT_MCGROUP
,
1241 OVS_VPORT_MCGROUP_FALLBACK_ID
);
1244 static struct dpif_linux_vport vport
;
1245 nln
= nln_create(NETLINK_GENERIC
, ovs_vport_mcgroup
,
1246 dpif_linux_nln_parse
, &vport
);
1254 dpif_linux_is_internal_device(const char *name
)
1256 struct dpif_linux_vport reply
;
1260 error
= dpif_linux_vport_get(name
, &reply
, &buf
);
1263 } else if (error
!= ENODEV
&& error
!= ENOENT
) {
1264 VLOG_WARN_RL(&error_rl
, "%s: vport query failed (%s)",
1265 name
, strerror(error
));
1268 return reply
.type
== OVS_VPORT_TYPE_INTERNAL
;
1272 dpif_linux_vport_send(int dp_ifindex
, uint32_t port_no
,
1273 const void *data
, size_t size
)
1275 struct ofpbuf actions
, key
, packet
;
1276 struct odputil_keybuf keybuf
;
1280 ofpbuf_use_const(&packet
, data
, size
);
1281 flow_extract(&packet
, htonll(0), 0, &flow
);
1283 ofpbuf_use_stack(&key
, &keybuf
, sizeof keybuf
);
1284 odp_flow_key_from_flow(&key
, &flow
);
1286 ofpbuf_use_stack(&actions
, &action
, sizeof action
);
1287 nl_msg_put_u32(&actions
, OVS_ACTION_ATTR_OUTPUT
, port_no
);
1289 return dpif_linux_execute__(dp_ifindex
, 0, key
.data
, key
.size
,
1290 actions
.data
, actions
.size
, &packet
);
1294 dpif_linux_nln_parse(struct ofpbuf
*buf
, void *vport_
)
1296 struct dpif_linux_vport
*vport
= vport_
;
1297 return dpif_linux_vport_from_ofpbuf(vport
, buf
) == 0;
1301 dpif_linux_port_changed(const void *vport_
, void *dpif_
)
1303 const struct dpif_linux_vport
*vport
= vport_
;
1304 struct dpif_linux
*dpif
= dpif_
;
1307 if (vport
->dp_ifindex
== dpif
->dp_ifindex
1308 && (vport
->cmd
== OVS_VPORT_CMD_NEW
1309 || vport
->cmd
== OVS_VPORT_CMD_DEL
1310 || vport
->cmd
== OVS_VPORT_CMD_SET
)) {
1311 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8
,
1312 dpif
->dpif
.full_name
, vport
->name
, vport
->cmd
);
1313 sset_add(&dpif
->changed_ports
, vport
->name
);
1316 dpif
->change_error
= true;
1320 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1321 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
1322 * positive errno value.
1324 * 'vport' will contain pointers into 'buf', so the caller should not free
1325 * 'buf' while 'vport' is still in use. */
1327 dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport
*vport
,
1328 const struct ofpbuf
*buf
)
1330 static const struct nl_policy ovs_vport_policy
[] = {
1331 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NL_A_U32
},
1332 [OVS_VPORT_ATTR_TYPE
] = { .type
= NL_A_U32
},
1333 [OVS_VPORT_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
1334 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NL_A_U32
},
1335 [OVS_VPORT_ATTR_STATS
] = { .type
= NL_A_UNSPEC
,
1336 .min_len
= sizeof(struct ovs_vport_stats
),
1337 .max_len
= sizeof(struct ovs_vport_stats
),
1339 [OVS_VPORT_ATTR_ADDRESS
] = { .type
= NL_A_UNSPEC
,
1340 .min_len
= ETH_ADDR_LEN
,
1341 .max_len
= ETH_ADDR_LEN
,
1343 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1344 [OVS_VPORT_ATTR_IFINDEX
] = { .type
= NL_A_U32
, .optional
= true },
1347 struct nlattr
*a
[ARRAY_SIZE(ovs_vport_policy
)];
1348 struct ovs_header
*ovs_header
;
1349 struct nlmsghdr
*nlmsg
;
1350 struct genlmsghdr
*genl
;
1353 dpif_linux_vport_init(vport
);
1355 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1356 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1357 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1358 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1359 if (!nlmsg
|| !genl
|| !ovs_header
1360 || nlmsg
->nlmsg_type
!= ovs_vport_family
1361 || !nl_policy_parse(&b
, 0, ovs_vport_policy
, a
,
1362 ARRAY_SIZE(ovs_vport_policy
))) {
1366 vport
->cmd
= genl
->cmd
;
1367 vport
->dp_ifindex
= ovs_header
->dp_ifindex
;
1368 vport
->port_no
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
1369 vport
->type
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
1370 vport
->name
= nl_attr_get_string(a
[OVS_VPORT_ATTR_NAME
]);
1371 if (a
[OVS_VPORT_ATTR_UPCALL_PID
]) {
1372 vport
->upcall_pid
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
1374 if (a
[OVS_VPORT_ATTR_STATS
]) {
1375 vport
->stats
= nl_attr_get(a
[OVS_VPORT_ATTR_STATS
]);
1377 if (a
[OVS_VPORT_ATTR_ADDRESS
]) {
1378 vport
->address
= nl_attr_get(a
[OVS_VPORT_ATTR_ADDRESS
]);
1380 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
1381 vport
->options
= nl_attr_get(a
[OVS_VPORT_ATTR_OPTIONS
]);
1382 vport
->options_len
= nl_attr_get_size(a
[OVS_VPORT_ATTR_OPTIONS
]);
1384 if (a
[OVS_VPORT_ATTR_IFINDEX
]) {
1385 vport
->ifindex
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_IFINDEX
]);
1390 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
1391 * followed by Netlink attributes corresponding to 'vport'. */
1393 dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport
*vport
,
1396 struct ovs_header
*ovs_header
;
1398 nl_msg_put_genlmsghdr(buf
, 0, ovs_vport_family
, NLM_F_REQUEST
| NLM_F_ECHO
,
1401 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1402 ovs_header
->dp_ifindex
= vport
->dp_ifindex
;
1404 if (vport
->port_no
!= UINT32_MAX
) {
1405 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
);
1408 if (vport
->type
!= OVS_VPORT_TYPE_UNSPEC
) {
1409 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_TYPE
, vport
->type
);
1413 nl_msg_put_string(buf
, OVS_VPORT_ATTR_NAME
, vport
->name
);
1416 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_UPCALL_PID
, vport
->upcall_pid
);
1419 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_STATS
,
1420 vport
->stats
, sizeof *vport
->stats
);
1423 if (vport
->address
) {
1424 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_ADDRESS
,
1425 vport
->address
, ETH_ADDR_LEN
);
1428 if (vport
->options
) {
1429 nl_msg_put_nested(buf
, OVS_VPORT_ATTR_OPTIONS
,
1430 vport
->options
, vport
->options_len
);
1433 if (vport
->ifindex
) {
1434 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_IFINDEX
, vport
->ifindex
);
1438 /* Clears 'vport' to "empty" values. */
1440 dpif_linux_vport_init(struct dpif_linux_vport
*vport
)
1442 memset(vport
, 0, sizeof *vport
);
1443 vport
->port_no
= UINT32_MAX
;
1446 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1447 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1448 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1449 * result of the command is expected to be an ovs_vport also, which is decoded
1450 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1451 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1453 dpif_linux_vport_transact(const struct dpif_linux_vport
*request
,
1454 struct dpif_linux_vport
*reply
,
1455 struct ofpbuf
**bufp
)
1457 struct ofpbuf
*request_buf
;
1460 assert((reply
!= NULL
) == (bufp
!= NULL
));
1462 error
= dpif_linux_init();
1466 dpif_linux_vport_init(reply
);
1471 request_buf
= ofpbuf_new(1024);
1472 dpif_linux_vport_to_ofpbuf(request
, request_buf
);
1473 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1474 ofpbuf_delete(request_buf
);
1478 error
= dpif_linux_vport_from_ofpbuf(reply
, *bufp
);
1481 dpif_linux_vport_init(reply
);
1482 ofpbuf_delete(*bufp
);
1489 /* Obtains information about the kernel vport named 'name' and stores it into
1490 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
1491 * longer needed ('reply' will contain pointers into '*bufp'). */
1493 dpif_linux_vport_get(const char *name
, struct dpif_linux_vport
*reply
,
1494 struct ofpbuf
**bufp
)
1496 struct dpif_linux_vport request
;
1498 dpif_linux_vport_init(&request
);
1499 request
.cmd
= OVS_VPORT_CMD_GET
;
1500 request
.name
= name
;
1502 return dpif_linux_vport_transact(&request
, reply
, bufp
);
1505 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1506 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
1507 * positive errno value.
1509 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
1510 * while 'dp' is still in use. */
1512 dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp
*dp
, const struct ofpbuf
*buf
)
1514 static const struct nl_policy ovs_datapath_policy
[] = {
1515 [OVS_DP_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
1516 [OVS_DP_ATTR_STATS
] = { .type
= NL_A_UNSPEC
,
1517 .min_len
= sizeof(struct ovs_dp_stats
),
1518 .max_len
= sizeof(struct ovs_dp_stats
),
1520 [OVS_DP_ATTR_IPV4_FRAGS
] = { .type
= NL_A_U32
, .optional
= true },
1521 [OVS_DP_ATTR_SAMPLING
] = { .type
= NL_A_U32
, .optional
= true },
1524 struct nlattr
*a
[ARRAY_SIZE(ovs_datapath_policy
)];
1525 struct ovs_header
*ovs_header
;
1526 struct nlmsghdr
*nlmsg
;
1527 struct genlmsghdr
*genl
;
1530 dpif_linux_dp_init(dp
);
1532 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1533 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1534 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1535 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1536 if (!nlmsg
|| !genl
|| !ovs_header
1537 || nlmsg
->nlmsg_type
!= ovs_datapath_family
1538 || !nl_policy_parse(&b
, 0, ovs_datapath_policy
, a
,
1539 ARRAY_SIZE(ovs_datapath_policy
))) {
1543 dp
->cmd
= genl
->cmd
;
1544 dp
->dp_ifindex
= ovs_header
->dp_ifindex
;
1545 dp
->name
= nl_attr_get_string(a
[OVS_DP_ATTR_NAME
]);
1546 if (a
[OVS_DP_ATTR_STATS
]) {
1547 /* Can't use structure assignment because Netlink doesn't ensure
1548 * sufficient alignment for 64-bit members. */
1549 memcpy(&dp
->stats
, nl_attr_get(a
[OVS_DP_ATTR_STATS
]),
1552 if (a
[OVS_DP_ATTR_IPV4_FRAGS
]) {
1553 dp
->ipv4_frags
= nl_attr_get_u32(a
[OVS_DP_ATTR_IPV4_FRAGS
]);
1555 if (a
[OVS_DP_ATTR_SAMPLING
]) {
1556 dp
->sampling
= nl_attr_get(a
[OVS_DP_ATTR_SAMPLING
]);
1562 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
1564 dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp
*dp
, struct ofpbuf
*buf
)
1566 struct ovs_header
*ovs_header
;
1568 nl_msg_put_genlmsghdr(buf
, 0, ovs_datapath_family
,
1569 NLM_F_REQUEST
| NLM_F_ECHO
, dp
->cmd
, 1);
1571 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1572 ovs_header
->dp_ifindex
= dp
->dp_ifindex
;
1575 nl_msg_put_string(buf
, OVS_DP_ATTR_NAME
, dp
->name
);
1578 nl_msg_put_u32(buf
, OVS_DP_ATTR_UPCALL_PID
, dp
->upcall_pid
);
1580 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
1582 if (dp
->ipv4_frags
) {
1583 nl_msg_put_u32(buf
, OVS_DP_ATTR_IPV4_FRAGS
, dp
->ipv4_frags
);
1587 nl_msg_put_u32(buf
, OVS_DP_ATTR_SAMPLING
, *dp
->sampling
);
1591 /* Clears 'dp' to "empty" values. */
1593 dpif_linux_dp_init(struct dpif_linux_dp
*dp
)
1595 memset(dp
, 0, sizeof *dp
);
1599 dpif_linux_dp_dump_start(struct nl_dump
*dump
)
1601 struct dpif_linux_dp request
;
1604 dpif_linux_dp_init(&request
);
1605 request
.cmd
= OVS_DP_CMD_GET
;
1607 buf
= ofpbuf_new(1024);
1608 dpif_linux_dp_to_ofpbuf(&request
, buf
);
1609 nl_dump_start(dump
, genl_sock
, buf
);
1613 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1614 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1615 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1616 * result of the command is expected to be of the same form, which is decoded
1617 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1618 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1620 dpif_linux_dp_transact(const struct dpif_linux_dp
*request
,
1621 struct dpif_linux_dp
*reply
, struct ofpbuf
**bufp
)
1623 struct ofpbuf
*request_buf
;
1626 assert((reply
!= NULL
) == (bufp
!= NULL
));
1628 request_buf
= ofpbuf_new(1024);
1629 dpif_linux_dp_to_ofpbuf(request
, request_buf
);
1630 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1631 ofpbuf_delete(request_buf
);
1635 error
= dpif_linux_dp_from_ofpbuf(reply
, *bufp
);
1638 dpif_linux_dp_init(reply
);
1639 ofpbuf_delete(*bufp
);
1646 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
1647 * The caller must free '*bufp' when the reply is no longer needed ('reply'
1648 * will contain pointers into '*bufp'). */
1650 dpif_linux_dp_get(const struct dpif
*dpif_
, struct dpif_linux_dp
*reply
,
1651 struct ofpbuf
**bufp
)
1653 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1654 struct dpif_linux_dp request
;
1656 dpif_linux_dp_init(&request
);
1657 request
.cmd
= OVS_DP_CMD_GET
;
1658 request
.dp_ifindex
= dpif
->dp_ifindex
;
1660 return dpif_linux_dp_transact(&request
, reply
, bufp
);
1663 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1664 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
1665 * positive errno value.
1667 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
1668 * while 'flow' is still in use. */
1670 dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow
*flow
,
1671 const struct ofpbuf
*buf
)
1673 static const struct nl_policy ovs_flow_policy
[] = {
1674 [OVS_FLOW_ATTR_KEY
] = { .type
= NL_A_NESTED
},
1675 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1676 [OVS_FLOW_ATTR_UPCALL_PID
] = { .type
= NL_A_U32
},
1677 [OVS_FLOW_ATTR_STATS
] = { .type
= NL_A_UNSPEC
,
1678 .min_len
= sizeof(struct ovs_flow_stats
),
1679 .max_len
= sizeof(struct ovs_flow_stats
),
1681 [OVS_FLOW_ATTR_TCP_FLAGS
] = { .type
= NL_A_U8
, .optional
= true },
1682 [OVS_FLOW_ATTR_USED
] = { .type
= NL_A_U64
, .optional
= true },
1683 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
1686 struct nlattr
*a
[ARRAY_SIZE(ovs_flow_policy
)];
1687 struct ovs_header
*ovs_header
;
1688 struct nlmsghdr
*nlmsg
;
1689 struct genlmsghdr
*genl
;
1692 dpif_linux_flow_init(flow
);
1694 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1695 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1696 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1697 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1698 if (!nlmsg
|| !genl
|| !ovs_header
1699 || nlmsg
->nlmsg_type
!= ovs_flow_family
1700 || !nl_policy_parse(&b
, 0, ovs_flow_policy
, a
,
1701 ARRAY_SIZE(ovs_flow_policy
))) {
1705 flow
->nlmsg_flags
= nlmsg
->nlmsg_flags
;
1706 flow
->dp_ifindex
= ovs_header
->dp_ifindex
;
1707 flow
->key
= nl_attr_get(a
[OVS_FLOW_ATTR_KEY
]);
1708 flow
->key_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_KEY
]);
1709 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
1710 flow
->actions
= nl_attr_get(a
[OVS_FLOW_ATTR_ACTIONS
]);
1711 flow
->actions_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_ACTIONS
]);
1713 if (a
[OVS_FLOW_ATTR_UPCALL_PID
]) {
1714 flow
->upcall_pid
= nl_attr_get_u32(a
[OVS_FLOW_ATTR_UPCALL_PID
]);
1716 if (a
[OVS_FLOW_ATTR_STATS
]) {
1717 flow
->stats
= nl_attr_get(a
[OVS_FLOW_ATTR_STATS
]);
1719 if (a
[OVS_FLOW_ATTR_TCP_FLAGS
]) {
1720 flow
->tcp_flags
= nl_attr_get(a
[OVS_FLOW_ATTR_TCP_FLAGS
]);
1722 if (a
[OVS_FLOW_ATTR_USED
]) {
1723 flow
->used
= nl_attr_get(a
[OVS_FLOW_ATTR_USED
]);
1728 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
1729 * followed by Netlink attributes corresponding to 'flow'. */
1731 dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow
*flow
,
1734 struct ovs_header
*ovs_header
;
1736 nl_msg_put_genlmsghdr(buf
, 0, ovs_flow_family
,
1737 NLM_F_REQUEST
| NLM_F_ECHO
| flow
->nlmsg_flags
,
1740 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1741 ovs_header
->dp_ifindex
= flow
->dp_ifindex
;
1743 if (flow
->key_len
) {
1744 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_KEY
, flow
->key
, flow
->key_len
);
1747 if (flow
->actions
|| flow
->actions_len
) {
1748 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_ACTIONS
,
1749 flow
->actions
, flow
->actions_len
);
1752 nl_msg_put_u32(buf
, OVS_FLOW_ATTR_UPCALL_PID
, flow
->upcall_pid
);
1754 /* We never need to send these to the kernel. */
1755 assert(!flow
->stats
);
1756 assert(!flow
->tcp_flags
);
1757 assert(!flow
->used
);
1760 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_CLEAR
);
1764 /* Clears 'flow' to "empty" values. */
1766 dpif_linux_flow_init(struct dpif_linux_flow
*flow
)
1768 memset(flow
, 0, sizeof *flow
);
1771 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1772 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1773 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1774 * result of the command is expected to be a flow also, which is decoded and
1775 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
1776 * is no longer needed ('reply' will contain pointers into '*bufp'). */
1778 dpif_linux_flow_transact(const struct dpif_linux_flow
*request
,
1779 struct dpif_linux_flow
*reply
, struct ofpbuf
**bufp
)
1781 struct ofpbuf
*request_buf
;
1784 assert((reply
!= NULL
) == (bufp
!= NULL
));
1786 request_buf
= ofpbuf_new(1024);
1787 dpif_linux_flow_to_ofpbuf(request
, request_buf
);
1788 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1789 ofpbuf_delete(request_buf
);
1793 error
= dpif_linux_flow_from_ofpbuf(reply
, *bufp
);
1796 dpif_linux_flow_init(reply
);
1797 ofpbuf_delete(*bufp
);
1805 dpif_linux_flow_get_stats(const struct dpif_linux_flow
*flow
,
1806 struct dpif_flow_stats
*stats
)
1809 stats
->n_packets
= get_unaligned_u64(&flow
->stats
->n_packets
);
1810 stats
->n_bytes
= get_unaligned_u64(&flow
->stats
->n_bytes
);
1812 stats
->n_packets
= 0;
1815 stats
->used
= flow
->used
? get_unaligned_u64(flow
->used
) : 0;
1816 stats
->tcp_flags
= flow
->tcp_flags
? *flow
->tcp_flags
: 0;