2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-linux.h"
27 #include <linux/types.h>
28 #include <linux/pkt_sched.h>
29 #include <linux/rtnetlink.h>
30 #include <linux/sockios.h>
36 #include "dpif-provider.h"
37 #include "dynamic-string.h"
40 #include "netdev-linux.h"
41 #include "netdev-vport.h"
42 #include "netlink-notifier.h"
43 #include "netlink-socket.h"
47 #include "openvswitch/tunnel.h"
49 #include "poll-loop.h"
50 #include "rtnetlink-link.h"
53 #include "unaligned.h"
57 VLOG_DEFINE_THIS_MODULE(dpif_linux
);
59 enum { LRU_MAX_PORTS
= 1024 };
60 enum { LRU_MASK
= LRU_MAX_PORTS
- 1};
61 BUILD_ASSERT_DECL(IS_POW2(LRU_MAX_PORTS
));
63 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
64 * missing if we have old headers. */
65 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
67 struct dpif_linux_dp
{
68 /* Generic Netlink header. */
71 /* struct ovs_header. */
75 const char *name
; /* OVS_DP_ATTR_NAME. */
76 struct ovs_dp_stats stats
; /* OVS_DP_ATTR_STATS. */
77 enum ovs_frag_handling ipv4_frags
; /* OVS_DP_ATTR_IPV4_FRAGS. */
78 const uint32_t *sampling
; /* OVS_DP_ATTR_SAMPLING. */
79 uint32_t mcgroups
[DPIF_N_UC_TYPES
]; /* OVS_DP_ATTR_MCGROUPS. */
82 static void dpif_linux_dp_init(struct dpif_linux_dp
*);
83 static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp
*,
84 const struct ofpbuf
*);
85 static void dpif_linux_dp_dump_start(struct nl_dump
*);
86 static int dpif_linux_dp_transact(const struct dpif_linux_dp
*request
,
87 struct dpif_linux_dp
*reply
,
88 struct ofpbuf
**bufp
);
89 static int dpif_linux_dp_get(const struct dpif
*, struct dpif_linux_dp
*reply
,
90 struct ofpbuf
**bufp
);
92 struct dpif_linux_flow
{
93 /* Generic Netlink header. */
96 /* struct ovs_header. */
97 unsigned int nlmsg_flags
;
102 * The 'stats' and 'used' members point to 64-bit data that might only be
103 * aligned on 32-bit boundaries, so get_unaligned_u64() should be used to
104 * access their values.
106 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
107 * the Netlink version of the command, even if actions_len is zero. */
108 const struct nlattr
*key
; /* OVS_FLOW_ATTR_KEY. */
110 const struct nlattr
*actions
; /* OVS_FLOW_ATTR_ACTIONS. */
112 const struct ovs_flow_stats
*stats
; /* OVS_FLOW_ATTR_STATS. */
113 const uint8_t *tcp_flags
; /* OVS_FLOW_ATTR_TCP_FLAGS. */
114 const uint64_t *used
; /* OVS_FLOW_ATTR_USED. */
115 bool clear
; /* OVS_FLOW_ATTR_CLEAR. */
118 static void dpif_linux_flow_init(struct dpif_linux_flow
*);
119 static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow
*,
120 const struct ofpbuf
*);
121 static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow
*,
123 static int dpif_linux_flow_transact(const struct dpif_linux_flow
*request
,
124 struct dpif_linux_flow
*reply
,
125 struct ofpbuf
**bufp
);
126 static void dpif_linux_flow_get_stats(const struct dpif_linux_flow
*,
127 struct dpif_flow_stats
*);
129 /* Datapath interface for the openvswitch Linux kernel module. */
134 /* Multicast group messages. */
135 struct nl_sock
*mc_sock
;
136 uint32_t mcgroups
[DPIF_N_UC_TYPES
];
137 unsigned int listen_mask
;
139 /* Change notification. */
140 struct sset changed_ports
; /* Ports that have changed. */
141 struct rtnetlink_notifier port_notifier
;
144 /* Queue of unused ports. */
145 unsigned long *lru_bitmap
;
146 uint16_t lru_ports
[LRU_MAX_PORTS
];
151 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(9999, 5);
153 /* Generic Netlink family numbers for OVS. */
154 static int ovs_datapath_family
;
155 static int ovs_vport_family
;
156 static int ovs_flow_family
;
157 static int ovs_packet_family
;
159 /* Generic Netlink socket. */
160 static struct nl_sock
*genl_sock
;
162 static int dpif_linux_init(void);
163 static int open_dpif(const struct dpif_linux_dp
*, struct dpif
**);
164 static void dpif_linux_port_changed(const struct rtnetlink_link_change
*,
167 static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport
*,
169 static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport
*,
170 const struct ofpbuf
*);
172 static struct dpif_linux
*
173 dpif_linux_cast(const struct dpif
*dpif
)
175 dpif_assert_class(dpif
, &dpif_linux_class
);
176 return CONTAINER_OF(dpif
, struct dpif_linux
, dpif
);
180 dpif_linux_push_port(struct dpif_linux
*dp
, uint16_t port
)
182 if (port
< LRU_MAX_PORTS
&& !bitmap_is_set(dp
->lru_bitmap
, port
)) {
183 bitmap_set1(dp
->lru_bitmap
, port
);
184 dp
->lru_ports
[dp
->lru_head
++ & LRU_MASK
] = port
;
189 dpif_linux_pop_port(struct dpif_linux
*dp
)
193 if (dp
->lru_head
== dp
->lru_tail
) {
197 port
= dp
->lru_ports
[dp
->lru_tail
++ & LRU_MASK
];
198 bitmap_set0(dp
->lru_bitmap
, port
);
203 dpif_linux_enumerate(struct sset
*all_dps
)
209 error
= dpif_linux_init();
214 dpif_linux_dp_dump_start(&dump
);
215 while (nl_dump_next(&dump
, &msg
)) {
216 struct dpif_linux_dp dp
;
218 if (!dpif_linux_dp_from_ofpbuf(&dp
, &msg
)) {
219 sset_add(all_dps
, dp
.name
);
222 return nl_dump_done(&dump
);
226 dpif_linux_open(const struct dpif_class
*class OVS_UNUSED
, const char *name
,
227 bool create
, struct dpif
**dpifp
)
229 struct dpif_linux_dp dp_request
, dp
;
233 error
= dpif_linux_init();
238 /* Create or look up datapath. */
239 dpif_linux_dp_init(&dp_request
);
240 dp_request
.cmd
= create
? OVS_DP_CMD_NEW
: OVS_DP_CMD_GET
;
241 dp_request
.name
= name
;
242 error
= dpif_linux_dp_transact(&dp_request
, &dp
, &buf
);
246 error
= open_dpif(&dp
, dpifp
);
253 open_dpif(const struct dpif_linux_dp
*dp
, struct dpif
**dpifp
)
255 struct dpif_linux
*dpif
;
259 dpif
= xmalloc(sizeof *dpif
);
260 error
= rtnetlink_link_notifier_register(&dpif
->port_notifier
,
261 dpif_linux_port_changed
, dpif
);
266 dpif_init(&dpif
->dpif
, &dpif_linux_class
, dp
->name
,
267 dp
->dp_ifindex
, dp
->dp_ifindex
);
269 dpif
->mc_sock
= NULL
;
270 for (i
= 0; i
< DPIF_N_UC_TYPES
; i
++) {
271 dpif
->mcgroups
[i
] = dp
->mcgroups
[i
];
273 dpif
->listen_mask
= 0;
274 dpif
->dp_ifindex
= dp
->dp_ifindex
;
275 sset_init(&dpif
->changed_ports
);
276 dpif
->change_error
= false;
277 *dpifp
= &dpif
->dpif
;
279 dpif
->lru_head
= dpif
->lru_tail
= 0;
280 dpif
->lru_bitmap
= bitmap_allocate(LRU_MAX_PORTS
);
281 bitmap_set1(dpif
->lru_bitmap
, OVSP_LOCAL
);
282 for (i
= 1; i
< LRU_MAX_PORTS
; i
++) {
283 dpif_linux_push_port(dpif
, i
);
293 dpif_linux_close(struct dpif
*dpif_
)
295 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
296 nl_sock_destroy(dpif
->mc_sock
);
297 rtnetlink_link_notifier_unregister(&dpif
->port_notifier
);
298 sset_destroy(&dpif
->changed_ports
);
299 free(dpif
->lru_bitmap
);
304 dpif_linux_destroy(struct dpif
*dpif_
)
306 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
307 struct dpif_linux_dp dp
;
309 dpif_linux_dp_init(&dp
);
310 dp
.cmd
= OVS_DP_CMD_DEL
;
311 dp
.dp_ifindex
= dpif
->dp_ifindex
;
312 return dpif_linux_dp_transact(&dp
, NULL
, NULL
);
316 dpif_linux_run(struct dpif
*dpif OVS_UNUSED
)
318 rtnetlink_link_notifier_run();
322 dpif_linux_wait(struct dpif
*dpif OVS_UNUSED
)
324 rtnetlink_link_notifier_wait();
328 dpif_linux_get_stats(const struct dpif
*dpif_
, struct ovs_dp_stats
*stats
)
330 struct dpif_linux_dp dp
;
334 error
= dpif_linux_dp_get(dpif_
, &dp
, &buf
);
343 dpif_linux_get_drop_frags(const struct dpif
*dpif_
, bool *drop_fragsp
)
345 struct dpif_linux_dp dp
;
349 error
= dpif_linux_dp_get(dpif_
, &dp
, &buf
);
351 *drop_fragsp
= dp
.ipv4_frags
== OVS_DP_FRAG_DROP
;
358 dpif_linux_set_drop_frags(struct dpif
*dpif_
, bool drop_frags
)
360 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
361 struct dpif_linux_dp dp
;
363 dpif_linux_dp_init(&dp
);
364 dp
.cmd
= OVS_DP_CMD_SET
;
365 dp
.dp_ifindex
= dpif
->dp_ifindex
;
366 dp
.ipv4_frags
= drop_frags
? OVS_DP_FRAG_DROP
: OVS_DP_FRAG_ZERO
;
367 return dpif_linux_dp_transact(&dp
, NULL
, NULL
);
371 dpif_linux_port_add(struct dpif
*dpif_
, struct netdev
*netdev
,
374 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
375 const char *name
= netdev_get_name(netdev
);
376 const char *type
= netdev_get_type(netdev
);
377 struct dpif_linux_vport request
, reply
;
378 const struct ofpbuf
*options
;
382 dpif_linux_vport_init(&request
);
383 request
.cmd
= OVS_VPORT_CMD_NEW
;
384 request
.dp_ifindex
= dpif
->dp_ifindex
;
385 request
.type
= netdev_vport_get_vport_type(netdev
);
386 if (request
.type
== OVS_VPORT_TYPE_UNSPEC
) {
387 VLOG_WARN_RL(&error_rl
, "%s: cannot create port `%s' because it has "
388 "unsupported type `%s'",
389 dpif_name(dpif_
), name
, type
);
394 options
= netdev_vport_get_options(netdev
);
395 if (options
&& options
->size
) {
396 request
.options
= options
->data
;
397 request
.options_len
= options
->size
;
400 if (request
.type
== OVS_VPORT_TYPE_NETDEV
) {
401 netdev_linux_ethtool_set_flag(netdev
, ETH_FLAG_LRO
, "LRO", false);
404 /* Loop until we find a port that isn't used. */
406 request
.port_no
= dpif_linux_pop_port(dpif
);
407 error
= dpif_linux_vport_transact(&request
, &reply
, &buf
);
410 *port_nop
= reply
.port_no
;
413 } while (request
.port_no
!= UINT32_MAX
414 && (error
== EBUSY
|| error
== EFBIG
));
420 dpif_linux_port_del(struct dpif
*dpif_
, uint16_t port_no
)
422 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
423 struct dpif_linux_vport vport
;
426 dpif_linux_vport_init(&vport
);
427 vport
.cmd
= OVS_VPORT_CMD_DEL
;
428 vport
.dp_ifindex
= dpif
->dp_ifindex
;
429 vport
.port_no
= port_no
;
430 error
= dpif_linux_vport_transact(&vport
, NULL
, NULL
);
433 dpif_linux_push_port(dpif
, port_no
);
439 dpif_linux_port_query__(const struct dpif
*dpif
, uint32_t port_no
,
440 const char *port_name
, struct dpif_port
*dpif_port
)
442 struct dpif_linux_vport request
;
443 struct dpif_linux_vport reply
;
447 dpif_linux_vport_init(&request
);
448 request
.cmd
= OVS_VPORT_CMD_GET
;
449 request
.dp_ifindex
= dpif_linux_cast(dpif
)->dp_ifindex
;
450 request
.port_no
= port_no
;
451 request
.name
= port_name
;
453 error
= dpif_linux_vport_transact(&request
, &reply
, &buf
);
455 dpif_port
->name
= xstrdup(reply
.name
);
456 dpif_port
->type
= xstrdup(netdev_vport_get_netdev_type(&reply
));
457 dpif_port
->port_no
= reply
.port_no
;
459 netdev_stats_from_rtnl_link_stats64(&dpif_port
->stats
,
462 memset(&dpif_port
->stats
, 0xff, sizeof dpif_port
->stats
);
470 dpif_linux_port_query_by_number(const struct dpif
*dpif
, uint16_t port_no
,
471 struct dpif_port
*dpif_port
)
473 return dpif_linux_port_query__(dpif
, port_no
, NULL
, dpif_port
);
477 dpif_linux_port_query_by_name(const struct dpif
*dpif
, const char *devname
,
478 struct dpif_port
*dpif_port
)
480 return dpif_linux_port_query__(dpif
, 0, devname
, dpif_port
);
484 dpif_linux_get_max_ports(const struct dpif
*dpif OVS_UNUSED
)
486 /* If the datapath increases its range of supported ports, then it should
487 * start reporting that. */
492 dpif_linux_flow_flush(struct dpif
*dpif_
)
494 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
495 struct dpif_linux_flow flow
;
497 dpif_linux_flow_init(&flow
);
498 flow
.cmd
= OVS_FLOW_CMD_DEL
;
499 flow
.dp_ifindex
= dpif
->dp_ifindex
;
500 return dpif_linux_flow_transact(&flow
, NULL
, NULL
);
503 struct dpif_linux_port_state
{
505 unsigned long *port_bitmap
; /* Ports in the datapath. */
506 bool complete
; /* Dump completed without error. */
510 dpif_linux_port_dump_start(const struct dpif
*dpif_
, void **statep
)
512 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
513 struct dpif_linux_port_state
*state
;
514 struct dpif_linux_vport request
;
517 *statep
= state
= xmalloc(sizeof *state
);
518 state
->port_bitmap
= bitmap_allocate(LRU_MAX_PORTS
);
519 state
->complete
= false;
521 dpif_linux_vport_init(&request
);
522 request
.cmd
= OVS_DP_CMD_GET
;
523 request
.dp_ifindex
= dpif
->dp_ifindex
;
525 buf
= ofpbuf_new(1024);
526 dpif_linux_vport_to_ofpbuf(&request
, buf
);
527 nl_dump_start(&state
->dump
, genl_sock
, buf
);
534 dpif_linux_port_dump_next(const struct dpif
*dpif OVS_UNUSED
, void *state_
,
535 struct dpif_port
*dpif_port
)
537 struct dpif_linux_port_state
*state
= state_
;
538 struct dpif_linux_vport vport
;
542 if (!nl_dump_next(&state
->dump
, &buf
)) {
543 state
->complete
= true;
547 error
= dpif_linux_vport_from_ofpbuf(&vport
, &buf
);
552 if (vport
.port_no
< LRU_MAX_PORTS
) {
553 bitmap_set1(state
->port_bitmap
, vport
.port_no
);
556 dpif_port
->name
= (char *) vport
.name
;
557 dpif_port
->type
= (char *) netdev_vport_get_netdev_type(&vport
);
558 dpif_port
->port_no
= vport
.port_no
;
560 netdev_stats_from_rtnl_link_stats64(&dpif_port
->stats
, vport
.stats
);
562 memset(&dpif_port
->stats
, 0xff, sizeof dpif_port
->stats
);
568 dpif_linux_port_dump_done(const struct dpif
*dpif_
, void *state_
)
570 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
571 struct dpif_linux_port_state
*state
= state_
;
572 int error
= nl_dump_done(&state
->dump
);
574 if (state
->complete
) {
577 for (i
= 0; i
< LRU_MAX_PORTS
; i
++) {
578 if (!bitmap_is_set(state
->port_bitmap
, i
)) {
579 dpif_linux_push_port(dpif
, i
);
584 free(state
->port_bitmap
);
590 dpif_linux_port_poll(const struct dpif
*dpif_
, char **devnamep
)
592 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
594 if (dpif
->change_error
) {
595 dpif
->change_error
= false;
596 sset_clear(&dpif
->changed_ports
);
598 } else if (!sset_is_empty(&dpif
->changed_ports
)) {
599 *devnamep
= sset_pop(&dpif
->changed_ports
);
607 dpif_linux_port_poll_wait(const struct dpif
*dpif_
)
609 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
610 if (!sset_is_empty(&dpif
->changed_ports
) || dpif
->change_error
) {
611 poll_immediate_wake();
616 dpif_linux_flow_get__(const struct dpif
*dpif_
,
617 const struct nlattr
*key
, size_t key_len
,
618 struct dpif_linux_flow
*reply
, struct ofpbuf
**bufp
)
620 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
621 struct dpif_linux_flow request
;
623 dpif_linux_flow_init(&request
);
624 request
.cmd
= OVS_FLOW_CMD_GET
;
625 request
.dp_ifindex
= dpif
->dp_ifindex
;
627 request
.key_len
= key_len
;
628 return dpif_linux_flow_transact(&request
, reply
, bufp
);
632 dpif_linux_flow_get(const struct dpif
*dpif_
,
633 const struct nlattr
*key
, size_t key_len
,
634 struct ofpbuf
**actionsp
, struct dpif_flow_stats
*stats
)
636 struct dpif_linux_flow reply
;
640 error
= dpif_linux_flow_get__(dpif_
, key
, key_len
, &reply
, &buf
);
643 dpif_linux_flow_get_stats(&reply
, stats
);
646 buf
->data
= (void *) reply
.actions
;
647 buf
->size
= reply
.actions_len
;
657 dpif_linux_flow_put(struct dpif
*dpif_
, enum dpif_flow_put_flags flags
,
658 const struct nlattr
*key
, size_t key_len
,
659 const struct nlattr
*actions
, size_t actions_len
,
660 struct dpif_flow_stats
*stats
)
662 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
663 struct dpif_linux_flow request
, reply
;
664 struct nlattr dummy_action
;
668 dpif_linux_flow_init(&request
);
669 request
.cmd
= flags
& DPIF_FP_CREATE
? OVS_FLOW_CMD_NEW
: OVS_FLOW_CMD_SET
;
670 request
.dp_ifindex
= dpif
->dp_ifindex
;
672 request
.key_len
= key_len
;
673 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
674 request
.actions
= actions
? actions
: &dummy_action
;
675 request
.actions_len
= actions_len
;
676 if (flags
& DPIF_FP_ZERO_STATS
) {
677 request
.clear
= true;
679 request
.nlmsg_flags
= flags
& DPIF_FP_MODIFY
? 0 : NLM_F_CREATE
;
680 error
= dpif_linux_flow_transact(&request
,
681 stats
? &reply
: NULL
,
682 stats
? &buf
: NULL
);
683 if (!error
&& stats
) {
684 dpif_linux_flow_get_stats(&reply
, stats
);
691 dpif_linux_flow_del(struct dpif
*dpif_
,
692 const struct nlattr
*key
, size_t key_len
,
693 struct dpif_flow_stats
*stats
)
695 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
696 struct dpif_linux_flow request
, reply
;
700 dpif_linux_flow_init(&request
);
701 request
.cmd
= OVS_FLOW_CMD_DEL
;
702 request
.dp_ifindex
= dpif
->dp_ifindex
;
704 request
.key_len
= key_len
;
705 error
= dpif_linux_flow_transact(&request
,
706 stats
? &reply
: NULL
,
707 stats
? &buf
: NULL
);
708 if (!error
&& stats
) {
709 dpif_linux_flow_get_stats(&reply
, stats
);
715 struct dpif_linux_flow_state
{
717 struct dpif_linux_flow flow
;
718 struct dpif_flow_stats stats
;
723 dpif_linux_flow_dump_start(const struct dpif
*dpif_
, void **statep
)
725 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
726 struct dpif_linux_flow_state
*state
;
727 struct dpif_linux_flow request
;
730 *statep
= state
= xmalloc(sizeof *state
);
732 dpif_linux_flow_init(&request
);
733 request
.cmd
= OVS_DP_CMD_GET
;
734 request
.dp_ifindex
= dpif
->dp_ifindex
;
736 buf
= ofpbuf_new(1024);
737 dpif_linux_flow_to_ofpbuf(&request
, buf
);
738 nl_dump_start(&state
->dump
, genl_sock
, buf
);
747 dpif_linux_flow_dump_next(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
,
748 const struct nlattr
**key
, size_t *key_len
,
749 const struct nlattr
**actions
, size_t *actions_len
,
750 const struct dpif_flow_stats
**stats
)
752 struct dpif_linux_flow_state
*state
= state_
;
757 ofpbuf_delete(state
->buf
);
760 if (!nl_dump_next(&state
->dump
, &buf
)) {
764 error
= dpif_linux_flow_from_ofpbuf(&state
->flow
, &buf
);
769 if (actions
&& !state
->flow
.actions
) {
770 error
= dpif_linux_flow_get__(dpif_
, state
->flow
.key
,
772 &state
->flow
, &state
->buf
);
773 if (error
== ENOENT
) {
774 VLOG_DBG("dumped flow disappeared on get");
776 VLOG_WARN("error fetching dumped flow: %s", strerror(error
));
782 *actions
= state
->flow
.actions
;
783 *actions_len
= state
->flow
.actions_len
;
786 *key
= state
->flow
.key
;
787 *key_len
= state
->flow
.key_len
;
790 dpif_linux_flow_get_stats(&state
->flow
, &state
->stats
);
791 *stats
= &state
->stats
;
797 dpif_linux_flow_dump_done(const struct dpif
*dpif OVS_UNUSED
, void *state_
)
799 struct dpif_linux_flow_state
*state
= state_
;
800 int error
= nl_dump_done(&state
->dump
);
801 ofpbuf_delete(state
->buf
);
807 dpif_linux_execute__(int dp_ifindex
,
808 const struct nlattr
*key
, size_t key_len
,
809 const struct nlattr
*actions
, size_t actions_len
,
810 const struct ofpbuf
*packet
)
812 struct ovs_header
*execute
;
816 buf
= ofpbuf_new(128 + actions_len
+ packet
->size
);
818 nl_msg_put_genlmsghdr(buf
, 0, ovs_packet_family
, NLM_F_REQUEST
,
819 OVS_PACKET_CMD_EXECUTE
, 1);
821 execute
= ofpbuf_put_uninit(buf
, sizeof *execute
);
822 execute
->dp_ifindex
= dp_ifindex
;
824 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_PACKET
, packet
->data
, packet
->size
);
825 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_KEY
, key
, key_len
);
826 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_ACTIONS
, actions
, actions_len
);
828 error
= nl_sock_transact(genl_sock
, buf
, NULL
);
834 dpif_linux_execute(struct dpif
*dpif_
,
835 const struct nlattr
*key
, size_t key_len
,
836 const struct nlattr
*actions
, size_t actions_len
,
837 const struct ofpbuf
*packet
)
839 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
841 return dpif_linux_execute__(dpif
->dp_ifindex
, key
, key_len
,
842 actions
, actions_len
, packet
);
846 dpif_linux_recv_get_mask(const struct dpif
*dpif_
, int *listen_mask
)
848 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
849 *listen_mask
= dpif
->listen_mask
;
854 dpif_linux_recv_set_mask(struct dpif
*dpif_
, int listen_mask
)
856 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
860 if (listen_mask
== dpif
->listen_mask
) {
862 } else if (!listen_mask
) {
863 nl_sock_destroy(dpif
->mc_sock
);
864 dpif
->mc_sock
= NULL
;
865 dpif
->listen_mask
= 0;
867 } else if (!dpif
->mc_sock
) {
868 error
= nl_sock_create(NETLINK_GENERIC
, &dpif
->mc_sock
);
874 /* Unsubscribe from old groups. */
875 for (i
= 0; i
< DPIF_N_UC_TYPES
; i
++) {
876 if (dpif
->listen_mask
& (1u << i
)) {
877 nl_sock_leave_mcgroup(dpif
->mc_sock
, dpif
->mcgroups
[i
]);
881 /* Update listen_mask. */
882 dpif
->listen_mask
= listen_mask
;
884 /* Subscribe to new groups. */
886 for (i
= 0; i
< DPIF_N_UC_TYPES
; i
++) {
887 if (dpif
->listen_mask
& (1u << i
)) {
890 retval
= nl_sock_join_mcgroup(dpif
->mc_sock
, dpif
->mcgroups
[i
]);
900 dpif_linux_get_sflow_probability(const struct dpif
*dpif_
,
901 uint32_t *probability
)
903 struct dpif_linux_dp dp
;
907 error
= dpif_linux_dp_get(dpif_
, &dp
, &buf
);
909 *probability
= dp
.sampling
? *dp
.sampling
: 0;
916 dpif_linux_set_sflow_probability(struct dpif
*dpif_
, uint32_t probability
)
918 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
919 struct dpif_linux_dp dp
;
921 dpif_linux_dp_init(&dp
);
922 dp
.cmd
= OVS_DP_CMD_SET
;
923 dp
.dp_ifindex
= dpif
->dp_ifindex
;
924 dp
.sampling
= &probability
;
925 return dpif_linux_dp_transact(&dp
, NULL
, NULL
);
929 dpif_linux_queue_to_priority(const struct dpif
*dpif OVS_UNUSED
,
930 uint32_t queue_id
, uint32_t *priority
)
932 if (queue_id
< 0xf000) {
933 *priority
= TC_H_MAKE(1 << 16, queue_id
+ 1);
941 parse_odp_packet(struct ofpbuf
*buf
, struct dpif_upcall
*upcall
,
944 static const struct nl_policy ovs_packet_policy
[] = {
945 /* Always present. */
946 [OVS_PACKET_ATTR_PACKET
] = { .type
= NL_A_UNSPEC
,
947 .min_len
= ETH_HEADER_LEN
},
948 [OVS_PACKET_ATTR_KEY
] = { .type
= NL_A_NESTED
},
950 /* OVS_PACKET_CMD_ACTION only. */
951 [OVS_PACKET_ATTR_USERDATA
] = { .type
= NL_A_U64
, .optional
= true },
953 /* OVS_PACKET_CMD_SAMPLE only. */
954 [OVS_PACKET_ATTR_SAMPLE_POOL
] = { .type
= NL_A_U32
, .optional
= true },
955 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
958 struct ovs_header
*ovs_header
;
959 struct nlattr
*a
[ARRAY_SIZE(ovs_packet_policy
)];
960 struct nlmsghdr
*nlmsg
;
961 struct genlmsghdr
*genl
;
965 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
967 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
968 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
969 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
970 if (!nlmsg
|| !genl
|| !ovs_header
971 || nlmsg
->nlmsg_type
!= ovs_packet_family
972 || !nl_policy_parse(&b
, 0, ovs_packet_policy
, a
,
973 ARRAY_SIZE(ovs_packet_policy
))) {
977 type
= (genl
->cmd
== OVS_PACKET_CMD_MISS
? DPIF_UC_MISS
978 : genl
->cmd
== OVS_PACKET_CMD_ACTION
? DPIF_UC_ACTION
979 : genl
->cmd
== OVS_PACKET_CMD_SAMPLE
? DPIF_UC_SAMPLE
985 memset(upcall
, 0, sizeof *upcall
);
987 upcall
->packet
= buf
;
988 upcall
->packet
->data
= (void *) nl_attr_get(a
[OVS_PACKET_ATTR_PACKET
]);
989 upcall
->packet
->size
= nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]);
990 upcall
->key
= (void *) nl_attr_get(a
[OVS_PACKET_ATTR_KEY
]);
991 upcall
->key_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_KEY
]);
992 upcall
->userdata
= (a
[OVS_PACKET_ATTR_USERDATA
]
993 ? nl_attr_get_u64(a
[OVS_PACKET_ATTR_USERDATA
])
995 upcall
->sample_pool
= (a
[OVS_PACKET_ATTR_SAMPLE_POOL
]
996 ? nl_attr_get_u32(a
[OVS_PACKET_ATTR_SAMPLE_POOL
])
998 if (a
[OVS_PACKET_ATTR_ACTIONS
]) {
999 upcall
->actions
= (void *) nl_attr_get(a
[OVS_PACKET_ATTR_ACTIONS
]);
1000 upcall
->actions_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_ACTIONS
]);
1003 *dp_ifindex
= ovs_header
->dp_ifindex
;
1009 dpif_linux_recv(struct dpif
*dpif_
, struct dpif_upcall
*upcall
)
1011 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1016 if (!dpif
->mc_sock
) {
1020 for (i
= 0; i
< 50; i
++) {
1023 error
= nl_sock_recv(dpif
->mc_sock
, &buf
, false);
1028 error
= parse_odp_packet(buf
, upcall
, &dp_ifindex
);
1030 && dp_ifindex
== dpif
->dp_ifindex
1031 && dpif
->listen_mask
& (1u << upcall
->type
)) {
1045 dpif_linux_recv_wait(struct dpif
*dpif_
)
1047 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1048 if (dpif
->mc_sock
) {
1049 nl_sock_wait(dpif
->mc_sock
, POLLIN
);
1054 dpif_linux_recv_purge(struct dpif
*dpif_
)
1056 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1058 if (dpif
->mc_sock
) {
1059 nl_sock_drain(dpif
->mc_sock
);
1063 const struct dpif_class dpif_linux_class
= {
1065 dpif_linux_enumerate
,
1071 dpif_linux_get_stats
,
1072 dpif_linux_get_drop_frags
,
1073 dpif_linux_set_drop_frags
,
1074 dpif_linux_port_add
,
1075 dpif_linux_port_del
,
1076 dpif_linux_port_query_by_number
,
1077 dpif_linux_port_query_by_name
,
1078 dpif_linux_get_max_ports
,
1079 dpif_linux_port_dump_start
,
1080 dpif_linux_port_dump_next
,
1081 dpif_linux_port_dump_done
,
1082 dpif_linux_port_poll
,
1083 dpif_linux_port_poll_wait
,
1084 dpif_linux_flow_get
,
1085 dpif_linux_flow_put
,
1086 dpif_linux_flow_del
,
1087 dpif_linux_flow_flush
,
1088 dpif_linux_flow_dump_start
,
1089 dpif_linux_flow_dump_next
,
1090 dpif_linux_flow_dump_done
,
1092 dpif_linux_recv_get_mask
,
1093 dpif_linux_recv_set_mask
,
1094 dpif_linux_get_sflow_probability
,
1095 dpif_linux_set_sflow_probability
,
1096 dpif_linux_queue_to_priority
,
1098 dpif_linux_recv_wait
,
1099 dpif_linux_recv_purge
,
1103 dpif_linux_init(void)
1105 static int error
= -1;
1108 error
= nl_lookup_genl_family(OVS_DATAPATH_FAMILY
,
1109 &ovs_datapath_family
);
1111 VLOG_ERR("Generic Netlink family '%s' does not exist. "
1112 "The Open vSwitch kernel module is probably not loaded.",
1113 OVS_DATAPATH_FAMILY
);
1116 error
= nl_lookup_genl_family(OVS_VPORT_FAMILY
, &ovs_vport_family
);
1119 error
= nl_lookup_genl_family(OVS_FLOW_FAMILY
, &ovs_flow_family
);
1122 error
= nl_lookup_genl_family(OVS_PACKET_FAMILY
,
1123 &ovs_packet_family
);
1126 error
= nl_sock_create(NETLINK_GENERIC
, &genl_sock
);
1134 dpif_linux_is_internal_device(const char *name
)
1136 struct dpif_linux_vport reply
;
1140 error
= dpif_linux_vport_get(name
, &reply
, &buf
);
1143 } else if (error
!= ENODEV
&& error
!= ENOENT
) {
1144 VLOG_WARN_RL(&error_rl
, "%s: vport query failed (%s)",
1145 name
, strerror(error
));
1148 return reply
.type
== OVS_VPORT_TYPE_INTERNAL
;
1152 dpif_linux_vport_send(int dp_ifindex
, uint32_t port_no
,
1153 const void *data
, size_t size
)
1155 struct ofpbuf actions
, key
, packet
;
1156 struct odputil_keybuf keybuf
;
1160 ofpbuf_use_const(&packet
, data
, size
);
1161 flow_extract(&packet
, htonll(0), 0, &flow
);
1163 ofpbuf_use_stack(&key
, &keybuf
, sizeof keybuf
);
1164 odp_flow_key_from_flow(&key
, &flow
);
1166 ofpbuf_use_stack(&actions
, &action
, sizeof action
);
1167 nl_msg_put_u32(&actions
, OVS_ACTION_ATTR_OUTPUT
, port_no
);
1169 return dpif_linux_execute__(dp_ifindex
, key
.data
, key
.size
,
1170 actions
.data
, actions
.size
, &packet
);
1174 dpif_linux_port_changed(const struct rtnetlink_link_change
*change
,
1177 struct dpif_linux
*dpif
= dpif_
;
1180 if (change
->master_ifindex
== dpif
->dp_ifindex
1181 && (change
->nlmsg_type
== RTM_NEWLINK
1182 || change
->nlmsg_type
== RTM_DELLINK
))
1184 /* Our datapath changed, either adding a new port or deleting an
1186 sset_add(&dpif
->changed_ports
, change
->ifname
);
1189 dpif
->change_error
= true;
1193 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1194 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
1195 * positive errno value.
1197 * 'vport' will contain pointers into 'buf', so the caller should not free
1198 * 'buf' while 'vport' is still in use. */
1200 dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport
*vport
,
1201 const struct ofpbuf
*buf
)
1203 static const struct nl_policy ovs_vport_policy
[] = {
1204 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NL_A_U32
},
1205 [OVS_VPORT_ATTR_TYPE
] = { .type
= NL_A_U32
},
1206 [OVS_VPORT_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
1207 [OVS_VPORT_ATTR_STATS
] = { .type
= NL_A_UNSPEC
,
1208 .min_len
= sizeof(struct rtnl_link_stats64
),
1209 .max_len
= sizeof(struct rtnl_link_stats64
),
1211 [OVS_VPORT_ATTR_ADDRESS
] = { .type
= NL_A_UNSPEC
,
1212 .min_len
= ETH_ADDR_LEN
,
1213 .max_len
= ETH_ADDR_LEN
,
1215 [OVS_VPORT_ATTR_MTU
] = { .type
= NL_A_U32
, .optional
= true },
1216 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1217 [OVS_VPORT_ATTR_IFINDEX
] = { .type
= NL_A_U32
, .optional
= true },
1218 [OVS_VPORT_ATTR_IFLINK
] = { .type
= NL_A_U32
, .optional
= true },
1221 struct nlattr
*a
[ARRAY_SIZE(ovs_vport_policy
)];
1222 struct ovs_header
*ovs_header
;
1223 struct nlmsghdr
*nlmsg
;
1224 struct genlmsghdr
*genl
;
1227 dpif_linux_vport_init(vport
);
1229 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1230 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1231 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1232 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1233 if (!nlmsg
|| !genl
|| !ovs_header
1234 || nlmsg
->nlmsg_type
!= ovs_vport_family
1235 || !nl_policy_parse(&b
, 0, ovs_vport_policy
, a
,
1236 ARRAY_SIZE(ovs_vport_policy
))) {
1240 vport
->cmd
= genl
->cmd
;
1241 vport
->dp_ifindex
= ovs_header
->dp_ifindex
;
1242 vport
->port_no
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_PORT_NO
]);
1243 vport
->type
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
1244 vport
->name
= nl_attr_get_string(a
[OVS_VPORT_ATTR_NAME
]);
1245 if (a
[OVS_VPORT_ATTR_STATS
]) {
1246 vport
->stats
= nl_attr_get(a
[OVS_VPORT_ATTR_STATS
]);
1248 if (a
[OVS_VPORT_ATTR_ADDRESS
]) {
1249 vport
->address
= nl_attr_get(a
[OVS_VPORT_ATTR_ADDRESS
]);
1251 if (a
[OVS_VPORT_ATTR_MTU
]) {
1252 vport
->mtu
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_MTU
]);
1254 vport
->mtu
= INT_MAX
;
1256 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
1257 vport
->options
= nl_attr_get(a
[OVS_VPORT_ATTR_OPTIONS
]);
1258 vport
->options_len
= nl_attr_get_size(a
[OVS_VPORT_ATTR_OPTIONS
]);
1260 if (a
[OVS_VPORT_ATTR_IFINDEX
]) {
1261 vport
->ifindex
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_IFINDEX
]);
1263 if (a
[OVS_VPORT_ATTR_IFLINK
]) {
1264 vport
->iflink
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_IFLINK
]);
1269 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
1270 * followed by Netlink attributes corresponding to 'vport'. */
1272 dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport
*vport
,
1275 struct ovs_header
*ovs_header
;
1277 nl_msg_put_genlmsghdr(buf
, 0, ovs_vport_family
, NLM_F_REQUEST
| NLM_F_ECHO
,
1280 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1281 ovs_header
->dp_ifindex
= vport
->dp_ifindex
;
1283 if (vport
->port_no
!= UINT32_MAX
) {
1284 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
);
1287 if (vport
->type
!= OVS_VPORT_TYPE_UNSPEC
) {
1288 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_TYPE
, vport
->type
);
1292 nl_msg_put_string(buf
, OVS_VPORT_ATTR_NAME
, vport
->name
);
1296 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_STATS
,
1297 vport
->stats
, sizeof *vport
->stats
);
1300 if (vport
->address
) {
1301 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_ADDRESS
,
1302 vport
->address
, ETH_ADDR_LEN
);
1305 if (vport
->mtu
&& vport
->mtu
!= INT_MAX
) {
1306 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_MTU
, vport
->mtu
);
1309 if (vport
->options
) {
1310 nl_msg_put_nested(buf
, OVS_VPORT_ATTR_OPTIONS
,
1311 vport
->options
, vport
->options_len
);
1314 if (vport
->ifindex
) {
1315 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_IFINDEX
, vport
->ifindex
);
1318 if (vport
->iflink
) {
1319 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_IFLINK
, vport
->iflink
);
1323 /* Clears 'vport' to "empty" values. */
1325 dpif_linux_vport_init(struct dpif_linux_vport
*vport
)
1327 memset(vport
, 0, sizeof *vport
);
1328 vport
->port_no
= UINT32_MAX
;
1331 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1332 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1333 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1334 * result of the command is expected to be an ovs_vport also, which is decoded
1335 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1336 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1338 dpif_linux_vport_transact(const struct dpif_linux_vport
*request
,
1339 struct dpif_linux_vport
*reply
,
1340 struct ofpbuf
**bufp
)
1342 struct ofpbuf
*request_buf
;
1345 assert((reply
!= NULL
) == (bufp
!= NULL
));
1347 error
= dpif_linux_init();
1351 dpif_linux_vport_init(reply
);
1356 request_buf
= ofpbuf_new(1024);
1357 dpif_linux_vport_to_ofpbuf(request
, request_buf
);
1358 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1359 ofpbuf_delete(request_buf
);
1363 error
= dpif_linux_vport_from_ofpbuf(reply
, *bufp
);
1366 dpif_linux_vport_init(reply
);
1367 ofpbuf_delete(*bufp
);
1374 /* Obtains information about the kernel vport named 'name' and stores it into
1375 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
1376 * longer needed ('reply' will contain pointers into '*bufp'). */
1378 dpif_linux_vport_get(const char *name
, struct dpif_linux_vport
*reply
,
1379 struct ofpbuf
**bufp
)
1381 struct dpif_linux_vport request
;
1383 dpif_linux_vport_init(&request
);
1384 request
.cmd
= OVS_VPORT_CMD_GET
;
1385 request
.name
= name
;
1387 return dpif_linux_vport_transact(&request
, reply
, bufp
);
1390 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1391 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
1392 * positive errno value.
1394 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
1395 * while 'dp' is still in use. */
1397 dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp
*dp
, const struct ofpbuf
*buf
)
1399 static const struct nl_policy ovs_datapath_policy
[] = {
1400 [OVS_DP_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
1401 [OVS_DP_ATTR_STATS
] = { .type
= NL_A_UNSPEC
,
1402 .min_len
= sizeof(struct ovs_dp_stats
),
1403 .max_len
= sizeof(struct ovs_dp_stats
),
1405 [OVS_DP_ATTR_IPV4_FRAGS
] = { .type
= NL_A_U32
, .optional
= true },
1406 [OVS_DP_ATTR_SAMPLING
] = { .type
= NL_A_U32
, .optional
= true },
1407 [OVS_DP_ATTR_MCGROUPS
] = { .type
= NL_A_NESTED
, .optional
= true },
1410 struct nlattr
*a
[ARRAY_SIZE(ovs_datapath_policy
)];
1411 struct ovs_header
*ovs_header
;
1412 struct nlmsghdr
*nlmsg
;
1413 struct genlmsghdr
*genl
;
1416 dpif_linux_dp_init(dp
);
1418 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1419 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1420 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1421 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1422 if (!nlmsg
|| !genl
|| !ovs_header
1423 || nlmsg
->nlmsg_type
!= ovs_datapath_family
1424 || !nl_policy_parse(&b
, 0, ovs_datapath_policy
, a
,
1425 ARRAY_SIZE(ovs_datapath_policy
))) {
1429 dp
->cmd
= genl
->cmd
;
1430 dp
->dp_ifindex
= ovs_header
->dp_ifindex
;
1431 dp
->name
= nl_attr_get_string(a
[OVS_DP_ATTR_NAME
]);
1432 if (a
[OVS_DP_ATTR_STATS
]) {
1433 /* Can't use structure assignment because Netlink doesn't ensure
1434 * sufficient alignment for 64-bit members. */
1435 memcpy(&dp
->stats
, nl_attr_get(a
[OVS_DP_ATTR_STATS
]),
1438 if (a
[OVS_DP_ATTR_IPV4_FRAGS
]) {
1439 dp
->ipv4_frags
= nl_attr_get_u32(a
[OVS_DP_ATTR_IPV4_FRAGS
]);
1441 if (a
[OVS_DP_ATTR_SAMPLING
]) {
1442 dp
->sampling
= nl_attr_get(a
[OVS_DP_ATTR_SAMPLING
]);
1445 if (a
[OVS_DP_ATTR_MCGROUPS
]) {
1446 static const struct nl_policy ovs_mcgroup_policy
[] = {
1447 [OVS_PACKET_CMD_MISS
] = { .type
= NL_A_U32
, .optional
= true },
1448 [OVS_PACKET_CMD_ACTION
] = { .type
= NL_A_U32
, .optional
= true },
1449 [OVS_PACKET_CMD_SAMPLE
] = { .type
= NL_A_U32
, .optional
= true },
1452 struct nlattr
*mcgroups
[ARRAY_SIZE(ovs_mcgroup_policy
)];
1454 if (!nl_parse_nested(a
[OVS_DP_ATTR_MCGROUPS
], ovs_mcgroup_policy
,
1455 mcgroups
, ARRAY_SIZE(ovs_mcgroup_policy
))) {
1459 if (mcgroups
[OVS_PACKET_CMD_MISS
]) {
1460 dp
->mcgroups
[DPIF_UC_MISS
]
1461 = nl_attr_get_u32(mcgroups
[OVS_PACKET_CMD_MISS
]);
1463 if (mcgroups
[OVS_PACKET_CMD_ACTION
]) {
1464 dp
->mcgroups
[DPIF_UC_ACTION
]
1465 = nl_attr_get_u32(mcgroups
[OVS_PACKET_CMD_ACTION
]);
1467 if (mcgroups
[OVS_PACKET_CMD_SAMPLE
]) {
1468 dp
->mcgroups
[DPIF_UC_SAMPLE
]
1469 = nl_attr_get_u32(mcgroups
[OVS_PACKET_CMD_SAMPLE
]);
1476 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
1478 dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp
*dp
, struct ofpbuf
*buf
)
1480 struct ovs_header
*ovs_header
;
1482 nl_msg_put_genlmsghdr(buf
, 0, ovs_datapath_family
,
1483 NLM_F_REQUEST
| NLM_F_ECHO
, dp
->cmd
, 1);
1485 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1486 ovs_header
->dp_ifindex
= dp
->dp_ifindex
;
1489 nl_msg_put_string(buf
, OVS_DP_ATTR_NAME
, dp
->name
);
1492 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
1494 if (dp
->ipv4_frags
) {
1495 nl_msg_put_u32(buf
, OVS_DP_ATTR_IPV4_FRAGS
, dp
->ipv4_frags
);
1499 nl_msg_put_u32(buf
, OVS_DP_ATTR_SAMPLING
, *dp
->sampling
);
1503 /* Clears 'dp' to "empty" values. */
1505 dpif_linux_dp_init(struct dpif_linux_dp
*dp
)
1507 memset(dp
, 0, sizeof *dp
);
1511 dpif_linux_dp_dump_start(struct nl_dump
*dump
)
1513 struct dpif_linux_dp request
;
1516 dpif_linux_dp_init(&request
);
1517 request
.cmd
= OVS_DP_CMD_GET
;
1519 buf
= ofpbuf_new(1024);
1520 dpif_linux_dp_to_ofpbuf(&request
, buf
);
1521 nl_dump_start(dump
, genl_sock
, buf
);
1525 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1526 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1527 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1528 * result of the command is expected to be of the same form, which is decoded
1529 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1530 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1532 dpif_linux_dp_transact(const struct dpif_linux_dp
*request
,
1533 struct dpif_linux_dp
*reply
, struct ofpbuf
**bufp
)
1535 struct ofpbuf
*request_buf
;
1538 assert((reply
!= NULL
) == (bufp
!= NULL
));
1540 request_buf
= ofpbuf_new(1024);
1541 dpif_linux_dp_to_ofpbuf(request
, request_buf
);
1542 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1543 ofpbuf_delete(request_buf
);
1547 error
= dpif_linux_dp_from_ofpbuf(reply
, *bufp
);
1550 dpif_linux_dp_init(reply
);
1551 ofpbuf_delete(*bufp
);
1558 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
1559 * The caller must free '*bufp' when the reply is no longer needed ('reply'
1560 * will contain pointers into '*bufp'). */
1562 dpif_linux_dp_get(const struct dpif
*dpif_
, struct dpif_linux_dp
*reply
,
1563 struct ofpbuf
**bufp
)
1565 struct dpif_linux
*dpif
= dpif_linux_cast(dpif_
);
1566 struct dpif_linux_dp request
;
1568 dpif_linux_dp_init(&request
);
1569 request
.cmd
= OVS_DP_CMD_GET
;
1570 request
.dp_ifindex
= dpif
->dp_ifindex
;
1572 return dpif_linux_dp_transact(&request
, reply
, bufp
);
1575 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
1576 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
1577 * positive errno value.
1579 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
1580 * while 'flow' is still in use. */
1582 dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow
*flow
,
1583 const struct ofpbuf
*buf
)
1585 static const struct nl_policy ovs_flow_policy
[] = {
1586 [OVS_FLOW_ATTR_KEY
] = { .type
= NL_A_NESTED
},
1587 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1588 [OVS_FLOW_ATTR_STATS
] = { .type
= NL_A_UNSPEC
,
1589 .min_len
= sizeof(struct ovs_flow_stats
),
1590 .max_len
= sizeof(struct ovs_flow_stats
),
1592 [OVS_FLOW_ATTR_TCP_FLAGS
] = { .type
= NL_A_U8
, .optional
= true },
1593 [OVS_FLOW_ATTR_USED
] = { .type
= NL_A_U64
, .optional
= true },
1594 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
1597 struct nlattr
*a
[ARRAY_SIZE(ovs_flow_policy
)];
1598 struct ovs_header
*ovs_header
;
1599 struct nlmsghdr
*nlmsg
;
1600 struct genlmsghdr
*genl
;
1603 dpif_linux_flow_init(flow
);
1605 ofpbuf_use_const(&b
, buf
->data
, buf
->size
);
1606 nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1607 genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1608 ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1609 if (!nlmsg
|| !genl
|| !ovs_header
1610 || nlmsg
->nlmsg_type
!= ovs_flow_family
1611 || !nl_policy_parse(&b
, 0, ovs_flow_policy
, a
,
1612 ARRAY_SIZE(ovs_flow_policy
))) {
1616 flow
->nlmsg_flags
= nlmsg
->nlmsg_flags
;
1617 flow
->dp_ifindex
= ovs_header
->dp_ifindex
;
1618 flow
->key
= nl_attr_get(a
[OVS_FLOW_ATTR_KEY
]);
1619 flow
->key_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_KEY
]);
1620 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
1621 flow
->actions
= nl_attr_get(a
[OVS_FLOW_ATTR_ACTIONS
]);
1622 flow
->actions_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_ACTIONS
]);
1624 if (a
[OVS_FLOW_ATTR_STATS
]) {
1625 flow
->stats
= nl_attr_get(a
[OVS_FLOW_ATTR_STATS
]);
1627 if (a
[OVS_FLOW_ATTR_TCP_FLAGS
]) {
1628 flow
->tcp_flags
= nl_attr_get(a
[OVS_FLOW_ATTR_TCP_FLAGS
]);
1630 if (a
[OVS_FLOW_ATTR_USED
]) {
1631 flow
->used
= nl_attr_get(a
[OVS_FLOW_ATTR_USED
]);
1636 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
1637 * followed by Netlink attributes corresponding to 'flow'. */
1639 dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow
*flow
,
1642 struct ovs_header
*ovs_header
;
1644 nl_msg_put_genlmsghdr(buf
, 0, ovs_flow_family
,
1645 NLM_F_REQUEST
| NLM_F_ECHO
| flow
->nlmsg_flags
,
1648 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
1649 ovs_header
->dp_ifindex
= flow
->dp_ifindex
;
1651 if (flow
->key_len
) {
1652 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_KEY
, flow
->key
, flow
->key_len
);
1655 if (flow
->actions
|| flow
->actions_len
) {
1656 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_ACTIONS
,
1657 flow
->actions
, flow
->actions_len
);
1660 /* We never need to send these to the kernel. */
1661 assert(!flow
->stats
);
1662 assert(!flow
->tcp_flags
);
1663 assert(!flow
->used
);
1666 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_CLEAR
);
1670 /* Clears 'flow' to "empty" values. */
1672 dpif_linux_flow_init(struct dpif_linux_flow
*flow
)
1674 memset(flow
, 0, sizeof *flow
);
1677 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1678 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1679 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1680 * result of the command is expected to be a flow also, which is decoded and
1681 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
1682 * is no longer needed ('reply' will contain pointers into '*bufp'). */
1684 dpif_linux_flow_transact(const struct dpif_linux_flow
*request
,
1685 struct dpif_linux_flow
*reply
, struct ofpbuf
**bufp
)
1687 struct ofpbuf
*request_buf
;
1690 assert((reply
!= NULL
) == (bufp
!= NULL
));
1692 request_buf
= ofpbuf_new(1024);
1693 dpif_linux_flow_to_ofpbuf(request
, request_buf
);
1694 error
= nl_sock_transact(genl_sock
, request_buf
, bufp
);
1695 ofpbuf_delete(request_buf
);
1699 error
= dpif_linux_flow_from_ofpbuf(reply
, *bufp
);
1702 dpif_linux_flow_init(reply
);
1703 ofpbuf_delete(*bufp
);
1711 dpif_linux_flow_get_stats(const struct dpif_linux_flow
*flow
,
1712 struct dpif_flow_stats
*stats
)
1715 stats
->n_packets
= get_unaligned_u64(&flow
->stats
->n_packets
);
1716 stats
->n_bytes
= get_unaligned_u64(&flow
->stats
->n_bytes
);
1718 stats
->n_packets
= 0;
1721 stats
->used
= flow
->used
? get_unaligned_u64(flow
->used
) : 0;
1722 stats
->tcp_flags
= flow
->tcp_flags
? *flow
->tcp_flags
: 0;