2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-netlink.h"
26 #include <linux/types.h>
27 #include <linux/pkt_sched.h>
31 #include <sys/epoll.h>
36 #include "dpif-provider.h"
37 #include "dpif-netlink-rtnl.h"
38 #include "openvswitch/dynamic-string.h"
40 #include "fat-rwlock.h"
42 #include "netdev-linux.h"
43 #include "netdev-vport.h"
44 #include "netlink-conntrack.h"
45 #include "netlink-notifier.h"
46 #include "netlink-socket.h"
49 #include "openvswitch/ofpbuf.h"
51 #include "poll-loop.h"
53 #include "openvswitch/shash.h"
56 #include "unaligned.h"
58 #include "openvswitch/vlog.h"
59 #include "openvswitch/flow.h"
61 VLOG_DEFINE_THIS_MODULE(dpif_netlink
);
68 enum { MAX_PORTS
= USHRT_MAX
};
70 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
71 * missing if we have old headers. */
72 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
74 struct dpif_netlink_dp
{
75 /* Generic Netlink header. */
78 /* struct ovs_header. */
82 const char *name
; /* OVS_DP_ATTR_NAME. */
83 const uint32_t *upcall_pid
; /* OVS_DP_ATTR_UPCALL_PID. */
84 uint32_t user_features
; /* OVS_DP_ATTR_USER_FEATURES */
85 const struct ovs_dp_stats
*stats
; /* OVS_DP_ATTR_STATS. */
86 const struct ovs_dp_megaflow_stats
*megaflow_stats
;
87 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
90 static void dpif_netlink_dp_init(struct dpif_netlink_dp
*);
91 static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp
*,
92 const struct ofpbuf
*);
93 static void dpif_netlink_dp_dump_start(struct nl_dump
*);
94 static int dpif_netlink_dp_transact(const struct dpif_netlink_dp
*request
,
95 struct dpif_netlink_dp
*reply
,
96 struct ofpbuf
**bufp
);
97 static int dpif_netlink_dp_get(const struct dpif
*,
98 struct dpif_netlink_dp
*reply
,
99 struct ofpbuf
**bufp
);
101 struct dpif_netlink_flow
{
102 /* Generic Netlink header. */
105 /* struct ovs_header. */
106 unsigned int nlmsg_flags
;
111 * The 'stats' member points to 64-bit data that might only be aligned on
112 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
115 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
116 * the Netlink version of the command, even if actions_len is zero. */
117 const struct nlattr
*key
; /* OVS_FLOW_ATTR_KEY. */
119 const struct nlattr
*mask
; /* OVS_FLOW_ATTR_MASK. */
121 const struct nlattr
*actions
; /* OVS_FLOW_ATTR_ACTIONS. */
123 ovs_u128 ufid
; /* OVS_FLOW_ATTR_FLOW_ID. */
124 bool ufid_present
; /* Is there a UFID? */
125 bool ufid_terse
; /* Skip serializing key/mask/acts? */
126 const struct ovs_flow_stats
*stats
; /* OVS_FLOW_ATTR_STATS. */
127 const uint8_t *tcp_flags
; /* OVS_FLOW_ATTR_TCP_FLAGS. */
128 const ovs_32aligned_u64
*used
; /* OVS_FLOW_ATTR_USED. */
129 bool clear
; /* OVS_FLOW_ATTR_CLEAR. */
130 bool probe
; /* OVS_FLOW_ATTR_PROBE. */
133 static void dpif_netlink_flow_init(struct dpif_netlink_flow
*);
134 static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow
*,
135 const struct ofpbuf
*);
136 static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow
*,
138 static int dpif_netlink_flow_transact(struct dpif_netlink_flow
*request
,
139 struct dpif_netlink_flow
*reply
,
140 struct ofpbuf
**bufp
);
141 static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow
*,
142 struct dpif_flow_stats
*);
143 static void dpif_netlink_flow_to_dpif_flow(struct dpif
*, struct dpif_flow
*,
144 const struct dpif_netlink_flow
*);
146 /* One of the dpif channels between the kernel and userspace. */
147 struct dpif_channel
{
148 struct nl_sock
*sock
; /* Netlink socket. */
149 long long int last_poll
; /* Last time this channel was polled. */
153 #define VPORT_SOCK_POOL_SIZE 1
154 /* On Windows, there is no native support for epoll. There are equivalent
155 * interfaces though, that are not used currently. For simpicity, a pool of
156 * netlink sockets is used. Each socket is represented by 'struct
157 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
158 * sharing the same socket. In the future, we can add a reference count and
160 struct dpif_windows_vport_sock
{
161 struct nl_sock
*nl_sock
; /* netlink socket. */
165 struct dpif_handler
{
166 struct dpif_channel
*channels
;/* Array of channels for each handler. */
167 struct epoll_event
*epoll_events
;
168 int epoll_fd
; /* epoll fd that includes channel socks. */
169 int n_events
; /* Num events returned by epoll_wait(). */
170 int event_offset
; /* Offset into 'epoll_events'. */
173 /* Pool of sockets. */
174 struct dpif_windows_vport_sock
*vport_sock_pool
;
175 size_t last_used_pool_idx
; /* Index to aid in allocating a
176 socket in the pool to a port. */
180 /* Datapath interface for the openvswitch Linux kernel module. */
181 struct dpif_netlink
{
185 /* Upcall messages. */
186 struct fat_rwlock upcall_lock
;
187 struct dpif_handler
*handlers
;
188 uint32_t n_handlers
; /* Num of upcall handlers. */
189 int uc_array_size
; /* Size of 'handler->channels' and */
190 /* 'handler->epoll_events'. */
192 /* Change notification. */
193 struct nl_sock
*port_notifier
; /* vport multicast group subscriber. */
194 bool refresh_channels
;
197 static void report_loss(struct dpif_netlink
*, struct dpif_channel
*,
198 uint32_t ch_idx
, uint32_t handler_id
);
200 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(9999, 5);
202 /* Generic Netlink family numbers for OVS.
204 * Initialized by dpif_netlink_init(). */
205 static int ovs_datapath_family
;
206 static int ovs_vport_family
;
207 static int ovs_flow_family
;
208 static int ovs_packet_family
;
210 /* Generic Netlink multicast groups for OVS.
212 * Initialized by dpif_netlink_init(). */
213 static unsigned int ovs_vport_mcgroup
;
215 /* If true, tunnel devices are created using OVS compat/genetlink.
216 * If false, tunnel devices are created with rtnetlink and using light weight
217 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
218 * to using the compat interface. */
219 static bool ovs_tunnels_out_of_tree
= true;
221 static int dpif_netlink_init(void);
222 static int open_dpif(const struct dpif_netlink_dp
*, struct dpif
**);
223 static uint32_t dpif_netlink_port_get_pid(const struct dpif
*,
224 odp_port_t port_no
, uint32_t hash
);
225 static void dpif_netlink_handler_uninit(struct dpif_handler
*handler
);
226 static int dpif_netlink_refresh_channels(struct dpif_netlink
*,
227 uint32_t n_handlers
);
228 static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport
*,
230 static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport
*,
231 const struct ofpbuf
*);
232 static int dpif_netlink_port_query__(const struct dpif_netlink
*dpif
,
233 odp_port_t port_no
, const char *port_name
,
234 struct dpif_port
*dpif_port
);
236 static struct dpif_netlink
*
237 dpif_netlink_cast(const struct dpif
*dpif
)
239 dpif_assert_class(dpif
, &dpif_netlink_class
);
240 return CONTAINER_OF(dpif
, struct dpif_netlink
, dpif
);
244 dpif_netlink_enumerate(struct sset
*all_dps
,
245 const struct dpif_class
*dpif_class OVS_UNUSED
)
248 uint64_t reply_stub
[NL_DUMP_BUFSIZE
/ 8];
249 struct ofpbuf msg
, buf
;
252 error
= dpif_netlink_init();
257 ofpbuf_use_stub(&buf
, reply_stub
, sizeof reply_stub
);
258 dpif_netlink_dp_dump_start(&dump
);
259 while (nl_dump_next(&dump
, &msg
, &buf
)) {
260 struct dpif_netlink_dp dp
;
262 if (!dpif_netlink_dp_from_ofpbuf(&dp
, &msg
)) {
263 sset_add(all_dps
, dp
.name
);
267 return nl_dump_done(&dump
);
271 dpif_netlink_open(const struct dpif_class
*class OVS_UNUSED
, const char *name
,
272 bool create
, struct dpif
**dpifp
)
274 struct dpif_netlink_dp dp_request
, dp
;
279 error
= dpif_netlink_init();
284 /* Create or look up datapath. */
285 dpif_netlink_dp_init(&dp_request
);
287 dp_request
.cmd
= OVS_DP_CMD_NEW
;
289 dp_request
.upcall_pid
= &upcall_pid
;
291 /* Use OVS_DP_CMD_SET to report user features */
292 dp_request
.cmd
= OVS_DP_CMD_SET
;
294 dp_request
.name
= name
;
295 dp_request
.user_features
|= OVS_DP_F_UNALIGNED
;
296 dp_request
.user_features
|= OVS_DP_F_VPORT_PIDS
;
297 error
= dpif_netlink_dp_transact(&dp_request
, &dp
, &buf
);
302 error
= open_dpif(&dp
, dpifp
);
308 open_dpif(const struct dpif_netlink_dp
*dp
, struct dpif
**dpifp
)
310 struct dpif_netlink
*dpif
;
312 dpif
= xzalloc(sizeof *dpif
);
313 dpif
->port_notifier
= NULL
;
314 fat_rwlock_init(&dpif
->upcall_lock
);
316 dpif_init(&dpif
->dpif
, &dpif_netlink_class
, dp
->name
,
317 dp
->dp_ifindex
, dp
->dp_ifindex
);
319 dpif
->dp_ifindex
= dp
->dp_ifindex
;
320 *dpifp
= &dpif
->dpif
;
325 /* Destroys the netlink sockets pointed by the elements in 'socksp'
326 * and frees the 'socksp'. */
328 vport_del_socksp__(struct nl_sock
**socksp
, uint32_t n_socks
)
332 for (i
= 0; i
< n_socks
; i
++) {
333 nl_sock_destroy(socksp
[i
]);
339 /* Creates an array of netlink sockets. Returns an array of the
340 * corresponding pointers. Records the error in 'error'. */
341 static struct nl_sock
**
342 vport_create_socksp__(uint32_t n_socks
, int *error
)
344 struct nl_sock
**socksp
= xzalloc(n_socks
* sizeof *socksp
);
347 for (i
= 0; i
< n_socks
; i
++) {
348 *error
= nl_sock_create(NETLINK_GENERIC
, &socksp
[i
]);
357 vport_del_socksp__(socksp
, n_socks
);
364 vport_delete_sock_pool(struct dpif_handler
*handler
)
365 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
367 if (handler
->vport_sock_pool
) {
369 struct dpif_windows_vport_sock
*sock_pool
=
370 handler
->vport_sock_pool
;
372 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
373 if (sock_pool
[i
].nl_sock
) {
374 nl_sock_unsubscribe_packets(sock_pool
[i
].nl_sock
);
375 nl_sock_destroy(sock_pool
[i
].nl_sock
);
376 sock_pool
[i
].nl_sock
= NULL
;
380 free(handler
->vport_sock_pool
);
381 handler
->vport_sock_pool
= NULL
;
386 vport_create_sock_pool(struct dpif_handler
*handler
)
387 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
389 struct dpif_windows_vport_sock
*sock_pool
;
393 sock_pool
= xzalloc(VPORT_SOCK_POOL_SIZE
* sizeof *sock_pool
);
394 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
395 error
= nl_sock_create(NETLINK_GENERIC
, &sock_pool
[i
].nl_sock
);
400 /* Enable the netlink socket to receive packets. This is equivalent to
401 * calling nl_sock_join_mcgroup() to receive events. */
402 error
= nl_sock_subscribe_packets(sock_pool
[i
].nl_sock
);
408 handler
->vport_sock_pool
= sock_pool
;
409 handler
->last_used_pool_idx
= 0;
413 vport_delete_sock_pool(handler
);
417 /* Returns an array pointers to netlink sockets. The sockets are picked from a
418 * pool. Records the error in 'error'. */
419 static struct nl_sock
**
420 vport_create_socksp_windows(struct dpif_netlink
*dpif
, int *error
)
421 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
423 uint32_t n_socks
= dpif
->n_handlers
;
424 struct nl_sock
**socksp
;
427 ovs_assert(n_socks
<= 1);
428 socksp
= xzalloc(n_socks
* sizeof *socksp
);
430 /* Pick netlink sockets to use in a round-robin fashion from each
431 * handler's pool of sockets. */
432 for (i
= 0; i
< n_socks
; i
++) {
433 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
434 struct dpif_windows_vport_sock
*sock_pool
= handler
->vport_sock_pool
;
435 size_t index
= handler
->last_used_pool_idx
;
437 /* A pool of sockets is allocated when the handler is initialized. */
438 if (sock_pool
== NULL
) {
444 ovs_assert(index
< VPORT_SOCK_POOL_SIZE
);
445 socksp
[i
] = sock_pool
[index
].nl_sock
;
446 socksp
[i
] = sock_pool
[index
].nl_sock
;
447 ovs_assert(socksp
[i
]);
448 index
= (index
== VPORT_SOCK_POOL_SIZE
- 1) ? 0 : index
+ 1;
449 handler
->last_used_pool_idx
= index
;
456 vport_del_socksp_windows(struct dpif_netlink
*dpif
, struct nl_sock
**socksp
)
462 static struct nl_sock
**
463 vport_create_socksp(struct dpif_netlink
*dpif
, int *error
)
466 return vport_create_socksp_windows(dpif
, error
);
468 return vport_create_socksp__(dpif
->n_handlers
, error
);
473 vport_del_socksp(struct dpif_netlink
*dpif
, struct nl_sock
**socksp
)
476 vport_del_socksp_windows(dpif
, socksp
);
478 vport_del_socksp__(socksp
, dpif
->n_handlers
);
482 /* Given the array of pointers to netlink sockets 'socksp', returns
483 * the array of corresponding pids. If the 'socksp' is NULL, returns
484 * a single-element array of value 0. */
486 vport_socksp_to_pids(struct nl_sock
**socksp
, uint32_t n_socks
)
491 pids
= xzalloc(sizeof *pids
);
495 pids
= xzalloc(n_socks
* sizeof *pids
);
496 for (i
= 0; i
< n_socks
; i
++) {
497 pids
[i
] = nl_sock_pid(socksp
[i
]);
504 /* Given the port number 'port_idx', extracts the pids of netlink sockets
505 * associated to the port and assigns it to 'upcall_pids'. */
507 vport_get_pids(struct dpif_netlink
*dpif
, uint32_t port_idx
,
508 uint32_t **upcall_pids
)
513 /* Since the nl_sock can only be assigned in either all
514 * or none "dpif->handlers" channels, the following check
516 if (!dpif
->handlers
[0].channels
[port_idx
].sock
) {
519 ovs_assert(!WINDOWS
|| dpif
->n_handlers
<= 1);
521 pids
= xzalloc(dpif
->n_handlers
* sizeof *pids
);
523 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
524 pids
[i
] = nl_sock_pid(dpif
->handlers
[i
].channels
[port_idx
].sock
);
533 vport_add_channels(struct dpif_netlink
*dpif
, odp_port_t port_no
,
534 struct nl_sock
**socksp
)
536 struct epoll_event event
;
537 uint32_t port_idx
= odp_to_u32(port_no
);
541 if (dpif
->handlers
== NULL
) {
545 /* We assume that the datapath densely chooses port numbers, which can
546 * therefore be used as an index into 'channels' and 'epoll_events' of
547 * 'dpif->handler'. */
548 if (port_idx
>= dpif
->uc_array_size
) {
549 uint32_t new_size
= port_idx
+ 1;
551 if (new_size
> MAX_PORTS
) {
552 VLOG_WARN_RL(&error_rl
, "%s: datapath port %"PRIu32
" too big",
553 dpif_name(&dpif
->dpif
), port_no
);
557 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
558 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
560 handler
->channels
= xrealloc(handler
->channels
,
561 new_size
* sizeof *handler
->channels
);
563 for (j
= dpif
->uc_array_size
; j
< new_size
; j
++) {
564 handler
->channels
[j
].sock
= NULL
;
567 handler
->epoll_events
= xrealloc(handler
->epoll_events
,
568 new_size
* sizeof *handler
->epoll_events
);
571 dpif
->uc_array_size
= new_size
;
574 memset(&event
, 0, sizeof event
);
575 event
.events
= EPOLLIN
;
576 event
.data
.u32
= port_idx
;
578 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
579 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
582 if (epoll_ctl(handler
->epoll_fd
, EPOLL_CTL_ADD
, nl_sock_fd(socksp
[i
]),
588 dpif
->handlers
[i
].channels
[port_idx
].sock
= socksp
[i
];
589 dpif
->handlers
[i
].channels
[port_idx
].last_poll
= LLONG_MIN
;
595 for (j
= 0; j
< i
; j
++) {
597 epoll_ctl(dpif
->handlers
[j
].epoll_fd
, EPOLL_CTL_DEL
,
598 nl_sock_fd(socksp
[j
]), NULL
);
600 dpif
->handlers
[j
].channels
[port_idx
].sock
= NULL
;
607 vport_del_channels(struct dpif_netlink
*dpif
, odp_port_t port_no
)
609 uint32_t port_idx
= odp_to_u32(port_no
);
612 if (!dpif
->handlers
|| port_idx
>= dpif
->uc_array_size
) {
616 /* Since the sock can only be assigned in either all or none
617 * of "dpif->handlers" channels, the following check would
619 if (!dpif
->handlers
[0].channels
[port_idx
].sock
) {
623 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
624 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
626 epoll_ctl(handler
->epoll_fd
, EPOLL_CTL_DEL
,
627 nl_sock_fd(handler
->channels
[port_idx
].sock
), NULL
);
628 nl_sock_destroy(handler
->channels
[port_idx
].sock
);
630 handler
->channels
[port_idx
].sock
= NULL
;
631 handler
->event_offset
= handler
->n_events
= 0;
636 destroy_all_channels(struct dpif_netlink
*dpif
)
637 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
641 if (!dpif
->handlers
) {
645 for (i
= 0; i
< dpif
->uc_array_size
; i
++ ) {
646 struct dpif_netlink_vport vport_request
;
647 uint32_t upcall_pids
= 0;
649 /* Since the sock can only be assigned in either all or none
650 * of "dpif->handlers" channels, the following check would
652 if (!dpif
->handlers
[0].channels
[i
].sock
) {
656 /* Turn off upcalls. */
657 dpif_netlink_vport_init(&vport_request
);
658 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
659 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
660 vport_request
.port_no
= u32_to_odp(i
);
661 vport_request
.n_upcall_pids
= 1;
662 vport_request
.upcall_pids
= &upcall_pids
;
663 dpif_netlink_vport_transact(&vport_request
, NULL
, NULL
);
665 vport_del_channels(dpif
, u32_to_odp(i
));
668 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
669 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
671 dpif_netlink_handler_uninit(handler
);
672 free(handler
->epoll_events
);
673 free(handler
->channels
);
676 free(dpif
->handlers
);
677 dpif
->handlers
= NULL
;
678 dpif
->n_handlers
= 0;
679 dpif
->uc_array_size
= 0;
683 dpif_netlink_close(struct dpif
*dpif_
)
685 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
687 nl_sock_destroy(dpif
->port_notifier
);
689 fat_rwlock_wrlock(&dpif
->upcall_lock
);
690 destroy_all_channels(dpif
);
691 fat_rwlock_unlock(&dpif
->upcall_lock
);
693 fat_rwlock_destroy(&dpif
->upcall_lock
);
698 dpif_netlink_destroy(struct dpif
*dpif_
)
700 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
701 struct dpif_netlink_dp dp
;
703 dpif_netlink_dp_init(&dp
);
704 dp
.cmd
= OVS_DP_CMD_DEL
;
705 dp
.dp_ifindex
= dpif
->dp_ifindex
;
706 return dpif_netlink_dp_transact(&dp
, NULL
, NULL
);
710 dpif_netlink_run(struct dpif
*dpif_
)
712 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
714 if (dpif
->refresh_channels
) {
715 dpif
->refresh_channels
= false;
716 fat_rwlock_wrlock(&dpif
->upcall_lock
);
717 dpif_netlink_refresh_channels(dpif
, dpif
->n_handlers
);
718 fat_rwlock_unlock(&dpif
->upcall_lock
);
724 dpif_netlink_get_stats(const struct dpif
*dpif_
, struct dpif_dp_stats
*stats
)
726 struct dpif_netlink_dp dp
;
730 error
= dpif_netlink_dp_get(dpif_
, &dp
, &buf
);
732 memset(stats
, 0, sizeof *stats
);
735 stats
->n_hit
= get_32aligned_u64(&dp
.stats
->n_hit
);
736 stats
->n_missed
= get_32aligned_u64(&dp
.stats
->n_missed
);
737 stats
->n_lost
= get_32aligned_u64(&dp
.stats
->n_lost
);
738 stats
->n_flows
= get_32aligned_u64(&dp
.stats
->n_flows
);
741 if (dp
.megaflow_stats
) {
742 stats
->n_masks
= dp
.megaflow_stats
->n_masks
;
743 stats
->n_mask_hit
= get_32aligned_u64(
744 &dp
.megaflow_stats
->n_mask_hit
);
746 stats
->n_masks
= UINT32_MAX
;
747 stats
->n_mask_hit
= UINT64_MAX
;
755 get_vport_type(const struct dpif_netlink_vport
*vport
)
757 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
759 switch (vport
->type
) {
760 case OVS_VPORT_TYPE_NETDEV
: {
761 const char *type
= netdev_get_type_from_name(vport
->name
);
763 return type
? type
: "system";
766 case OVS_VPORT_TYPE_INTERNAL
:
769 case OVS_VPORT_TYPE_GENEVE
:
772 case OVS_VPORT_TYPE_GRE
:
775 case OVS_VPORT_TYPE_VXLAN
:
778 case OVS_VPORT_TYPE_LISP
:
781 case OVS_VPORT_TYPE_STT
:
784 case OVS_VPORT_TYPE_UNSPEC
:
785 case __OVS_VPORT_TYPE_MAX
:
789 VLOG_WARN_RL(&rl
, "dp%d: port `%s' has unsupported type %u",
790 vport
->dp_ifindex
, vport
->name
, (unsigned int) vport
->type
);
795 netdev_to_ovs_vport_type(const char *type
)
797 if (!strcmp(type
, "tap") || !strcmp(type
, "system")) {
798 return OVS_VPORT_TYPE_NETDEV
;
799 } else if (!strcmp(type
, "internal")) {
800 return OVS_VPORT_TYPE_INTERNAL
;
801 } else if (strstr(type
, "stt")) {
802 return OVS_VPORT_TYPE_STT
;
803 } else if (!strcmp(type
, "geneve")) {
804 return OVS_VPORT_TYPE_GENEVE
;
805 } else if (strstr(type
, "gre")) {
806 return OVS_VPORT_TYPE_GRE
;
807 } else if (!strcmp(type
, "vxlan")) {
808 return OVS_VPORT_TYPE_VXLAN
;
809 } else if (!strcmp(type
, "lisp")) {
810 return OVS_VPORT_TYPE_LISP
;
812 return OVS_VPORT_TYPE_UNSPEC
;
817 dpif_netlink_port_add__(struct dpif_netlink
*dpif
, const char *name
,
818 enum ovs_vport_type type
,
819 struct ofpbuf
*options
,
820 odp_port_t
*port_nop
)
821 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
823 struct dpif_netlink_vport request
, reply
;
825 struct nl_sock
**socksp
= NULL
;
826 uint32_t *upcall_pids
;
829 if (dpif
->handlers
) {
830 socksp
= vport_create_socksp(dpif
, &error
);
836 dpif_netlink_vport_init(&request
);
837 request
.cmd
= OVS_VPORT_CMD_NEW
;
838 request
.dp_ifindex
= dpif
->dp_ifindex
;
842 request
.port_no
= *port_nop
;
843 upcall_pids
= vport_socksp_to_pids(socksp
, dpif
->n_handlers
);
844 request
.n_upcall_pids
= socksp
? dpif
->n_handlers
: 1;
845 request
.upcall_pids
= upcall_pids
;
848 request
.options
= options
->data
;
849 request
.options_len
= options
->size
;
852 error
= dpif_netlink_vport_transact(&request
, &reply
, &buf
);
854 *port_nop
= reply
.port_no
;
856 if (error
== EBUSY
&& *port_nop
!= ODPP_NONE
) {
857 VLOG_INFO("%s: requested port %"PRIu32
" is in use",
858 dpif_name(&dpif
->dpif
), *port_nop
);
861 vport_del_socksp(dpif
, socksp
);
866 error
= vport_add_channels(dpif
, *port_nop
, socksp
);
868 VLOG_INFO("%s: could not add channel for port %s",
869 dpif_name(&dpif
->dpif
), name
);
871 /* Delete the port. */
872 dpif_netlink_vport_init(&request
);
873 request
.cmd
= OVS_VPORT_CMD_DEL
;
874 request
.dp_ifindex
= dpif
->dp_ifindex
;
875 request
.port_no
= *port_nop
;
876 dpif_netlink_vport_transact(&request
, NULL
, NULL
);
877 vport_del_socksp(dpif
, socksp
);
891 dpif_netlink_port_add_compat(struct dpif_netlink
*dpif
, struct netdev
*netdev
,
892 odp_port_t
*port_nop
)
893 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
895 const struct netdev_tunnel_config
*tnl_cfg
;
896 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
897 const char *type
= netdev_get_type(netdev
);
898 uint64_t options_stub
[64 / 8];
899 enum ovs_vport_type ovs_type
;
900 struct ofpbuf options
;
903 name
= netdev_vport_get_dpif_port(netdev
, namebuf
, sizeof namebuf
);
905 ovs_type
= netdev_to_ovs_vport_type(netdev_get_type(netdev
));
906 if (ovs_type
== OVS_VPORT_TYPE_UNSPEC
) {
907 VLOG_WARN_RL(&error_rl
, "%s: cannot create port `%s' because it has "
908 "unsupported type `%s'",
909 dpif_name(&dpif
->dpif
), name
, type
);
913 if (ovs_type
== OVS_VPORT_TYPE_NETDEV
) {
915 /* XXX : Map appropiate Windows handle */
917 netdev_linux_ethtool_set_flag(netdev
, ETH_FLAG_LRO
, "LRO", false);
922 if (ovs_type
== OVS_VPORT_TYPE_INTERNAL
) {
923 if (!create_wmi_port(name
)){
924 VLOG_ERR("Could not create wmi internal port with name:%s", name
);
930 tnl_cfg
= netdev_get_tunnel_config(netdev
);
931 if (tnl_cfg
&& (tnl_cfg
->dst_port
!= 0 || tnl_cfg
->exts
)) {
932 ofpbuf_use_stack(&options
, options_stub
, sizeof options_stub
);
933 if (tnl_cfg
->dst_port
) {
934 nl_msg_put_u16(&options
, OVS_TUNNEL_ATTR_DST_PORT
,
935 ntohs(tnl_cfg
->dst_port
));
941 ext_ofs
= nl_msg_start_nested(&options
, OVS_TUNNEL_ATTR_EXTENSION
);
942 for (i
= 0; i
< 32; i
++) {
943 if (tnl_cfg
->exts
& (1 << i
)) {
944 nl_msg_put_flag(&options
, i
);
947 nl_msg_end_nested(&options
, ext_ofs
);
949 return dpif_netlink_port_add__(dpif
, name
, ovs_type
, &options
,
952 return dpif_netlink_port_add__(dpif
, name
, ovs_type
, NULL
, port_nop
);
958 dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink
*dpif
,
959 struct netdev
*netdev
,
960 odp_port_t
*port_nop
)
961 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
963 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
964 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
968 error
= dpif_netlink_rtnl_port_create(netdev
);
970 if (error
!= EOPNOTSUPP
) {
971 VLOG_INFO_RL(&rl
, "Failed to create %s with rtnetlink: %s",
972 netdev_get_name(netdev
), ovs_strerror(error
));
977 name
= netdev_vport_get_dpif_port(netdev
, namebuf
, sizeof namebuf
);
978 error
= dpif_netlink_port_add__(dpif
, name
, OVS_VPORT_TYPE_NETDEV
, NULL
,
981 dpif_netlink_rtnl_port_destroy(name
, netdev_get_type(netdev
));
987 dpif_netlink_port_add(struct dpif
*dpif_
, struct netdev
*netdev
,
988 odp_port_t
*port_nop
)
990 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
991 int error
= EOPNOTSUPP
;
993 fat_rwlock_wrlock(&dpif
->upcall_lock
);
994 if (!ovs_tunnels_out_of_tree
) {
995 error
= dpif_netlink_rtnl_port_create_and_add(dpif
, netdev
, port_nop
);
998 error
= dpif_netlink_port_add_compat(dpif
, netdev
, port_nop
);
1000 fat_rwlock_unlock(&dpif
->upcall_lock
);
1006 dpif_netlink_port_del__(struct dpif_netlink
*dpif
, odp_port_t port_no
)
1007 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
1009 struct dpif_netlink_vport vport
;
1010 struct dpif_port dpif_port
;
1013 error
= dpif_netlink_port_query__(dpif
, port_no
, NULL
, &dpif_port
);
1018 dpif_netlink_vport_init(&vport
);
1019 vport
.cmd
= OVS_VPORT_CMD_DEL
;
1020 vport
.dp_ifindex
= dpif
->dp_ifindex
;
1021 vport
.port_no
= port_no
;
1023 if (!strcmp(dpif_port
.type
, "internal")) {
1024 if (!delete_wmi_port(dpif_port
.name
)) {
1025 VLOG_ERR("Could not delete wmi port with name: %s",
1030 error
= dpif_netlink_vport_transact(&vport
, NULL
, NULL
);
1032 vport_del_channels(dpif
, port_no
);
1034 if (!error
&& !ovs_tunnels_out_of_tree
) {
1035 error
= dpif_netlink_rtnl_port_destroy(dpif_port
.name
, dpif_port
.type
);
1036 if (error
== EOPNOTSUPP
) {
1041 dpif_port_destroy(&dpif_port
);
1047 dpif_netlink_port_del(struct dpif
*dpif_
, odp_port_t port_no
)
1049 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1052 fat_rwlock_wrlock(&dpif
->upcall_lock
);
1053 error
= dpif_netlink_port_del__(dpif
, port_no
);
1054 fat_rwlock_unlock(&dpif
->upcall_lock
);
1060 dpif_netlink_port_query__(const struct dpif_netlink
*dpif
, odp_port_t port_no
,
1061 const char *port_name
, struct dpif_port
*dpif_port
)
1063 struct dpif_netlink_vport request
;
1064 struct dpif_netlink_vport reply
;
1068 dpif_netlink_vport_init(&request
);
1069 request
.cmd
= OVS_VPORT_CMD_GET
;
1070 request
.dp_ifindex
= dpif
->dp_ifindex
;
1071 request
.port_no
= port_no
;
1072 request
.name
= port_name
;
1074 error
= dpif_netlink_vport_transact(&request
, &reply
, &buf
);
1076 if (reply
.dp_ifindex
!= request
.dp_ifindex
) {
1077 /* A query by name reported that 'port_name' is in some datapath
1078 * other than 'dpif', but the caller wants to know about 'dpif'. */
1080 } else if (dpif_port
) {
1081 dpif_port
->name
= xstrdup(reply
.name
);
1082 dpif_port
->type
= xstrdup(get_vport_type(&reply
));
1083 dpif_port
->port_no
= reply
.port_no
;
1091 dpif_netlink_port_query_by_number(const struct dpif
*dpif_
, odp_port_t port_no
,
1092 struct dpif_port
*dpif_port
)
1094 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1096 return dpif_netlink_port_query__(dpif
, port_no
, NULL
, dpif_port
);
1100 dpif_netlink_port_query_by_name(const struct dpif
*dpif_
, const char *devname
,
1101 struct dpif_port
*dpif_port
)
1103 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1105 return dpif_netlink_port_query__(dpif
, 0, devname
, dpif_port
);
1109 dpif_netlink_port_get_pid__(const struct dpif_netlink
*dpif
,
1110 odp_port_t port_no
, uint32_t hash
)
1111 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
1113 uint32_t port_idx
= odp_to_u32(port_no
);
1116 if (dpif
->handlers
&& dpif
->uc_array_size
> 0) {
1117 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1118 * channel, since it is not heavily loaded. */
1119 uint32_t idx
= port_idx
>= dpif
->uc_array_size
? 0 : port_idx
;
1120 struct dpif_handler
*h
= &dpif
->handlers
[hash
% dpif
->n_handlers
];
1122 /* Needs to check in case the socket pointer is changed in between
1123 * the holding of upcall_lock. A known case happens when the main
1124 * thread deletes the vport while the handler thread is handling
1125 * the upcall from that port. */
1126 if (h
->channels
[idx
].sock
) {
1127 pid
= nl_sock_pid(h
->channels
[idx
].sock
);
1135 dpif_netlink_port_get_pid(const struct dpif
*dpif_
, odp_port_t port_no
,
1138 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1141 fat_rwlock_rdlock(&dpif
->upcall_lock
);
1142 ret
= dpif_netlink_port_get_pid__(dpif
, port_no
, hash
);
1143 fat_rwlock_unlock(&dpif
->upcall_lock
);
1149 dpif_netlink_flow_flush(struct dpif
*dpif_
)
1151 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1152 struct dpif_netlink_flow flow
;
1154 dpif_netlink_flow_init(&flow
);
1155 flow
.cmd
= OVS_FLOW_CMD_DEL
;
1156 flow
.dp_ifindex
= dpif
->dp_ifindex
;
1157 return dpif_netlink_flow_transact(&flow
, NULL
, NULL
);
1160 struct dpif_netlink_port_state
{
1161 struct nl_dump dump
;
1166 dpif_netlink_port_dump_start__(const struct dpif_netlink
*dpif
,
1167 struct nl_dump
*dump
)
1169 struct dpif_netlink_vport request
;
1172 dpif_netlink_vport_init(&request
);
1173 request
.cmd
= OVS_VPORT_CMD_GET
;
1174 request
.dp_ifindex
= dpif
->dp_ifindex
;
1176 buf
= ofpbuf_new(1024);
1177 dpif_netlink_vport_to_ofpbuf(&request
, buf
);
1178 nl_dump_start(dump
, NETLINK_GENERIC
, buf
);
1183 dpif_netlink_port_dump_start(const struct dpif
*dpif_
, void **statep
)
1185 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1186 struct dpif_netlink_port_state
*state
;
1188 *statep
= state
= xmalloc(sizeof *state
);
1189 dpif_netlink_port_dump_start__(dpif
, &state
->dump
);
1191 ofpbuf_init(&state
->buf
, NL_DUMP_BUFSIZE
);
1196 dpif_netlink_port_dump_next__(const struct dpif_netlink
*dpif
,
1197 struct nl_dump
*dump
,
1198 struct dpif_netlink_vport
*vport
,
1199 struct ofpbuf
*buffer
)
1204 if (!nl_dump_next(dump
, &buf
, buffer
)) {
1208 error
= dpif_netlink_vport_from_ofpbuf(vport
, &buf
);
1210 VLOG_WARN_RL(&error_rl
, "%s: failed to parse vport record (%s)",
1211 dpif_name(&dpif
->dpif
), ovs_strerror(error
));
1217 dpif_netlink_port_dump_next(const struct dpif
*dpif_
, void *state_
,
1218 struct dpif_port
*dpif_port
)
1220 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1221 struct dpif_netlink_port_state
*state
= state_
;
1222 struct dpif_netlink_vport vport
;
1225 error
= dpif_netlink_port_dump_next__(dpif
, &state
->dump
, &vport
,
1230 dpif_port
->name
= CONST_CAST(char *, vport
.name
);
1231 dpif_port
->type
= CONST_CAST(char *, get_vport_type(&vport
));
1232 dpif_port
->port_no
= vport
.port_no
;
1237 dpif_netlink_port_dump_done(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
)
1239 struct dpif_netlink_port_state
*state
= state_
;
1240 int error
= nl_dump_done(&state
->dump
);
1242 ofpbuf_uninit(&state
->buf
);
1248 dpif_netlink_port_poll(const struct dpif
*dpif_
, char **devnamep
)
1250 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1252 /* Lazily create the Netlink socket to listen for notifications. */
1253 if (!dpif
->port_notifier
) {
1254 struct nl_sock
*sock
;
1257 error
= nl_sock_create(NETLINK_GENERIC
, &sock
);
1262 error
= nl_sock_join_mcgroup(sock
, ovs_vport_mcgroup
);
1264 nl_sock_destroy(sock
);
1267 dpif
->port_notifier
= sock
;
1269 /* We have no idea of the current state so report that everything
1275 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1276 uint64_t buf_stub
[4096 / 8];
1280 ofpbuf_use_stub(&buf
, buf_stub
, sizeof buf_stub
);
1281 error
= nl_sock_recv(dpif
->port_notifier
, &buf
, false);
1283 struct dpif_netlink_vport vport
;
1285 error
= dpif_netlink_vport_from_ofpbuf(&vport
, &buf
);
1287 if (vport
.dp_ifindex
== dpif
->dp_ifindex
1288 && (vport
.cmd
== OVS_VPORT_CMD_NEW
1289 || vport
.cmd
== OVS_VPORT_CMD_DEL
1290 || vport
.cmd
== OVS_VPORT_CMD_SET
)) {
1291 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8
,
1292 dpif
->dpif
.full_name
, vport
.name
, vport
.cmd
);
1293 if (vport
.cmd
== OVS_VPORT_CMD_DEL
&& dpif
->handlers
) {
1294 dpif
->refresh_channels
= true;
1296 *devnamep
= xstrdup(vport
.name
);
1297 ofpbuf_uninit(&buf
);
1301 } else if (error
!= EAGAIN
) {
1302 VLOG_WARN_RL(&rl
, "error reading or parsing netlink (%s)",
1303 ovs_strerror(error
));
1304 nl_sock_drain(dpif
->port_notifier
);
1308 ofpbuf_uninit(&buf
);
1316 dpif_netlink_port_poll_wait(const struct dpif
*dpif_
)
1318 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1320 if (dpif
->port_notifier
) {
1321 nl_sock_wait(dpif
->port_notifier
, POLLIN
);
1323 poll_immediate_wake();
1328 dpif_netlink_flow_init_ufid(struct dpif_netlink_flow
*request
,
1329 const ovs_u128
*ufid
, bool terse
)
1332 request
->ufid
= *ufid
;
1333 request
->ufid_present
= true;
1335 request
->ufid_present
= false;
1337 request
->ufid_terse
= terse
;
1341 dpif_netlink_init_flow_get__(const struct dpif_netlink
*dpif
,
1342 const struct nlattr
*key
, size_t key_len
,
1343 const ovs_u128
*ufid
, bool terse
,
1344 struct dpif_netlink_flow
*request
)
1346 dpif_netlink_flow_init(request
);
1347 request
->cmd
= OVS_FLOW_CMD_GET
;
1348 request
->dp_ifindex
= dpif
->dp_ifindex
;
1350 request
->key_len
= key_len
;
1351 dpif_netlink_flow_init_ufid(request
, ufid
, terse
);
1355 dpif_netlink_init_flow_get(const struct dpif_netlink
*dpif
,
1356 const struct dpif_flow_get
*get
,
1357 struct dpif_netlink_flow
*request
)
1359 dpif_netlink_init_flow_get__(dpif
, get
->key
, get
->key_len
, get
->ufid
,
1364 dpif_netlink_flow_get__(const struct dpif_netlink
*dpif
,
1365 const struct nlattr
*key
, size_t key_len
,
1366 const ovs_u128
*ufid
, bool terse
,
1367 struct dpif_netlink_flow
*reply
, struct ofpbuf
**bufp
)
1369 struct dpif_netlink_flow request
;
1371 dpif_netlink_init_flow_get__(dpif
, key
, key_len
, ufid
, terse
, &request
);
1372 return dpif_netlink_flow_transact(&request
, reply
, bufp
);
1376 dpif_netlink_flow_get(const struct dpif_netlink
*dpif
,
1377 const struct dpif_netlink_flow
*flow
,
1378 struct dpif_netlink_flow
*reply
, struct ofpbuf
**bufp
)
1380 return dpif_netlink_flow_get__(dpif
, flow
->key
, flow
->key_len
,
1381 flow
->ufid_present
? &flow
->ufid
: NULL
,
1382 false, reply
, bufp
);
1386 dpif_netlink_init_flow_put(struct dpif_netlink
*dpif
,
1387 const struct dpif_flow_put
*put
,
1388 struct dpif_netlink_flow
*request
)
1390 static const struct nlattr dummy_action
;
1392 dpif_netlink_flow_init(request
);
1393 request
->cmd
= (put
->flags
& DPIF_FP_CREATE
1394 ? OVS_FLOW_CMD_NEW
: OVS_FLOW_CMD_SET
);
1395 request
->dp_ifindex
= dpif
->dp_ifindex
;
1396 request
->key
= put
->key
;
1397 request
->key_len
= put
->key_len
;
1398 request
->mask
= put
->mask
;
1399 request
->mask_len
= put
->mask_len
;
1400 dpif_netlink_flow_init_ufid(request
, put
->ufid
, false);
1402 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1403 request
->actions
= (put
->actions
1405 : CONST_CAST(struct nlattr
*, &dummy_action
));
1406 request
->actions_len
= put
->actions_len
;
1407 if (put
->flags
& DPIF_FP_ZERO_STATS
) {
1408 request
->clear
= true;
1410 if (put
->flags
& DPIF_FP_PROBE
) {
1411 request
->probe
= true;
1413 request
->nlmsg_flags
= put
->flags
& DPIF_FP_MODIFY
? 0 : NLM_F_CREATE
;
1417 dpif_netlink_init_flow_del__(struct dpif_netlink
*dpif
,
1418 const struct nlattr
*key
, size_t key_len
,
1419 const ovs_u128
*ufid
, bool terse
,
1420 struct dpif_netlink_flow
*request
)
1422 dpif_netlink_flow_init(request
);
1423 request
->cmd
= OVS_FLOW_CMD_DEL
;
1424 request
->dp_ifindex
= dpif
->dp_ifindex
;
1426 request
->key_len
= key_len
;
1427 dpif_netlink_flow_init_ufid(request
, ufid
, terse
);
1431 dpif_netlink_init_flow_del(struct dpif_netlink
*dpif
,
1432 const struct dpif_flow_del
*del
,
1433 struct dpif_netlink_flow
*request
)
1435 dpif_netlink_init_flow_del__(dpif
, del
->key
, del
->key_len
,
1436 del
->ufid
, del
->terse
, request
);
1439 struct dpif_netlink_flow_dump
{
1440 struct dpif_flow_dump up
;
1441 struct nl_dump nl_dump
;
1445 static struct dpif_netlink_flow_dump
*
1446 dpif_netlink_flow_dump_cast(struct dpif_flow_dump
*dump
)
1448 return CONTAINER_OF(dump
, struct dpif_netlink_flow_dump
, up
);
1451 static struct dpif_flow_dump
*
1452 dpif_netlink_flow_dump_create(const struct dpif
*dpif_
, bool terse
)
1454 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1455 struct dpif_netlink_flow_dump
*dump
;
1456 struct dpif_netlink_flow request
;
1459 dump
= xmalloc(sizeof *dump
);
1460 dpif_flow_dump_init(&dump
->up
, dpif_
);
1462 dpif_netlink_flow_init(&request
);
1463 request
.cmd
= OVS_FLOW_CMD_GET
;
1464 request
.dp_ifindex
= dpif
->dp_ifindex
;
1465 request
.ufid_present
= false;
1466 request
.ufid_terse
= terse
;
1468 buf
= ofpbuf_new(1024);
1469 dpif_netlink_flow_to_ofpbuf(&request
, buf
);
1470 nl_dump_start(&dump
->nl_dump
, NETLINK_GENERIC
, buf
);
1472 atomic_init(&dump
->status
, 0);
1473 dump
->up
.terse
= terse
;
1479 dpif_netlink_flow_dump_destroy(struct dpif_flow_dump
*dump_
)
1481 struct dpif_netlink_flow_dump
*dump
= dpif_netlink_flow_dump_cast(dump_
);
1482 unsigned int nl_status
= nl_dump_done(&dump
->nl_dump
);
1485 /* No other thread has access to 'dump' at this point. */
1486 atomic_read_relaxed(&dump
->status
, &dump_status
);
1488 return dump_status
? dump_status
: nl_status
;
1491 struct dpif_netlink_flow_dump_thread
{
1492 struct dpif_flow_dump_thread up
;
1493 struct dpif_netlink_flow_dump
*dump
;
1494 struct dpif_netlink_flow flow
;
1495 struct dpif_flow_stats stats
;
1496 struct ofpbuf nl_flows
; /* Always used to store flows. */
1497 struct ofpbuf
*nl_actions
; /* Used if kernel does not supply actions. */
1500 static struct dpif_netlink_flow_dump_thread
*
1501 dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread
*thread
)
1503 return CONTAINER_OF(thread
, struct dpif_netlink_flow_dump_thread
, up
);
1506 static struct dpif_flow_dump_thread
*
1507 dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump
*dump_
)
1509 struct dpif_netlink_flow_dump
*dump
= dpif_netlink_flow_dump_cast(dump_
);
1510 struct dpif_netlink_flow_dump_thread
*thread
;
1512 thread
= xmalloc(sizeof *thread
);
1513 dpif_flow_dump_thread_init(&thread
->up
, &dump
->up
);
1514 thread
->dump
= dump
;
1515 ofpbuf_init(&thread
->nl_flows
, NL_DUMP_BUFSIZE
);
1516 thread
->nl_actions
= NULL
;
1522 dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread
*thread_
)
1524 struct dpif_netlink_flow_dump_thread
*thread
1525 = dpif_netlink_flow_dump_thread_cast(thread_
);
1527 ofpbuf_uninit(&thread
->nl_flows
);
1528 ofpbuf_delete(thread
->nl_actions
);
1533 dpif_netlink_flow_to_dpif_flow(struct dpif
*dpif
, struct dpif_flow
*dpif_flow
,
1534 const struct dpif_netlink_flow
*datapath_flow
)
1536 dpif_flow
->key
= datapath_flow
->key
;
1537 dpif_flow
->key_len
= datapath_flow
->key_len
;
1538 dpif_flow
->mask
= datapath_flow
->mask
;
1539 dpif_flow
->mask_len
= datapath_flow
->mask_len
;
1540 dpif_flow
->actions
= datapath_flow
->actions
;
1541 dpif_flow
->actions_len
= datapath_flow
->actions_len
;
1542 dpif_flow
->ufid_present
= datapath_flow
->ufid_present
;
1543 dpif_flow
->pmd_id
= PMD_ID_NULL
;
1544 if (datapath_flow
->ufid_present
) {
1545 dpif_flow
->ufid
= datapath_flow
->ufid
;
1547 ovs_assert(datapath_flow
->key
&& datapath_flow
->key_len
);
1548 dpif_flow_hash(dpif
, datapath_flow
->key
, datapath_flow
->key_len
,
1551 dpif_netlink_flow_get_stats(datapath_flow
, &dpif_flow
->stats
);
1555 dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread
*thread_
,
1556 struct dpif_flow
*flows
, int max_flows
)
1558 struct dpif_netlink_flow_dump_thread
*thread
1559 = dpif_netlink_flow_dump_thread_cast(thread_
);
1560 struct dpif_netlink_flow_dump
*dump
= thread
->dump
;
1561 struct dpif_netlink
*dpif
= dpif_netlink_cast(thread
->up
.dpif
);
1564 ofpbuf_delete(thread
->nl_actions
);
1565 thread
->nl_actions
= NULL
;
1569 || (n_flows
< max_flows
&& thread
->nl_flows
.size
)) {
1570 struct dpif_netlink_flow datapath_flow
;
1571 struct ofpbuf nl_flow
;
1574 /* Try to grab another flow. */
1575 if (!nl_dump_next(&dump
->nl_dump
, &nl_flow
, &thread
->nl_flows
)) {
1579 /* Convert the flow to our output format. */
1580 error
= dpif_netlink_flow_from_ofpbuf(&datapath_flow
, &nl_flow
);
1582 atomic_store_relaxed(&dump
->status
, error
);
1586 if (dump
->up
.terse
|| datapath_flow
.actions
) {
1587 /* Common case: we don't want actions, or the flow includes
1589 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, &flows
[n_flows
++],
1592 /* Rare case: the flow does not include actions. Retrieve this
1593 * individual flow again to get the actions. */
1594 error
= dpif_netlink_flow_get(dpif
, &datapath_flow
,
1595 &datapath_flow
, &thread
->nl_actions
);
1596 if (error
== ENOENT
) {
1597 VLOG_DBG("dumped flow disappeared on get");
1600 VLOG_WARN("error fetching dumped flow: %s",
1601 ovs_strerror(error
));
1602 atomic_store_relaxed(&dump
->status
, error
);
1606 /* Save this flow. Then exit, because we only have one buffer to
1607 * handle this case. */
1608 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, &flows
[n_flows
++],
1617 dpif_netlink_encode_execute(int dp_ifindex
, const struct dpif_execute
*d_exec
,
1620 struct ovs_header
*k_exec
;
1623 ofpbuf_prealloc_tailroom(buf
, (64
1624 + dp_packet_size(d_exec
->packet
)
1625 + ODP_KEY_METADATA_SIZE
1626 + d_exec
->actions_len
));
1628 nl_msg_put_genlmsghdr(buf
, 0, ovs_packet_family
, NLM_F_REQUEST
,
1629 OVS_PACKET_CMD_EXECUTE
, OVS_PACKET_VERSION
);
1631 k_exec
= ofpbuf_put_uninit(buf
, sizeof *k_exec
);
1632 k_exec
->dp_ifindex
= dp_ifindex
;
1634 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_PACKET
,
1635 dp_packet_data(d_exec
->packet
),
1636 dp_packet_size(d_exec
->packet
));
1638 key_ofs
= nl_msg_start_nested(buf
, OVS_PACKET_ATTR_KEY
);
1639 odp_key_from_pkt_metadata(buf
, &d_exec
->packet
->md
);
1640 nl_msg_end_nested(buf
, key_ofs
);
1642 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_ACTIONS
,
1643 d_exec
->actions
, d_exec
->actions_len
);
1644 if (d_exec
->probe
) {
1645 nl_msg_put_flag(buf
, OVS_PACKET_ATTR_PROBE
);
1648 nl_msg_put_u16(buf
, OVS_PACKET_ATTR_MRU
, d_exec
->mtu
);
1652 /* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1653 * Returns the number actually executed (at least 1, if 'n_ops' is
1656 dpif_netlink_operate__(struct dpif_netlink
*dpif
,
1657 struct dpif_op
**ops
, size_t n_ops
)
1659 enum { MAX_OPS
= 50 };
1662 struct nl_transaction txn
;
1664 struct ofpbuf request
;
1665 uint64_t request_stub
[1024 / 8];
1667 struct ofpbuf reply
;
1668 uint64_t reply_stub
[1024 / 8];
1671 struct nl_transaction
*txnsp
[MAX_OPS
];
1674 n_ops
= MIN(n_ops
, MAX_OPS
);
1675 for (i
= 0; i
< n_ops
; i
++) {
1676 struct op_auxdata
*aux
= &auxes
[i
];
1677 struct dpif_op
*op
= ops
[i
];
1678 struct dpif_flow_put
*put
;
1679 struct dpif_flow_del
*del
;
1680 struct dpif_flow_get
*get
;
1681 struct dpif_netlink_flow flow
;
1683 ofpbuf_use_stub(&aux
->request
,
1684 aux
->request_stub
, sizeof aux
->request_stub
);
1685 aux
->txn
.request
= &aux
->request
;
1687 ofpbuf_use_stub(&aux
->reply
, aux
->reply_stub
, sizeof aux
->reply_stub
);
1688 aux
->txn
.reply
= NULL
;
1691 case DPIF_OP_FLOW_PUT
:
1692 put
= &op
->u
.flow_put
;
1693 dpif_netlink_init_flow_put(dpif
, put
, &flow
);
1695 flow
.nlmsg_flags
|= NLM_F_ECHO
;
1696 aux
->txn
.reply
= &aux
->reply
;
1698 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1701 case DPIF_OP_FLOW_DEL
:
1702 del
= &op
->u
.flow_del
;
1703 dpif_netlink_init_flow_del(dpif
, del
, &flow
);
1705 flow
.nlmsg_flags
|= NLM_F_ECHO
;
1706 aux
->txn
.reply
= &aux
->reply
;
1708 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1711 case DPIF_OP_EXECUTE
:
1712 /* Can't execute a packet that won't fit in a Netlink attribute. */
1713 if (OVS_UNLIKELY(nl_attr_oversized(
1714 dp_packet_size(op
->u
.execute
.packet
)))) {
1715 /* Report an error immediately if this is the first operation.
1716 * Otherwise the easiest thing to do is to postpone to the next
1717 * call (when this will be the first operation). */
1719 VLOG_ERR_RL(&error_rl
,
1720 "dropping oversized %"PRIu32
"-byte packet",
1721 dp_packet_size(op
->u
.execute
.packet
));
1722 op
->error
= ENOBUFS
;
1727 dpif_netlink_encode_execute(dpif
->dp_ifindex
, &op
->u
.execute
,
1732 case DPIF_OP_FLOW_GET
:
1733 get
= &op
->u
.flow_get
;
1734 dpif_netlink_init_flow_get(dpif
, get
, &flow
);
1735 aux
->txn
.reply
= get
->buffer
;
1736 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1744 for (i
= 0; i
< n_ops
; i
++) {
1745 txnsp
[i
] = &auxes
[i
].txn
;
1747 nl_transact_multiple(NETLINK_GENERIC
, txnsp
, n_ops
);
1749 for (i
= 0; i
< n_ops
; i
++) {
1750 struct op_auxdata
*aux
= &auxes
[i
];
1751 struct nl_transaction
*txn
= &auxes
[i
].txn
;
1752 struct dpif_op
*op
= ops
[i
];
1753 struct dpif_flow_put
*put
;
1754 struct dpif_flow_del
*del
;
1755 struct dpif_flow_get
*get
;
1757 op
->error
= txn
->error
;
1760 case DPIF_OP_FLOW_PUT
:
1761 put
= &op
->u
.flow_put
;
1764 struct dpif_netlink_flow reply
;
1766 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
,
1769 dpif_netlink_flow_get_stats(&reply
, put
->stats
);
1775 case DPIF_OP_FLOW_DEL
:
1776 del
= &op
->u
.flow_del
;
1779 struct dpif_netlink_flow reply
;
1781 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
,
1784 dpif_netlink_flow_get_stats(&reply
, del
->stats
);
1790 case DPIF_OP_EXECUTE
:
1793 case DPIF_OP_FLOW_GET
:
1794 get
= &op
->u
.flow_get
;
1796 struct dpif_netlink_flow reply
;
1798 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
, txn
->reply
);
1800 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, get
->flow
,
1810 ofpbuf_uninit(&aux
->request
);
1811 ofpbuf_uninit(&aux
->reply
);
1818 dpif_netlink_operate(struct dpif
*dpif_
, struct dpif_op
**ops
, size_t n_ops
)
1820 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1823 size_t chunk
= dpif_netlink_operate__(dpif
, ops
, n_ops
);
1831 dpif_netlink_handler_uninit(struct dpif_handler
*handler
)
1833 vport_delete_sock_pool(handler
);
1837 dpif_netlink_handler_init(struct dpif_handler
*handler
)
1839 return vport_create_sock_pool(handler
);
1844 dpif_netlink_handler_init(struct dpif_handler
*handler
)
1846 handler
->epoll_fd
= epoll_create(10);
1847 return handler
->epoll_fd
< 0 ? errno
: 0;
1851 dpif_netlink_handler_uninit(struct dpif_handler
*handler
)
1853 close(handler
->epoll_fd
);
1857 /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
1858 * currently in 'dpif' in the kernel, by adding a new set of channels for
1859 * any kernel vport that lacks one and deleting any channels that have no
1860 * backing kernel vports. */
1862 dpif_netlink_refresh_channels(struct dpif_netlink
*dpif
, uint32_t n_handlers
)
1863 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
1865 unsigned long int *keep_channels
;
1866 struct dpif_netlink_vport vport
;
1867 size_t keep_channels_nbits
;
1868 struct nl_dump dump
;
1869 uint64_t reply_stub
[NL_DUMP_BUFSIZE
/ 8];
1874 ovs_assert(!WINDOWS
|| n_handlers
<= 1);
1875 ovs_assert(!WINDOWS
|| dpif
->n_handlers
<= 1);
1877 if (dpif
->n_handlers
!= n_handlers
) {
1878 destroy_all_channels(dpif
);
1879 dpif
->handlers
= xzalloc(n_handlers
* sizeof *dpif
->handlers
);
1880 for (i
= 0; i
< n_handlers
; i
++) {
1882 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
1884 error
= dpif_netlink_handler_init(handler
);
1887 struct dpif_handler
*tmp
= &dpif
->handlers
[i
];
1890 for (j
= 0; j
< i
; j
++) {
1891 dpif_netlink_handler_uninit(tmp
);
1893 free(dpif
->handlers
);
1894 dpif
->handlers
= NULL
;
1899 dpif
->n_handlers
= n_handlers
;
1902 for (i
= 0; i
< n_handlers
; i
++) {
1903 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
1905 handler
->event_offset
= handler
->n_events
= 0;
1908 keep_channels_nbits
= dpif
->uc_array_size
;
1909 keep_channels
= bitmap_allocate(keep_channels_nbits
);
1911 ofpbuf_use_stub(&buf
, reply_stub
, sizeof reply_stub
);
1912 dpif_netlink_port_dump_start__(dpif
, &dump
);
1913 while (!dpif_netlink_port_dump_next__(dpif
, &dump
, &vport
, &buf
)) {
1914 uint32_t port_no
= odp_to_u32(vport
.port_no
);
1915 uint32_t *upcall_pids
= NULL
;
1918 if (port_no
>= dpif
->uc_array_size
1919 || !vport_get_pids(dpif
, port_no
, &upcall_pids
)) {
1920 struct nl_sock
**socksp
= vport_create_socksp(dpif
, &error
);
1926 error
= vport_add_channels(dpif
, vport
.port_no
, socksp
);
1928 VLOG_INFO("%s: could not add channels for port %s",
1929 dpif_name(&dpif
->dpif
), vport
.name
);
1930 vport_del_socksp(dpif
, socksp
);
1934 upcall_pids
= vport_socksp_to_pids(socksp
, dpif
->n_handlers
);
1938 /* Configure the vport to deliver misses to 'sock'. */
1939 if (vport
.upcall_pids
[0] == 0
1940 || vport
.n_upcall_pids
!= dpif
->n_handlers
1941 || memcmp(upcall_pids
, vport
.upcall_pids
, n_handlers
* sizeof
1943 struct dpif_netlink_vport vport_request
;
1945 dpif_netlink_vport_init(&vport_request
);
1946 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
1947 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
1948 vport_request
.port_no
= vport
.port_no
;
1949 vport_request
.n_upcall_pids
= dpif
->n_handlers
;
1950 vport_request
.upcall_pids
= upcall_pids
;
1951 error
= dpif_netlink_vport_transact(&vport_request
, NULL
, NULL
);
1953 VLOG_WARN_RL(&error_rl
,
1954 "%s: failed to set upcall pid on port: %s",
1955 dpif_name(&dpif
->dpif
), ovs_strerror(error
));
1957 if (error
!= ENODEV
&& error
!= ENOENT
) {
1960 /* The vport isn't really there, even though the dump says
1961 * it is. Probably we just hit a race after a port
1968 if (port_no
< keep_channels_nbits
) {
1969 bitmap_set1(keep_channels
, port_no
);
1976 vport_del_channels(dpif
, vport
.port_no
);
1978 nl_dump_done(&dump
);
1979 ofpbuf_uninit(&buf
);
1981 /* Discard any saved channels that we didn't reuse. */
1982 for (i
= 0; i
< keep_channels_nbits
; i
++) {
1983 if (!bitmap_is_set(keep_channels
, i
)) {
1984 vport_del_channels(dpif
, u32_to_odp(i
));
1987 free(keep_channels
);
1993 dpif_netlink_recv_set__(struct dpif_netlink
*dpif
, bool enable
)
1994 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
1996 if ((dpif
->handlers
!= NULL
) == enable
) {
1998 } else if (!enable
) {
1999 destroy_all_channels(dpif
);
2002 return dpif_netlink_refresh_channels(dpif
, 1);
2007 dpif_netlink_recv_set(struct dpif
*dpif_
, bool enable
)
2009 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2012 fat_rwlock_wrlock(&dpif
->upcall_lock
);
2013 error
= dpif_netlink_recv_set__(dpif
, enable
);
2014 fat_rwlock_unlock(&dpif
->upcall_lock
);
2020 dpif_netlink_handlers_set(struct dpif
*dpif_
, uint32_t n_handlers
)
2022 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2026 /* Multiple upcall handlers will be supported once kernel datapath supports
2028 if (n_handlers
> 1) {
2033 fat_rwlock_wrlock(&dpif
->upcall_lock
);
2034 if (dpif
->handlers
) {
2035 error
= dpif_netlink_refresh_channels(dpif
, n_handlers
);
2037 fat_rwlock_unlock(&dpif
->upcall_lock
);
2043 dpif_netlink_queue_to_priority(const struct dpif
*dpif OVS_UNUSED
,
2044 uint32_t queue_id
, uint32_t *priority
)
2046 if (queue_id
< 0xf000) {
2047 *priority
= TC_H_MAKE(1 << 16, queue_id
+ 1);
2055 parse_odp_packet(const struct dpif_netlink
*dpif
, struct ofpbuf
*buf
,
2056 struct dpif_upcall
*upcall
, int *dp_ifindex
)
2058 static const struct nl_policy ovs_packet_policy
[] = {
2059 /* Always present. */
2060 [OVS_PACKET_ATTR_PACKET
] = { .type
= NL_A_UNSPEC
,
2061 .min_len
= ETH_HEADER_LEN
},
2062 [OVS_PACKET_ATTR_KEY
] = { .type
= NL_A_NESTED
},
2064 /* OVS_PACKET_CMD_ACTION only. */
2065 [OVS_PACKET_ATTR_USERDATA
] = { .type
= NL_A_UNSPEC
, .optional
= true },
2066 [OVS_PACKET_ATTR_EGRESS_TUN_KEY
] = { .type
= NL_A_NESTED
, .optional
= true },
2067 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
2068 [OVS_PACKET_ATTR_MRU
] = { .type
= NL_A_U16
, .optional
= true }
2071 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2072 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2073 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2074 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2076 struct nlattr
*a
[ARRAY_SIZE(ovs_packet_policy
)];
2077 if (!nlmsg
|| !genl
|| !ovs_header
2078 || nlmsg
->nlmsg_type
!= ovs_packet_family
2079 || !nl_policy_parse(&b
, 0, ovs_packet_policy
, a
,
2080 ARRAY_SIZE(ovs_packet_policy
))) {
2084 int type
= (genl
->cmd
== OVS_PACKET_CMD_MISS
? DPIF_UC_MISS
2085 : genl
->cmd
== OVS_PACKET_CMD_ACTION
? DPIF_UC_ACTION
2091 /* (Re)set ALL fields of '*upcall' on successful return. */
2092 upcall
->type
= type
;
2093 upcall
->key
= CONST_CAST(struct nlattr
*,
2094 nl_attr_get(a
[OVS_PACKET_ATTR_KEY
]));
2095 upcall
->key_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_KEY
]);
2096 dpif_flow_hash(&dpif
->dpif
, upcall
->key
, upcall
->key_len
, &upcall
->ufid
);
2097 upcall
->userdata
= a
[OVS_PACKET_ATTR_USERDATA
];
2098 upcall
->out_tun_key
= a
[OVS_PACKET_ATTR_EGRESS_TUN_KEY
];
2099 upcall
->actions
= a
[OVS_PACKET_ATTR_ACTIONS
];
2100 upcall
->mru
= a
[OVS_PACKET_ATTR_MRU
];
2102 /* Allow overwriting the netlink attribute header without reallocating. */
2103 dp_packet_use_stub(&upcall
->packet
,
2104 CONST_CAST(struct nlattr
*,
2105 nl_attr_get(a
[OVS_PACKET_ATTR_PACKET
])) - 1,
2106 nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]) +
2107 sizeof(struct nlattr
));
2108 dp_packet_set_data(&upcall
->packet
,
2109 (char *)dp_packet_data(&upcall
->packet
) + sizeof(struct nlattr
));
2110 dp_packet_set_size(&upcall
->packet
, nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]));
2112 if (nl_attr_find__(upcall
->key
, upcall
->key_len
, OVS_KEY_ATTR_ETHERNET
)) {
2113 /* Ethernet frame */
2114 upcall
->packet
.packet_type
= htonl(PT_ETH
);
2116 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2117 ovs_be16 ethertype
= 0;
2118 const struct nlattr
*et_nla
= nl_attr_find__(upcall
->key
,
2120 OVS_KEY_ATTR_ETHERTYPE
);
2122 ethertype
= nl_attr_get_be16(et_nla
);
2124 upcall
->packet
.packet_type
= PACKET_TYPE_BE(OFPHTN_ETHERTYPE
,
2126 dp_packet_set_l3(&upcall
->packet
, dp_packet_data(&upcall
->packet
));
2129 *dp_ifindex
= ovs_header
->dp_ifindex
;
2135 #define PACKET_RECV_BATCH_SIZE 50
2137 dpif_netlink_recv_windows(struct dpif_netlink
*dpif
, uint32_t handler_id
,
2138 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2139 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2141 struct dpif_handler
*handler
;
2143 struct dpif_windows_vport_sock
*sock_pool
;
2146 if (!dpif
->handlers
) {
2150 /* Only one handler is supported currently. */
2151 if (handler_id
>= 1) {
2155 if (handler_id
>= dpif
->n_handlers
) {
2159 handler
= &dpif
->handlers
[handler_id
];
2160 sock_pool
= handler
->vport_sock_pool
;
2162 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
2167 if (++read_tries
> PACKET_RECV_BATCH_SIZE
) {
2171 error
= nl_sock_recv(sock_pool
[i
].nl_sock
, buf
, false);
2172 if (error
== ENOBUFS
) {
2173 /* ENOBUFS typically means that we've received so many
2174 * packets that the buffer overflowed. Try again
2175 * immediately because there's almost certainly a packet
2176 * waiting for us. */
2177 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2181 /* XXX: ch->last_poll = time_msec(); */
2183 if (error
== EAGAIN
) {
2189 error
= parse_odp_packet(dpif
, buf
, upcall
, &dp_ifindex
);
2190 if (!error
&& dp_ifindex
== dpif
->dp_ifindex
) {
2202 dpif_netlink_recv__(struct dpif_netlink
*dpif
, uint32_t handler_id
,
2203 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2204 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2206 struct dpif_handler
*handler
;
2209 if (!dpif
->handlers
|| handler_id
>= dpif
->n_handlers
) {
2213 handler
= &dpif
->handlers
[handler_id
];
2214 if (handler
->event_offset
>= handler
->n_events
) {
2217 handler
->event_offset
= handler
->n_events
= 0;
2220 retval
= epoll_wait(handler
->epoll_fd
, handler
->epoll_events
,
2221 dpif
->uc_array_size
, 0);
2222 } while (retval
< 0 && errno
== EINTR
);
2225 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2226 VLOG_WARN_RL(&rl
, "epoll_wait failed (%s)", ovs_strerror(errno
));
2227 } else if (retval
> 0) {
2228 handler
->n_events
= retval
;
2232 while (handler
->event_offset
< handler
->n_events
) {
2233 int idx
= handler
->epoll_events
[handler
->event_offset
].data
.u32
;
2234 struct dpif_channel
*ch
= &dpif
->handlers
[handler_id
].channels
[idx
];
2236 handler
->event_offset
++;
2242 if (++read_tries
> 50) {
2246 error
= nl_sock_recv(ch
->sock
, buf
, false);
2247 if (error
== ENOBUFS
) {
2248 /* ENOBUFS typically means that we've received so many
2249 * packets that the buffer overflowed. Try again
2250 * immediately because there's almost certainly a packet
2251 * waiting for us. */
2252 report_loss(dpif
, ch
, idx
, handler_id
);
2256 ch
->last_poll
= time_msec();
2258 if (error
== EAGAIN
) {
2264 error
= parse_odp_packet(dpif
, buf
, upcall
, &dp_ifindex
);
2265 if (!error
&& dp_ifindex
== dpif
->dp_ifindex
) {
2278 dpif_netlink_recv(struct dpif
*dpif_
, uint32_t handler_id
,
2279 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2281 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2284 fat_rwlock_rdlock(&dpif
->upcall_lock
);
2286 error
= dpif_netlink_recv_windows(dpif
, handler_id
, upcall
, buf
);
2288 error
= dpif_netlink_recv__(dpif
, handler_id
, upcall
, buf
);
2290 fat_rwlock_unlock(&dpif
->upcall_lock
);
2296 dpif_netlink_recv_wait__(struct dpif_netlink
*dpif
, uint32_t handler_id
)
2297 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2301 struct dpif_windows_vport_sock
*sock_pool
=
2302 dpif
->handlers
[handler_id
].vport_sock_pool
;
2304 /* Only one handler is supported currently. */
2305 if (handler_id
>= 1) {
2309 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
2310 nl_sock_wait(sock_pool
[i
].nl_sock
, POLLIN
);
2313 if (dpif
->handlers
&& handler_id
< dpif
->n_handlers
) {
2314 struct dpif_handler
*handler
= &dpif
->handlers
[handler_id
];
2316 poll_fd_wait(handler
->epoll_fd
, POLLIN
);
2322 dpif_netlink_recv_wait(struct dpif
*dpif_
, uint32_t handler_id
)
2324 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2326 fat_rwlock_rdlock(&dpif
->upcall_lock
);
2327 dpif_netlink_recv_wait__(dpif
, handler_id
);
2328 fat_rwlock_unlock(&dpif
->upcall_lock
);
2332 dpif_netlink_recv_purge__(struct dpif_netlink
*dpif
)
2333 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
2335 if (dpif
->handlers
) {
2338 for (i
= 0; i
< dpif
->uc_array_size
; i
++ ) {
2339 if (!dpif
->handlers
[0].channels
[i
].sock
) {
2343 for (j
= 0; j
< dpif
->n_handlers
; j
++) {
2344 nl_sock_drain(dpif
->handlers
[j
].channels
[i
].sock
);
2351 dpif_netlink_recv_purge(struct dpif
*dpif_
)
2353 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2355 fat_rwlock_wrlock(&dpif
->upcall_lock
);
2356 dpif_netlink_recv_purge__(dpif
);
2357 fat_rwlock_unlock(&dpif
->upcall_lock
);
2361 dpif_netlink_get_datapath_version(void)
2363 char *version_str
= NULL
;
2367 #define MAX_VERSION_STR_SIZE 80
2368 #define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2371 f
= fopen(LINUX_DATAPATH_VERSION_FILE
, "r");
2374 char version
[MAX_VERSION_STR_SIZE
];
2376 if (fgets(version
, MAX_VERSION_STR_SIZE
, f
)) {
2377 newline
= strchr(version
, '\n');
2381 version_str
= xstrdup(version
);
2390 struct dpif_netlink_ct_dump_state
{
2391 struct ct_dpif_dump_state up
;
2392 struct nl_ct_dump_state
*nl_ct_dump
;
2396 dpif_netlink_ct_dump_start(struct dpif
*dpif OVS_UNUSED
,
2397 struct ct_dpif_dump_state
**dump_
,
2398 const uint16_t *zone
)
2400 struct dpif_netlink_ct_dump_state
*dump
;
2403 dump
= xzalloc(sizeof *dump
);
2404 err
= nl_ct_dump_start(&dump
->nl_ct_dump
, zone
);
2416 dpif_netlink_ct_dump_next(struct dpif
*dpif OVS_UNUSED
,
2417 struct ct_dpif_dump_state
*dump_
,
2418 struct ct_dpif_entry
*entry
)
2420 struct dpif_netlink_ct_dump_state
*dump
;
2422 INIT_CONTAINER(dump
, dump_
, up
);
2424 return nl_ct_dump_next(dump
->nl_ct_dump
, entry
);
2428 dpif_netlink_ct_dump_done(struct dpif
*dpif OVS_UNUSED
,
2429 struct ct_dpif_dump_state
*dump_
)
2431 struct dpif_netlink_ct_dump_state
*dump
;
2434 INIT_CONTAINER(dump
, dump_
, up
);
2436 err
= nl_ct_dump_done(dump
->nl_ct_dump
);
2442 dpif_netlink_ct_flush(struct dpif
*dpif OVS_UNUSED
, const uint16_t *zone
)
2445 return nl_ct_flush_zone(*zone
);
2447 return nl_ct_flush();
2454 dpif_netlink_meter_get_features(const struct dpif
* dpif OVS_UNUSED
,
2455 struct ofputil_meter_features
*features
)
2457 features
->max_meters
= 0;
2458 features
->band_types
= 0;
2459 features
->capabilities
= 0;
2460 features
->max_bands
= 0;
2461 features
->max_color
= 0;
2465 dpif_netlink_meter_set(struct dpif
*dpif OVS_UNUSED
,
2466 ofproto_meter_id
*meter_id OVS_UNUSED
,
2467 struct ofputil_meter_config
*config OVS_UNUSED
)
2469 return EFBIG
; /* meter_id out of range */
2473 dpif_netlink_meter_get(const struct dpif
*dpif OVS_UNUSED
,
2474 ofproto_meter_id meter_id OVS_UNUSED
,
2475 struct ofputil_meter_stats
*stats OVS_UNUSED
,
2476 uint16_t n_bands OVS_UNUSED
)
2478 return EFBIG
; /* meter_id out of range */
2482 dpif_netlink_meter_del(struct dpif
*dpif OVS_UNUSED
,
2483 ofproto_meter_id meter_id OVS_UNUSED
,
2484 struct ofputil_meter_stats
*stats OVS_UNUSED
,
2485 uint16_t n_bands OVS_UNUSED
)
2487 return EFBIG
; /* meter_id out of range */
2491 const struct dpif_class dpif_netlink_class
= {
2494 dpif_netlink_enumerate
,
2498 dpif_netlink_destroy
,
2501 dpif_netlink_get_stats
,
2502 dpif_netlink_port_add
,
2503 dpif_netlink_port_del
,
2504 NULL
, /* port_set_config */
2505 dpif_netlink_port_query_by_number
,
2506 dpif_netlink_port_query_by_name
,
2507 dpif_netlink_port_get_pid
,
2508 dpif_netlink_port_dump_start
,
2509 dpif_netlink_port_dump_next
,
2510 dpif_netlink_port_dump_done
,
2511 dpif_netlink_port_poll
,
2512 dpif_netlink_port_poll_wait
,
2513 dpif_netlink_flow_flush
,
2514 dpif_netlink_flow_dump_create
,
2515 dpif_netlink_flow_dump_destroy
,
2516 dpif_netlink_flow_dump_thread_create
,
2517 dpif_netlink_flow_dump_thread_destroy
,
2518 dpif_netlink_flow_dump_next
,
2519 dpif_netlink_operate
,
2520 dpif_netlink_recv_set
,
2521 dpif_netlink_handlers_set
,
2522 NULL
, /* set_config */
2523 dpif_netlink_queue_to_priority
,
2525 dpif_netlink_recv_wait
,
2526 dpif_netlink_recv_purge
,
2527 NULL
, /* register_dp_purge_cb */
2528 NULL
, /* register_upcall_cb */
2529 NULL
, /* enable_upcall */
2530 NULL
, /* disable_upcall */
2531 dpif_netlink_get_datapath_version
, /* get_datapath_version */
2532 dpif_netlink_ct_dump_start
,
2533 dpif_netlink_ct_dump_next
,
2534 dpif_netlink_ct_dump_done
,
2535 dpif_netlink_ct_flush
,
2536 dpif_netlink_meter_get_features
,
2537 dpif_netlink_meter_set
,
2538 dpif_netlink_meter_get
,
2539 dpif_netlink_meter_del
,
2543 dpif_netlink_init(void)
2545 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
2548 if (ovsthread_once_start(&once
)) {
2549 error
= nl_lookup_genl_family(OVS_DATAPATH_FAMILY
,
2550 &ovs_datapath_family
);
2552 VLOG_WARN("Generic Netlink family '%s' does not exist. "
2553 "The Open vSwitch kernel module is probably not loaded.",
2554 OVS_DATAPATH_FAMILY
);
2557 error
= nl_lookup_genl_family(OVS_VPORT_FAMILY
, &ovs_vport_family
);
2560 error
= nl_lookup_genl_family(OVS_FLOW_FAMILY
, &ovs_flow_family
);
2563 error
= nl_lookup_genl_family(OVS_PACKET_FAMILY
,
2564 &ovs_packet_family
);
2567 error
= nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY
, OVS_VPORT_MCGROUP
,
2568 &ovs_vport_mcgroup
);
2571 ovs_tunnels_out_of_tree
= dpif_netlink_rtnl_probe_oot_tunnels();
2573 ovsthread_once_done(&once
);
2580 dpif_netlink_is_internal_device(const char *name
)
2582 struct dpif_netlink_vport reply
;
2586 error
= dpif_netlink_vport_get(name
, &reply
, &buf
);
2589 } else if (error
!= ENODEV
&& error
!= ENOENT
) {
2590 VLOG_WARN_RL(&error_rl
, "%s: vport query failed (%s)",
2591 name
, ovs_strerror(error
));
2594 return reply
.type
== OVS_VPORT_TYPE_INTERNAL
;
2597 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2598 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
2599 * positive errno value.
2601 * 'vport' will contain pointers into 'buf', so the caller should not free
2602 * 'buf' while 'vport' is still in use. */
2604 dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport
*vport
,
2605 const struct ofpbuf
*buf
)
2607 static const struct nl_policy ovs_vport_policy
[] = {
2608 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NL_A_U32
},
2609 [OVS_VPORT_ATTR_TYPE
] = { .type
= NL_A_U32
},
2610 [OVS_VPORT_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
2611 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NL_A_UNSPEC
},
2612 [OVS_VPORT_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_vport_stats
),
2614 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
2617 dpif_netlink_vport_init(vport
);
2619 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2620 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2621 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2622 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2624 struct nlattr
*a
[ARRAY_SIZE(ovs_vport_policy
)];
2625 if (!nlmsg
|| !genl
|| !ovs_header
2626 || nlmsg
->nlmsg_type
!= ovs_vport_family
2627 || !nl_policy_parse(&b
, 0, ovs_vport_policy
, a
,
2628 ARRAY_SIZE(ovs_vport_policy
))) {
2632 vport
->cmd
= genl
->cmd
;
2633 vport
->dp_ifindex
= ovs_header
->dp_ifindex
;
2634 vport
->port_no
= nl_attr_get_odp_port(a
[OVS_VPORT_ATTR_PORT_NO
]);
2635 vport
->type
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
2636 vport
->name
= nl_attr_get_string(a
[OVS_VPORT_ATTR_NAME
]);
2637 if (a
[OVS_VPORT_ATTR_UPCALL_PID
]) {
2638 vport
->n_upcall_pids
= nl_attr_get_size(a
[OVS_VPORT_ATTR_UPCALL_PID
])
2639 / (sizeof *vport
->upcall_pids
);
2640 vport
->upcall_pids
= nl_attr_get(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
2643 if (a
[OVS_VPORT_ATTR_STATS
]) {
2644 vport
->stats
= nl_attr_get(a
[OVS_VPORT_ATTR_STATS
]);
2646 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
2647 vport
->options
= nl_attr_get(a
[OVS_VPORT_ATTR_OPTIONS
]);
2648 vport
->options_len
= nl_attr_get_size(a
[OVS_VPORT_ATTR_OPTIONS
]);
2653 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
2654 * followed by Netlink attributes corresponding to 'vport'. */
2656 dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport
*vport
,
2659 struct ovs_header
*ovs_header
;
2661 nl_msg_put_genlmsghdr(buf
, 0, ovs_vport_family
, NLM_F_REQUEST
| NLM_F_ECHO
,
2662 vport
->cmd
, OVS_VPORT_VERSION
);
2664 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
2665 ovs_header
->dp_ifindex
= vport
->dp_ifindex
;
2667 if (vport
->port_no
!= ODPP_NONE
) {
2668 nl_msg_put_odp_port(buf
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
);
2671 if (vport
->type
!= OVS_VPORT_TYPE_UNSPEC
) {
2672 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_TYPE
, vport
->type
);
2676 nl_msg_put_string(buf
, OVS_VPORT_ATTR_NAME
, vport
->name
);
2679 if (vport
->upcall_pids
) {
2680 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_UPCALL_PID
,
2682 vport
->n_upcall_pids
* sizeof *vport
->upcall_pids
);
2686 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_STATS
,
2687 vport
->stats
, sizeof *vport
->stats
);
2690 if (vport
->options
) {
2691 nl_msg_put_nested(buf
, OVS_VPORT_ATTR_OPTIONS
,
2692 vport
->options
, vport
->options_len
);
2696 /* Clears 'vport' to "empty" values. */
2698 dpif_netlink_vport_init(struct dpif_netlink_vport
*vport
)
2700 memset(vport
, 0, sizeof *vport
);
2701 vport
->port_no
= ODPP_NONE
;
2704 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2705 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2706 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2707 * result of the command is expected to be an ovs_vport also, which is decoded
2708 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2709 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2711 dpif_netlink_vport_transact(const struct dpif_netlink_vport
*request
,
2712 struct dpif_netlink_vport
*reply
,
2713 struct ofpbuf
**bufp
)
2715 struct ofpbuf
*request_buf
;
2718 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
2720 error
= dpif_netlink_init();
2724 dpif_netlink_vport_init(reply
);
2729 request_buf
= ofpbuf_new(1024);
2730 dpif_netlink_vport_to_ofpbuf(request
, request_buf
);
2731 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
2732 ofpbuf_delete(request_buf
);
2736 error
= dpif_netlink_vport_from_ofpbuf(reply
, *bufp
);
2739 dpif_netlink_vport_init(reply
);
2740 ofpbuf_delete(*bufp
);
2747 /* Obtains information about the kernel vport named 'name' and stores it into
2748 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
2749 * longer needed ('reply' will contain pointers into '*bufp'). */
2751 dpif_netlink_vport_get(const char *name
, struct dpif_netlink_vport
*reply
,
2752 struct ofpbuf
**bufp
)
2754 struct dpif_netlink_vport request
;
2756 dpif_netlink_vport_init(&request
);
2757 request
.cmd
= OVS_VPORT_CMD_GET
;
2758 request
.name
= name
;
2760 return dpif_netlink_vport_transact(&request
, reply
, bufp
);
2763 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2764 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
2765 * positive errno value.
2767 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
2768 * while 'dp' is still in use. */
2770 dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp
*dp
, const struct ofpbuf
*buf
)
2772 static const struct nl_policy ovs_datapath_policy
[] = {
2773 [OVS_DP_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
2774 [OVS_DP_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_dp_stats
),
2776 [OVS_DP_ATTR_MEGAFLOW_STATS
] = {
2777 NL_POLICY_FOR(struct ovs_dp_megaflow_stats
),
2781 dpif_netlink_dp_init(dp
);
2783 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2784 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2785 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2786 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2788 struct nlattr
*a
[ARRAY_SIZE(ovs_datapath_policy
)];
2789 if (!nlmsg
|| !genl
|| !ovs_header
2790 || nlmsg
->nlmsg_type
!= ovs_datapath_family
2791 || !nl_policy_parse(&b
, 0, ovs_datapath_policy
, a
,
2792 ARRAY_SIZE(ovs_datapath_policy
))) {
2796 dp
->cmd
= genl
->cmd
;
2797 dp
->dp_ifindex
= ovs_header
->dp_ifindex
;
2798 dp
->name
= nl_attr_get_string(a
[OVS_DP_ATTR_NAME
]);
2799 if (a
[OVS_DP_ATTR_STATS
]) {
2800 dp
->stats
= nl_attr_get(a
[OVS_DP_ATTR_STATS
]);
2803 if (a
[OVS_DP_ATTR_MEGAFLOW_STATS
]) {
2804 dp
->megaflow_stats
= nl_attr_get(a
[OVS_DP_ATTR_MEGAFLOW_STATS
]);
2810 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
2812 dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp
*dp
, struct ofpbuf
*buf
)
2814 struct ovs_header
*ovs_header
;
2816 nl_msg_put_genlmsghdr(buf
, 0, ovs_datapath_family
,
2817 NLM_F_REQUEST
| NLM_F_ECHO
, dp
->cmd
,
2818 OVS_DATAPATH_VERSION
);
2820 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
2821 ovs_header
->dp_ifindex
= dp
->dp_ifindex
;
2824 nl_msg_put_string(buf
, OVS_DP_ATTR_NAME
, dp
->name
);
2827 if (dp
->upcall_pid
) {
2828 nl_msg_put_u32(buf
, OVS_DP_ATTR_UPCALL_PID
, *dp
->upcall_pid
);
2831 if (dp
->user_features
) {
2832 nl_msg_put_u32(buf
, OVS_DP_ATTR_USER_FEATURES
, dp
->user_features
);
2835 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
2838 /* Clears 'dp' to "empty" values. */
2840 dpif_netlink_dp_init(struct dpif_netlink_dp
*dp
)
2842 memset(dp
, 0, sizeof *dp
);
2846 dpif_netlink_dp_dump_start(struct nl_dump
*dump
)
2848 struct dpif_netlink_dp request
;
2851 dpif_netlink_dp_init(&request
);
2852 request
.cmd
= OVS_DP_CMD_GET
;
2854 buf
= ofpbuf_new(1024);
2855 dpif_netlink_dp_to_ofpbuf(&request
, buf
);
2856 nl_dump_start(dump
, NETLINK_GENERIC
, buf
);
2860 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2861 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2862 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2863 * result of the command is expected to be of the same form, which is decoded
2864 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2865 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2867 dpif_netlink_dp_transact(const struct dpif_netlink_dp
*request
,
2868 struct dpif_netlink_dp
*reply
, struct ofpbuf
**bufp
)
2870 struct ofpbuf
*request_buf
;
2873 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
2875 request_buf
= ofpbuf_new(1024);
2876 dpif_netlink_dp_to_ofpbuf(request
, request_buf
);
2877 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
2878 ofpbuf_delete(request_buf
);
2881 dpif_netlink_dp_init(reply
);
2883 error
= dpif_netlink_dp_from_ofpbuf(reply
, *bufp
);
2886 ofpbuf_delete(*bufp
);
2893 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
2894 * The caller must free '*bufp' when the reply is no longer needed ('reply'
2895 * will contain pointers into '*bufp'). */
2897 dpif_netlink_dp_get(const struct dpif
*dpif_
, struct dpif_netlink_dp
*reply
,
2898 struct ofpbuf
**bufp
)
2900 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2901 struct dpif_netlink_dp request
;
2903 dpif_netlink_dp_init(&request
);
2904 request
.cmd
= OVS_DP_CMD_GET
;
2905 request
.dp_ifindex
= dpif
->dp_ifindex
;
2907 return dpif_netlink_dp_transact(&request
, reply
, bufp
);
2910 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2911 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
2912 * positive errno value.
2914 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
2915 * while 'flow' is still in use. */
2917 dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow
*flow
,
2918 const struct ofpbuf
*buf
)
2920 static const struct nl_policy ovs_flow_policy
[__OVS_FLOW_ATTR_MAX
] = {
2921 [OVS_FLOW_ATTR_KEY
] = { .type
= NL_A_NESTED
, .optional
= true },
2922 [OVS_FLOW_ATTR_MASK
] = { .type
= NL_A_NESTED
, .optional
= true },
2923 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
2924 [OVS_FLOW_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_flow_stats
),
2926 [OVS_FLOW_ATTR_TCP_FLAGS
] = { .type
= NL_A_U8
, .optional
= true },
2927 [OVS_FLOW_ATTR_USED
] = { .type
= NL_A_U64
, .optional
= true },
2928 [OVS_FLOW_ATTR_UFID
] = { .type
= NL_A_UNSPEC
, .optional
= true,
2929 .min_len
= sizeof(ovs_u128
) },
2930 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
2931 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
2932 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
2935 dpif_netlink_flow_init(flow
);
2937 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2938 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2939 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2940 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2942 struct nlattr
*a
[ARRAY_SIZE(ovs_flow_policy
)];
2943 if (!nlmsg
|| !genl
|| !ovs_header
2944 || nlmsg
->nlmsg_type
!= ovs_flow_family
2945 || !nl_policy_parse(&b
, 0, ovs_flow_policy
, a
,
2946 ARRAY_SIZE(ovs_flow_policy
))) {
2949 if (!a
[OVS_FLOW_ATTR_KEY
] && !a
[OVS_FLOW_ATTR_UFID
]) {
2953 flow
->nlmsg_flags
= nlmsg
->nlmsg_flags
;
2954 flow
->dp_ifindex
= ovs_header
->dp_ifindex
;
2955 if (a
[OVS_FLOW_ATTR_KEY
]) {
2956 flow
->key
= nl_attr_get(a
[OVS_FLOW_ATTR_KEY
]);
2957 flow
->key_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_KEY
]);
2960 if (a
[OVS_FLOW_ATTR_UFID
]) {
2961 const ovs_u128
*ufid
;
2963 ufid
= nl_attr_get_unspec(a
[OVS_FLOW_ATTR_UFID
],
2964 nl_attr_get_size(a
[OVS_FLOW_ATTR_UFID
]));
2966 flow
->ufid_present
= true;
2968 if (a
[OVS_FLOW_ATTR_MASK
]) {
2969 flow
->mask
= nl_attr_get(a
[OVS_FLOW_ATTR_MASK
]);
2970 flow
->mask_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_MASK
]);
2972 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
2973 flow
->actions
= nl_attr_get(a
[OVS_FLOW_ATTR_ACTIONS
]);
2974 flow
->actions_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_ACTIONS
]);
2976 if (a
[OVS_FLOW_ATTR_STATS
]) {
2977 flow
->stats
= nl_attr_get(a
[OVS_FLOW_ATTR_STATS
]);
2979 if (a
[OVS_FLOW_ATTR_TCP_FLAGS
]) {
2980 flow
->tcp_flags
= nl_attr_get(a
[OVS_FLOW_ATTR_TCP_FLAGS
]);
2982 if (a
[OVS_FLOW_ATTR_USED
]) {
2983 flow
->used
= nl_attr_get(a
[OVS_FLOW_ATTR_USED
]);
2988 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
2989 * followed by Netlink attributes corresponding to 'flow'. */
2991 dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow
*flow
,
2994 struct ovs_header
*ovs_header
;
2996 nl_msg_put_genlmsghdr(buf
, 0, ovs_flow_family
,
2997 NLM_F_REQUEST
| flow
->nlmsg_flags
,
2998 flow
->cmd
, OVS_FLOW_VERSION
);
3000 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
3001 ovs_header
->dp_ifindex
= flow
->dp_ifindex
;
3003 if (flow
->ufid_present
) {
3004 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_UFID
, &flow
->ufid
,
3007 if (flow
->ufid_terse
) {
3008 nl_msg_put_u32(buf
, OVS_FLOW_ATTR_UFID_FLAGS
,
3009 OVS_UFID_F_OMIT_KEY
| OVS_UFID_F_OMIT_MASK
3010 | OVS_UFID_F_OMIT_ACTIONS
);
3012 if (!flow
->ufid_terse
|| !flow
->ufid_present
) {
3013 if (flow
->key_len
) {
3014 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_KEY
,
3015 flow
->key
, flow
->key_len
);
3018 if (flow
->mask_len
) {
3019 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_MASK
,
3020 flow
->mask
, flow
->mask_len
);
3022 if (flow
->actions
|| flow
->actions_len
) {
3023 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_ACTIONS
,
3024 flow
->actions
, flow
->actions_len
);
3028 /* We never need to send these to the kernel. */
3029 ovs_assert(!flow
->stats
);
3030 ovs_assert(!flow
->tcp_flags
);
3031 ovs_assert(!flow
->used
);
3034 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_CLEAR
);
3037 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_PROBE
);
3041 /* Clears 'flow' to "empty" values. */
3043 dpif_netlink_flow_init(struct dpif_netlink_flow
*flow
)
3045 memset(flow
, 0, sizeof *flow
);
3048 /* Executes 'request' in the kernel datapath. If the command fails, returns a
3049 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3050 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
3051 * result of the command is expected to be a flow also, which is decoded and
3052 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
3053 * is no longer needed ('reply' will contain pointers into '*bufp'). */
3055 dpif_netlink_flow_transact(struct dpif_netlink_flow
*request
,
3056 struct dpif_netlink_flow
*reply
,
3057 struct ofpbuf
**bufp
)
3059 struct ofpbuf
*request_buf
;
3062 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
3065 request
->nlmsg_flags
|= NLM_F_ECHO
;
3068 request_buf
= ofpbuf_new(1024);
3069 dpif_netlink_flow_to_ofpbuf(request
, request_buf
);
3070 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
3071 ofpbuf_delete(request_buf
);
3075 error
= dpif_netlink_flow_from_ofpbuf(reply
, *bufp
);
3078 dpif_netlink_flow_init(reply
);
3079 ofpbuf_delete(*bufp
);
3087 dpif_netlink_flow_get_stats(const struct dpif_netlink_flow
*flow
,
3088 struct dpif_flow_stats
*stats
)
3091 stats
->n_packets
= get_32aligned_u64(&flow
->stats
->n_packets
);
3092 stats
->n_bytes
= get_32aligned_u64(&flow
->stats
->n_bytes
);
3094 stats
->n_packets
= 0;
3097 stats
->used
= flow
->used
? get_32aligned_u64(flow
->used
) : 0;
3098 stats
->tcp_flags
= flow
->tcp_flags
? *flow
->tcp_flags
: 0;
3101 /* Logs information about a packet that was recently lost in 'ch' (in
3104 report_loss(struct dpif_netlink
*dpif
, struct dpif_channel
*ch
, uint32_t ch_idx
,
3105 uint32_t handler_id
)
3107 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 5);
3110 if (VLOG_DROP_WARN(&rl
)) {
3115 if (ch
->last_poll
!= LLONG_MIN
) {
3116 ds_put_format(&s
, " (last polled %lld ms ago)",
3117 time_msec() - ch
->last_poll
);
3120 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
3121 dpif_name(&dpif
->dpif
), ch_idx
, handler_id
);