2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-netlink.h"
26 #include <linux/types.h>
27 #include <linux/pkt_sched.h>
31 #include <sys/epoll.h>
36 #include "dpif-provider.h"
37 #include "openvswitch/dynamic-string.h"
39 #include "fat-rwlock.h"
41 #include "netdev-linux.h"
42 #include "netdev-vport.h"
43 #include "netlink-conntrack.h"
44 #include "netlink-notifier.h"
45 #include "netlink-socket.h"
48 #include "openvswitch/ofpbuf.h"
50 #include "poll-loop.h"
52 #include "openvswitch/shash.h"
55 #include "unaligned.h"
57 #include "openvswitch/vlog.h"
59 VLOG_DEFINE_THIS_MODULE(dpif_netlink
);
66 enum { MAX_PORTS
= USHRT_MAX
};
68 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
69 * missing if we have old headers. */
70 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
72 struct dpif_netlink_dp
{
73 /* Generic Netlink header. */
76 /* struct ovs_header. */
80 const char *name
; /* OVS_DP_ATTR_NAME. */
81 const uint32_t *upcall_pid
; /* OVS_DP_ATTR_UPCALL_PID. */
82 uint32_t user_features
; /* OVS_DP_ATTR_USER_FEATURES */
83 const struct ovs_dp_stats
*stats
; /* OVS_DP_ATTR_STATS. */
84 const struct ovs_dp_megaflow_stats
*megaflow_stats
;
85 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
88 static void dpif_netlink_dp_init(struct dpif_netlink_dp
*);
89 static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp
*,
90 const struct ofpbuf
*);
91 static void dpif_netlink_dp_dump_start(struct nl_dump
*);
92 static int dpif_netlink_dp_transact(const struct dpif_netlink_dp
*request
,
93 struct dpif_netlink_dp
*reply
,
94 struct ofpbuf
**bufp
);
95 static int dpif_netlink_dp_get(const struct dpif
*,
96 struct dpif_netlink_dp
*reply
,
97 struct ofpbuf
**bufp
);
99 struct dpif_netlink_flow
{
100 /* Generic Netlink header. */
103 /* struct ovs_header. */
104 unsigned int nlmsg_flags
;
109 * The 'stats' member points to 64-bit data that might only be aligned on
110 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
113 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
114 * the Netlink version of the command, even if actions_len is zero. */
115 const struct nlattr
*key
; /* OVS_FLOW_ATTR_KEY. */
117 const struct nlattr
*mask
; /* OVS_FLOW_ATTR_MASK. */
119 const struct nlattr
*actions
; /* OVS_FLOW_ATTR_ACTIONS. */
121 ovs_u128 ufid
; /* OVS_FLOW_ATTR_FLOW_ID. */
122 bool ufid_present
; /* Is there a UFID? */
123 bool ufid_terse
; /* Skip serializing key/mask/acts? */
124 const struct ovs_flow_stats
*stats
; /* OVS_FLOW_ATTR_STATS. */
125 const uint8_t *tcp_flags
; /* OVS_FLOW_ATTR_TCP_FLAGS. */
126 const ovs_32aligned_u64
*used
; /* OVS_FLOW_ATTR_USED. */
127 bool clear
; /* OVS_FLOW_ATTR_CLEAR. */
128 bool probe
; /* OVS_FLOW_ATTR_PROBE. */
131 static void dpif_netlink_flow_init(struct dpif_netlink_flow
*);
132 static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow
*,
133 const struct ofpbuf
*);
134 static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow
*,
136 static int dpif_netlink_flow_transact(struct dpif_netlink_flow
*request
,
137 struct dpif_netlink_flow
*reply
,
138 struct ofpbuf
**bufp
);
139 static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow
*,
140 struct dpif_flow_stats
*);
141 static void dpif_netlink_flow_to_dpif_flow(struct dpif
*, struct dpif_flow
*,
142 const struct dpif_netlink_flow
*);
144 /* One of the dpif channels between the kernel and userspace. */
145 struct dpif_channel
{
146 struct nl_sock
*sock
; /* Netlink socket. */
147 long long int last_poll
; /* Last time this channel was polled. */
151 #define VPORT_SOCK_POOL_SIZE 1
152 /* On Windows, there is no native support for epoll. There are equivalent
153 * interfaces though, that are not used currently. For simpicity, a pool of
154 * netlink sockets is used. Each socket is represented by 'struct
155 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
156 * sharing the same socket. In the future, we can add a reference count and
158 struct dpif_windows_vport_sock
{
159 struct nl_sock
*nl_sock
; /* netlink socket. */
163 struct dpif_handler
{
164 struct dpif_channel
*channels
;/* Array of channels for each handler. */
165 struct epoll_event
*epoll_events
;
166 int epoll_fd
; /* epoll fd that includes channel socks. */
167 int n_events
; /* Num events returned by epoll_wait(). */
168 int event_offset
; /* Offset into 'epoll_events'. */
171 /* Pool of sockets. */
172 struct dpif_windows_vport_sock
*vport_sock_pool
;
173 size_t last_used_pool_idx
; /* Index to aid in allocating a
174 socket in the pool to a port. */
178 /* Datapath interface for the openvswitch Linux kernel module. */
179 struct dpif_netlink
{
183 /* Upcall messages. */
184 struct fat_rwlock upcall_lock
;
185 struct dpif_handler
*handlers
;
186 uint32_t n_handlers
; /* Num of upcall handlers. */
187 int uc_array_size
; /* Size of 'handler->channels' and */
188 /* 'handler->epoll_events'. */
190 /* Change notification. */
191 struct nl_sock
*port_notifier
; /* vport multicast group subscriber. */
192 bool refresh_channels
;
195 static void report_loss(struct dpif_netlink
*, struct dpif_channel
*,
196 uint32_t ch_idx
, uint32_t handler_id
);
198 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(9999, 5);
200 /* Generic Netlink family numbers for OVS.
202 * Initialized by dpif_netlink_init(). */
203 static int ovs_datapath_family
;
204 static int ovs_vport_family
;
205 static int ovs_flow_family
;
206 static int ovs_packet_family
;
208 /* Generic Netlink multicast groups for OVS.
210 * Initialized by dpif_netlink_init(). */
211 static unsigned int ovs_vport_mcgroup
;
213 static int dpif_netlink_init(void);
214 static int open_dpif(const struct dpif_netlink_dp
*, struct dpif
**);
215 static uint32_t dpif_netlink_port_get_pid(const struct dpif
*,
216 odp_port_t port_no
, uint32_t hash
);
217 static void dpif_netlink_handler_uninit(struct dpif_handler
*handler
);
218 static int dpif_netlink_refresh_channels(struct dpif_netlink
*,
219 uint32_t n_handlers
);
220 static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport
*,
222 static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport
*,
223 const struct ofpbuf
*);
225 static struct dpif_netlink
*
226 dpif_netlink_cast(const struct dpif
*dpif
)
228 dpif_assert_class(dpif
, &dpif_netlink_class
);
229 return CONTAINER_OF(dpif
, struct dpif_netlink
, dpif
);
233 dpif_netlink_enumerate(struct sset
*all_dps
,
234 const struct dpif_class
*dpif_class OVS_UNUSED
)
237 uint64_t reply_stub
[NL_DUMP_BUFSIZE
/ 8];
238 struct ofpbuf msg
, buf
;
241 error
= dpif_netlink_init();
246 ofpbuf_use_stub(&buf
, reply_stub
, sizeof reply_stub
);
247 dpif_netlink_dp_dump_start(&dump
);
248 while (nl_dump_next(&dump
, &msg
, &buf
)) {
249 struct dpif_netlink_dp dp
;
251 if (!dpif_netlink_dp_from_ofpbuf(&dp
, &msg
)) {
252 sset_add(all_dps
, dp
.name
);
256 return nl_dump_done(&dump
);
260 dpif_netlink_open(const struct dpif_class
*class OVS_UNUSED
, const char *name
,
261 bool create
, struct dpif
**dpifp
)
263 struct dpif_netlink_dp dp_request
, dp
;
268 error
= dpif_netlink_init();
273 /* Create or look up datapath. */
274 dpif_netlink_dp_init(&dp_request
);
276 dp_request
.cmd
= OVS_DP_CMD_NEW
;
278 dp_request
.upcall_pid
= &upcall_pid
;
280 /* Use OVS_DP_CMD_SET to report user features */
281 dp_request
.cmd
= OVS_DP_CMD_SET
;
283 dp_request
.name
= name
;
284 dp_request
.user_features
|= OVS_DP_F_UNALIGNED
;
285 dp_request
.user_features
|= OVS_DP_F_VPORT_PIDS
;
286 error
= dpif_netlink_dp_transact(&dp_request
, &dp
, &buf
);
291 error
= open_dpif(&dp
, dpifp
);
297 open_dpif(const struct dpif_netlink_dp
*dp
, struct dpif
**dpifp
)
299 struct dpif_netlink
*dpif
;
301 dpif
= xzalloc(sizeof *dpif
);
302 dpif
->port_notifier
= NULL
;
303 fat_rwlock_init(&dpif
->upcall_lock
);
305 dpif_init(&dpif
->dpif
, &dpif_netlink_class
, dp
->name
,
306 dp
->dp_ifindex
, dp
->dp_ifindex
);
308 dpif
->dp_ifindex
= dp
->dp_ifindex
;
309 *dpifp
= &dpif
->dpif
;
314 /* Destroys the netlink sockets pointed by the elements in 'socksp'
315 * and frees the 'socksp'. */
317 vport_del_socksp__(struct nl_sock
**socksp
, uint32_t n_socks
)
321 for (i
= 0; i
< n_socks
; i
++) {
322 nl_sock_destroy(socksp
[i
]);
328 /* Creates an array of netlink sockets. Returns an array of the
329 * corresponding pointers. Records the error in 'error'. */
330 static struct nl_sock
**
331 vport_create_socksp__(uint32_t n_socks
, int *error
)
333 struct nl_sock
**socksp
= xzalloc(n_socks
* sizeof *socksp
);
336 for (i
= 0; i
< n_socks
; i
++) {
337 *error
= nl_sock_create(NETLINK_GENERIC
, &socksp
[i
]);
346 vport_del_socksp__(socksp
, n_socks
);
353 vport_delete_sock_pool(struct dpif_handler
*handler
)
354 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
356 if (handler
->vport_sock_pool
) {
358 struct dpif_windows_vport_sock
*sock_pool
=
359 handler
->vport_sock_pool
;
361 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
362 if (sock_pool
[i
].nl_sock
) {
363 nl_sock_unsubscribe_packets(sock_pool
[i
].nl_sock
);
364 nl_sock_destroy(sock_pool
[i
].nl_sock
);
365 sock_pool
[i
].nl_sock
= NULL
;
369 free(handler
->vport_sock_pool
);
370 handler
->vport_sock_pool
= NULL
;
375 vport_create_sock_pool(struct dpif_handler
*handler
)
376 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
378 struct dpif_windows_vport_sock
*sock_pool
;
382 sock_pool
= xzalloc(VPORT_SOCK_POOL_SIZE
* sizeof *sock_pool
);
383 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
384 error
= nl_sock_create(NETLINK_GENERIC
, &sock_pool
[i
].nl_sock
);
389 /* Enable the netlink socket to receive packets. This is equivalent to
390 * calling nl_sock_join_mcgroup() to receive events. */
391 error
= nl_sock_subscribe_packets(sock_pool
[i
].nl_sock
);
397 handler
->vport_sock_pool
= sock_pool
;
398 handler
->last_used_pool_idx
= 0;
402 vport_delete_sock_pool(handler
);
406 /* Returns an array pointers to netlink sockets. The sockets are picked from a
407 * pool. Records the error in 'error'. */
408 static struct nl_sock
**
409 vport_create_socksp_windows(struct dpif_netlink
*dpif
, int *error
)
410 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
412 uint32_t n_socks
= dpif
->n_handlers
;
413 struct nl_sock
**socksp
;
416 ovs_assert(n_socks
<= 1);
417 socksp
= xzalloc(n_socks
* sizeof *socksp
);
419 /* Pick netlink sockets to use in a round-robin fashion from each
420 * handler's pool of sockets. */
421 for (i
= 0; i
< n_socks
; i
++) {
422 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
423 struct dpif_windows_vport_sock
*sock_pool
= handler
->vport_sock_pool
;
424 size_t index
= handler
->last_used_pool_idx
;
426 /* A pool of sockets is allocated when the handler is initialized. */
427 if (sock_pool
== NULL
) {
433 ovs_assert(index
< VPORT_SOCK_POOL_SIZE
);
434 socksp
[i
] = sock_pool
[index
].nl_sock
;
435 socksp
[i
] = sock_pool
[index
].nl_sock
;
436 ovs_assert(socksp
[i
]);
437 index
= (index
== VPORT_SOCK_POOL_SIZE
- 1) ? 0 : index
+ 1;
438 handler
->last_used_pool_idx
= index
;
445 vport_del_socksp_windows(struct dpif_netlink
*dpif
, struct nl_sock
**socksp
)
451 static struct nl_sock
**
452 vport_create_socksp(struct dpif_netlink
*dpif
, int *error
)
455 return vport_create_socksp_windows(dpif
, error
);
457 return vport_create_socksp__(dpif
->n_handlers
, error
);
462 vport_del_socksp(struct dpif_netlink
*dpif
, struct nl_sock
**socksp
)
465 vport_del_socksp_windows(dpif
, socksp
);
467 vport_del_socksp__(socksp
, dpif
->n_handlers
);
471 /* Given the array of pointers to netlink sockets 'socksp', returns
472 * the array of corresponding pids. If the 'socksp' is NULL, returns
473 * a single-element array of value 0. */
475 vport_socksp_to_pids(struct nl_sock
**socksp
, uint32_t n_socks
)
480 pids
= xzalloc(sizeof *pids
);
484 pids
= xzalloc(n_socks
* sizeof *pids
);
485 for (i
= 0; i
< n_socks
; i
++) {
486 pids
[i
] = nl_sock_pid(socksp
[i
]);
493 /* Given the port number 'port_idx', extracts the pids of netlink sockets
494 * associated to the port and assigns it to 'upcall_pids'. */
496 vport_get_pids(struct dpif_netlink
*dpif
, uint32_t port_idx
,
497 uint32_t **upcall_pids
)
502 /* Since the nl_sock can only be assigned in either all
503 * or none "dpif->handlers" channels, the following check
505 if (!dpif
->handlers
[0].channels
[port_idx
].sock
) {
508 ovs_assert(!WINDOWS
|| dpif
->n_handlers
<= 1);
510 pids
= xzalloc(dpif
->n_handlers
* sizeof *pids
);
512 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
513 pids
[i
] = nl_sock_pid(dpif
->handlers
[i
].channels
[port_idx
].sock
);
522 vport_add_channels(struct dpif_netlink
*dpif
, odp_port_t port_no
,
523 struct nl_sock
**socksp
)
525 struct epoll_event event
;
526 uint32_t port_idx
= odp_to_u32(port_no
);
530 if (dpif
->handlers
== NULL
) {
534 /* We assume that the datapath densely chooses port numbers, which can
535 * therefore be used as an index into 'channels' and 'epoll_events' of
536 * 'dpif->handler'. */
537 if (port_idx
>= dpif
->uc_array_size
) {
538 uint32_t new_size
= port_idx
+ 1;
540 if (new_size
> MAX_PORTS
) {
541 VLOG_WARN_RL(&error_rl
, "%s: datapath port %"PRIu32
" too big",
542 dpif_name(&dpif
->dpif
), port_no
);
546 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
547 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
549 handler
->channels
= xrealloc(handler
->channels
,
550 new_size
* sizeof *handler
->channels
);
552 for (j
= dpif
->uc_array_size
; j
< new_size
; j
++) {
553 handler
->channels
[j
].sock
= NULL
;
556 handler
->epoll_events
= xrealloc(handler
->epoll_events
,
557 new_size
* sizeof *handler
->epoll_events
);
560 dpif
->uc_array_size
= new_size
;
563 memset(&event
, 0, sizeof event
);
564 event
.events
= EPOLLIN
;
565 event
.data
.u32
= port_idx
;
567 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
568 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
571 if (epoll_ctl(handler
->epoll_fd
, EPOLL_CTL_ADD
, nl_sock_fd(socksp
[i
]),
577 dpif
->handlers
[i
].channels
[port_idx
].sock
= socksp
[i
];
578 dpif
->handlers
[i
].channels
[port_idx
].last_poll
= LLONG_MIN
;
584 for (j
= 0; j
< i
; j
++) {
586 epoll_ctl(dpif
->handlers
[j
].epoll_fd
, EPOLL_CTL_DEL
,
587 nl_sock_fd(socksp
[j
]), NULL
);
589 dpif
->handlers
[j
].channels
[port_idx
].sock
= NULL
;
596 vport_del_channels(struct dpif_netlink
*dpif
, odp_port_t port_no
)
598 uint32_t port_idx
= odp_to_u32(port_no
);
601 if (!dpif
->handlers
|| port_idx
>= dpif
->uc_array_size
) {
605 /* Since the sock can only be assigned in either all or none
606 * of "dpif->handlers" channels, the following check would
608 if (!dpif
->handlers
[0].channels
[port_idx
].sock
) {
612 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
613 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
615 epoll_ctl(handler
->epoll_fd
, EPOLL_CTL_DEL
,
616 nl_sock_fd(handler
->channels
[port_idx
].sock
), NULL
);
617 nl_sock_destroy(handler
->channels
[port_idx
].sock
);
619 handler
->channels
[port_idx
].sock
= NULL
;
620 handler
->event_offset
= handler
->n_events
= 0;
625 destroy_all_channels(struct dpif_netlink
*dpif
)
626 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
630 if (!dpif
->handlers
) {
634 for (i
= 0; i
< dpif
->uc_array_size
; i
++ ) {
635 struct dpif_netlink_vport vport_request
;
636 uint32_t upcall_pids
= 0;
638 /* Since the sock can only be assigned in either all or none
639 * of "dpif->handlers" channels, the following check would
641 if (!dpif
->handlers
[0].channels
[i
].sock
) {
645 /* Turn off upcalls. */
646 dpif_netlink_vport_init(&vport_request
);
647 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
648 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
649 vport_request
.port_no
= u32_to_odp(i
);
650 vport_request
.n_upcall_pids
= 1;
651 vport_request
.upcall_pids
= &upcall_pids
;
652 dpif_netlink_vport_transact(&vport_request
, NULL
, NULL
);
654 vport_del_channels(dpif
, u32_to_odp(i
));
657 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
658 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
660 dpif_netlink_handler_uninit(handler
);
661 free(handler
->epoll_events
);
662 free(handler
->channels
);
665 free(dpif
->handlers
);
666 dpif
->handlers
= NULL
;
667 dpif
->n_handlers
= 0;
668 dpif
->uc_array_size
= 0;
672 dpif_netlink_close(struct dpif
*dpif_
)
674 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
676 nl_sock_destroy(dpif
->port_notifier
);
678 fat_rwlock_wrlock(&dpif
->upcall_lock
);
679 destroy_all_channels(dpif
);
680 fat_rwlock_unlock(&dpif
->upcall_lock
);
682 fat_rwlock_destroy(&dpif
->upcall_lock
);
687 dpif_netlink_destroy(struct dpif
*dpif_
)
689 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
690 struct dpif_netlink_dp dp
;
692 dpif_netlink_dp_init(&dp
);
693 dp
.cmd
= OVS_DP_CMD_DEL
;
694 dp
.dp_ifindex
= dpif
->dp_ifindex
;
695 return dpif_netlink_dp_transact(&dp
, NULL
, NULL
);
699 dpif_netlink_run(struct dpif
*dpif_
)
701 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
703 if (dpif
->refresh_channels
) {
704 dpif
->refresh_channels
= false;
705 fat_rwlock_wrlock(&dpif
->upcall_lock
);
706 dpif_netlink_refresh_channels(dpif
, dpif
->n_handlers
);
707 fat_rwlock_unlock(&dpif
->upcall_lock
);
713 dpif_netlink_get_stats(const struct dpif
*dpif_
, struct dpif_dp_stats
*stats
)
715 struct dpif_netlink_dp dp
;
719 error
= dpif_netlink_dp_get(dpif_
, &dp
, &buf
);
721 memset(stats
, 0, sizeof *stats
);
724 stats
->n_hit
= get_32aligned_u64(&dp
.stats
->n_hit
);
725 stats
->n_missed
= get_32aligned_u64(&dp
.stats
->n_missed
);
726 stats
->n_lost
= get_32aligned_u64(&dp
.stats
->n_lost
);
727 stats
->n_flows
= get_32aligned_u64(&dp
.stats
->n_flows
);
730 if (dp
.megaflow_stats
) {
731 stats
->n_masks
= dp
.megaflow_stats
->n_masks
;
732 stats
->n_mask_hit
= get_32aligned_u64(
733 &dp
.megaflow_stats
->n_mask_hit
);
735 stats
->n_masks
= UINT32_MAX
;
736 stats
->n_mask_hit
= UINT64_MAX
;
744 get_vport_type(const struct dpif_netlink_vport
*vport
)
746 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
748 switch (vport
->type
) {
749 case OVS_VPORT_TYPE_NETDEV
: {
750 const char *type
= netdev_get_type_from_name(vport
->name
);
752 return type
? type
: "system";
755 case OVS_VPORT_TYPE_INTERNAL
:
758 case OVS_VPORT_TYPE_GENEVE
:
761 case OVS_VPORT_TYPE_GRE
:
764 case OVS_VPORT_TYPE_VXLAN
:
767 case OVS_VPORT_TYPE_LISP
:
770 case OVS_VPORT_TYPE_STT
:
773 case OVS_VPORT_TYPE_UNSPEC
:
774 case __OVS_VPORT_TYPE_MAX
:
778 VLOG_WARN_RL(&rl
, "dp%d: port `%s' has unsupported type %u",
779 vport
->dp_ifindex
, vport
->name
, (unsigned int) vport
->type
);
783 static enum ovs_vport_type
784 netdev_to_ovs_vport_type(const struct netdev
*netdev
)
786 const char *type
= netdev_get_type(netdev
);
788 if (!strcmp(type
, "tap") || !strcmp(type
, "system")) {
789 return OVS_VPORT_TYPE_NETDEV
;
790 } else if (!strcmp(type
, "internal")) {
791 return OVS_VPORT_TYPE_INTERNAL
;
792 } else if (strstr(type
, "stt")) {
793 return OVS_VPORT_TYPE_STT
;
794 } else if (!strcmp(type
, "geneve")) {
795 return OVS_VPORT_TYPE_GENEVE
;
796 } else if (strstr(type
, "gre")) {
797 return OVS_VPORT_TYPE_GRE
;
798 } else if (!strcmp(type
, "vxlan")) {
799 return OVS_VPORT_TYPE_VXLAN
;
800 } else if (!strcmp(type
, "lisp")) {
801 return OVS_VPORT_TYPE_LISP
;
803 return OVS_VPORT_TYPE_UNSPEC
;
808 dpif_netlink_port_add__(struct dpif_netlink
*dpif
, struct netdev
*netdev
,
809 odp_port_t
*port_nop
)
810 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
812 const struct netdev_tunnel_config
*tnl_cfg
;
813 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
814 const char *name
= netdev_vport_get_dpif_port(netdev
,
815 namebuf
, sizeof namebuf
);
816 const char *type
= netdev_get_type(netdev
);
817 struct dpif_netlink_vport request
, reply
;
819 uint64_t options_stub
[64 / 8];
820 struct ofpbuf options
;
821 struct nl_sock
**socksp
= NULL
;
822 uint32_t *upcall_pids
;
825 if (dpif
->handlers
) {
826 socksp
= vport_create_socksp(dpif
, &error
);
832 dpif_netlink_vport_init(&request
);
833 request
.cmd
= OVS_VPORT_CMD_NEW
;
834 request
.dp_ifindex
= dpif
->dp_ifindex
;
835 request
.type
= netdev_to_ovs_vport_type(netdev
);
836 if (request
.type
== OVS_VPORT_TYPE_UNSPEC
) {
837 VLOG_WARN_RL(&error_rl
, "%s: cannot create port `%s' because it has "
838 "unsupported type `%s'",
839 dpif_name(&dpif
->dpif
), name
, type
);
840 vport_del_socksp(dpif
, socksp
);
845 if (request
.type
== OVS_VPORT_TYPE_NETDEV
) {
847 /* XXX : Map appropiate Windows handle */
849 netdev_linux_ethtool_set_flag(netdev
, ETH_FLAG_LRO
, "LRO", false);
854 if (request
.type
== OVS_VPORT_TYPE_INTERNAL
) {
855 if (!create_wmi_port(name
)){
856 VLOG_ERR("Could not create wmi internal port with name:%s", name
);
857 vport_del_socksp(dpif
, socksp
);
863 tnl_cfg
= netdev_get_tunnel_config(netdev
);
864 if (tnl_cfg
&& (tnl_cfg
->dst_port
!= 0 || tnl_cfg
->exts
)) {
865 ofpbuf_use_stack(&options
, options_stub
, sizeof options_stub
);
866 if (tnl_cfg
->dst_port
) {
867 nl_msg_put_u16(&options
, OVS_TUNNEL_ATTR_DST_PORT
,
868 ntohs(tnl_cfg
->dst_port
));
874 ext_ofs
= nl_msg_start_nested(&options
, OVS_TUNNEL_ATTR_EXTENSION
);
875 for (i
= 0; i
< 32; i
++) {
876 if (tnl_cfg
->exts
& (1 << i
)) {
877 nl_msg_put_flag(&options
, i
);
880 nl_msg_end_nested(&options
, ext_ofs
);
882 request
.options
= options
.data
;
883 request
.options_len
= options
.size
;
886 request
.port_no
= *port_nop
;
887 upcall_pids
= vport_socksp_to_pids(socksp
, dpif
->n_handlers
);
888 request
.n_upcall_pids
= socksp
? dpif
->n_handlers
: 1;
889 request
.upcall_pids
= upcall_pids
;
891 error
= dpif_netlink_vport_transact(&request
, &reply
, &buf
);
893 *port_nop
= reply
.port_no
;
895 if (error
== EBUSY
&& *port_nop
!= ODPP_NONE
) {
896 VLOG_INFO("%s: requested port %"PRIu32
" is in use",
897 dpif_name(&dpif
->dpif
), *port_nop
);
900 vport_del_socksp(dpif
, socksp
);
905 error
= vport_add_channels(dpif
, *port_nop
, socksp
);
907 VLOG_INFO("%s: could not add channel for port %s",
908 dpif_name(&dpif
->dpif
), name
);
910 /* Delete the port. */
911 dpif_netlink_vport_init(&request
);
912 request
.cmd
= OVS_VPORT_CMD_DEL
;
913 request
.dp_ifindex
= dpif
->dp_ifindex
;
914 request
.port_no
= *port_nop
;
915 dpif_netlink_vport_transact(&request
, NULL
, NULL
);
916 vport_del_socksp(dpif
, socksp
);
930 dpif_netlink_port_add(struct dpif
*dpif_
, struct netdev
*netdev
,
931 odp_port_t
*port_nop
)
933 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
936 fat_rwlock_wrlock(&dpif
->upcall_lock
);
937 error
= dpif_netlink_port_add__(dpif
, netdev
, port_nop
);
938 fat_rwlock_unlock(&dpif
->upcall_lock
);
944 dpif_netlink_port_del__(struct dpif_netlink
*dpif
, odp_port_t port_no
)
945 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
947 struct dpif_netlink_vport vport
;
950 dpif_netlink_vport_init(&vport
);
951 vport
.cmd
= OVS_VPORT_CMD_DEL
;
952 vport
.dp_ifindex
= dpif
->dp_ifindex
;
953 vport
.port_no
= port_no
;
955 struct dpif_port temp_dpif_port
;
956 dpif_netlink_port_query__(dpif
, port_no
, NULL
, &temp_dpif_port
);
957 if (!strcmp(temp_dpif_port
.type
, "internal")) {
958 if (!delete_wmi_port(temp_dpif_port
.name
)){
959 VLOG_ERR("Could not delete wmi port with name: %s",
960 temp_dpif_port
.name
);
964 error
= dpif_netlink_vport_transact(&vport
, NULL
, NULL
);
966 vport_del_channels(dpif
, port_no
);
972 dpif_netlink_port_del(struct dpif
*dpif_
, odp_port_t port_no
)
974 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
977 fat_rwlock_wrlock(&dpif
->upcall_lock
);
978 error
= dpif_netlink_port_del__(dpif
, port_no
);
979 fat_rwlock_unlock(&dpif
->upcall_lock
);
985 dpif_netlink_port_query__(const struct dpif_netlink
*dpif
, odp_port_t port_no
,
986 const char *port_name
, struct dpif_port
*dpif_port
)
988 struct dpif_netlink_vport request
;
989 struct dpif_netlink_vport reply
;
993 dpif_netlink_vport_init(&request
);
994 request
.cmd
= OVS_VPORT_CMD_GET
;
995 request
.dp_ifindex
= dpif
->dp_ifindex
;
996 request
.port_no
= port_no
;
997 request
.name
= port_name
;
999 error
= dpif_netlink_vport_transact(&request
, &reply
, &buf
);
1001 if (reply
.dp_ifindex
!= request
.dp_ifindex
) {
1002 /* A query by name reported that 'port_name' is in some datapath
1003 * other than 'dpif', but the caller wants to know about 'dpif'. */
1005 } else if (dpif_port
) {
1006 dpif_port
->name
= xstrdup(reply
.name
);
1007 dpif_port
->type
= xstrdup(get_vport_type(&reply
));
1008 dpif_port
->port_no
= reply
.port_no
;
1016 dpif_netlink_port_query_by_number(const struct dpif
*dpif_
, odp_port_t port_no
,
1017 struct dpif_port
*dpif_port
)
1019 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1021 return dpif_netlink_port_query__(dpif
, port_no
, NULL
, dpif_port
);
1025 dpif_netlink_port_query_by_name(const struct dpif
*dpif_
, const char *devname
,
1026 struct dpif_port
*dpif_port
)
1028 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1030 return dpif_netlink_port_query__(dpif
, 0, devname
, dpif_port
);
1034 dpif_netlink_port_get_pid__(const struct dpif_netlink
*dpif
,
1035 odp_port_t port_no
, uint32_t hash
)
1036 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
1038 uint32_t port_idx
= odp_to_u32(port_no
);
1041 if (dpif
->handlers
&& dpif
->uc_array_size
> 0) {
1042 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1043 * channel, since it is not heavily loaded. */
1044 uint32_t idx
= port_idx
>= dpif
->uc_array_size
? 0 : port_idx
;
1045 struct dpif_handler
*h
= &dpif
->handlers
[hash
% dpif
->n_handlers
];
1047 /* Needs to check in case the socket pointer is changed in between
1048 * the holding of upcall_lock. A known case happens when the main
1049 * thread deletes the vport while the handler thread is handling
1050 * the upcall from that port. */
1051 if (h
->channels
[idx
].sock
) {
1052 pid
= nl_sock_pid(h
->channels
[idx
].sock
);
1060 dpif_netlink_port_get_pid(const struct dpif
*dpif_
, odp_port_t port_no
,
1063 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1066 fat_rwlock_rdlock(&dpif
->upcall_lock
);
1067 ret
= dpif_netlink_port_get_pid__(dpif
, port_no
, hash
);
1068 fat_rwlock_unlock(&dpif
->upcall_lock
);
1074 dpif_netlink_flow_flush(struct dpif
*dpif_
)
1076 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1077 struct dpif_netlink_flow flow
;
1079 dpif_netlink_flow_init(&flow
);
1080 flow
.cmd
= OVS_FLOW_CMD_DEL
;
1081 flow
.dp_ifindex
= dpif
->dp_ifindex
;
1082 return dpif_netlink_flow_transact(&flow
, NULL
, NULL
);
1085 struct dpif_netlink_port_state
{
1086 struct nl_dump dump
;
1091 dpif_netlink_port_dump_start__(const struct dpif_netlink
*dpif
,
1092 struct nl_dump
*dump
)
1094 struct dpif_netlink_vport request
;
1097 dpif_netlink_vport_init(&request
);
1098 request
.cmd
= OVS_VPORT_CMD_GET
;
1099 request
.dp_ifindex
= dpif
->dp_ifindex
;
1101 buf
= ofpbuf_new(1024);
1102 dpif_netlink_vport_to_ofpbuf(&request
, buf
);
1103 nl_dump_start(dump
, NETLINK_GENERIC
, buf
);
1108 dpif_netlink_port_dump_start(const struct dpif
*dpif_
, void **statep
)
1110 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1111 struct dpif_netlink_port_state
*state
;
1113 *statep
= state
= xmalloc(sizeof *state
);
1114 dpif_netlink_port_dump_start__(dpif
, &state
->dump
);
1116 ofpbuf_init(&state
->buf
, NL_DUMP_BUFSIZE
);
1121 dpif_netlink_port_dump_next__(const struct dpif_netlink
*dpif
,
1122 struct nl_dump
*dump
,
1123 struct dpif_netlink_vport
*vport
,
1124 struct ofpbuf
*buffer
)
1129 if (!nl_dump_next(dump
, &buf
, buffer
)) {
1133 error
= dpif_netlink_vport_from_ofpbuf(vport
, &buf
);
1135 VLOG_WARN_RL(&error_rl
, "%s: failed to parse vport record (%s)",
1136 dpif_name(&dpif
->dpif
), ovs_strerror(error
));
1142 dpif_netlink_port_dump_next(const struct dpif
*dpif_
, void *state_
,
1143 struct dpif_port
*dpif_port
)
1145 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1146 struct dpif_netlink_port_state
*state
= state_
;
1147 struct dpif_netlink_vport vport
;
1150 error
= dpif_netlink_port_dump_next__(dpif
, &state
->dump
, &vport
,
1155 dpif_port
->name
= CONST_CAST(char *, vport
.name
);
1156 dpif_port
->type
= CONST_CAST(char *, get_vport_type(&vport
));
1157 dpif_port
->port_no
= vport
.port_no
;
1162 dpif_netlink_port_dump_done(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
)
1164 struct dpif_netlink_port_state
*state
= state_
;
1165 int error
= nl_dump_done(&state
->dump
);
1167 ofpbuf_uninit(&state
->buf
);
1173 dpif_netlink_port_poll(const struct dpif
*dpif_
, char **devnamep
)
1175 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1177 /* Lazily create the Netlink socket to listen for notifications. */
1178 if (!dpif
->port_notifier
) {
1179 struct nl_sock
*sock
;
1182 error
= nl_sock_create(NETLINK_GENERIC
, &sock
);
1187 error
= nl_sock_join_mcgroup(sock
, ovs_vport_mcgroup
);
1189 nl_sock_destroy(sock
);
1192 dpif
->port_notifier
= sock
;
1194 /* We have no idea of the current state so report that everything
1200 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1201 uint64_t buf_stub
[4096 / 8];
1205 ofpbuf_use_stub(&buf
, buf_stub
, sizeof buf_stub
);
1206 error
= nl_sock_recv(dpif
->port_notifier
, &buf
, false);
1208 struct dpif_netlink_vport vport
;
1210 error
= dpif_netlink_vport_from_ofpbuf(&vport
, &buf
);
1212 if (vport
.dp_ifindex
== dpif
->dp_ifindex
1213 && (vport
.cmd
== OVS_VPORT_CMD_NEW
1214 || vport
.cmd
== OVS_VPORT_CMD_DEL
1215 || vport
.cmd
== OVS_VPORT_CMD_SET
)) {
1216 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8
,
1217 dpif
->dpif
.full_name
, vport
.name
, vport
.cmd
);
1218 if (vport
.cmd
== OVS_VPORT_CMD_DEL
&& dpif
->handlers
) {
1219 dpif
->refresh_channels
= true;
1221 *devnamep
= xstrdup(vport
.name
);
1222 ofpbuf_uninit(&buf
);
1226 } else if (error
!= EAGAIN
) {
1227 VLOG_WARN_RL(&rl
, "error reading or parsing netlink (%s)",
1228 ovs_strerror(error
));
1229 nl_sock_drain(dpif
->port_notifier
);
1233 ofpbuf_uninit(&buf
);
1241 dpif_netlink_port_poll_wait(const struct dpif
*dpif_
)
1243 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1245 if (dpif
->port_notifier
) {
1246 nl_sock_wait(dpif
->port_notifier
, POLLIN
);
1248 poll_immediate_wake();
1253 dpif_netlink_flow_init_ufid(struct dpif_netlink_flow
*request
,
1254 const ovs_u128
*ufid
, bool terse
)
1257 request
->ufid
= *ufid
;
1258 request
->ufid_present
= true;
1260 request
->ufid_present
= false;
1262 request
->ufid_terse
= terse
;
1266 dpif_netlink_init_flow_get__(const struct dpif_netlink
*dpif
,
1267 const struct nlattr
*key
, size_t key_len
,
1268 const ovs_u128
*ufid
, bool terse
,
1269 struct dpif_netlink_flow
*request
)
1271 dpif_netlink_flow_init(request
);
1272 request
->cmd
= OVS_FLOW_CMD_GET
;
1273 request
->dp_ifindex
= dpif
->dp_ifindex
;
1275 request
->key_len
= key_len
;
1276 dpif_netlink_flow_init_ufid(request
, ufid
, terse
);
1280 dpif_netlink_init_flow_get(const struct dpif_netlink
*dpif
,
1281 const struct dpif_flow_get
*get
,
1282 struct dpif_netlink_flow
*request
)
1284 dpif_netlink_init_flow_get__(dpif
, get
->key
, get
->key_len
, get
->ufid
,
1289 dpif_netlink_flow_get__(const struct dpif_netlink
*dpif
,
1290 const struct nlattr
*key
, size_t key_len
,
1291 const ovs_u128
*ufid
, bool terse
,
1292 struct dpif_netlink_flow
*reply
, struct ofpbuf
**bufp
)
1294 struct dpif_netlink_flow request
;
1296 dpif_netlink_init_flow_get__(dpif
, key
, key_len
, ufid
, terse
, &request
);
1297 return dpif_netlink_flow_transact(&request
, reply
, bufp
);
1301 dpif_netlink_flow_get(const struct dpif_netlink
*dpif
,
1302 const struct dpif_netlink_flow
*flow
,
1303 struct dpif_netlink_flow
*reply
, struct ofpbuf
**bufp
)
1305 return dpif_netlink_flow_get__(dpif
, flow
->key
, flow
->key_len
,
1306 flow
->ufid_present
? &flow
->ufid
: NULL
,
1307 false, reply
, bufp
);
1311 dpif_netlink_init_flow_put(struct dpif_netlink
*dpif
,
1312 const struct dpif_flow_put
*put
,
1313 struct dpif_netlink_flow
*request
)
1315 static const struct nlattr dummy_action
;
1317 dpif_netlink_flow_init(request
);
1318 request
->cmd
= (put
->flags
& DPIF_FP_CREATE
1319 ? OVS_FLOW_CMD_NEW
: OVS_FLOW_CMD_SET
);
1320 request
->dp_ifindex
= dpif
->dp_ifindex
;
1321 request
->key
= put
->key
;
1322 request
->key_len
= put
->key_len
;
1323 request
->mask
= put
->mask
;
1324 request
->mask_len
= put
->mask_len
;
1325 dpif_netlink_flow_init_ufid(request
, put
->ufid
, false);
1327 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1328 request
->actions
= (put
->actions
1330 : CONST_CAST(struct nlattr
*, &dummy_action
));
1331 request
->actions_len
= put
->actions_len
;
1332 if (put
->flags
& DPIF_FP_ZERO_STATS
) {
1333 request
->clear
= true;
1335 if (put
->flags
& DPIF_FP_PROBE
) {
1336 request
->probe
= true;
1338 request
->nlmsg_flags
= put
->flags
& DPIF_FP_MODIFY
? 0 : NLM_F_CREATE
;
1342 dpif_netlink_init_flow_del__(struct dpif_netlink
*dpif
,
1343 const struct nlattr
*key
, size_t key_len
,
1344 const ovs_u128
*ufid
, bool terse
,
1345 struct dpif_netlink_flow
*request
)
1347 dpif_netlink_flow_init(request
);
1348 request
->cmd
= OVS_FLOW_CMD_DEL
;
1349 request
->dp_ifindex
= dpif
->dp_ifindex
;
1351 request
->key_len
= key_len
;
1352 dpif_netlink_flow_init_ufid(request
, ufid
, terse
);
1356 dpif_netlink_init_flow_del(struct dpif_netlink
*dpif
,
1357 const struct dpif_flow_del
*del
,
1358 struct dpif_netlink_flow
*request
)
1360 dpif_netlink_init_flow_del__(dpif
, del
->key
, del
->key_len
,
1361 del
->ufid
, del
->terse
, request
);
1364 struct dpif_netlink_flow_dump
{
1365 struct dpif_flow_dump up
;
1366 struct nl_dump nl_dump
;
1370 static struct dpif_netlink_flow_dump
*
1371 dpif_netlink_flow_dump_cast(struct dpif_flow_dump
*dump
)
1373 return CONTAINER_OF(dump
, struct dpif_netlink_flow_dump
, up
);
1376 static struct dpif_flow_dump
*
1377 dpif_netlink_flow_dump_create(const struct dpif
*dpif_
, bool terse
)
1379 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1380 struct dpif_netlink_flow_dump
*dump
;
1381 struct dpif_netlink_flow request
;
1384 dump
= xmalloc(sizeof *dump
);
1385 dpif_flow_dump_init(&dump
->up
, dpif_
);
1387 dpif_netlink_flow_init(&request
);
1388 request
.cmd
= OVS_FLOW_CMD_GET
;
1389 request
.dp_ifindex
= dpif
->dp_ifindex
;
1390 request
.ufid_present
= false;
1391 request
.ufid_terse
= terse
;
1393 buf
= ofpbuf_new(1024);
1394 dpif_netlink_flow_to_ofpbuf(&request
, buf
);
1395 nl_dump_start(&dump
->nl_dump
, NETLINK_GENERIC
, buf
);
1397 atomic_init(&dump
->status
, 0);
1398 dump
->up
.terse
= terse
;
1404 dpif_netlink_flow_dump_destroy(struct dpif_flow_dump
*dump_
)
1406 struct dpif_netlink_flow_dump
*dump
= dpif_netlink_flow_dump_cast(dump_
);
1407 unsigned int nl_status
= nl_dump_done(&dump
->nl_dump
);
1410 /* No other thread has access to 'dump' at this point. */
1411 atomic_read_relaxed(&dump
->status
, &dump_status
);
1413 return dump_status
? dump_status
: nl_status
;
1416 struct dpif_netlink_flow_dump_thread
{
1417 struct dpif_flow_dump_thread up
;
1418 struct dpif_netlink_flow_dump
*dump
;
1419 struct dpif_netlink_flow flow
;
1420 struct dpif_flow_stats stats
;
1421 struct ofpbuf nl_flows
; /* Always used to store flows. */
1422 struct ofpbuf
*nl_actions
; /* Used if kernel does not supply actions. */
1425 static struct dpif_netlink_flow_dump_thread
*
1426 dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread
*thread
)
1428 return CONTAINER_OF(thread
, struct dpif_netlink_flow_dump_thread
, up
);
1431 static struct dpif_flow_dump_thread
*
1432 dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump
*dump_
)
1434 struct dpif_netlink_flow_dump
*dump
= dpif_netlink_flow_dump_cast(dump_
);
1435 struct dpif_netlink_flow_dump_thread
*thread
;
1437 thread
= xmalloc(sizeof *thread
);
1438 dpif_flow_dump_thread_init(&thread
->up
, &dump
->up
);
1439 thread
->dump
= dump
;
1440 ofpbuf_init(&thread
->nl_flows
, NL_DUMP_BUFSIZE
);
1441 thread
->nl_actions
= NULL
;
1447 dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread
*thread_
)
1449 struct dpif_netlink_flow_dump_thread
*thread
1450 = dpif_netlink_flow_dump_thread_cast(thread_
);
1452 ofpbuf_uninit(&thread
->nl_flows
);
1453 ofpbuf_delete(thread
->nl_actions
);
1458 dpif_netlink_flow_to_dpif_flow(struct dpif
*dpif
, struct dpif_flow
*dpif_flow
,
1459 const struct dpif_netlink_flow
*datapath_flow
)
1461 dpif_flow
->key
= datapath_flow
->key
;
1462 dpif_flow
->key_len
= datapath_flow
->key_len
;
1463 dpif_flow
->mask
= datapath_flow
->mask
;
1464 dpif_flow
->mask_len
= datapath_flow
->mask_len
;
1465 dpif_flow
->actions
= datapath_flow
->actions
;
1466 dpif_flow
->actions_len
= datapath_flow
->actions_len
;
1467 dpif_flow
->ufid_present
= datapath_flow
->ufid_present
;
1468 dpif_flow
->pmd_id
= PMD_ID_NULL
;
1469 if (datapath_flow
->ufid_present
) {
1470 dpif_flow
->ufid
= datapath_flow
->ufid
;
1472 ovs_assert(datapath_flow
->key
&& datapath_flow
->key_len
);
1473 dpif_flow_hash(dpif
, datapath_flow
->key
, datapath_flow
->key_len
,
1476 dpif_netlink_flow_get_stats(datapath_flow
, &dpif_flow
->stats
);
1480 dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread
*thread_
,
1481 struct dpif_flow
*flows
, int max_flows
)
1483 struct dpif_netlink_flow_dump_thread
*thread
1484 = dpif_netlink_flow_dump_thread_cast(thread_
);
1485 struct dpif_netlink_flow_dump
*dump
= thread
->dump
;
1486 struct dpif_netlink
*dpif
= dpif_netlink_cast(thread
->up
.dpif
);
1489 ofpbuf_delete(thread
->nl_actions
);
1490 thread
->nl_actions
= NULL
;
1494 || (n_flows
< max_flows
&& thread
->nl_flows
.size
)) {
1495 struct dpif_netlink_flow datapath_flow
;
1496 struct ofpbuf nl_flow
;
1499 /* Try to grab another flow. */
1500 if (!nl_dump_next(&dump
->nl_dump
, &nl_flow
, &thread
->nl_flows
)) {
1504 /* Convert the flow to our output format. */
1505 error
= dpif_netlink_flow_from_ofpbuf(&datapath_flow
, &nl_flow
);
1507 atomic_store_relaxed(&dump
->status
, error
);
1511 if (dump
->up
.terse
|| datapath_flow
.actions
) {
1512 /* Common case: we don't want actions, or the flow includes
1514 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, &flows
[n_flows
++],
1517 /* Rare case: the flow does not include actions. Retrieve this
1518 * individual flow again to get the actions. */
1519 error
= dpif_netlink_flow_get(dpif
, &datapath_flow
,
1520 &datapath_flow
, &thread
->nl_actions
);
1521 if (error
== ENOENT
) {
1522 VLOG_DBG("dumped flow disappeared on get");
1525 VLOG_WARN("error fetching dumped flow: %s",
1526 ovs_strerror(error
));
1527 atomic_store_relaxed(&dump
->status
, error
);
1531 /* Save this flow. Then exit, because we only have one buffer to
1532 * handle this case. */
1533 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, &flows
[n_flows
++],
1542 dpif_netlink_encode_execute(int dp_ifindex
, const struct dpif_execute
*d_exec
,
1545 struct ovs_header
*k_exec
;
1548 ofpbuf_prealloc_tailroom(buf
, (64
1549 + dp_packet_size(d_exec
->packet
)
1550 + ODP_KEY_METADATA_SIZE
1551 + d_exec
->actions_len
));
1553 nl_msg_put_genlmsghdr(buf
, 0, ovs_packet_family
, NLM_F_REQUEST
,
1554 OVS_PACKET_CMD_EXECUTE
, OVS_PACKET_VERSION
);
1556 k_exec
= ofpbuf_put_uninit(buf
, sizeof *k_exec
);
1557 k_exec
->dp_ifindex
= dp_ifindex
;
1559 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_PACKET
,
1560 dp_packet_data(d_exec
->packet
),
1561 dp_packet_size(d_exec
->packet
));
1563 key_ofs
= nl_msg_start_nested(buf
, OVS_PACKET_ATTR_KEY
);
1564 odp_key_from_pkt_metadata(buf
, &d_exec
->packet
->md
);
1565 nl_msg_end_nested(buf
, key_ofs
);
1567 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_ACTIONS
,
1568 d_exec
->actions
, d_exec
->actions_len
);
1569 if (d_exec
->probe
) {
1570 nl_msg_put_flag(buf
, OVS_PACKET_ATTR_PROBE
);
1573 nl_msg_put_u16(buf
, OVS_PACKET_ATTR_MRU
, d_exec
->mtu
);
1577 /* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1578 * Returns the number actually executed (at least 1, if 'n_ops' is
1581 dpif_netlink_operate__(struct dpif_netlink
*dpif
,
1582 struct dpif_op
**ops
, size_t n_ops
)
1584 enum { MAX_OPS
= 50 };
1587 struct nl_transaction txn
;
1589 struct ofpbuf request
;
1590 uint64_t request_stub
[1024 / 8];
1592 struct ofpbuf reply
;
1593 uint64_t reply_stub
[1024 / 8];
1596 struct nl_transaction
*txnsp
[MAX_OPS
];
1599 n_ops
= MIN(n_ops
, MAX_OPS
);
1600 for (i
= 0; i
< n_ops
; i
++) {
1601 struct op_auxdata
*aux
= &auxes
[i
];
1602 struct dpif_op
*op
= ops
[i
];
1603 struct dpif_flow_put
*put
;
1604 struct dpif_flow_del
*del
;
1605 struct dpif_flow_get
*get
;
1606 struct dpif_netlink_flow flow
;
1608 ofpbuf_use_stub(&aux
->request
,
1609 aux
->request_stub
, sizeof aux
->request_stub
);
1610 aux
->txn
.request
= &aux
->request
;
1612 ofpbuf_use_stub(&aux
->reply
, aux
->reply_stub
, sizeof aux
->reply_stub
);
1613 aux
->txn
.reply
= NULL
;
1616 case DPIF_OP_FLOW_PUT
:
1617 put
= &op
->u
.flow_put
;
1618 dpif_netlink_init_flow_put(dpif
, put
, &flow
);
1620 flow
.nlmsg_flags
|= NLM_F_ECHO
;
1621 aux
->txn
.reply
= &aux
->reply
;
1623 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1626 case DPIF_OP_FLOW_DEL
:
1627 del
= &op
->u
.flow_del
;
1628 dpif_netlink_init_flow_del(dpif
, del
, &flow
);
1630 flow
.nlmsg_flags
|= NLM_F_ECHO
;
1631 aux
->txn
.reply
= &aux
->reply
;
1633 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1636 case DPIF_OP_EXECUTE
:
1637 /* Can't execute a packet that won't fit in a Netlink attribute. */
1638 if (OVS_UNLIKELY(nl_attr_oversized(
1639 dp_packet_size(op
->u
.execute
.packet
)))) {
1640 /* Report an error immediately if this is the first operation.
1641 * Otherwise the easiest thing to do is to postpone to the next
1642 * call (when this will be the first operation). */
1644 VLOG_ERR_RL(&error_rl
,
1645 "dropping oversized %"PRIu32
"-byte packet",
1646 dp_packet_size(op
->u
.execute
.packet
));
1647 op
->error
= ENOBUFS
;
1652 dpif_netlink_encode_execute(dpif
->dp_ifindex
, &op
->u
.execute
,
1657 case DPIF_OP_FLOW_GET
:
1658 get
= &op
->u
.flow_get
;
1659 dpif_netlink_init_flow_get(dpif
, get
, &flow
);
1660 aux
->txn
.reply
= get
->buffer
;
1661 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1669 for (i
= 0; i
< n_ops
; i
++) {
1670 txnsp
[i
] = &auxes
[i
].txn
;
1672 nl_transact_multiple(NETLINK_GENERIC
, txnsp
, n_ops
);
1674 for (i
= 0; i
< n_ops
; i
++) {
1675 struct op_auxdata
*aux
= &auxes
[i
];
1676 struct nl_transaction
*txn
= &auxes
[i
].txn
;
1677 struct dpif_op
*op
= ops
[i
];
1678 struct dpif_flow_put
*put
;
1679 struct dpif_flow_del
*del
;
1680 struct dpif_flow_get
*get
;
1682 op
->error
= txn
->error
;
1685 case DPIF_OP_FLOW_PUT
:
1686 put
= &op
->u
.flow_put
;
1689 struct dpif_netlink_flow reply
;
1691 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
,
1694 dpif_netlink_flow_get_stats(&reply
, put
->stats
);
1700 case DPIF_OP_FLOW_DEL
:
1701 del
= &op
->u
.flow_del
;
1704 struct dpif_netlink_flow reply
;
1706 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
,
1709 dpif_netlink_flow_get_stats(&reply
, del
->stats
);
1715 case DPIF_OP_EXECUTE
:
1718 case DPIF_OP_FLOW_GET
:
1719 get
= &op
->u
.flow_get
;
1721 struct dpif_netlink_flow reply
;
1723 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
, txn
->reply
);
1725 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, get
->flow
,
1735 ofpbuf_uninit(&aux
->request
);
1736 ofpbuf_uninit(&aux
->reply
);
1743 dpif_netlink_operate(struct dpif
*dpif_
, struct dpif_op
**ops
, size_t n_ops
)
1745 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1748 size_t chunk
= dpif_netlink_operate__(dpif
, ops
, n_ops
);
1756 dpif_netlink_handler_uninit(struct dpif_handler
*handler
)
1758 vport_delete_sock_pool(handler
);
1762 dpif_netlink_handler_init(struct dpif_handler
*handler
)
1764 return vport_create_sock_pool(handler
);
1769 dpif_netlink_handler_init(struct dpif_handler
*handler
)
1771 handler
->epoll_fd
= epoll_create(10);
1772 return handler
->epoll_fd
< 0 ? errno
: 0;
1776 dpif_netlink_handler_uninit(struct dpif_handler
*handler
)
1778 close(handler
->epoll_fd
);
1782 /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
1783 * currently in 'dpif' in the kernel, by adding a new set of channels for
1784 * any kernel vport that lacks one and deleting any channels that have no
1785 * backing kernel vports. */
1787 dpif_netlink_refresh_channels(struct dpif_netlink
*dpif
, uint32_t n_handlers
)
1788 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
1790 unsigned long int *keep_channels
;
1791 struct dpif_netlink_vport vport
;
1792 size_t keep_channels_nbits
;
1793 struct nl_dump dump
;
1794 uint64_t reply_stub
[NL_DUMP_BUFSIZE
/ 8];
1799 ovs_assert(!WINDOWS
|| n_handlers
<= 1);
1800 ovs_assert(!WINDOWS
|| dpif
->n_handlers
<= 1);
1802 if (dpif
->n_handlers
!= n_handlers
) {
1803 destroy_all_channels(dpif
);
1804 dpif
->handlers
= xzalloc(n_handlers
* sizeof *dpif
->handlers
);
1805 for (i
= 0; i
< n_handlers
; i
++) {
1807 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
1809 error
= dpif_netlink_handler_init(handler
);
1812 struct dpif_handler
*tmp
= &dpif
->handlers
[i
];
1815 for (j
= 0; j
< i
; j
++) {
1816 dpif_netlink_handler_uninit(tmp
);
1818 free(dpif
->handlers
);
1819 dpif
->handlers
= NULL
;
1824 dpif
->n_handlers
= n_handlers
;
1827 for (i
= 0; i
< n_handlers
; i
++) {
1828 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
1830 handler
->event_offset
= handler
->n_events
= 0;
1833 keep_channels_nbits
= dpif
->uc_array_size
;
1834 keep_channels
= bitmap_allocate(keep_channels_nbits
);
1836 ofpbuf_use_stub(&buf
, reply_stub
, sizeof reply_stub
);
1837 dpif_netlink_port_dump_start__(dpif
, &dump
);
1838 while (!dpif_netlink_port_dump_next__(dpif
, &dump
, &vport
, &buf
)) {
1839 uint32_t port_no
= odp_to_u32(vport
.port_no
);
1840 uint32_t *upcall_pids
= NULL
;
1843 if (port_no
>= dpif
->uc_array_size
1844 || !vport_get_pids(dpif
, port_no
, &upcall_pids
)) {
1845 struct nl_sock
**socksp
= vport_create_socksp(dpif
, &error
);
1851 error
= vport_add_channels(dpif
, vport
.port_no
, socksp
);
1853 VLOG_INFO("%s: could not add channels for port %s",
1854 dpif_name(&dpif
->dpif
), vport
.name
);
1855 vport_del_socksp(dpif
, socksp
);
1859 upcall_pids
= vport_socksp_to_pids(socksp
, dpif
->n_handlers
);
1863 /* Configure the vport to deliver misses to 'sock'. */
1864 if (vport
.upcall_pids
[0] == 0
1865 || vport
.n_upcall_pids
!= dpif
->n_handlers
1866 || memcmp(upcall_pids
, vport
.upcall_pids
, n_handlers
* sizeof
1868 struct dpif_netlink_vport vport_request
;
1870 dpif_netlink_vport_init(&vport_request
);
1871 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
1872 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
1873 vport_request
.port_no
= vport
.port_no
;
1874 vport_request
.n_upcall_pids
= dpif
->n_handlers
;
1875 vport_request
.upcall_pids
= upcall_pids
;
1876 error
= dpif_netlink_vport_transact(&vport_request
, NULL
, NULL
);
1878 VLOG_WARN_RL(&error_rl
,
1879 "%s: failed to set upcall pid on port: %s",
1880 dpif_name(&dpif
->dpif
), ovs_strerror(error
));
1882 if (error
!= ENODEV
&& error
!= ENOENT
) {
1885 /* The vport isn't really there, even though the dump says
1886 * it is. Probably we just hit a race after a port
1893 if (port_no
< keep_channels_nbits
) {
1894 bitmap_set1(keep_channels
, port_no
);
1901 vport_del_channels(dpif
, vport
.port_no
);
1903 nl_dump_done(&dump
);
1904 ofpbuf_uninit(&buf
);
1906 /* Discard any saved channels that we didn't reuse. */
1907 for (i
= 0; i
< keep_channels_nbits
; i
++) {
1908 if (!bitmap_is_set(keep_channels
, i
)) {
1909 vport_del_channels(dpif
, u32_to_odp(i
));
1912 free(keep_channels
);
1918 dpif_netlink_recv_set__(struct dpif_netlink
*dpif
, bool enable
)
1919 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
1921 if ((dpif
->handlers
!= NULL
) == enable
) {
1923 } else if (!enable
) {
1924 destroy_all_channels(dpif
);
1927 return dpif_netlink_refresh_channels(dpif
, 1);
1932 dpif_netlink_recv_set(struct dpif
*dpif_
, bool enable
)
1934 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1937 fat_rwlock_wrlock(&dpif
->upcall_lock
);
1938 error
= dpif_netlink_recv_set__(dpif
, enable
);
1939 fat_rwlock_unlock(&dpif
->upcall_lock
);
1945 dpif_netlink_handlers_set(struct dpif
*dpif_
, uint32_t n_handlers
)
1947 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1951 /* Multiple upcall handlers will be supported once kernel datapath supports
1953 if (n_handlers
> 1) {
1958 fat_rwlock_wrlock(&dpif
->upcall_lock
);
1959 if (dpif
->handlers
) {
1960 error
= dpif_netlink_refresh_channels(dpif
, n_handlers
);
1962 fat_rwlock_unlock(&dpif
->upcall_lock
);
1968 dpif_netlink_queue_to_priority(const struct dpif
*dpif OVS_UNUSED
,
1969 uint32_t queue_id
, uint32_t *priority
)
1971 if (queue_id
< 0xf000) {
1972 *priority
= TC_H_MAKE(1 << 16, queue_id
+ 1);
1980 parse_odp_packet(const struct dpif_netlink
*dpif
, struct ofpbuf
*buf
,
1981 struct dpif_upcall
*upcall
, int *dp_ifindex
)
1983 static const struct nl_policy ovs_packet_policy
[] = {
1984 /* Always present. */
1985 [OVS_PACKET_ATTR_PACKET
] = { .type
= NL_A_UNSPEC
,
1986 .min_len
= ETH_HEADER_LEN
},
1987 [OVS_PACKET_ATTR_KEY
] = { .type
= NL_A_NESTED
},
1989 /* OVS_PACKET_CMD_ACTION only. */
1990 [OVS_PACKET_ATTR_USERDATA
] = { .type
= NL_A_UNSPEC
, .optional
= true },
1991 [OVS_PACKET_ATTR_EGRESS_TUN_KEY
] = { .type
= NL_A_NESTED
, .optional
= true },
1992 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1993 [OVS_PACKET_ATTR_MRU
] = { .type
= NL_A_U16
, .optional
= true }
1996 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
1997 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1998 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1999 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2001 struct nlattr
*a
[ARRAY_SIZE(ovs_packet_policy
)];
2002 if (!nlmsg
|| !genl
|| !ovs_header
2003 || nlmsg
->nlmsg_type
!= ovs_packet_family
2004 || !nl_policy_parse(&b
, 0, ovs_packet_policy
, a
,
2005 ARRAY_SIZE(ovs_packet_policy
))) {
2009 int type
= (genl
->cmd
== OVS_PACKET_CMD_MISS
? DPIF_UC_MISS
2010 : genl
->cmd
== OVS_PACKET_CMD_ACTION
? DPIF_UC_ACTION
2016 /* (Re)set ALL fields of '*upcall' on successful return. */
2017 upcall
->type
= type
;
2018 upcall
->key
= CONST_CAST(struct nlattr
*,
2019 nl_attr_get(a
[OVS_PACKET_ATTR_KEY
]));
2020 upcall
->key_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_KEY
]);
2021 dpif_flow_hash(&dpif
->dpif
, upcall
->key
, upcall
->key_len
, &upcall
->ufid
);
2022 upcall
->userdata
= a
[OVS_PACKET_ATTR_USERDATA
];
2023 upcall
->out_tun_key
= a
[OVS_PACKET_ATTR_EGRESS_TUN_KEY
];
2024 upcall
->actions
= a
[OVS_PACKET_ATTR_ACTIONS
];
2025 upcall
->mru
= a
[OVS_PACKET_ATTR_MRU
];
2027 /* Allow overwriting the netlink attribute header without reallocating. */
2028 dp_packet_use_stub(&upcall
->packet
,
2029 CONST_CAST(struct nlattr
*,
2030 nl_attr_get(a
[OVS_PACKET_ATTR_PACKET
])) - 1,
2031 nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]) +
2032 sizeof(struct nlattr
));
2033 dp_packet_set_data(&upcall
->packet
,
2034 (char *)dp_packet_data(&upcall
->packet
) + sizeof(struct nlattr
));
2035 dp_packet_set_size(&upcall
->packet
, nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]));
2037 *dp_ifindex
= ovs_header
->dp_ifindex
;
2043 #define PACKET_RECV_BATCH_SIZE 50
2045 dpif_netlink_recv_windows(struct dpif_netlink
*dpif
, uint32_t handler_id
,
2046 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2047 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2049 struct dpif_handler
*handler
;
2051 struct dpif_windows_vport_sock
*sock_pool
;
2054 if (!dpif
->handlers
) {
2058 /* Only one handler is supported currently. */
2059 if (handler_id
>= 1) {
2063 if (handler_id
>= dpif
->n_handlers
) {
2067 handler
= &dpif
->handlers
[handler_id
];
2068 sock_pool
= handler
->vport_sock_pool
;
2070 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
2075 if (++read_tries
> PACKET_RECV_BATCH_SIZE
) {
2079 error
= nl_sock_recv(sock_pool
[i
].nl_sock
, buf
, false);
2080 if (error
== ENOBUFS
) {
2081 /* ENOBUFS typically means that we've received so many
2082 * packets that the buffer overflowed. Try again
2083 * immediately because there's almost certainly a packet
2084 * waiting for us. */
2085 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2089 /* XXX: ch->last_poll = time_msec(); */
2091 if (error
== EAGAIN
) {
2097 error
= parse_odp_packet(dpif
, buf
, upcall
, &dp_ifindex
);
2098 if (!error
&& dp_ifindex
== dpif
->dp_ifindex
) {
2110 dpif_netlink_recv__(struct dpif_netlink
*dpif
, uint32_t handler_id
,
2111 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2112 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2114 struct dpif_handler
*handler
;
2117 if (!dpif
->handlers
|| handler_id
>= dpif
->n_handlers
) {
2121 handler
= &dpif
->handlers
[handler_id
];
2122 if (handler
->event_offset
>= handler
->n_events
) {
2125 handler
->event_offset
= handler
->n_events
= 0;
2128 retval
= epoll_wait(handler
->epoll_fd
, handler
->epoll_events
,
2129 dpif
->uc_array_size
, 0);
2130 } while (retval
< 0 && errno
== EINTR
);
2133 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2134 VLOG_WARN_RL(&rl
, "epoll_wait failed (%s)", ovs_strerror(errno
));
2135 } else if (retval
> 0) {
2136 handler
->n_events
= retval
;
2140 while (handler
->event_offset
< handler
->n_events
) {
2141 int idx
= handler
->epoll_events
[handler
->event_offset
].data
.u32
;
2142 struct dpif_channel
*ch
= &dpif
->handlers
[handler_id
].channels
[idx
];
2144 handler
->event_offset
++;
2150 if (++read_tries
> 50) {
2154 error
= nl_sock_recv(ch
->sock
, buf
, false);
2155 if (error
== ENOBUFS
) {
2156 /* ENOBUFS typically means that we've received so many
2157 * packets that the buffer overflowed. Try again
2158 * immediately because there's almost certainly a packet
2159 * waiting for us. */
2160 report_loss(dpif
, ch
, idx
, handler_id
);
2164 ch
->last_poll
= time_msec();
2166 if (error
== EAGAIN
) {
2172 error
= parse_odp_packet(dpif
, buf
, upcall
, &dp_ifindex
);
2173 if (!error
&& dp_ifindex
== dpif
->dp_ifindex
) {
2186 dpif_netlink_recv(struct dpif
*dpif_
, uint32_t handler_id
,
2187 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2189 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2192 fat_rwlock_rdlock(&dpif
->upcall_lock
);
2194 error
= dpif_netlink_recv_windows(dpif
, handler_id
, upcall
, buf
);
2196 error
= dpif_netlink_recv__(dpif
, handler_id
, upcall
, buf
);
2198 fat_rwlock_unlock(&dpif
->upcall_lock
);
2204 dpif_netlink_recv_wait__(struct dpif_netlink
*dpif
, uint32_t handler_id
)
2205 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2209 struct dpif_windows_vport_sock
*sock_pool
=
2210 dpif
->handlers
[handler_id
].vport_sock_pool
;
2212 /* Only one handler is supported currently. */
2213 if (handler_id
>= 1) {
2217 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
2218 nl_sock_wait(sock_pool
[i
].nl_sock
, POLLIN
);
2221 if (dpif
->handlers
&& handler_id
< dpif
->n_handlers
) {
2222 struct dpif_handler
*handler
= &dpif
->handlers
[handler_id
];
2224 poll_fd_wait(handler
->epoll_fd
, POLLIN
);
2230 dpif_netlink_recv_wait(struct dpif
*dpif_
, uint32_t handler_id
)
2232 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2234 fat_rwlock_rdlock(&dpif
->upcall_lock
);
2235 dpif_netlink_recv_wait__(dpif
, handler_id
);
2236 fat_rwlock_unlock(&dpif
->upcall_lock
);
2240 dpif_netlink_recv_purge__(struct dpif_netlink
*dpif
)
2241 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
2243 if (dpif
->handlers
) {
2246 for (i
= 0; i
< dpif
->uc_array_size
; i
++ ) {
2247 if (!dpif
->handlers
[0].channels
[i
].sock
) {
2251 for (j
= 0; j
< dpif
->n_handlers
; j
++) {
2252 nl_sock_drain(dpif
->handlers
[j
].channels
[i
].sock
);
2259 dpif_netlink_recv_purge(struct dpif
*dpif_
)
2261 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2263 fat_rwlock_wrlock(&dpif
->upcall_lock
);
2264 dpif_netlink_recv_purge__(dpif
);
2265 fat_rwlock_unlock(&dpif
->upcall_lock
);
2269 dpif_netlink_get_datapath_version(void)
2271 char *version_str
= NULL
;
2275 #define MAX_VERSION_STR_SIZE 80
2276 #define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2279 f
= fopen(LINUX_DATAPATH_VERSION_FILE
, "r");
2282 char version
[MAX_VERSION_STR_SIZE
];
2284 if (fgets(version
, MAX_VERSION_STR_SIZE
, f
)) {
2285 newline
= strchr(version
, '\n');
2289 version_str
= xstrdup(version
);
2298 struct dpif_netlink_ct_dump_state
{
2299 struct ct_dpif_dump_state up
;
2300 struct nl_ct_dump_state
*nl_ct_dump
;
2304 dpif_netlink_ct_dump_start(struct dpif
*dpif OVS_UNUSED
,
2305 struct ct_dpif_dump_state
**dump_
,
2306 const uint16_t *zone
)
2308 struct dpif_netlink_ct_dump_state
*dump
;
2311 dump
= xzalloc(sizeof *dump
);
2312 err
= nl_ct_dump_start(&dump
->nl_ct_dump
, zone
);
2324 dpif_netlink_ct_dump_next(struct dpif
*dpif OVS_UNUSED
,
2325 struct ct_dpif_dump_state
*dump_
,
2326 struct ct_dpif_entry
*entry
)
2328 struct dpif_netlink_ct_dump_state
*dump
;
2330 INIT_CONTAINER(dump
, dump_
, up
);
2332 return nl_ct_dump_next(dump
->nl_ct_dump
, entry
);
2336 dpif_netlink_ct_dump_done(struct dpif
*dpif OVS_UNUSED
,
2337 struct ct_dpif_dump_state
*dump_
)
2339 struct dpif_netlink_ct_dump_state
*dump
;
2342 INIT_CONTAINER(dump
, dump_
, up
);
2344 err
= nl_ct_dump_done(dump
->nl_ct_dump
);
2350 dpif_netlink_ct_flush(struct dpif
*dpif OVS_UNUSED
, const uint16_t *zone
)
2353 return nl_ct_flush_zone(*zone
);
2355 return nl_ct_flush();
2359 const struct dpif_class dpif_netlink_class
= {
2362 dpif_netlink_enumerate
,
2366 dpif_netlink_destroy
,
2369 dpif_netlink_get_stats
,
2370 dpif_netlink_port_add
,
2371 dpif_netlink_port_del
,
2372 NULL
, /* port_set_config */
2373 dpif_netlink_port_query_by_number
,
2374 dpif_netlink_port_query_by_name
,
2375 dpif_netlink_port_get_pid
,
2376 dpif_netlink_port_dump_start
,
2377 dpif_netlink_port_dump_next
,
2378 dpif_netlink_port_dump_done
,
2379 dpif_netlink_port_poll
,
2380 dpif_netlink_port_poll_wait
,
2381 dpif_netlink_flow_flush
,
2382 dpif_netlink_flow_dump_create
,
2383 dpif_netlink_flow_dump_destroy
,
2384 dpif_netlink_flow_dump_thread_create
,
2385 dpif_netlink_flow_dump_thread_destroy
,
2386 dpif_netlink_flow_dump_next
,
2387 dpif_netlink_operate
,
2388 dpif_netlink_recv_set
,
2389 dpif_netlink_handlers_set
,
2390 NULL
, /* poll_thread_set */
2391 dpif_netlink_queue_to_priority
,
2393 dpif_netlink_recv_wait
,
2394 dpif_netlink_recv_purge
,
2395 NULL
, /* register_dp_purge_cb */
2396 NULL
, /* register_upcall_cb */
2397 NULL
, /* enable_upcall */
2398 NULL
, /* disable_upcall */
2399 dpif_netlink_get_datapath_version
, /* get_datapath_version */
2400 dpif_netlink_ct_dump_start
,
2401 dpif_netlink_ct_dump_next
,
2402 dpif_netlink_ct_dump_done
,
2403 dpif_netlink_ct_flush
2407 dpif_netlink_init(void)
2409 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
2412 if (ovsthread_once_start(&once
)) {
2413 error
= nl_lookup_genl_family(OVS_DATAPATH_FAMILY
,
2414 &ovs_datapath_family
);
2416 VLOG_WARN("Generic Netlink family '%s' does not exist. "
2417 "The Open vSwitch kernel module is probably not loaded.",
2418 OVS_DATAPATH_FAMILY
);
2421 error
= nl_lookup_genl_family(OVS_VPORT_FAMILY
, &ovs_vport_family
);
2424 error
= nl_lookup_genl_family(OVS_FLOW_FAMILY
, &ovs_flow_family
);
2427 error
= nl_lookup_genl_family(OVS_PACKET_FAMILY
,
2428 &ovs_packet_family
);
2431 error
= nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY
, OVS_VPORT_MCGROUP
,
2432 &ovs_vport_mcgroup
);
2435 ovsthread_once_done(&once
);
2442 dpif_netlink_is_internal_device(const char *name
)
2444 struct dpif_netlink_vport reply
;
2448 error
= dpif_netlink_vport_get(name
, &reply
, &buf
);
2451 } else if (error
!= ENODEV
&& error
!= ENOENT
) {
2452 VLOG_WARN_RL(&error_rl
, "%s: vport query failed (%s)",
2453 name
, ovs_strerror(error
));
2456 return reply
.type
== OVS_VPORT_TYPE_INTERNAL
;
2459 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2460 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
2461 * positive errno value.
2463 * 'vport' will contain pointers into 'buf', so the caller should not free
2464 * 'buf' while 'vport' is still in use. */
2466 dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport
*vport
,
2467 const struct ofpbuf
*buf
)
2469 static const struct nl_policy ovs_vport_policy
[] = {
2470 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NL_A_U32
},
2471 [OVS_VPORT_ATTR_TYPE
] = { .type
= NL_A_U32
},
2472 [OVS_VPORT_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
2473 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NL_A_UNSPEC
},
2474 [OVS_VPORT_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_vport_stats
),
2476 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
2479 dpif_netlink_vport_init(vport
);
2481 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2482 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2483 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2484 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2486 struct nlattr
*a
[ARRAY_SIZE(ovs_vport_policy
)];
2487 if (!nlmsg
|| !genl
|| !ovs_header
2488 || nlmsg
->nlmsg_type
!= ovs_vport_family
2489 || !nl_policy_parse(&b
, 0, ovs_vport_policy
, a
,
2490 ARRAY_SIZE(ovs_vport_policy
))) {
2494 vport
->cmd
= genl
->cmd
;
2495 vport
->dp_ifindex
= ovs_header
->dp_ifindex
;
2496 vport
->port_no
= nl_attr_get_odp_port(a
[OVS_VPORT_ATTR_PORT_NO
]);
2497 vport
->type
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
2498 vport
->name
= nl_attr_get_string(a
[OVS_VPORT_ATTR_NAME
]);
2499 if (a
[OVS_VPORT_ATTR_UPCALL_PID
]) {
2500 vport
->n_upcall_pids
= nl_attr_get_size(a
[OVS_VPORT_ATTR_UPCALL_PID
])
2501 / (sizeof *vport
->upcall_pids
);
2502 vport
->upcall_pids
= nl_attr_get(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
2505 if (a
[OVS_VPORT_ATTR_STATS
]) {
2506 vport
->stats
= nl_attr_get(a
[OVS_VPORT_ATTR_STATS
]);
2508 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
2509 vport
->options
= nl_attr_get(a
[OVS_VPORT_ATTR_OPTIONS
]);
2510 vport
->options_len
= nl_attr_get_size(a
[OVS_VPORT_ATTR_OPTIONS
]);
2515 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
2516 * followed by Netlink attributes corresponding to 'vport'. */
2518 dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport
*vport
,
2521 struct ovs_header
*ovs_header
;
2523 nl_msg_put_genlmsghdr(buf
, 0, ovs_vport_family
, NLM_F_REQUEST
| NLM_F_ECHO
,
2524 vport
->cmd
, OVS_VPORT_VERSION
);
2526 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
2527 ovs_header
->dp_ifindex
= vport
->dp_ifindex
;
2529 if (vport
->port_no
!= ODPP_NONE
) {
2530 nl_msg_put_odp_port(buf
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
);
2533 if (vport
->type
!= OVS_VPORT_TYPE_UNSPEC
) {
2534 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_TYPE
, vport
->type
);
2538 nl_msg_put_string(buf
, OVS_VPORT_ATTR_NAME
, vport
->name
);
2541 if (vport
->upcall_pids
) {
2542 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_UPCALL_PID
,
2544 vport
->n_upcall_pids
* sizeof *vport
->upcall_pids
);
2548 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_STATS
,
2549 vport
->stats
, sizeof *vport
->stats
);
2552 if (vport
->options
) {
2553 nl_msg_put_nested(buf
, OVS_VPORT_ATTR_OPTIONS
,
2554 vport
->options
, vport
->options_len
);
2558 /* Clears 'vport' to "empty" values. */
2560 dpif_netlink_vport_init(struct dpif_netlink_vport
*vport
)
2562 memset(vport
, 0, sizeof *vport
);
2563 vport
->port_no
= ODPP_NONE
;
2566 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2567 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2568 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2569 * result of the command is expected to be an ovs_vport also, which is decoded
2570 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2571 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2573 dpif_netlink_vport_transact(const struct dpif_netlink_vport
*request
,
2574 struct dpif_netlink_vport
*reply
,
2575 struct ofpbuf
**bufp
)
2577 struct ofpbuf
*request_buf
;
2580 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
2582 error
= dpif_netlink_init();
2586 dpif_netlink_vport_init(reply
);
2591 request_buf
= ofpbuf_new(1024);
2592 dpif_netlink_vport_to_ofpbuf(request
, request_buf
);
2593 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
2594 ofpbuf_delete(request_buf
);
2598 error
= dpif_netlink_vport_from_ofpbuf(reply
, *bufp
);
2601 dpif_netlink_vport_init(reply
);
2602 ofpbuf_delete(*bufp
);
2609 /* Obtains information about the kernel vport named 'name' and stores it into
2610 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
2611 * longer needed ('reply' will contain pointers into '*bufp'). */
2613 dpif_netlink_vport_get(const char *name
, struct dpif_netlink_vport
*reply
,
2614 struct ofpbuf
**bufp
)
2616 struct dpif_netlink_vport request
;
2618 dpif_netlink_vport_init(&request
);
2619 request
.cmd
= OVS_VPORT_CMD_GET
;
2620 request
.name
= name
;
2622 return dpif_netlink_vport_transact(&request
, reply
, bufp
);
2625 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2626 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
2627 * positive errno value.
2629 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
2630 * while 'dp' is still in use. */
2632 dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp
*dp
, const struct ofpbuf
*buf
)
2634 static const struct nl_policy ovs_datapath_policy
[] = {
2635 [OVS_DP_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
2636 [OVS_DP_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_dp_stats
),
2638 [OVS_DP_ATTR_MEGAFLOW_STATS
] = {
2639 NL_POLICY_FOR(struct ovs_dp_megaflow_stats
),
2643 dpif_netlink_dp_init(dp
);
2645 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2646 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2647 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2648 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2650 struct nlattr
*a
[ARRAY_SIZE(ovs_datapath_policy
)];
2651 if (!nlmsg
|| !genl
|| !ovs_header
2652 || nlmsg
->nlmsg_type
!= ovs_datapath_family
2653 || !nl_policy_parse(&b
, 0, ovs_datapath_policy
, a
,
2654 ARRAY_SIZE(ovs_datapath_policy
))) {
2658 dp
->cmd
= genl
->cmd
;
2659 dp
->dp_ifindex
= ovs_header
->dp_ifindex
;
2660 dp
->name
= nl_attr_get_string(a
[OVS_DP_ATTR_NAME
]);
2661 if (a
[OVS_DP_ATTR_STATS
]) {
2662 dp
->stats
= nl_attr_get(a
[OVS_DP_ATTR_STATS
]);
2665 if (a
[OVS_DP_ATTR_MEGAFLOW_STATS
]) {
2666 dp
->megaflow_stats
= nl_attr_get(a
[OVS_DP_ATTR_MEGAFLOW_STATS
]);
2672 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
2674 dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp
*dp
, struct ofpbuf
*buf
)
2676 struct ovs_header
*ovs_header
;
2678 nl_msg_put_genlmsghdr(buf
, 0, ovs_datapath_family
,
2679 NLM_F_REQUEST
| NLM_F_ECHO
, dp
->cmd
,
2680 OVS_DATAPATH_VERSION
);
2682 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
2683 ovs_header
->dp_ifindex
= dp
->dp_ifindex
;
2686 nl_msg_put_string(buf
, OVS_DP_ATTR_NAME
, dp
->name
);
2689 if (dp
->upcall_pid
) {
2690 nl_msg_put_u32(buf
, OVS_DP_ATTR_UPCALL_PID
, *dp
->upcall_pid
);
2693 if (dp
->user_features
) {
2694 nl_msg_put_u32(buf
, OVS_DP_ATTR_USER_FEATURES
, dp
->user_features
);
2697 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
2700 /* Clears 'dp' to "empty" values. */
2702 dpif_netlink_dp_init(struct dpif_netlink_dp
*dp
)
2704 memset(dp
, 0, sizeof *dp
);
2708 dpif_netlink_dp_dump_start(struct nl_dump
*dump
)
2710 struct dpif_netlink_dp request
;
2713 dpif_netlink_dp_init(&request
);
2714 request
.cmd
= OVS_DP_CMD_GET
;
2716 buf
= ofpbuf_new(1024);
2717 dpif_netlink_dp_to_ofpbuf(&request
, buf
);
2718 nl_dump_start(dump
, NETLINK_GENERIC
, buf
);
2722 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2723 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2724 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2725 * result of the command is expected to be of the same form, which is decoded
2726 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2727 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2729 dpif_netlink_dp_transact(const struct dpif_netlink_dp
*request
,
2730 struct dpif_netlink_dp
*reply
, struct ofpbuf
**bufp
)
2732 struct ofpbuf
*request_buf
;
2735 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
2737 request_buf
= ofpbuf_new(1024);
2738 dpif_netlink_dp_to_ofpbuf(request
, request_buf
);
2739 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
2740 ofpbuf_delete(request_buf
);
2743 dpif_netlink_dp_init(reply
);
2745 error
= dpif_netlink_dp_from_ofpbuf(reply
, *bufp
);
2748 ofpbuf_delete(*bufp
);
2755 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
2756 * The caller must free '*bufp' when the reply is no longer needed ('reply'
2757 * will contain pointers into '*bufp'). */
2759 dpif_netlink_dp_get(const struct dpif
*dpif_
, struct dpif_netlink_dp
*reply
,
2760 struct ofpbuf
**bufp
)
2762 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2763 struct dpif_netlink_dp request
;
2765 dpif_netlink_dp_init(&request
);
2766 request
.cmd
= OVS_DP_CMD_GET
;
2767 request
.dp_ifindex
= dpif
->dp_ifindex
;
2769 return dpif_netlink_dp_transact(&request
, reply
, bufp
);
2772 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2773 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
2774 * positive errno value.
2776 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
2777 * while 'flow' is still in use. */
2779 dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow
*flow
,
2780 const struct ofpbuf
*buf
)
2782 static const struct nl_policy ovs_flow_policy
[__OVS_FLOW_ATTR_MAX
] = {
2783 [OVS_FLOW_ATTR_KEY
] = { .type
= NL_A_NESTED
, .optional
= true },
2784 [OVS_FLOW_ATTR_MASK
] = { .type
= NL_A_NESTED
, .optional
= true },
2785 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
2786 [OVS_FLOW_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_flow_stats
),
2788 [OVS_FLOW_ATTR_TCP_FLAGS
] = { .type
= NL_A_U8
, .optional
= true },
2789 [OVS_FLOW_ATTR_USED
] = { .type
= NL_A_U64
, .optional
= true },
2790 [OVS_FLOW_ATTR_UFID
] = { .type
= NL_A_UNSPEC
, .optional
= true,
2791 .min_len
= sizeof(ovs_u128
) },
2792 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
2793 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
2794 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
2797 dpif_netlink_flow_init(flow
);
2799 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2800 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2801 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2802 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2804 struct nlattr
*a
[ARRAY_SIZE(ovs_flow_policy
)];
2805 if (!nlmsg
|| !genl
|| !ovs_header
2806 || nlmsg
->nlmsg_type
!= ovs_flow_family
2807 || !nl_policy_parse(&b
, 0, ovs_flow_policy
, a
,
2808 ARRAY_SIZE(ovs_flow_policy
))) {
2811 if (!a
[OVS_FLOW_ATTR_KEY
] && !a
[OVS_FLOW_ATTR_UFID
]) {
2815 flow
->nlmsg_flags
= nlmsg
->nlmsg_flags
;
2816 flow
->dp_ifindex
= ovs_header
->dp_ifindex
;
2817 if (a
[OVS_FLOW_ATTR_KEY
]) {
2818 flow
->key
= nl_attr_get(a
[OVS_FLOW_ATTR_KEY
]);
2819 flow
->key_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_KEY
]);
2822 if (a
[OVS_FLOW_ATTR_UFID
]) {
2823 const ovs_u128
*ufid
;
2825 ufid
= nl_attr_get_unspec(a
[OVS_FLOW_ATTR_UFID
],
2826 nl_attr_get_size(a
[OVS_FLOW_ATTR_UFID
]));
2828 flow
->ufid_present
= true;
2830 if (a
[OVS_FLOW_ATTR_MASK
]) {
2831 flow
->mask
= nl_attr_get(a
[OVS_FLOW_ATTR_MASK
]);
2832 flow
->mask_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_MASK
]);
2834 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
2835 flow
->actions
= nl_attr_get(a
[OVS_FLOW_ATTR_ACTIONS
]);
2836 flow
->actions_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_ACTIONS
]);
2838 if (a
[OVS_FLOW_ATTR_STATS
]) {
2839 flow
->stats
= nl_attr_get(a
[OVS_FLOW_ATTR_STATS
]);
2841 if (a
[OVS_FLOW_ATTR_TCP_FLAGS
]) {
2842 flow
->tcp_flags
= nl_attr_get(a
[OVS_FLOW_ATTR_TCP_FLAGS
]);
2844 if (a
[OVS_FLOW_ATTR_USED
]) {
2845 flow
->used
= nl_attr_get(a
[OVS_FLOW_ATTR_USED
]);
2850 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
2851 * followed by Netlink attributes corresponding to 'flow'. */
2853 dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow
*flow
,
2856 struct ovs_header
*ovs_header
;
2858 nl_msg_put_genlmsghdr(buf
, 0, ovs_flow_family
,
2859 NLM_F_REQUEST
| flow
->nlmsg_flags
,
2860 flow
->cmd
, OVS_FLOW_VERSION
);
2862 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
2863 ovs_header
->dp_ifindex
= flow
->dp_ifindex
;
2865 if (flow
->ufid_present
) {
2866 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_UFID
, &flow
->ufid
,
2869 if (flow
->ufid_terse
) {
2870 nl_msg_put_u32(buf
, OVS_FLOW_ATTR_UFID_FLAGS
,
2871 OVS_UFID_F_OMIT_KEY
| OVS_UFID_F_OMIT_MASK
2872 | OVS_UFID_F_OMIT_ACTIONS
);
2874 if (!flow
->ufid_terse
|| !flow
->ufid_present
) {
2875 if (flow
->key_len
) {
2876 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_KEY
,
2877 flow
->key
, flow
->key_len
);
2880 if (flow
->mask_len
) {
2881 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_MASK
,
2882 flow
->mask
, flow
->mask_len
);
2884 if (flow
->actions
|| flow
->actions_len
) {
2885 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_ACTIONS
,
2886 flow
->actions
, flow
->actions_len
);
2890 /* We never need to send these to the kernel. */
2891 ovs_assert(!flow
->stats
);
2892 ovs_assert(!flow
->tcp_flags
);
2893 ovs_assert(!flow
->used
);
2896 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_CLEAR
);
2899 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_PROBE
);
2903 /* Clears 'flow' to "empty" values. */
2905 dpif_netlink_flow_init(struct dpif_netlink_flow
*flow
)
2907 memset(flow
, 0, sizeof *flow
);
2910 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2911 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2912 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2913 * result of the command is expected to be a flow also, which is decoded and
2914 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
2915 * is no longer needed ('reply' will contain pointers into '*bufp'). */
2917 dpif_netlink_flow_transact(struct dpif_netlink_flow
*request
,
2918 struct dpif_netlink_flow
*reply
,
2919 struct ofpbuf
**bufp
)
2921 struct ofpbuf
*request_buf
;
2924 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
2927 request
->nlmsg_flags
|= NLM_F_ECHO
;
2930 request_buf
= ofpbuf_new(1024);
2931 dpif_netlink_flow_to_ofpbuf(request
, request_buf
);
2932 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
2933 ofpbuf_delete(request_buf
);
2937 error
= dpif_netlink_flow_from_ofpbuf(reply
, *bufp
);
2940 dpif_netlink_flow_init(reply
);
2941 ofpbuf_delete(*bufp
);
2949 dpif_netlink_flow_get_stats(const struct dpif_netlink_flow
*flow
,
2950 struct dpif_flow_stats
*stats
)
2953 stats
->n_packets
= get_32aligned_u64(&flow
->stats
->n_packets
);
2954 stats
->n_bytes
= get_32aligned_u64(&flow
->stats
->n_bytes
);
2956 stats
->n_packets
= 0;
2959 stats
->used
= flow
->used
? get_32aligned_u64(flow
->used
) : 0;
2960 stats
->tcp_flags
= flow
->tcp_flags
? *flow
->tcp_flags
: 0;
2963 /* Logs information about a packet that was recently lost in 'ch' (in
2966 report_loss(struct dpif_netlink
*dpif
, struct dpif_channel
*ch
, uint32_t ch_idx
,
2967 uint32_t handler_id
)
2969 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 5);
2972 if (VLOG_DROP_WARN(&rl
)) {
2977 if (ch
->last_poll
!= LLONG_MIN
) {
2978 ds_put_format(&s
, " (last polled %lld ms ago)",
2979 time_msec() - ch
->last_poll
);
2982 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
2983 dpif_name(&dpif
->dpif
), ch_idx
, handler_id
);