2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-netlink.h"
26 #include <linux/types.h>
27 #include <linux/pkt_sched.h>
31 #include <sys/epoll.h>
36 #include "dpif-provider.h"
37 #include "openvswitch/dynamic-string.h"
39 #include "fat-rwlock.h"
41 #include "netdev-linux.h"
42 #include "netdev-vport.h"
43 #include "netlink-conntrack.h"
44 #include "netlink-notifier.h"
45 #include "netlink-socket.h"
48 #include "openvswitch/ofpbuf.h"
50 #include "poll-loop.h"
55 #include "unaligned.h"
57 #include "openvswitch/vlog.h"
59 VLOG_DEFINE_THIS_MODULE(dpif_netlink
);
65 enum { MAX_PORTS
= USHRT_MAX
};
67 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
68 * missing if we have old headers. */
69 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
71 struct dpif_netlink_dp
{
72 /* Generic Netlink header. */
75 /* struct ovs_header. */
79 const char *name
; /* OVS_DP_ATTR_NAME. */
80 const uint32_t *upcall_pid
; /* OVS_DP_ATTR_UPCALL_PID. */
81 uint32_t user_features
; /* OVS_DP_ATTR_USER_FEATURES */
82 const struct ovs_dp_stats
*stats
; /* OVS_DP_ATTR_STATS. */
83 const struct ovs_dp_megaflow_stats
*megaflow_stats
;
84 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
87 static void dpif_netlink_dp_init(struct dpif_netlink_dp
*);
88 static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp
*,
89 const struct ofpbuf
*);
90 static void dpif_netlink_dp_dump_start(struct nl_dump
*);
91 static int dpif_netlink_dp_transact(const struct dpif_netlink_dp
*request
,
92 struct dpif_netlink_dp
*reply
,
93 struct ofpbuf
**bufp
);
94 static int dpif_netlink_dp_get(const struct dpif
*,
95 struct dpif_netlink_dp
*reply
,
96 struct ofpbuf
**bufp
);
98 struct dpif_netlink_flow
{
99 /* Generic Netlink header. */
102 /* struct ovs_header. */
103 unsigned int nlmsg_flags
;
108 * The 'stats' member points to 64-bit data that might only be aligned on
109 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
112 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
113 * the Netlink version of the command, even if actions_len is zero. */
114 const struct nlattr
*key
; /* OVS_FLOW_ATTR_KEY. */
116 const struct nlattr
*mask
; /* OVS_FLOW_ATTR_MASK. */
118 const struct nlattr
*actions
; /* OVS_FLOW_ATTR_ACTIONS. */
120 ovs_u128 ufid
; /* OVS_FLOW_ATTR_FLOW_ID. */
121 bool ufid_present
; /* Is there a UFID? */
122 bool ufid_terse
; /* Skip serializing key/mask/acts? */
123 const struct ovs_flow_stats
*stats
; /* OVS_FLOW_ATTR_STATS. */
124 const uint8_t *tcp_flags
; /* OVS_FLOW_ATTR_TCP_FLAGS. */
125 const ovs_32aligned_u64
*used
; /* OVS_FLOW_ATTR_USED. */
126 bool clear
; /* OVS_FLOW_ATTR_CLEAR. */
127 bool probe
; /* OVS_FLOW_ATTR_PROBE. */
130 static void dpif_netlink_flow_init(struct dpif_netlink_flow
*);
131 static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow
*,
132 const struct ofpbuf
*);
133 static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow
*,
135 static int dpif_netlink_flow_transact(struct dpif_netlink_flow
*request
,
136 struct dpif_netlink_flow
*reply
,
137 struct ofpbuf
**bufp
);
138 static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow
*,
139 struct dpif_flow_stats
*);
140 static void dpif_netlink_flow_to_dpif_flow(struct dpif
*, struct dpif_flow
*,
141 const struct dpif_netlink_flow
*);
143 /* One of the dpif channels between the kernel and userspace. */
144 struct dpif_channel
{
145 struct nl_sock
*sock
; /* Netlink socket. */
146 long long int last_poll
; /* Last time this channel was polled. */
150 #define VPORT_SOCK_POOL_SIZE 1
151 /* On Windows, there is no native support for epoll. There are equivalent
152 * interfaces though, that are not used currently. For simpicity, a pool of
153 * netlink sockets is used. Each socket is represented by 'struct
154 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
155 * sharing the same socket. In the future, we can add a reference count and
157 struct dpif_windows_vport_sock
{
158 struct nl_sock
*nl_sock
; /* netlink socket. */
162 struct dpif_handler
{
163 struct dpif_channel
*channels
;/* Array of channels for each handler. */
164 struct epoll_event
*epoll_events
;
165 int epoll_fd
; /* epoll fd that includes channel socks. */
166 int n_events
; /* Num events returned by epoll_wait(). */
167 int event_offset
; /* Offset into 'epoll_events'. */
170 /* Pool of sockets. */
171 struct dpif_windows_vport_sock
*vport_sock_pool
;
172 size_t last_used_pool_idx
; /* Index to aid in allocating a
173 socket in the pool to a port. */
177 /* Datapath interface for the openvswitch Linux kernel module. */
178 struct dpif_netlink
{
182 /* Upcall messages. */
183 struct fat_rwlock upcall_lock
;
184 struct dpif_handler
*handlers
;
185 uint32_t n_handlers
; /* Num of upcall handlers. */
186 int uc_array_size
; /* Size of 'handler->channels' and */
187 /* 'handler->epoll_events'. */
189 /* Change notification. */
190 struct nl_sock
*port_notifier
; /* vport multicast group subscriber. */
191 bool refresh_channels
;
194 static void report_loss(struct dpif_netlink
*, struct dpif_channel
*,
195 uint32_t ch_idx
, uint32_t handler_id
);
197 static struct vlog_rate_limit error_rl
= VLOG_RATE_LIMIT_INIT(9999, 5);
199 /* Generic Netlink family numbers for OVS.
201 * Initialized by dpif_netlink_init(). */
202 static int ovs_datapath_family
;
203 static int ovs_vport_family
;
204 static int ovs_flow_family
;
205 static int ovs_packet_family
;
207 /* Generic Netlink multicast groups for OVS.
209 * Initialized by dpif_netlink_init(). */
210 static unsigned int ovs_vport_mcgroup
;
212 static int dpif_netlink_init(void);
213 static int open_dpif(const struct dpif_netlink_dp
*, struct dpif
**);
214 static uint32_t dpif_netlink_port_get_pid(const struct dpif
*,
215 odp_port_t port_no
, uint32_t hash
);
216 static void dpif_netlink_handler_uninit(struct dpif_handler
*handler
);
217 static int dpif_netlink_refresh_channels(struct dpif_netlink
*,
218 uint32_t n_handlers
);
219 static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport
*,
221 static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport
*,
222 const struct ofpbuf
*);
224 static struct dpif_netlink
*
225 dpif_netlink_cast(const struct dpif
*dpif
)
227 dpif_assert_class(dpif
, &dpif_netlink_class
);
228 return CONTAINER_OF(dpif
, struct dpif_netlink
, dpif
);
232 dpif_netlink_enumerate(struct sset
*all_dps
,
233 const struct dpif_class
*dpif_class OVS_UNUSED
)
236 uint64_t reply_stub
[NL_DUMP_BUFSIZE
/ 8];
237 struct ofpbuf msg
, buf
;
240 error
= dpif_netlink_init();
245 ofpbuf_use_stub(&buf
, reply_stub
, sizeof reply_stub
);
246 dpif_netlink_dp_dump_start(&dump
);
247 while (nl_dump_next(&dump
, &msg
, &buf
)) {
248 struct dpif_netlink_dp dp
;
250 if (!dpif_netlink_dp_from_ofpbuf(&dp
, &msg
)) {
251 sset_add(all_dps
, dp
.name
);
255 return nl_dump_done(&dump
);
259 dpif_netlink_open(const struct dpif_class
*class OVS_UNUSED
, const char *name
,
260 bool create
, struct dpif
**dpifp
)
262 struct dpif_netlink_dp dp_request
, dp
;
267 error
= dpif_netlink_init();
272 /* Create or look up datapath. */
273 dpif_netlink_dp_init(&dp_request
);
275 dp_request
.cmd
= OVS_DP_CMD_NEW
;
277 dp_request
.upcall_pid
= &upcall_pid
;
279 /* Use OVS_DP_CMD_SET to report user features */
280 dp_request
.cmd
= OVS_DP_CMD_SET
;
282 dp_request
.name
= name
;
283 dp_request
.user_features
|= OVS_DP_F_UNALIGNED
;
284 dp_request
.user_features
|= OVS_DP_F_VPORT_PIDS
;
285 error
= dpif_netlink_dp_transact(&dp_request
, &dp
, &buf
);
290 error
= open_dpif(&dp
, dpifp
);
296 open_dpif(const struct dpif_netlink_dp
*dp
, struct dpif
**dpifp
)
298 struct dpif_netlink
*dpif
;
300 dpif
= xzalloc(sizeof *dpif
);
301 dpif
->port_notifier
= NULL
;
302 fat_rwlock_init(&dpif
->upcall_lock
);
304 dpif_init(&dpif
->dpif
, &dpif_netlink_class
, dp
->name
,
305 dp
->dp_ifindex
, dp
->dp_ifindex
);
307 dpif
->dp_ifindex
= dp
->dp_ifindex
;
308 *dpifp
= &dpif
->dpif
;
313 /* Destroys the netlink sockets pointed by the elements in 'socksp'
314 * and frees the 'socksp'. */
316 vport_del_socksp__(struct nl_sock
**socksp
, uint32_t n_socks
)
320 for (i
= 0; i
< n_socks
; i
++) {
321 nl_sock_destroy(socksp
[i
]);
327 /* Creates an array of netlink sockets. Returns an array of the
328 * corresponding pointers. Records the error in 'error'. */
329 static struct nl_sock
**
330 vport_create_socksp__(uint32_t n_socks
, int *error
)
332 struct nl_sock
**socksp
= xzalloc(n_socks
* sizeof *socksp
);
335 for (i
= 0; i
< n_socks
; i
++) {
336 *error
= nl_sock_create(NETLINK_GENERIC
, &socksp
[i
]);
345 vport_del_socksp__(socksp
, n_socks
);
352 vport_delete_sock_pool(struct dpif_handler
*handler
)
353 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
355 if (handler
->vport_sock_pool
) {
357 struct dpif_windows_vport_sock
*sock_pool
=
358 handler
->vport_sock_pool
;
360 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
361 if (sock_pool
[i
].nl_sock
) {
362 nl_sock_unsubscribe_packets(sock_pool
[i
].nl_sock
);
363 nl_sock_destroy(sock_pool
[i
].nl_sock
);
364 sock_pool
[i
].nl_sock
= NULL
;
368 free(handler
->vport_sock_pool
);
369 handler
->vport_sock_pool
= NULL
;
374 vport_create_sock_pool(struct dpif_handler
*handler
)
375 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
377 struct dpif_windows_vport_sock
*sock_pool
;
381 sock_pool
= xzalloc(VPORT_SOCK_POOL_SIZE
* sizeof *sock_pool
);
382 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
383 error
= nl_sock_create(NETLINK_GENERIC
, &sock_pool
[i
].nl_sock
);
388 /* Enable the netlink socket to receive packets. This is equivalent to
389 * calling nl_sock_join_mcgroup() to receive events. */
390 error
= nl_sock_subscribe_packets(sock_pool
[i
].nl_sock
);
396 handler
->vport_sock_pool
= sock_pool
;
397 handler
->last_used_pool_idx
= 0;
401 vport_delete_sock_pool(handler
);
405 /* Returns an array pointers to netlink sockets. The sockets are picked from a
406 * pool. Records the error in 'error'. */
407 static struct nl_sock
**
408 vport_create_socksp_windows(struct dpif_netlink
*dpif
, int *error
)
409 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
411 uint32_t n_socks
= dpif
->n_handlers
;
412 struct nl_sock
**socksp
;
415 ovs_assert(n_socks
<= 1);
416 socksp
= xzalloc(n_socks
* sizeof *socksp
);
418 /* Pick netlink sockets to use in a round-robin fashion from each
419 * handler's pool of sockets. */
420 for (i
= 0; i
< n_socks
; i
++) {
421 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
422 struct dpif_windows_vport_sock
*sock_pool
= handler
->vport_sock_pool
;
423 size_t index
= handler
->last_used_pool_idx
;
425 /* A pool of sockets is allocated when the handler is initialized. */
426 if (sock_pool
== NULL
) {
432 ovs_assert(index
< VPORT_SOCK_POOL_SIZE
);
433 socksp
[i
] = sock_pool
[index
].nl_sock
;
434 socksp
[i
] = sock_pool
[index
].nl_sock
;
435 ovs_assert(socksp
[i
]);
436 index
= (index
== VPORT_SOCK_POOL_SIZE
- 1) ? 0 : index
+ 1;
437 handler
->last_used_pool_idx
= index
;
444 vport_del_socksp_windows(struct dpif_netlink
*dpif
, struct nl_sock
**socksp
)
450 static struct nl_sock
**
451 vport_create_socksp(struct dpif_netlink
*dpif
, int *error
)
454 return vport_create_socksp_windows(dpif
, error
);
456 return vport_create_socksp__(dpif
->n_handlers
, error
);
461 vport_del_socksp(struct dpif_netlink
*dpif
, struct nl_sock
**socksp
)
464 vport_del_socksp_windows(dpif
, socksp
);
466 vport_del_socksp__(socksp
, dpif
->n_handlers
);
470 /* Given the array of pointers to netlink sockets 'socksp', returns
471 * the array of corresponding pids. If the 'socksp' is NULL, returns
472 * a single-element array of value 0. */
474 vport_socksp_to_pids(struct nl_sock
**socksp
, uint32_t n_socks
)
479 pids
= xzalloc(sizeof *pids
);
483 pids
= xzalloc(n_socks
* sizeof *pids
);
484 for (i
= 0; i
< n_socks
; i
++) {
485 pids
[i
] = nl_sock_pid(socksp
[i
]);
492 /* Given the port number 'port_idx', extracts the pids of netlink sockets
493 * associated to the port and assigns it to 'upcall_pids'. */
495 vport_get_pids(struct dpif_netlink
*dpif
, uint32_t port_idx
,
496 uint32_t **upcall_pids
)
501 /* Since the nl_sock can only be assigned in either all
502 * or none "dpif->handlers" channels, the following check
504 if (!dpif
->handlers
[0].channels
[port_idx
].sock
) {
507 ovs_assert(!WINDOWS
|| dpif
->n_handlers
<= 1);
509 pids
= xzalloc(dpif
->n_handlers
* sizeof *pids
);
511 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
512 pids
[i
] = nl_sock_pid(dpif
->handlers
[i
].channels
[port_idx
].sock
);
521 vport_add_channels(struct dpif_netlink
*dpif
, odp_port_t port_no
,
522 struct nl_sock
**socksp
)
524 struct epoll_event event
;
525 uint32_t port_idx
= odp_to_u32(port_no
);
529 if (dpif
->handlers
== NULL
) {
533 /* We assume that the datapath densely chooses port numbers, which can
534 * therefore be used as an index into 'channels' and 'epoll_events' of
535 * 'dpif->handler'. */
536 if (port_idx
>= dpif
->uc_array_size
) {
537 uint32_t new_size
= port_idx
+ 1;
539 if (new_size
> MAX_PORTS
) {
540 VLOG_WARN_RL(&error_rl
, "%s: datapath port %"PRIu32
" too big",
541 dpif_name(&dpif
->dpif
), port_no
);
545 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
546 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
548 handler
->channels
= xrealloc(handler
->channels
,
549 new_size
* sizeof *handler
->channels
);
551 for (j
= dpif
->uc_array_size
; j
< new_size
; j
++) {
552 handler
->channels
[j
].sock
= NULL
;
555 handler
->epoll_events
= xrealloc(handler
->epoll_events
,
556 new_size
* sizeof *handler
->epoll_events
);
559 dpif
->uc_array_size
= new_size
;
562 memset(&event
, 0, sizeof event
);
563 event
.events
= EPOLLIN
;
564 event
.data
.u32
= port_idx
;
566 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
567 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
570 if (epoll_ctl(handler
->epoll_fd
, EPOLL_CTL_ADD
, nl_sock_fd(socksp
[i
]),
576 dpif
->handlers
[i
].channels
[port_idx
].sock
= socksp
[i
];
577 dpif
->handlers
[i
].channels
[port_idx
].last_poll
= LLONG_MIN
;
583 for (j
= 0; j
< i
; j
++) {
585 epoll_ctl(dpif
->handlers
[j
].epoll_fd
, EPOLL_CTL_DEL
,
586 nl_sock_fd(socksp
[j
]), NULL
);
588 dpif
->handlers
[j
].channels
[port_idx
].sock
= NULL
;
595 vport_del_channels(struct dpif_netlink
*dpif
, odp_port_t port_no
)
597 uint32_t port_idx
= odp_to_u32(port_no
);
600 if (!dpif
->handlers
|| port_idx
>= dpif
->uc_array_size
) {
604 /* Since the sock can only be assigned in either all or none
605 * of "dpif->handlers" channels, the following check would
607 if (!dpif
->handlers
[0].channels
[port_idx
].sock
) {
611 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
612 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
614 epoll_ctl(handler
->epoll_fd
, EPOLL_CTL_DEL
,
615 nl_sock_fd(handler
->channels
[port_idx
].sock
), NULL
);
616 nl_sock_destroy(handler
->channels
[port_idx
].sock
);
618 handler
->channels
[port_idx
].sock
= NULL
;
619 handler
->event_offset
= handler
->n_events
= 0;
624 destroy_all_channels(struct dpif_netlink
*dpif
)
625 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
629 if (!dpif
->handlers
) {
633 for (i
= 0; i
< dpif
->uc_array_size
; i
++ ) {
634 struct dpif_netlink_vport vport_request
;
635 uint32_t upcall_pids
= 0;
637 /* Since the sock can only be assigned in either all or none
638 * of "dpif->handlers" channels, the following check would
640 if (!dpif
->handlers
[0].channels
[i
].sock
) {
644 /* Turn off upcalls. */
645 dpif_netlink_vport_init(&vport_request
);
646 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
647 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
648 vport_request
.port_no
= u32_to_odp(i
);
649 vport_request
.n_upcall_pids
= 1;
650 vport_request
.upcall_pids
= &upcall_pids
;
651 dpif_netlink_vport_transact(&vport_request
, NULL
, NULL
);
653 vport_del_channels(dpif
, u32_to_odp(i
));
656 for (i
= 0; i
< dpif
->n_handlers
; i
++) {
657 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
659 dpif_netlink_handler_uninit(handler
);
660 free(handler
->epoll_events
);
661 free(handler
->channels
);
664 free(dpif
->handlers
);
665 dpif
->handlers
= NULL
;
666 dpif
->n_handlers
= 0;
667 dpif
->uc_array_size
= 0;
671 dpif_netlink_close(struct dpif
*dpif_
)
673 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
675 nl_sock_destroy(dpif
->port_notifier
);
677 fat_rwlock_wrlock(&dpif
->upcall_lock
);
678 destroy_all_channels(dpif
);
679 fat_rwlock_unlock(&dpif
->upcall_lock
);
681 fat_rwlock_destroy(&dpif
->upcall_lock
);
686 dpif_netlink_destroy(struct dpif
*dpif_
)
688 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
689 struct dpif_netlink_dp dp
;
691 dpif_netlink_dp_init(&dp
);
692 dp
.cmd
= OVS_DP_CMD_DEL
;
693 dp
.dp_ifindex
= dpif
->dp_ifindex
;
694 return dpif_netlink_dp_transact(&dp
, NULL
, NULL
);
698 dpif_netlink_run(struct dpif
*dpif_
)
700 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
702 if (dpif
->refresh_channels
) {
703 dpif
->refresh_channels
= false;
704 fat_rwlock_wrlock(&dpif
->upcall_lock
);
705 dpif_netlink_refresh_channels(dpif
, dpif
->n_handlers
);
706 fat_rwlock_unlock(&dpif
->upcall_lock
);
712 dpif_netlink_get_stats(const struct dpif
*dpif_
, struct dpif_dp_stats
*stats
)
714 struct dpif_netlink_dp dp
;
718 error
= dpif_netlink_dp_get(dpif_
, &dp
, &buf
);
720 memset(stats
, 0, sizeof *stats
);
723 stats
->n_hit
= get_32aligned_u64(&dp
.stats
->n_hit
);
724 stats
->n_missed
= get_32aligned_u64(&dp
.stats
->n_missed
);
725 stats
->n_lost
= get_32aligned_u64(&dp
.stats
->n_lost
);
726 stats
->n_flows
= get_32aligned_u64(&dp
.stats
->n_flows
);
729 if (dp
.megaflow_stats
) {
730 stats
->n_masks
= dp
.megaflow_stats
->n_masks
;
731 stats
->n_mask_hit
= get_32aligned_u64(
732 &dp
.megaflow_stats
->n_mask_hit
);
734 stats
->n_masks
= UINT32_MAX
;
735 stats
->n_mask_hit
= UINT64_MAX
;
743 get_vport_type(const struct dpif_netlink_vport
*vport
)
745 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
747 switch (vport
->type
) {
748 case OVS_VPORT_TYPE_NETDEV
: {
749 const char *type
= netdev_get_type_from_name(vport
->name
);
751 return type
? type
: "system";
754 case OVS_VPORT_TYPE_INTERNAL
:
757 case OVS_VPORT_TYPE_GENEVE
:
760 case OVS_VPORT_TYPE_GRE
:
763 case OVS_VPORT_TYPE_VXLAN
:
766 case OVS_VPORT_TYPE_LISP
:
769 case OVS_VPORT_TYPE_STT
:
772 case OVS_VPORT_TYPE_UNSPEC
:
773 case __OVS_VPORT_TYPE_MAX
:
777 VLOG_WARN_RL(&rl
, "dp%d: port `%s' has unsupported type %u",
778 vport
->dp_ifindex
, vport
->name
, (unsigned int) vport
->type
);
782 static enum ovs_vport_type
783 netdev_to_ovs_vport_type(const struct netdev
*netdev
)
785 const char *type
= netdev_get_type(netdev
);
787 if (!strcmp(type
, "tap") || !strcmp(type
, "system")) {
788 return OVS_VPORT_TYPE_NETDEV
;
789 } else if (!strcmp(type
, "internal")) {
790 return OVS_VPORT_TYPE_INTERNAL
;
791 } else if (strstr(type
, "stt")) {
792 return OVS_VPORT_TYPE_STT
;
793 } else if (!strcmp(type
, "geneve")) {
794 return OVS_VPORT_TYPE_GENEVE
;
795 } else if (strstr(type
, "gre")) {
796 return OVS_VPORT_TYPE_GRE
;
797 } else if (!strcmp(type
, "vxlan")) {
798 return OVS_VPORT_TYPE_VXLAN
;
799 } else if (!strcmp(type
, "lisp")) {
800 return OVS_VPORT_TYPE_LISP
;
802 return OVS_VPORT_TYPE_UNSPEC
;
807 dpif_netlink_port_add__(struct dpif_netlink
*dpif
, struct netdev
*netdev
,
808 odp_port_t
*port_nop
)
809 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
811 const struct netdev_tunnel_config
*tnl_cfg
;
812 char namebuf
[NETDEV_VPORT_NAME_BUFSIZE
];
813 const char *name
= netdev_vport_get_dpif_port(netdev
,
814 namebuf
, sizeof namebuf
);
815 const char *type
= netdev_get_type(netdev
);
816 struct dpif_netlink_vport request
, reply
;
818 uint64_t options_stub
[64 / 8];
819 struct ofpbuf options
;
820 struct nl_sock
**socksp
= NULL
;
821 uint32_t *upcall_pids
;
824 if (dpif
->handlers
) {
825 socksp
= vport_create_socksp(dpif
, &error
);
831 dpif_netlink_vport_init(&request
);
832 request
.cmd
= OVS_VPORT_CMD_NEW
;
833 request
.dp_ifindex
= dpif
->dp_ifindex
;
834 request
.type
= netdev_to_ovs_vport_type(netdev
);
835 if (request
.type
== OVS_VPORT_TYPE_UNSPEC
) {
836 VLOG_WARN_RL(&error_rl
, "%s: cannot create port `%s' because it has "
837 "unsupported type `%s'",
838 dpif_name(&dpif
->dpif
), name
, type
);
839 vport_del_socksp(dpif
, socksp
);
844 if (request
.type
== OVS_VPORT_TYPE_NETDEV
) {
846 /* XXX : Map appropiate Windows handle */
848 netdev_linux_ethtool_set_flag(netdev
, ETH_FLAG_LRO
, "LRO", false);
852 tnl_cfg
= netdev_get_tunnel_config(netdev
);
853 if (tnl_cfg
&& (tnl_cfg
->dst_port
!= 0 || tnl_cfg
->exts
)) {
854 ofpbuf_use_stack(&options
, options_stub
, sizeof options_stub
);
855 if (tnl_cfg
->dst_port
) {
856 nl_msg_put_u16(&options
, OVS_TUNNEL_ATTR_DST_PORT
,
857 ntohs(tnl_cfg
->dst_port
));
863 ext_ofs
= nl_msg_start_nested(&options
, OVS_TUNNEL_ATTR_EXTENSION
);
864 for (i
= 0; i
< 32; i
++) {
865 if (tnl_cfg
->exts
& (1 << i
)) {
866 nl_msg_put_flag(&options
, i
);
869 nl_msg_end_nested(&options
, ext_ofs
);
871 request
.options
= options
.data
;
872 request
.options_len
= options
.size
;
875 request
.port_no
= *port_nop
;
876 upcall_pids
= vport_socksp_to_pids(socksp
, dpif
->n_handlers
);
877 request
.n_upcall_pids
= socksp
? dpif
->n_handlers
: 1;
878 request
.upcall_pids
= upcall_pids
;
880 error
= dpif_netlink_vport_transact(&request
, &reply
, &buf
);
882 *port_nop
= reply
.port_no
;
884 if (error
== EBUSY
&& *port_nop
!= ODPP_NONE
) {
885 VLOG_INFO("%s: requested port %"PRIu32
" is in use",
886 dpif_name(&dpif
->dpif
), *port_nop
);
889 vport_del_socksp(dpif
, socksp
);
894 error
= vport_add_channels(dpif
, *port_nop
, socksp
);
896 VLOG_INFO("%s: could not add channel for port %s",
897 dpif_name(&dpif
->dpif
), name
);
899 /* Delete the port. */
900 dpif_netlink_vport_init(&request
);
901 request
.cmd
= OVS_VPORT_CMD_DEL
;
902 request
.dp_ifindex
= dpif
->dp_ifindex
;
903 request
.port_no
= *port_nop
;
904 dpif_netlink_vport_transact(&request
, NULL
, NULL
);
905 vport_del_socksp(dpif
, socksp
);
919 dpif_netlink_port_add(struct dpif
*dpif_
, struct netdev
*netdev
,
920 odp_port_t
*port_nop
)
922 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
925 fat_rwlock_wrlock(&dpif
->upcall_lock
);
926 error
= dpif_netlink_port_add__(dpif
, netdev
, port_nop
);
927 fat_rwlock_unlock(&dpif
->upcall_lock
);
933 dpif_netlink_port_del__(struct dpif_netlink
*dpif
, odp_port_t port_no
)
934 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
936 struct dpif_netlink_vport vport
;
939 dpif_netlink_vport_init(&vport
);
940 vport
.cmd
= OVS_VPORT_CMD_DEL
;
941 vport
.dp_ifindex
= dpif
->dp_ifindex
;
942 vport
.port_no
= port_no
;
943 error
= dpif_netlink_vport_transact(&vport
, NULL
, NULL
);
945 vport_del_channels(dpif
, port_no
);
951 dpif_netlink_port_del(struct dpif
*dpif_
, odp_port_t port_no
)
953 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
956 fat_rwlock_wrlock(&dpif
->upcall_lock
);
957 error
= dpif_netlink_port_del__(dpif
, port_no
);
958 fat_rwlock_unlock(&dpif
->upcall_lock
);
964 dpif_netlink_port_query__(const struct dpif_netlink
*dpif
, odp_port_t port_no
,
965 const char *port_name
, struct dpif_port
*dpif_port
)
967 struct dpif_netlink_vport request
;
968 struct dpif_netlink_vport reply
;
972 dpif_netlink_vport_init(&request
);
973 request
.cmd
= OVS_VPORT_CMD_GET
;
974 request
.dp_ifindex
= dpif
->dp_ifindex
;
975 request
.port_no
= port_no
;
976 request
.name
= port_name
;
978 error
= dpif_netlink_vport_transact(&request
, &reply
, &buf
);
980 if (reply
.dp_ifindex
!= request
.dp_ifindex
) {
981 /* A query by name reported that 'port_name' is in some datapath
982 * other than 'dpif', but the caller wants to know about 'dpif'. */
984 } else if (dpif_port
) {
985 dpif_port
->name
= xstrdup(reply
.name
);
986 dpif_port
->type
= xstrdup(get_vport_type(&reply
));
987 dpif_port
->port_no
= reply
.port_no
;
995 dpif_netlink_port_query_by_number(const struct dpif
*dpif_
, odp_port_t port_no
,
996 struct dpif_port
*dpif_port
)
998 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1000 return dpif_netlink_port_query__(dpif
, port_no
, NULL
, dpif_port
);
1004 dpif_netlink_port_query_by_name(const struct dpif
*dpif_
, const char *devname
,
1005 struct dpif_port
*dpif_port
)
1007 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1009 return dpif_netlink_port_query__(dpif
, 0, devname
, dpif_port
);
1013 dpif_netlink_port_get_pid__(const struct dpif_netlink
*dpif
,
1014 odp_port_t port_no
, uint32_t hash
)
1015 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
1017 uint32_t port_idx
= odp_to_u32(port_no
);
1020 if (dpif
->handlers
&& dpif
->uc_array_size
> 0) {
1021 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1022 * channel, since it is not heavily loaded. */
1023 uint32_t idx
= port_idx
>= dpif
->uc_array_size
? 0 : port_idx
;
1024 struct dpif_handler
*h
= &dpif
->handlers
[hash
% dpif
->n_handlers
];
1026 /* Needs to check in case the socket pointer is changed in between
1027 * the holding of upcall_lock. A known case happens when the main
1028 * thread deletes the vport while the handler thread is handling
1029 * the upcall from that port. */
1030 if (h
->channels
[idx
].sock
) {
1031 pid
= nl_sock_pid(h
->channels
[idx
].sock
);
1039 dpif_netlink_port_get_pid(const struct dpif
*dpif_
, odp_port_t port_no
,
1042 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1045 fat_rwlock_rdlock(&dpif
->upcall_lock
);
1046 ret
= dpif_netlink_port_get_pid__(dpif
, port_no
, hash
);
1047 fat_rwlock_unlock(&dpif
->upcall_lock
);
1053 dpif_netlink_flow_flush(struct dpif
*dpif_
)
1055 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1056 struct dpif_netlink_flow flow
;
1058 dpif_netlink_flow_init(&flow
);
1059 flow
.cmd
= OVS_FLOW_CMD_DEL
;
1060 flow
.dp_ifindex
= dpif
->dp_ifindex
;
1061 return dpif_netlink_flow_transact(&flow
, NULL
, NULL
);
1064 struct dpif_netlink_port_state
{
1065 struct nl_dump dump
;
1070 dpif_netlink_port_dump_start__(const struct dpif_netlink
*dpif
,
1071 struct nl_dump
*dump
)
1073 struct dpif_netlink_vport request
;
1076 dpif_netlink_vport_init(&request
);
1077 request
.cmd
= OVS_VPORT_CMD_GET
;
1078 request
.dp_ifindex
= dpif
->dp_ifindex
;
1080 buf
= ofpbuf_new(1024);
1081 dpif_netlink_vport_to_ofpbuf(&request
, buf
);
1082 nl_dump_start(dump
, NETLINK_GENERIC
, buf
);
1087 dpif_netlink_port_dump_start(const struct dpif
*dpif_
, void **statep
)
1089 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1090 struct dpif_netlink_port_state
*state
;
1092 *statep
= state
= xmalloc(sizeof *state
);
1093 dpif_netlink_port_dump_start__(dpif
, &state
->dump
);
1095 ofpbuf_init(&state
->buf
, NL_DUMP_BUFSIZE
);
1100 dpif_netlink_port_dump_next__(const struct dpif_netlink
*dpif
,
1101 struct nl_dump
*dump
,
1102 struct dpif_netlink_vport
*vport
,
1103 struct ofpbuf
*buffer
)
1108 if (!nl_dump_next(dump
, &buf
, buffer
)) {
1112 error
= dpif_netlink_vport_from_ofpbuf(vport
, &buf
);
1114 VLOG_WARN_RL(&error_rl
, "%s: failed to parse vport record (%s)",
1115 dpif_name(&dpif
->dpif
), ovs_strerror(error
));
1121 dpif_netlink_port_dump_next(const struct dpif
*dpif_
, void *state_
,
1122 struct dpif_port
*dpif_port
)
1124 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1125 struct dpif_netlink_port_state
*state
= state_
;
1126 struct dpif_netlink_vport vport
;
1129 error
= dpif_netlink_port_dump_next__(dpif
, &state
->dump
, &vport
,
1134 dpif_port
->name
= CONST_CAST(char *, vport
.name
);
1135 dpif_port
->type
= CONST_CAST(char *, get_vport_type(&vport
));
1136 dpif_port
->port_no
= vport
.port_no
;
1141 dpif_netlink_port_dump_done(const struct dpif
*dpif_ OVS_UNUSED
, void *state_
)
1143 struct dpif_netlink_port_state
*state
= state_
;
1144 int error
= nl_dump_done(&state
->dump
);
1146 ofpbuf_uninit(&state
->buf
);
1152 dpif_netlink_port_poll(const struct dpif
*dpif_
, char **devnamep
)
1154 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1156 /* Lazily create the Netlink socket to listen for notifications. */
1157 if (!dpif
->port_notifier
) {
1158 struct nl_sock
*sock
;
1161 error
= nl_sock_create(NETLINK_GENERIC
, &sock
);
1166 error
= nl_sock_join_mcgroup(sock
, ovs_vport_mcgroup
);
1168 nl_sock_destroy(sock
);
1171 dpif
->port_notifier
= sock
;
1173 /* We have no idea of the current state so report that everything
1179 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1180 uint64_t buf_stub
[4096 / 8];
1184 ofpbuf_use_stub(&buf
, buf_stub
, sizeof buf_stub
);
1185 error
= nl_sock_recv(dpif
->port_notifier
, &buf
, false);
1187 struct dpif_netlink_vport vport
;
1189 error
= dpif_netlink_vport_from_ofpbuf(&vport
, &buf
);
1191 if (vport
.dp_ifindex
== dpif
->dp_ifindex
1192 && (vport
.cmd
== OVS_VPORT_CMD_NEW
1193 || vport
.cmd
== OVS_VPORT_CMD_DEL
1194 || vport
.cmd
== OVS_VPORT_CMD_SET
)) {
1195 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8
,
1196 dpif
->dpif
.full_name
, vport
.name
, vport
.cmd
);
1197 if (vport
.cmd
== OVS_VPORT_CMD_DEL
&& dpif
->handlers
) {
1198 dpif
->refresh_channels
= true;
1200 *devnamep
= xstrdup(vport
.name
);
1201 ofpbuf_uninit(&buf
);
1205 } else if (error
!= EAGAIN
) {
1206 VLOG_WARN_RL(&rl
, "error reading or parsing netlink (%s)",
1207 ovs_strerror(error
));
1208 nl_sock_drain(dpif
->port_notifier
);
1212 ofpbuf_uninit(&buf
);
1220 dpif_netlink_port_poll_wait(const struct dpif
*dpif_
)
1222 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1224 if (dpif
->port_notifier
) {
1225 nl_sock_wait(dpif
->port_notifier
, POLLIN
);
1227 poll_immediate_wake();
1232 dpif_netlink_flow_init_ufid(struct dpif_netlink_flow
*request
,
1233 const ovs_u128
*ufid
, bool terse
)
1236 request
->ufid
= *ufid
;
1237 request
->ufid_present
= true;
1239 request
->ufid_present
= false;
1241 request
->ufid_terse
= terse
;
1245 dpif_netlink_init_flow_get__(const struct dpif_netlink
*dpif
,
1246 const struct nlattr
*key
, size_t key_len
,
1247 const ovs_u128
*ufid
, bool terse
,
1248 struct dpif_netlink_flow
*request
)
1250 dpif_netlink_flow_init(request
);
1251 request
->cmd
= OVS_FLOW_CMD_GET
;
1252 request
->dp_ifindex
= dpif
->dp_ifindex
;
1254 request
->key_len
= key_len
;
1255 dpif_netlink_flow_init_ufid(request
, ufid
, terse
);
1259 dpif_netlink_init_flow_get(const struct dpif_netlink
*dpif
,
1260 const struct dpif_flow_get
*get
,
1261 struct dpif_netlink_flow
*request
)
1263 dpif_netlink_init_flow_get__(dpif
, get
->key
, get
->key_len
, get
->ufid
,
1268 dpif_netlink_flow_get__(const struct dpif_netlink
*dpif
,
1269 const struct nlattr
*key
, size_t key_len
,
1270 const ovs_u128
*ufid
, bool terse
,
1271 struct dpif_netlink_flow
*reply
, struct ofpbuf
**bufp
)
1273 struct dpif_netlink_flow request
;
1275 dpif_netlink_init_flow_get__(dpif
, key
, key_len
, ufid
, terse
, &request
);
1276 return dpif_netlink_flow_transact(&request
, reply
, bufp
);
1280 dpif_netlink_flow_get(const struct dpif_netlink
*dpif
,
1281 const struct dpif_netlink_flow
*flow
,
1282 struct dpif_netlink_flow
*reply
, struct ofpbuf
**bufp
)
1284 return dpif_netlink_flow_get__(dpif
, flow
->key
, flow
->key_len
,
1285 flow
->ufid_present
? &flow
->ufid
: NULL
,
1286 false, reply
, bufp
);
1290 dpif_netlink_init_flow_put(struct dpif_netlink
*dpif
,
1291 const struct dpif_flow_put
*put
,
1292 struct dpif_netlink_flow
*request
)
1294 static const struct nlattr dummy_action
;
1296 dpif_netlink_flow_init(request
);
1297 request
->cmd
= (put
->flags
& DPIF_FP_CREATE
1298 ? OVS_FLOW_CMD_NEW
: OVS_FLOW_CMD_SET
);
1299 request
->dp_ifindex
= dpif
->dp_ifindex
;
1300 request
->key
= put
->key
;
1301 request
->key_len
= put
->key_len
;
1302 request
->mask
= put
->mask
;
1303 request
->mask_len
= put
->mask_len
;
1304 dpif_netlink_flow_init_ufid(request
, put
->ufid
, false);
1306 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1307 request
->actions
= (put
->actions
1309 : CONST_CAST(struct nlattr
*, &dummy_action
));
1310 request
->actions_len
= put
->actions_len
;
1311 if (put
->flags
& DPIF_FP_ZERO_STATS
) {
1312 request
->clear
= true;
1314 if (put
->flags
& DPIF_FP_PROBE
) {
1315 request
->probe
= true;
1317 request
->nlmsg_flags
= put
->flags
& DPIF_FP_MODIFY
? 0 : NLM_F_CREATE
;
1321 dpif_netlink_init_flow_del__(struct dpif_netlink
*dpif
,
1322 const struct nlattr
*key
, size_t key_len
,
1323 const ovs_u128
*ufid
, bool terse
,
1324 struct dpif_netlink_flow
*request
)
1326 dpif_netlink_flow_init(request
);
1327 request
->cmd
= OVS_FLOW_CMD_DEL
;
1328 request
->dp_ifindex
= dpif
->dp_ifindex
;
1330 request
->key_len
= key_len
;
1331 dpif_netlink_flow_init_ufid(request
, ufid
, terse
);
1335 dpif_netlink_init_flow_del(struct dpif_netlink
*dpif
,
1336 const struct dpif_flow_del
*del
,
1337 struct dpif_netlink_flow
*request
)
1339 dpif_netlink_init_flow_del__(dpif
, del
->key
, del
->key_len
,
1340 del
->ufid
, del
->terse
, request
);
1343 struct dpif_netlink_flow_dump
{
1344 struct dpif_flow_dump up
;
1345 struct nl_dump nl_dump
;
1349 static struct dpif_netlink_flow_dump
*
1350 dpif_netlink_flow_dump_cast(struct dpif_flow_dump
*dump
)
1352 return CONTAINER_OF(dump
, struct dpif_netlink_flow_dump
, up
);
1355 static struct dpif_flow_dump
*
1356 dpif_netlink_flow_dump_create(const struct dpif
*dpif_
, bool terse
)
1358 const struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1359 struct dpif_netlink_flow_dump
*dump
;
1360 struct dpif_netlink_flow request
;
1363 dump
= xmalloc(sizeof *dump
);
1364 dpif_flow_dump_init(&dump
->up
, dpif_
);
1366 dpif_netlink_flow_init(&request
);
1367 request
.cmd
= OVS_FLOW_CMD_GET
;
1368 request
.dp_ifindex
= dpif
->dp_ifindex
;
1369 request
.ufid_present
= false;
1370 request
.ufid_terse
= terse
;
1372 buf
= ofpbuf_new(1024);
1373 dpif_netlink_flow_to_ofpbuf(&request
, buf
);
1374 nl_dump_start(&dump
->nl_dump
, NETLINK_GENERIC
, buf
);
1376 atomic_init(&dump
->status
, 0);
1377 dump
->up
.terse
= terse
;
1383 dpif_netlink_flow_dump_destroy(struct dpif_flow_dump
*dump_
)
1385 struct dpif_netlink_flow_dump
*dump
= dpif_netlink_flow_dump_cast(dump_
);
1386 unsigned int nl_status
= nl_dump_done(&dump
->nl_dump
);
1389 /* No other thread has access to 'dump' at this point. */
1390 atomic_read_relaxed(&dump
->status
, &dump_status
);
1392 return dump_status
? dump_status
: nl_status
;
1395 struct dpif_netlink_flow_dump_thread
{
1396 struct dpif_flow_dump_thread up
;
1397 struct dpif_netlink_flow_dump
*dump
;
1398 struct dpif_netlink_flow flow
;
1399 struct dpif_flow_stats stats
;
1400 struct ofpbuf nl_flows
; /* Always used to store flows. */
1401 struct ofpbuf
*nl_actions
; /* Used if kernel does not supply actions. */
1404 static struct dpif_netlink_flow_dump_thread
*
1405 dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread
*thread
)
1407 return CONTAINER_OF(thread
, struct dpif_netlink_flow_dump_thread
, up
);
1410 static struct dpif_flow_dump_thread
*
1411 dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump
*dump_
)
1413 struct dpif_netlink_flow_dump
*dump
= dpif_netlink_flow_dump_cast(dump_
);
1414 struct dpif_netlink_flow_dump_thread
*thread
;
1416 thread
= xmalloc(sizeof *thread
);
1417 dpif_flow_dump_thread_init(&thread
->up
, &dump
->up
);
1418 thread
->dump
= dump
;
1419 ofpbuf_init(&thread
->nl_flows
, NL_DUMP_BUFSIZE
);
1420 thread
->nl_actions
= NULL
;
1426 dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread
*thread_
)
1428 struct dpif_netlink_flow_dump_thread
*thread
1429 = dpif_netlink_flow_dump_thread_cast(thread_
);
1431 ofpbuf_uninit(&thread
->nl_flows
);
1432 ofpbuf_delete(thread
->nl_actions
);
1437 dpif_netlink_flow_to_dpif_flow(struct dpif
*dpif
, struct dpif_flow
*dpif_flow
,
1438 const struct dpif_netlink_flow
*datapath_flow
)
1440 dpif_flow
->key
= datapath_flow
->key
;
1441 dpif_flow
->key_len
= datapath_flow
->key_len
;
1442 dpif_flow
->mask
= datapath_flow
->mask
;
1443 dpif_flow
->mask_len
= datapath_flow
->mask_len
;
1444 dpif_flow
->actions
= datapath_flow
->actions
;
1445 dpif_flow
->actions_len
= datapath_flow
->actions_len
;
1446 dpif_flow
->ufid_present
= datapath_flow
->ufid_present
;
1447 dpif_flow
->pmd_id
= PMD_ID_NULL
;
1448 if (datapath_flow
->ufid_present
) {
1449 dpif_flow
->ufid
= datapath_flow
->ufid
;
1451 ovs_assert(datapath_flow
->key
&& datapath_flow
->key_len
);
1452 dpif_flow_hash(dpif
, datapath_flow
->key
, datapath_flow
->key_len
,
1455 dpif_netlink_flow_get_stats(datapath_flow
, &dpif_flow
->stats
);
1459 dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread
*thread_
,
1460 struct dpif_flow
*flows
, int max_flows
)
1462 struct dpif_netlink_flow_dump_thread
*thread
1463 = dpif_netlink_flow_dump_thread_cast(thread_
);
1464 struct dpif_netlink_flow_dump
*dump
= thread
->dump
;
1465 struct dpif_netlink
*dpif
= dpif_netlink_cast(thread
->up
.dpif
);
1468 ofpbuf_delete(thread
->nl_actions
);
1469 thread
->nl_actions
= NULL
;
1473 || (n_flows
< max_flows
&& thread
->nl_flows
.size
)) {
1474 struct dpif_netlink_flow datapath_flow
;
1475 struct ofpbuf nl_flow
;
1478 /* Try to grab another flow. */
1479 if (!nl_dump_next(&dump
->nl_dump
, &nl_flow
, &thread
->nl_flows
)) {
1483 /* Convert the flow to our output format. */
1484 error
= dpif_netlink_flow_from_ofpbuf(&datapath_flow
, &nl_flow
);
1486 atomic_store_relaxed(&dump
->status
, error
);
1490 if (dump
->up
.terse
|| datapath_flow
.actions
) {
1491 /* Common case: we don't want actions, or the flow includes
1493 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, &flows
[n_flows
++],
1496 /* Rare case: the flow does not include actions. Retrieve this
1497 * individual flow again to get the actions. */
1498 error
= dpif_netlink_flow_get(dpif
, &datapath_flow
,
1499 &datapath_flow
, &thread
->nl_actions
);
1500 if (error
== ENOENT
) {
1501 VLOG_DBG("dumped flow disappeared on get");
1504 VLOG_WARN("error fetching dumped flow: %s",
1505 ovs_strerror(error
));
1506 atomic_store_relaxed(&dump
->status
, error
);
1510 /* Save this flow. Then exit, because we only have one buffer to
1511 * handle this case. */
1512 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, &flows
[n_flows
++],
1521 dpif_netlink_encode_execute(int dp_ifindex
, const struct dpif_execute
*d_exec
,
1524 struct ovs_header
*k_exec
;
1527 ofpbuf_prealloc_tailroom(buf
, (64
1528 + dp_packet_size(d_exec
->packet
)
1529 + ODP_KEY_METADATA_SIZE
1530 + d_exec
->actions_len
));
1532 nl_msg_put_genlmsghdr(buf
, 0, ovs_packet_family
, NLM_F_REQUEST
,
1533 OVS_PACKET_CMD_EXECUTE
, OVS_PACKET_VERSION
);
1535 k_exec
= ofpbuf_put_uninit(buf
, sizeof *k_exec
);
1536 k_exec
->dp_ifindex
= dp_ifindex
;
1538 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_PACKET
,
1539 dp_packet_data(d_exec
->packet
),
1540 dp_packet_size(d_exec
->packet
));
1542 key_ofs
= nl_msg_start_nested(buf
, OVS_PACKET_ATTR_KEY
);
1543 odp_key_from_pkt_metadata(buf
, &d_exec
->packet
->md
);
1544 nl_msg_end_nested(buf
, key_ofs
);
1546 nl_msg_put_unspec(buf
, OVS_PACKET_ATTR_ACTIONS
,
1547 d_exec
->actions
, d_exec
->actions_len
);
1548 if (d_exec
->probe
) {
1549 nl_msg_put_flag(buf
, OVS_PACKET_ATTR_PROBE
);
1552 nl_msg_put_u16(buf
, OVS_PACKET_ATTR_MRU
, d_exec
->mtu
);
1556 /* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1557 * Returns the number actually executed (at least 1, if 'n_ops' is
1560 dpif_netlink_operate__(struct dpif_netlink
*dpif
,
1561 struct dpif_op
**ops
, size_t n_ops
)
1563 enum { MAX_OPS
= 50 };
1566 struct nl_transaction txn
;
1568 struct ofpbuf request
;
1569 uint64_t request_stub
[1024 / 8];
1571 struct ofpbuf reply
;
1572 uint64_t reply_stub
[1024 / 8];
1575 struct nl_transaction
*txnsp
[MAX_OPS
];
1578 n_ops
= MIN(n_ops
, MAX_OPS
);
1579 for (i
= 0; i
< n_ops
; i
++) {
1580 struct op_auxdata
*aux
= &auxes
[i
];
1581 struct dpif_op
*op
= ops
[i
];
1582 struct dpif_flow_put
*put
;
1583 struct dpif_flow_del
*del
;
1584 struct dpif_flow_get
*get
;
1585 struct dpif_netlink_flow flow
;
1587 ofpbuf_use_stub(&aux
->request
,
1588 aux
->request_stub
, sizeof aux
->request_stub
);
1589 aux
->txn
.request
= &aux
->request
;
1591 ofpbuf_use_stub(&aux
->reply
, aux
->reply_stub
, sizeof aux
->reply_stub
);
1592 aux
->txn
.reply
= NULL
;
1595 case DPIF_OP_FLOW_PUT
:
1596 put
= &op
->u
.flow_put
;
1597 dpif_netlink_init_flow_put(dpif
, put
, &flow
);
1599 flow
.nlmsg_flags
|= NLM_F_ECHO
;
1600 aux
->txn
.reply
= &aux
->reply
;
1602 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1605 case DPIF_OP_FLOW_DEL
:
1606 del
= &op
->u
.flow_del
;
1607 dpif_netlink_init_flow_del(dpif
, del
, &flow
);
1609 flow
.nlmsg_flags
|= NLM_F_ECHO
;
1610 aux
->txn
.reply
= &aux
->reply
;
1612 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1615 case DPIF_OP_EXECUTE
:
1616 /* Can't execute a packet that won't fit in a Netlink attribute. */
1617 if (OVS_UNLIKELY(nl_attr_oversized(
1618 dp_packet_size(op
->u
.execute
.packet
)))) {
1619 /* Report an error immediately if this is the first operation.
1620 * Otherwise the easiest thing to do is to postpone to the next
1621 * call (when this will be the first operation). */
1623 VLOG_ERR_RL(&error_rl
,
1624 "dropping oversized %"PRIu32
"-byte packet",
1625 dp_packet_size(op
->u
.execute
.packet
));
1626 op
->error
= ENOBUFS
;
1631 dpif_netlink_encode_execute(dpif
->dp_ifindex
, &op
->u
.execute
,
1636 case DPIF_OP_FLOW_GET
:
1637 get
= &op
->u
.flow_get
;
1638 dpif_netlink_init_flow_get(dpif
, get
, &flow
);
1639 aux
->txn
.reply
= get
->buffer
;
1640 dpif_netlink_flow_to_ofpbuf(&flow
, &aux
->request
);
1648 for (i
= 0; i
< n_ops
; i
++) {
1649 txnsp
[i
] = &auxes
[i
].txn
;
1651 nl_transact_multiple(NETLINK_GENERIC
, txnsp
, n_ops
);
1653 for (i
= 0; i
< n_ops
; i
++) {
1654 struct op_auxdata
*aux
= &auxes
[i
];
1655 struct nl_transaction
*txn
= &auxes
[i
].txn
;
1656 struct dpif_op
*op
= ops
[i
];
1657 struct dpif_flow_put
*put
;
1658 struct dpif_flow_del
*del
;
1659 struct dpif_flow_get
*get
;
1661 op
->error
= txn
->error
;
1664 case DPIF_OP_FLOW_PUT
:
1665 put
= &op
->u
.flow_put
;
1668 struct dpif_netlink_flow reply
;
1670 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
,
1673 dpif_netlink_flow_get_stats(&reply
, put
->stats
);
1679 case DPIF_OP_FLOW_DEL
:
1680 del
= &op
->u
.flow_del
;
1683 struct dpif_netlink_flow reply
;
1685 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
,
1688 dpif_netlink_flow_get_stats(&reply
, del
->stats
);
1694 case DPIF_OP_EXECUTE
:
1697 case DPIF_OP_FLOW_GET
:
1698 get
= &op
->u
.flow_get
;
1700 struct dpif_netlink_flow reply
;
1702 op
->error
= dpif_netlink_flow_from_ofpbuf(&reply
, txn
->reply
);
1704 dpif_netlink_flow_to_dpif_flow(&dpif
->dpif
, get
->flow
,
1714 ofpbuf_uninit(&aux
->request
);
1715 ofpbuf_uninit(&aux
->reply
);
1722 dpif_netlink_operate(struct dpif
*dpif_
, struct dpif_op
**ops
, size_t n_ops
)
1724 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1727 size_t chunk
= dpif_netlink_operate__(dpif
, ops
, n_ops
);
1735 dpif_netlink_handler_uninit(struct dpif_handler
*handler
)
1737 vport_delete_sock_pool(handler
);
1741 dpif_netlink_handler_init(struct dpif_handler
*handler
)
1743 return vport_create_sock_pool(handler
);
1748 dpif_netlink_handler_init(struct dpif_handler
*handler
)
1750 handler
->epoll_fd
= epoll_create(10);
1751 return handler
->epoll_fd
< 0 ? errno
: 0;
1755 dpif_netlink_handler_uninit(struct dpif_handler
*handler
)
1757 close(handler
->epoll_fd
);
1761 /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
1762 * currently in 'dpif' in the kernel, by adding a new set of channels for
1763 * any kernel vport that lacks one and deleting any channels that have no
1764 * backing kernel vports. */
1766 dpif_netlink_refresh_channels(struct dpif_netlink
*dpif
, uint32_t n_handlers
)
1767 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
1769 unsigned long int *keep_channels
;
1770 struct dpif_netlink_vport vport
;
1771 size_t keep_channels_nbits
;
1772 struct nl_dump dump
;
1773 uint64_t reply_stub
[NL_DUMP_BUFSIZE
/ 8];
1778 ovs_assert(!WINDOWS
|| n_handlers
<= 1);
1779 ovs_assert(!WINDOWS
|| dpif
->n_handlers
<= 1);
1781 if (dpif
->n_handlers
!= n_handlers
) {
1782 destroy_all_channels(dpif
);
1783 dpif
->handlers
= xzalloc(n_handlers
* sizeof *dpif
->handlers
);
1784 for (i
= 0; i
< n_handlers
; i
++) {
1786 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
1788 error
= dpif_netlink_handler_init(handler
);
1791 struct dpif_handler
*tmp
= &dpif
->handlers
[i
];
1794 for (j
= 0; j
< i
; j
++) {
1795 dpif_netlink_handler_uninit(tmp
);
1797 free(dpif
->handlers
);
1798 dpif
->handlers
= NULL
;
1803 dpif
->n_handlers
= n_handlers
;
1806 for (i
= 0; i
< n_handlers
; i
++) {
1807 struct dpif_handler
*handler
= &dpif
->handlers
[i
];
1809 handler
->event_offset
= handler
->n_events
= 0;
1812 keep_channels_nbits
= dpif
->uc_array_size
;
1813 keep_channels
= bitmap_allocate(keep_channels_nbits
);
1815 ofpbuf_use_stub(&buf
, reply_stub
, sizeof reply_stub
);
1816 dpif_netlink_port_dump_start__(dpif
, &dump
);
1817 while (!dpif_netlink_port_dump_next__(dpif
, &dump
, &vport
, &buf
)) {
1818 uint32_t port_no
= odp_to_u32(vport
.port_no
);
1819 uint32_t *upcall_pids
= NULL
;
1822 if (port_no
>= dpif
->uc_array_size
1823 || !vport_get_pids(dpif
, port_no
, &upcall_pids
)) {
1824 struct nl_sock
**socksp
= vport_create_socksp(dpif
, &error
);
1830 error
= vport_add_channels(dpif
, vport
.port_no
, socksp
);
1832 VLOG_INFO("%s: could not add channels for port %s",
1833 dpif_name(&dpif
->dpif
), vport
.name
);
1834 vport_del_socksp(dpif
, socksp
);
1838 upcall_pids
= vport_socksp_to_pids(socksp
, dpif
->n_handlers
);
1842 /* Configure the vport to deliver misses to 'sock'. */
1843 if (vport
.upcall_pids
[0] == 0
1844 || vport
.n_upcall_pids
!= dpif
->n_handlers
1845 || memcmp(upcall_pids
, vport
.upcall_pids
, n_handlers
* sizeof
1847 struct dpif_netlink_vport vport_request
;
1849 dpif_netlink_vport_init(&vport_request
);
1850 vport_request
.cmd
= OVS_VPORT_CMD_SET
;
1851 vport_request
.dp_ifindex
= dpif
->dp_ifindex
;
1852 vport_request
.port_no
= vport
.port_no
;
1853 vport_request
.n_upcall_pids
= dpif
->n_handlers
;
1854 vport_request
.upcall_pids
= upcall_pids
;
1855 error
= dpif_netlink_vport_transact(&vport_request
, NULL
, NULL
);
1857 VLOG_WARN_RL(&error_rl
,
1858 "%s: failed to set upcall pid on port: %s",
1859 dpif_name(&dpif
->dpif
), ovs_strerror(error
));
1861 if (error
!= ENODEV
&& error
!= ENOENT
) {
1864 /* The vport isn't really there, even though the dump says
1865 * it is. Probably we just hit a race after a port
1872 if (port_no
< keep_channels_nbits
) {
1873 bitmap_set1(keep_channels
, port_no
);
1880 vport_del_channels(dpif
, vport
.port_no
);
1882 nl_dump_done(&dump
);
1883 ofpbuf_uninit(&buf
);
1885 /* Discard any saved channels that we didn't reuse. */
1886 for (i
= 0; i
< keep_channels_nbits
; i
++) {
1887 if (!bitmap_is_set(keep_channels
, i
)) {
1888 vport_del_channels(dpif
, u32_to_odp(i
));
1891 free(keep_channels
);
1897 dpif_netlink_recv_set__(struct dpif_netlink
*dpif
, bool enable
)
1898 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
1900 if ((dpif
->handlers
!= NULL
) == enable
) {
1902 } else if (!enable
) {
1903 destroy_all_channels(dpif
);
1906 return dpif_netlink_refresh_channels(dpif
, 1);
1911 dpif_netlink_recv_set(struct dpif
*dpif_
, bool enable
)
1913 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1916 fat_rwlock_wrlock(&dpif
->upcall_lock
);
1917 error
= dpif_netlink_recv_set__(dpif
, enable
);
1918 fat_rwlock_unlock(&dpif
->upcall_lock
);
1924 dpif_netlink_handlers_set(struct dpif
*dpif_
, uint32_t n_handlers
)
1926 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
1930 /* Multiple upcall handlers will be supported once kernel datapath supports
1932 if (n_handlers
> 1) {
1937 fat_rwlock_wrlock(&dpif
->upcall_lock
);
1938 if (dpif
->handlers
) {
1939 error
= dpif_netlink_refresh_channels(dpif
, n_handlers
);
1941 fat_rwlock_unlock(&dpif
->upcall_lock
);
1947 dpif_netlink_queue_to_priority(const struct dpif
*dpif OVS_UNUSED
,
1948 uint32_t queue_id
, uint32_t *priority
)
1950 if (queue_id
< 0xf000) {
1951 *priority
= TC_H_MAKE(1 << 16, queue_id
+ 1);
1959 parse_odp_packet(const struct dpif_netlink
*dpif
, struct ofpbuf
*buf
,
1960 struct dpif_upcall
*upcall
, int *dp_ifindex
)
1962 static const struct nl_policy ovs_packet_policy
[] = {
1963 /* Always present. */
1964 [OVS_PACKET_ATTR_PACKET
] = { .type
= NL_A_UNSPEC
,
1965 .min_len
= ETH_HEADER_LEN
},
1966 [OVS_PACKET_ATTR_KEY
] = { .type
= NL_A_NESTED
},
1968 /* OVS_PACKET_CMD_ACTION only. */
1969 [OVS_PACKET_ATTR_USERDATA
] = { .type
= NL_A_UNSPEC
, .optional
= true },
1970 [OVS_PACKET_ATTR_EGRESS_TUN_KEY
] = { .type
= NL_A_NESTED
, .optional
= true },
1971 [OVS_PACKET_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
1972 [OVS_PACKET_ATTR_MRU
] = { .type
= NL_A_U16
, .optional
= true }
1975 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
1976 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
1977 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
1978 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
1980 struct nlattr
*a
[ARRAY_SIZE(ovs_packet_policy
)];
1981 if (!nlmsg
|| !genl
|| !ovs_header
1982 || nlmsg
->nlmsg_type
!= ovs_packet_family
1983 || !nl_policy_parse(&b
, 0, ovs_packet_policy
, a
,
1984 ARRAY_SIZE(ovs_packet_policy
))) {
1988 int type
= (genl
->cmd
== OVS_PACKET_CMD_MISS
? DPIF_UC_MISS
1989 : genl
->cmd
== OVS_PACKET_CMD_ACTION
? DPIF_UC_ACTION
1995 /* (Re)set ALL fields of '*upcall' on successful return. */
1996 upcall
->type
= type
;
1997 upcall
->key
= CONST_CAST(struct nlattr
*,
1998 nl_attr_get(a
[OVS_PACKET_ATTR_KEY
]));
1999 upcall
->key_len
= nl_attr_get_size(a
[OVS_PACKET_ATTR_KEY
]);
2000 dpif_flow_hash(&dpif
->dpif
, upcall
->key
, upcall
->key_len
, &upcall
->ufid
);
2001 upcall
->userdata
= a
[OVS_PACKET_ATTR_USERDATA
];
2002 upcall
->out_tun_key
= a
[OVS_PACKET_ATTR_EGRESS_TUN_KEY
];
2003 upcall
->actions
= a
[OVS_PACKET_ATTR_ACTIONS
];
2004 upcall
->mru
= a
[OVS_PACKET_ATTR_MRU
];
2006 /* Allow overwriting the netlink attribute header without reallocating. */
2007 dp_packet_use_stub(&upcall
->packet
,
2008 CONST_CAST(struct nlattr
*,
2009 nl_attr_get(a
[OVS_PACKET_ATTR_PACKET
])) - 1,
2010 nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]) +
2011 sizeof(struct nlattr
));
2012 dp_packet_set_data(&upcall
->packet
,
2013 (char *)dp_packet_data(&upcall
->packet
) + sizeof(struct nlattr
));
2014 dp_packet_set_size(&upcall
->packet
, nl_attr_get_size(a
[OVS_PACKET_ATTR_PACKET
]));
2016 *dp_ifindex
= ovs_header
->dp_ifindex
;
2022 #define PACKET_RECV_BATCH_SIZE 50
2024 dpif_netlink_recv_windows(struct dpif_netlink
*dpif
, uint32_t handler_id
,
2025 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2026 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2028 struct dpif_handler
*handler
;
2030 struct dpif_windows_vport_sock
*sock_pool
;
2033 if (!dpif
->handlers
) {
2037 /* Only one handler is supported currently. */
2038 if (handler_id
>= 1) {
2042 if (handler_id
>= dpif
->n_handlers
) {
2046 handler
= &dpif
->handlers
[handler_id
];
2047 sock_pool
= handler
->vport_sock_pool
;
2049 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
2054 if (++read_tries
> PACKET_RECV_BATCH_SIZE
) {
2058 error
= nl_sock_recv(sock_pool
[i
].nl_sock
, buf
, false);
2059 if (error
== ENOBUFS
) {
2060 /* ENOBUFS typically means that we've received so many
2061 * packets that the buffer overflowed. Try again
2062 * immediately because there's almost certainly a packet
2063 * waiting for us. */
2064 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2068 /* XXX: ch->last_poll = time_msec(); */
2070 if (error
== EAGAIN
) {
2076 error
= parse_odp_packet(dpif
, buf
, upcall
, &dp_ifindex
);
2077 if (!error
&& dp_ifindex
== dpif
->dp_ifindex
) {
2089 dpif_netlink_recv__(struct dpif_netlink
*dpif
, uint32_t handler_id
,
2090 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2091 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2093 struct dpif_handler
*handler
;
2096 if (!dpif
->handlers
|| handler_id
>= dpif
->n_handlers
) {
2100 handler
= &dpif
->handlers
[handler_id
];
2101 if (handler
->event_offset
>= handler
->n_events
) {
2104 handler
->event_offset
= handler
->n_events
= 0;
2107 retval
= epoll_wait(handler
->epoll_fd
, handler
->epoll_events
,
2108 dpif
->uc_array_size
, 0);
2109 } while (retval
< 0 && errno
== EINTR
);
2112 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2113 VLOG_WARN_RL(&rl
, "epoll_wait failed (%s)", ovs_strerror(errno
));
2114 } else if (retval
> 0) {
2115 handler
->n_events
= retval
;
2119 while (handler
->event_offset
< handler
->n_events
) {
2120 int idx
= handler
->epoll_events
[handler
->event_offset
].data
.u32
;
2121 struct dpif_channel
*ch
= &dpif
->handlers
[handler_id
].channels
[idx
];
2123 handler
->event_offset
++;
2129 if (++read_tries
> 50) {
2133 error
= nl_sock_recv(ch
->sock
, buf
, false);
2134 if (error
== ENOBUFS
) {
2135 /* ENOBUFS typically means that we've received so many
2136 * packets that the buffer overflowed. Try again
2137 * immediately because there's almost certainly a packet
2138 * waiting for us. */
2139 report_loss(dpif
, ch
, idx
, handler_id
);
2143 ch
->last_poll
= time_msec();
2145 if (error
== EAGAIN
) {
2151 error
= parse_odp_packet(dpif
, buf
, upcall
, &dp_ifindex
);
2152 if (!error
&& dp_ifindex
== dpif
->dp_ifindex
) {
2165 dpif_netlink_recv(struct dpif
*dpif_
, uint32_t handler_id
,
2166 struct dpif_upcall
*upcall
, struct ofpbuf
*buf
)
2168 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2171 fat_rwlock_rdlock(&dpif
->upcall_lock
);
2173 error
= dpif_netlink_recv_windows(dpif
, handler_id
, upcall
, buf
);
2175 error
= dpif_netlink_recv__(dpif
, handler_id
, upcall
, buf
);
2177 fat_rwlock_unlock(&dpif
->upcall_lock
);
2183 dpif_netlink_recv_wait__(struct dpif_netlink
*dpif
, uint32_t handler_id
)
2184 OVS_REQ_RDLOCK(dpif
->upcall_lock
)
2188 struct dpif_windows_vport_sock
*sock_pool
=
2189 dpif
->handlers
[handler_id
].vport_sock_pool
;
2191 /* Only one handler is supported currently. */
2192 if (handler_id
>= 1) {
2196 for (i
= 0; i
< VPORT_SOCK_POOL_SIZE
; i
++) {
2197 nl_sock_wait(sock_pool
[i
].nl_sock
, POLLIN
);
2200 if (dpif
->handlers
&& handler_id
< dpif
->n_handlers
) {
2201 struct dpif_handler
*handler
= &dpif
->handlers
[handler_id
];
2203 poll_fd_wait(handler
->epoll_fd
, POLLIN
);
2209 dpif_netlink_recv_wait(struct dpif
*dpif_
, uint32_t handler_id
)
2211 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2213 fat_rwlock_rdlock(&dpif
->upcall_lock
);
2214 dpif_netlink_recv_wait__(dpif
, handler_id
);
2215 fat_rwlock_unlock(&dpif
->upcall_lock
);
2219 dpif_netlink_recv_purge__(struct dpif_netlink
*dpif
)
2220 OVS_REQ_WRLOCK(dpif
->upcall_lock
)
2222 if (dpif
->handlers
) {
2225 for (i
= 0; i
< dpif
->uc_array_size
; i
++ ) {
2226 if (!dpif
->handlers
[0].channels
[i
].sock
) {
2230 for (j
= 0; j
< dpif
->n_handlers
; j
++) {
2231 nl_sock_drain(dpif
->handlers
[j
].channels
[i
].sock
);
2238 dpif_netlink_recv_purge(struct dpif
*dpif_
)
2240 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2242 fat_rwlock_wrlock(&dpif
->upcall_lock
);
2243 dpif_netlink_recv_purge__(dpif
);
2244 fat_rwlock_unlock(&dpif
->upcall_lock
);
2248 dpif_netlink_get_datapath_version(void)
2250 char *version_str
= NULL
;
2254 #define MAX_VERSION_STR_SIZE 80
2255 #define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2258 f
= fopen(LINUX_DATAPATH_VERSION_FILE
, "r");
2261 char version
[MAX_VERSION_STR_SIZE
];
2263 if (fgets(version
, MAX_VERSION_STR_SIZE
, f
)) {
2264 newline
= strchr(version
, '\n');
2268 version_str
= xstrdup(version
);
2278 struct dpif_netlink_ct_dump_state
{
2279 struct ct_dpif_dump_state up
;
2280 struct nl_ct_dump_state
*nl_ct_dump
;
2284 dpif_netlink_ct_dump_start(struct dpif
*dpif OVS_UNUSED
,
2285 struct ct_dpif_dump_state
**dump_
,
2286 const uint16_t *zone
)
2288 struct dpif_netlink_ct_dump_state
*dump
;
2291 dump
= xzalloc(sizeof *dump
);
2292 err
= nl_ct_dump_start(&dump
->nl_ct_dump
, zone
);
2304 dpif_netlink_ct_dump_next(struct dpif
*dpif OVS_UNUSED
,
2305 struct ct_dpif_dump_state
*dump_
,
2306 struct ct_dpif_entry
*entry
)
2308 struct dpif_netlink_ct_dump_state
*dump
;
2310 INIT_CONTAINER(dump
, dump_
, up
);
2312 return nl_ct_dump_next(dump
->nl_ct_dump
, entry
);
2316 dpif_netlink_ct_dump_done(struct dpif
*dpif OVS_UNUSED
,
2317 struct ct_dpif_dump_state
*dump_
)
2319 struct dpif_netlink_ct_dump_state
*dump
;
2322 INIT_CONTAINER(dump
, dump_
, up
);
2324 err
= nl_ct_dump_done(dump
->nl_ct_dump
);
2330 dpif_netlink_ct_flush(struct dpif
*dpif OVS_UNUSED
, const uint16_t *zone
)
2333 return nl_ct_flush_zone(*zone
);
2335 return nl_ct_flush();
2340 const struct dpif_class dpif_netlink_class
= {
2343 dpif_netlink_enumerate
,
2347 dpif_netlink_destroy
,
2350 dpif_netlink_get_stats
,
2351 dpif_netlink_port_add
,
2352 dpif_netlink_port_del
,
2353 dpif_netlink_port_query_by_number
,
2354 dpif_netlink_port_query_by_name
,
2355 dpif_netlink_port_get_pid
,
2356 dpif_netlink_port_dump_start
,
2357 dpif_netlink_port_dump_next
,
2358 dpif_netlink_port_dump_done
,
2359 dpif_netlink_port_poll
,
2360 dpif_netlink_port_poll_wait
,
2361 dpif_netlink_flow_flush
,
2362 dpif_netlink_flow_dump_create
,
2363 dpif_netlink_flow_dump_destroy
,
2364 dpif_netlink_flow_dump_thread_create
,
2365 dpif_netlink_flow_dump_thread_destroy
,
2366 dpif_netlink_flow_dump_next
,
2367 dpif_netlink_operate
,
2368 dpif_netlink_recv_set
,
2369 dpif_netlink_handlers_set
,
2370 NULL
, /* poll_thread_set */
2371 dpif_netlink_queue_to_priority
,
2373 dpif_netlink_recv_wait
,
2374 dpif_netlink_recv_purge
,
2375 NULL
, /* register_dp_purge_cb */
2376 NULL
, /* register_upcall_cb */
2377 NULL
, /* enable_upcall */
2378 NULL
, /* disable_upcall */
2379 dpif_netlink_get_datapath_version
, /* get_datapath_version */
2381 dpif_netlink_ct_dump_start
,
2382 dpif_netlink_ct_dump_next
,
2383 dpif_netlink_ct_dump_done
,
2384 dpif_netlink_ct_flush
,
2386 NULL
, /* ct_dump_start */
2387 NULL
, /* ct_dump_next */
2388 NULL
, /* ct_dump_done */
2389 NULL
, /* ct_flush */
2394 dpif_netlink_init(void)
2396 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
2399 if (ovsthread_once_start(&once
)) {
2400 error
= nl_lookup_genl_family(OVS_DATAPATH_FAMILY
,
2401 &ovs_datapath_family
);
2403 VLOG_WARN("Generic Netlink family '%s' does not exist. "
2404 "The Open vSwitch kernel module is probably not loaded.",
2405 OVS_DATAPATH_FAMILY
);
2408 error
= nl_lookup_genl_family(OVS_VPORT_FAMILY
, &ovs_vport_family
);
2411 error
= nl_lookup_genl_family(OVS_FLOW_FAMILY
, &ovs_flow_family
);
2414 error
= nl_lookup_genl_family(OVS_PACKET_FAMILY
,
2415 &ovs_packet_family
);
2418 error
= nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY
, OVS_VPORT_MCGROUP
,
2419 &ovs_vport_mcgroup
);
2422 ovsthread_once_done(&once
);
2429 dpif_netlink_is_internal_device(const char *name
)
2431 struct dpif_netlink_vport reply
;
2435 error
= dpif_netlink_vport_get(name
, &reply
, &buf
);
2438 } else if (error
!= ENODEV
&& error
!= ENOENT
) {
2439 VLOG_WARN_RL(&error_rl
, "%s: vport query failed (%s)",
2440 name
, ovs_strerror(error
));
2443 return reply
.type
== OVS_VPORT_TYPE_INTERNAL
;
2446 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2447 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
2448 * positive errno value.
2450 * 'vport' will contain pointers into 'buf', so the caller should not free
2451 * 'buf' while 'vport' is still in use. */
2453 dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport
*vport
,
2454 const struct ofpbuf
*buf
)
2456 static const struct nl_policy ovs_vport_policy
[] = {
2457 [OVS_VPORT_ATTR_PORT_NO
] = { .type
= NL_A_U32
},
2458 [OVS_VPORT_ATTR_TYPE
] = { .type
= NL_A_U32
},
2459 [OVS_VPORT_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
2460 [OVS_VPORT_ATTR_UPCALL_PID
] = { .type
= NL_A_UNSPEC
},
2461 [OVS_VPORT_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_vport_stats
),
2463 [OVS_VPORT_ATTR_OPTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
2466 dpif_netlink_vport_init(vport
);
2468 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2469 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2470 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2471 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2473 struct nlattr
*a
[ARRAY_SIZE(ovs_vport_policy
)];
2474 if (!nlmsg
|| !genl
|| !ovs_header
2475 || nlmsg
->nlmsg_type
!= ovs_vport_family
2476 || !nl_policy_parse(&b
, 0, ovs_vport_policy
, a
,
2477 ARRAY_SIZE(ovs_vport_policy
))) {
2481 vport
->cmd
= genl
->cmd
;
2482 vport
->dp_ifindex
= ovs_header
->dp_ifindex
;
2483 vport
->port_no
= nl_attr_get_odp_port(a
[OVS_VPORT_ATTR_PORT_NO
]);
2484 vport
->type
= nl_attr_get_u32(a
[OVS_VPORT_ATTR_TYPE
]);
2485 vport
->name
= nl_attr_get_string(a
[OVS_VPORT_ATTR_NAME
]);
2486 if (a
[OVS_VPORT_ATTR_UPCALL_PID
]) {
2487 vport
->n_upcall_pids
= nl_attr_get_size(a
[OVS_VPORT_ATTR_UPCALL_PID
])
2488 / (sizeof *vport
->upcall_pids
);
2489 vport
->upcall_pids
= nl_attr_get(a
[OVS_VPORT_ATTR_UPCALL_PID
]);
2492 if (a
[OVS_VPORT_ATTR_STATS
]) {
2493 vport
->stats
= nl_attr_get(a
[OVS_VPORT_ATTR_STATS
]);
2495 if (a
[OVS_VPORT_ATTR_OPTIONS
]) {
2496 vport
->options
= nl_attr_get(a
[OVS_VPORT_ATTR_OPTIONS
]);
2497 vport
->options_len
= nl_attr_get_size(a
[OVS_VPORT_ATTR_OPTIONS
]);
2502 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
2503 * followed by Netlink attributes corresponding to 'vport'. */
2505 dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport
*vport
,
2508 struct ovs_header
*ovs_header
;
2510 nl_msg_put_genlmsghdr(buf
, 0, ovs_vport_family
, NLM_F_REQUEST
| NLM_F_ECHO
,
2511 vport
->cmd
, OVS_VPORT_VERSION
);
2513 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
2514 ovs_header
->dp_ifindex
= vport
->dp_ifindex
;
2516 if (vport
->port_no
!= ODPP_NONE
) {
2517 nl_msg_put_odp_port(buf
, OVS_VPORT_ATTR_PORT_NO
, vport
->port_no
);
2520 if (vport
->type
!= OVS_VPORT_TYPE_UNSPEC
) {
2521 nl_msg_put_u32(buf
, OVS_VPORT_ATTR_TYPE
, vport
->type
);
2525 nl_msg_put_string(buf
, OVS_VPORT_ATTR_NAME
, vport
->name
);
2528 if (vport
->upcall_pids
) {
2529 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_UPCALL_PID
,
2531 vport
->n_upcall_pids
* sizeof *vport
->upcall_pids
);
2535 nl_msg_put_unspec(buf
, OVS_VPORT_ATTR_STATS
,
2536 vport
->stats
, sizeof *vport
->stats
);
2539 if (vport
->options
) {
2540 nl_msg_put_nested(buf
, OVS_VPORT_ATTR_OPTIONS
,
2541 vport
->options
, vport
->options_len
);
2545 /* Clears 'vport' to "empty" values. */
2547 dpif_netlink_vport_init(struct dpif_netlink_vport
*vport
)
2549 memset(vport
, 0, sizeof *vport
);
2550 vport
->port_no
= ODPP_NONE
;
2553 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2554 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2555 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2556 * result of the command is expected to be an ovs_vport also, which is decoded
2557 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2558 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2560 dpif_netlink_vport_transact(const struct dpif_netlink_vport
*request
,
2561 struct dpif_netlink_vport
*reply
,
2562 struct ofpbuf
**bufp
)
2564 struct ofpbuf
*request_buf
;
2567 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
2569 error
= dpif_netlink_init();
2573 dpif_netlink_vport_init(reply
);
2578 request_buf
= ofpbuf_new(1024);
2579 dpif_netlink_vport_to_ofpbuf(request
, request_buf
);
2580 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
2581 ofpbuf_delete(request_buf
);
2585 error
= dpif_netlink_vport_from_ofpbuf(reply
, *bufp
);
2588 dpif_netlink_vport_init(reply
);
2589 ofpbuf_delete(*bufp
);
2596 /* Obtains information about the kernel vport named 'name' and stores it into
2597 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
2598 * longer needed ('reply' will contain pointers into '*bufp'). */
2600 dpif_netlink_vport_get(const char *name
, struct dpif_netlink_vport
*reply
,
2601 struct ofpbuf
**bufp
)
2603 struct dpif_netlink_vport request
;
2605 dpif_netlink_vport_init(&request
);
2606 request
.cmd
= OVS_VPORT_CMD_GET
;
2607 request
.name
= name
;
2609 return dpif_netlink_vport_transact(&request
, reply
, bufp
);
2612 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2613 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
2614 * positive errno value.
2616 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
2617 * while 'dp' is still in use. */
2619 dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp
*dp
, const struct ofpbuf
*buf
)
2621 static const struct nl_policy ovs_datapath_policy
[] = {
2622 [OVS_DP_ATTR_NAME
] = { .type
= NL_A_STRING
, .max_len
= IFNAMSIZ
},
2623 [OVS_DP_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_dp_stats
),
2625 [OVS_DP_ATTR_MEGAFLOW_STATS
] = {
2626 NL_POLICY_FOR(struct ovs_dp_megaflow_stats
),
2630 dpif_netlink_dp_init(dp
);
2632 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2633 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2634 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2635 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2637 struct nlattr
*a
[ARRAY_SIZE(ovs_datapath_policy
)];
2638 if (!nlmsg
|| !genl
|| !ovs_header
2639 || nlmsg
->nlmsg_type
!= ovs_datapath_family
2640 || !nl_policy_parse(&b
, 0, ovs_datapath_policy
, a
,
2641 ARRAY_SIZE(ovs_datapath_policy
))) {
2645 dp
->cmd
= genl
->cmd
;
2646 dp
->dp_ifindex
= ovs_header
->dp_ifindex
;
2647 dp
->name
= nl_attr_get_string(a
[OVS_DP_ATTR_NAME
]);
2648 if (a
[OVS_DP_ATTR_STATS
]) {
2649 dp
->stats
= nl_attr_get(a
[OVS_DP_ATTR_STATS
]);
2652 if (a
[OVS_DP_ATTR_MEGAFLOW_STATS
]) {
2653 dp
->megaflow_stats
= nl_attr_get(a
[OVS_DP_ATTR_MEGAFLOW_STATS
]);
2659 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
2661 dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp
*dp
, struct ofpbuf
*buf
)
2663 struct ovs_header
*ovs_header
;
2665 nl_msg_put_genlmsghdr(buf
, 0, ovs_datapath_family
,
2666 NLM_F_REQUEST
| NLM_F_ECHO
, dp
->cmd
,
2667 OVS_DATAPATH_VERSION
);
2669 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
2670 ovs_header
->dp_ifindex
= dp
->dp_ifindex
;
2673 nl_msg_put_string(buf
, OVS_DP_ATTR_NAME
, dp
->name
);
2676 if (dp
->upcall_pid
) {
2677 nl_msg_put_u32(buf
, OVS_DP_ATTR_UPCALL_PID
, *dp
->upcall_pid
);
2680 if (dp
->user_features
) {
2681 nl_msg_put_u32(buf
, OVS_DP_ATTR_USER_FEATURES
, dp
->user_features
);
2684 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
2687 /* Clears 'dp' to "empty" values. */
2689 dpif_netlink_dp_init(struct dpif_netlink_dp
*dp
)
2691 memset(dp
, 0, sizeof *dp
);
2695 dpif_netlink_dp_dump_start(struct nl_dump
*dump
)
2697 struct dpif_netlink_dp request
;
2700 dpif_netlink_dp_init(&request
);
2701 request
.cmd
= OVS_DP_CMD_GET
;
2703 buf
= ofpbuf_new(1024);
2704 dpif_netlink_dp_to_ofpbuf(&request
, buf
);
2705 nl_dump_start(dump
, NETLINK_GENERIC
, buf
);
2709 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2710 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2711 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2712 * result of the command is expected to be of the same form, which is decoded
2713 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2714 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2716 dpif_netlink_dp_transact(const struct dpif_netlink_dp
*request
,
2717 struct dpif_netlink_dp
*reply
, struct ofpbuf
**bufp
)
2719 struct ofpbuf
*request_buf
;
2722 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
2724 request_buf
= ofpbuf_new(1024);
2725 dpif_netlink_dp_to_ofpbuf(request
, request_buf
);
2726 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
2727 ofpbuf_delete(request_buf
);
2730 dpif_netlink_dp_init(reply
);
2732 error
= dpif_netlink_dp_from_ofpbuf(reply
, *bufp
);
2735 ofpbuf_delete(*bufp
);
2742 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
2743 * The caller must free '*bufp' when the reply is no longer needed ('reply'
2744 * will contain pointers into '*bufp'). */
2746 dpif_netlink_dp_get(const struct dpif
*dpif_
, struct dpif_netlink_dp
*reply
,
2747 struct ofpbuf
**bufp
)
2749 struct dpif_netlink
*dpif
= dpif_netlink_cast(dpif_
);
2750 struct dpif_netlink_dp request
;
2752 dpif_netlink_dp_init(&request
);
2753 request
.cmd
= OVS_DP_CMD_GET
;
2754 request
.dp_ifindex
= dpif
->dp_ifindex
;
2756 return dpif_netlink_dp_transact(&request
, reply
, bufp
);
2759 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2760 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
2761 * positive errno value.
2763 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
2764 * while 'flow' is still in use. */
2766 dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow
*flow
,
2767 const struct ofpbuf
*buf
)
2769 static const struct nl_policy ovs_flow_policy
[__OVS_FLOW_ATTR_MAX
] = {
2770 [OVS_FLOW_ATTR_KEY
] = { .type
= NL_A_NESTED
, .optional
= true },
2771 [OVS_FLOW_ATTR_MASK
] = { .type
= NL_A_NESTED
, .optional
= true },
2772 [OVS_FLOW_ATTR_ACTIONS
] = { .type
= NL_A_NESTED
, .optional
= true },
2773 [OVS_FLOW_ATTR_STATS
] = { NL_POLICY_FOR(struct ovs_flow_stats
),
2775 [OVS_FLOW_ATTR_TCP_FLAGS
] = { .type
= NL_A_U8
, .optional
= true },
2776 [OVS_FLOW_ATTR_USED
] = { .type
= NL_A_U64
, .optional
= true },
2777 [OVS_FLOW_ATTR_UFID
] = { .type
= NL_A_UNSPEC
, .optional
= true,
2778 .min_len
= sizeof(ovs_u128
) },
2779 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
2780 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
2781 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
2784 dpif_netlink_flow_init(flow
);
2786 struct ofpbuf b
= ofpbuf_const_initializer(buf
->data
, buf
->size
);
2787 struct nlmsghdr
*nlmsg
= ofpbuf_try_pull(&b
, sizeof *nlmsg
);
2788 struct genlmsghdr
*genl
= ofpbuf_try_pull(&b
, sizeof *genl
);
2789 struct ovs_header
*ovs_header
= ofpbuf_try_pull(&b
, sizeof *ovs_header
);
2791 struct nlattr
*a
[ARRAY_SIZE(ovs_flow_policy
)];
2792 if (!nlmsg
|| !genl
|| !ovs_header
2793 || nlmsg
->nlmsg_type
!= ovs_flow_family
2794 || !nl_policy_parse(&b
, 0, ovs_flow_policy
, a
,
2795 ARRAY_SIZE(ovs_flow_policy
))) {
2798 if (!a
[OVS_FLOW_ATTR_KEY
] && !a
[OVS_FLOW_ATTR_UFID
]) {
2802 flow
->nlmsg_flags
= nlmsg
->nlmsg_flags
;
2803 flow
->dp_ifindex
= ovs_header
->dp_ifindex
;
2804 if (a
[OVS_FLOW_ATTR_KEY
]) {
2805 flow
->key
= nl_attr_get(a
[OVS_FLOW_ATTR_KEY
]);
2806 flow
->key_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_KEY
]);
2809 if (a
[OVS_FLOW_ATTR_UFID
]) {
2810 const ovs_u128
*ufid
;
2812 ufid
= nl_attr_get_unspec(a
[OVS_FLOW_ATTR_UFID
],
2813 nl_attr_get_size(a
[OVS_FLOW_ATTR_UFID
]));
2815 flow
->ufid_present
= true;
2817 if (a
[OVS_FLOW_ATTR_MASK
]) {
2818 flow
->mask
= nl_attr_get(a
[OVS_FLOW_ATTR_MASK
]);
2819 flow
->mask_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_MASK
]);
2821 if (a
[OVS_FLOW_ATTR_ACTIONS
]) {
2822 flow
->actions
= nl_attr_get(a
[OVS_FLOW_ATTR_ACTIONS
]);
2823 flow
->actions_len
= nl_attr_get_size(a
[OVS_FLOW_ATTR_ACTIONS
]);
2825 if (a
[OVS_FLOW_ATTR_STATS
]) {
2826 flow
->stats
= nl_attr_get(a
[OVS_FLOW_ATTR_STATS
]);
2828 if (a
[OVS_FLOW_ATTR_TCP_FLAGS
]) {
2829 flow
->tcp_flags
= nl_attr_get(a
[OVS_FLOW_ATTR_TCP_FLAGS
]);
2831 if (a
[OVS_FLOW_ATTR_USED
]) {
2832 flow
->used
= nl_attr_get(a
[OVS_FLOW_ATTR_USED
]);
2837 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
2838 * followed by Netlink attributes corresponding to 'flow'. */
2840 dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow
*flow
,
2843 struct ovs_header
*ovs_header
;
2845 nl_msg_put_genlmsghdr(buf
, 0, ovs_flow_family
,
2846 NLM_F_REQUEST
| flow
->nlmsg_flags
,
2847 flow
->cmd
, OVS_FLOW_VERSION
);
2849 ovs_header
= ofpbuf_put_uninit(buf
, sizeof *ovs_header
);
2850 ovs_header
->dp_ifindex
= flow
->dp_ifindex
;
2852 if (flow
->ufid_present
) {
2853 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_UFID
, &flow
->ufid
,
2856 if (flow
->ufid_terse
) {
2857 nl_msg_put_u32(buf
, OVS_FLOW_ATTR_UFID_FLAGS
,
2858 OVS_UFID_F_OMIT_KEY
| OVS_UFID_F_OMIT_MASK
2859 | OVS_UFID_F_OMIT_ACTIONS
);
2861 if (!flow
->ufid_terse
|| !flow
->ufid_present
) {
2862 if (flow
->key_len
) {
2863 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_KEY
,
2864 flow
->key
, flow
->key_len
);
2867 if (flow
->mask_len
) {
2868 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_MASK
,
2869 flow
->mask
, flow
->mask_len
);
2871 if (flow
->actions
|| flow
->actions_len
) {
2872 nl_msg_put_unspec(buf
, OVS_FLOW_ATTR_ACTIONS
,
2873 flow
->actions
, flow
->actions_len
);
2877 /* We never need to send these to the kernel. */
2878 ovs_assert(!flow
->stats
);
2879 ovs_assert(!flow
->tcp_flags
);
2880 ovs_assert(!flow
->used
);
2883 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_CLEAR
);
2886 nl_msg_put_flag(buf
, OVS_FLOW_ATTR_PROBE
);
2890 /* Clears 'flow' to "empty" values. */
2892 dpif_netlink_flow_init(struct dpif_netlink_flow
*flow
)
2894 memset(flow
, 0, sizeof *flow
);
2897 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2898 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2899 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2900 * result of the command is expected to be a flow also, which is decoded and
2901 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
2902 * is no longer needed ('reply' will contain pointers into '*bufp'). */
2904 dpif_netlink_flow_transact(struct dpif_netlink_flow
*request
,
2905 struct dpif_netlink_flow
*reply
,
2906 struct ofpbuf
**bufp
)
2908 struct ofpbuf
*request_buf
;
2911 ovs_assert((reply
!= NULL
) == (bufp
!= NULL
));
2914 request
->nlmsg_flags
|= NLM_F_ECHO
;
2917 request_buf
= ofpbuf_new(1024);
2918 dpif_netlink_flow_to_ofpbuf(request
, request_buf
);
2919 error
= nl_transact(NETLINK_GENERIC
, request_buf
, bufp
);
2920 ofpbuf_delete(request_buf
);
2924 error
= dpif_netlink_flow_from_ofpbuf(reply
, *bufp
);
2927 dpif_netlink_flow_init(reply
);
2928 ofpbuf_delete(*bufp
);
2936 dpif_netlink_flow_get_stats(const struct dpif_netlink_flow
*flow
,
2937 struct dpif_flow_stats
*stats
)
2940 stats
->n_packets
= get_32aligned_u64(&flow
->stats
->n_packets
);
2941 stats
->n_bytes
= get_32aligned_u64(&flow
->stats
->n_bytes
);
2943 stats
->n_packets
= 0;
2946 stats
->used
= flow
->used
? get_32aligned_u64(flow
->used
) : 0;
2947 stats
->tcp_flags
= flow
->tcp_flags
? *flow
->tcp_flags
: 0;
2950 /* Logs information about a packet that was recently lost in 'ch' (in
2953 report_loss(struct dpif_netlink
*dpif
, struct dpif_channel
*ch
, uint32_t ch_idx
,
2954 uint32_t handler_id
)
2956 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 5);
2959 if (VLOG_DROP_WARN(&rl
)) {
2964 if (ch
->last_poll
!= LLONG_MIN
) {
2965 ds_put_format(&s
, " (last polled %lld ms ago)",
2966 time_msec() - ch
->last_poll
);
2969 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
2970 dpif_name(&dpif
->dpif
), ch_idx
, handler_id
);