]> git.proxmox.com Git - mirror_ovs.git/blame - lib/dpif-netlink.c
dpif-netlink: Don't destroy and recreate port if it exists
[mirror_ovs.git] / lib / dpif-netlink.c
CommitLineData
96fba48f 1/*
aa5c0216 2 * Copyright (c) 2008-2017 Nicira, Inc.
96fba48f
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
9fe3b9a2 18
93451a0a 19#include "dpif-netlink.h"
96fba48f 20
96fba48f
BP
21#include <ctype.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <net/if.h>
b90fa799 26#include <linux/types.h>
aae51f53 27#include <linux/pkt_sched.h>
8522ba09 28#include <poll.h>
96fba48f 29#include <stdlib.h>
8522ba09 30#include <strings.h>
50f80534 31#include <sys/epoll.h>
10dcf8de 32#include <sys/stat.h>
96fba48f
BP
33#include <unistd.h>
34
773cd538 35#include "bitmap.h"
c4e08753 36#include "dpif-netlink-rtnl.h"
0d71302e 37#include "dpif-provider.h"
1579cf67 38#include "fat-rwlock.h"
0d71302e 39#include "flow.h"
032aa6a3 40#include "netdev-linux.h"
0d71302e 41#include "netdev-provider.h"
c3827f61 42#include "netdev-vport.h"
0d71302e 43#include "netdev.h"
c11c9f4a 44#include "netlink-conntrack.h"
45c8d3a1 45#include "netlink-notifier.h"
982b8810 46#include "netlink-socket.h"
856081f6 47#include "netlink.h"
bfda5239 48#include "netnsid.h"
feebdea2 49#include "odp-util.h"
0d71302e
BP
50#include "openvswitch/dynamic-string.h"
51#include "openvswitch/flow.h"
52#include "openvswitch/match.h"
64c96779 53#include "openvswitch/ofpbuf.h"
fd016ae3 54#include "openvswitch/poll-loop.h"
ee89ea7b 55#include "openvswitch/shash.h"
92d0d515 56#include "openvswitch/thread.h"
0d71302e
BP
57#include "openvswitch/vlog.h"
58#include "packets.h"
59#include "random.h"
b3c01ed3 60#include "sset.h"
14b4d2f9 61#include "timeval.h"
d6569377 62#include "unaligned.h"
96fba48f 63#include "util.h"
5136ce49 64
93451a0a 65VLOG_DEFINE_THIS_MODULE(dpif_netlink);
09cac43f 66#ifdef _WIN32
da467899 67#include "wmi.h"
09cac43f
NR
68enum { WINDOWS = 1 };
69#else
70enum { WINDOWS = 0 };
71#endif
95b1d73a 72enum { MAX_PORTS = USHRT_MAX };
773cd538 73
24b019f8
JP
74/* This ethtool flag was introduced in Linux 2.6.24, so it might be
75 * missing if we have old headers. */
76#define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
77
f2280b41 78#define FLOW_DUMP_MAX_BATCH 50
8b668ee3 79#define OPERATE_MAX_OPS 50
f2280b41 80
69c51582
MC
81#ifndef EPOLLEXCLUSIVE
82#define EPOLLEXCLUSIVE (1u << 28)
83#endif
84
93451a0a 85struct dpif_netlink_dp {
aaff4b55
BP
86 /* Generic Netlink header. */
87 uint8_t cmd;
d6569377 88
df2c07f4 89 /* struct ovs_header. */
254f2dc8 90 int dp_ifindex;
d6569377
BP
91
92 /* Attributes. */
df2c07f4 93 const char *name; /* OVS_DP_ATTR_NAME. */
fcd5d230 94 const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */
b7fd5e38 95 uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */
6a54dedc
BP
96 const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */
97 const struct ovs_dp_megaflow_stats *megaflow_stats;
847108dc 98 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
d6569377
BP
99};
100
93451a0a
AS
101static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
102static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
103 const struct ofpbuf *);
104static void dpif_netlink_dp_dump_start(struct nl_dump *);
105static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
106 struct dpif_netlink_dp *reply,
107 struct ofpbuf **bufp);
108static int dpif_netlink_dp_get(const struct dpif *,
109 struct dpif_netlink_dp *reply,
110 struct ofpbuf **bufp);
111
112struct dpif_netlink_flow {
37a1300c
BP
113 /* Generic Netlink header. */
114 uint8_t cmd;
d6569377 115
df2c07f4 116 /* struct ovs_header. */
d6569377 117 unsigned int nlmsg_flags;
254f2dc8 118 int dp_ifindex;
d6569377
BP
119
120 /* Attributes.
121 *
0e70cdcb
BP
122 * The 'stats' member points to 64-bit data that might only be aligned on
123 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
124 * values.
d2a23af2 125 *
df2c07f4 126 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
d2a23af2 127 * the Netlink version of the command, even if actions_len is zero. */
df2c07f4 128 const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
d6569377 129 size_t key_len;
e6cc0bab
AZ
130 const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */
131 size_t mask_len;
df2c07f4 132 const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
d6569377 133 size_t actions_len;
70e5ed6f
JS
134 ovs_u128 ufid; /* OVS_FLOW_ATTR_FLOW_ID. */
135 bool ufid_present; /* Is there a UFID? */
136 bool ufid_terse; /* Skip serializing key/mask/acts? */
df2c07f4
JP
137 const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
138 const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
0e70cdcb 139 const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
df2c07f4 140 bool clear; /* OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 141 bool probe; /* OVS_FLOW_ATTR_PROBE. */
d6569377
BP
142};
143
93451a0a
AS
144static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
145static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
146 const struct ofpbuf *);
147static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
148 struct ofpbuf *);
149static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
150 struct dpif_netlink_flow *reply,
151 struct ofpbuf **bufp);
152static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
153 struct dpif_flow_stats *);
7af12bd7 154static void dpif_netlink_flow_to_dpif_flow(struct dpif *, struct dpif_flow *,
93451a0a 155 const struct dpif_netlink_flow *);
d6569377 156
989fd548 157/* One of the dpif channels between the kernel and userspace. */
fe3d61b3 158struct dpif_channel {
14b4d2f9 159 struct nl_sock *sock; /* Netlink socket. */
14b4d2f9 160 long long int last_poll; /* Last time this channel was polled. */
fe3d61b3
BP
161};
162
09cac43f
NR
163#ifdef _WIN32
164#define VPORT_SOCK_POOL_SIZE 1
165/* On Windows, there is no native support for epoll. There are equivalent
166 * interfaces though, that are not used currently. For simpicity, a pool of
167 * netlink sockets is used. Each socket is represented by 'struct
168 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
169 * sharing the same socket. In the future, we can add a reference count and
170 * such fields. */
171struct dpif_windows_vport_sock {
172 struct nl_sock *nl_sock; /* netlink socket. */
173};
174#endif
175
1579cf67 176struct dpif_handler {
1579cf67
AW
177 struct epoll_event *epoll_events;
178 int epoll_fd; /* epoll fd that includes channel socks. */
179 int n_events; /* Num events returned by epoll_wait(). */
180 int event_offset; /* Offset into 'epoll_events'. */
09cac43f
NR
181
182#ifdef _WIN32
183 /* Pool of sockets. */
184 struct dpif_windows_vport_sock *vport_sock_pool;
185 size_t last_used_pool_idx; /* Index to aid in allocating a
186 socket in the pool to a port. */
187#endif
1579cf67 188};
14b4d2f9 189
96fba48f 190/* Datapath interface for the openvswitch Linux kernel module. */
93451a0a 191struct dpif_netlink {
96fba48f 192 struct dpif dpif;
254f2dc8 193 int dp_ifindex;
e9e28be3 194
b063d9f0 195 /* Upcall messages. */
1579cf67
AW
196 struct fat_rwlock upcall_lock;
197 struct dpif_handler *handlers;
198 uint32_t n_handlers; /* Num of upcall handlers. */
69c51582 199 struct dpif_channel *channels; /* Array of channels for each port. */
1579cf67
AW
200 int uc_array_size; /* Size of 'handler->channels' and */
201 /* 'handler->epoll_events'. */
982b8810 202
e9e28be3 203 /* Change notification. */
e4516b20 204 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
61eae437 205 bool refresh_channels;
96fba48f
BP
206};
207
93451a0a 208static void report_loss(struct dpif_netlink *, struct dpif_channel *,
9b00386b 209 uint32_t ch_idx, uint32_t handler_id);
1579cf67 210
96fba48f
BP
211static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
212
e4516b20
BP
213/* Generic Netlink family numbers for OVS.
214 *
93451a0a 215 * Initialized by dpif_netlink_init(). */
df2c07f4
JP
216static int ovs_datapath_family;
217static int ovs_vport_family;
218static int ovs_flow_family;
219static int ovs_packet_family;
80738e5f 220static int ovs_meter_family;
906ff9d2 221static int ovs_ct_limit_family;
982b8810 222
e4516b20
BP
223/* Generic Netlink multicast groups for OVS.
224 *
93451a0a 225 * Initialized by dpif_netlink_init(). */
e4516b20 226static unsigned int ovs_vport_mcgroup;
982b8810 227
921c370a
EG
228/* If true, tunnel devices are created using OVS compat/genetlink.
229 * If false, tunnel devices are created with rtnetlink and using light weight
230 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
231 * to using the compat interface. */
232static bool ovs_tunnels_out_of_tree = true;
233
93451a0a
AS
234static int dpif_netlink_init(void);
235static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
236static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
769b5034 237 odp_port_t port_no);
09cac43f 238static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
93451a0a
AS
239static int dpif_netlink_refresh_channels(struct dpif_netlink *,
240 uint32_t n_handlers);
241static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
242 struct ofpbuf *);
243static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
244 const struct ofpbuf *);
921c370a
EG
245static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
246 odp_port_t port_no, const char *port_name,
247 struct dpif_port *dpif_port);
f0fef760 248
93451a0a
AS
249static struct dpif_netlink *
250dpif_netlink_cast(const struct dpif *dpif)
96fba48f 251{
93451a0a
AS
252 dpif_assert_class(dpif, &dpif_netlink_class);
253 return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
96fba48f
BP
254}
255
d3d22744 256static int
93451a0a
AS
257dpif_netlink_enumerate(struct sset *all_dps,
258 const struct dpif_class *dpif_class OVS_UNUSED)
d3d22744 259{
aaff4b55 260 struct nl_dump dump;
d57695d7
JS
261 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
262 struct ofpbuf msg, buf;
aaff4b55 263 int error;
982b8810 264
93451a0a 265 error = dpif_netlink_init();
aaff4b55
BP
266 if (error) {
267 return error;
982b8810 268 }
d3d22744 269
d57695d7 270 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a 271 dpif_netlink_dp_dump_start(&dump);
d57695d7 272 while (nl_dump_next(&dump, &msg, &buf)) {
93451a0a 273 struct dpif_netlink_dp dp;
d6569377 274
93451a0a 275 if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
d0c23a1a 276 sset_add(all_dps, dp.name);
d3d22744
BP
277 }
278 }
d57695d7 279 ofpbuf_uninit(&buf);
aaff4b55 280 return nl_dump_done(&dump);
d3d22744
BP
281}
282
96fba48f 283static int
93451a0a
AS
284dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
285 bool create, struct dpif **dpifp)
96fba48f 286{
93451a0a 287 struct dpif_netlink_dp dp_request, dp;
c19e6535 288 struct ofpbuf *buf;
ea36840f 289 uint32_t upcall_pid;
c19e6535 290 int error;
96fba48f 291
93451a0a 292 error = dpif_netlink_init();
982b8810
BP
293 if (error) {
294 return error;
295 }
296
982b8810 297 /* Create or look up datapath. */
93451a0a 298 dpif_netlink_dp_init(&dp_request);
ea36840f
BP
299 if (create) {
300 dp_request.cmd = OVS_DP_CMD_NEW;
301 upcall_pid = 0;
302 dp_request.upcall_pid = &upcall_pid;
303 } else {
b7fd5e38
TG
304 /* Use OVS_DP_CMD_SET to report user features */
305 dp_request.cmd = OVS_DP_CMD_SET;
ea36840f 306 }
254f2dc8 307 dp_request.name = name;
b7fd5e38 308 dp_request.user_features |= OVS_DP_F_UNALIGNED;
1579cf67 309 dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
93451a0a 310 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
982b8810
BP
311 if (error) {
312 return error;
c19e6535 313 }
254f2dc8 314
e4516b20 315 error = open_dpif(&dp, dpifp);
8f4a4df5 316 ofpbuf_delete(buf);
e4516b20 317 return error;
c19e6535
BP
318}
319
e4516b20 320static int
93451a0a 321open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
c19e6535 322{
93451a0a 323 struct dpif_netlink *dpif;
c19e6535 324
17411ecf 325 dpif = xzalloc(sizeof *dpif);
e4516b20 326 dpif->port_notifier = NULL;
1579cf67 327 fat_rwlock_init(&dpif->upcall_lock);
c19e6535 328
93451a0a 329 dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
254f2dc8 330 dp->dp_ifindex, dp->dp_ifindex);
c19e6535 331
254f2dc8 332 dpif->dp_ifindex = dp->dp_ifindex;
c19e6535 333 *dpifp = &dpif->dpif;
e4516b20
BP
334
335 return 0;
96fba48f
BP
336}
337
09cac43f
NR
338#ifdef _WIN32
339static void
340vport_delete_sock_pool(struct dpif_handler *handler)
341 OVS_REQ_WRLOCK(dpif->upcall_lock)
342{
343 if (handler->vport_sock_pool) {
344 uint32_t i;
345 struct dpif_windows_vport_sock *sock_pool =
346 handler->vport_sock_pool;
347
348 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
349 if (sock_pool[i].nl_sock) {
350 nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
351 nl_sock_destroy(sock_pool[i].nl_sock);
352 sock_pool[i].nl_sock = NULL;
353 }
354 }
355
356 free(handler->vport_sock_pool);
357 handler->vport_sock_pool = NULL;
358 }
359}
360
361static int
362vport_create_sock_pool(struct dpif_handler *handler)
363 OVS_REQ_WRLOCK(dpif->upcall_lock)
364{
365 struct dpif_windows_vport_sock *sock_pool;
366 size_t i;
367 int error = 0;
368
369 sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
370 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
371 error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
372 if (error) {
373 goto error;
374 }
375
376 /* Enable the netlink socket to receive packets. This is equivalent to
377 * calling nl_sock_join_mcgroup() to receive events. */
378 error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
379 if (error) {
380 goto error;
381 }
382 }
383
384 handler->vport_sock_pool = sock_pool;
385 handler->last_used_pool_idx = 0;
386 return 0;
387
388error:
389 vport_delete_sock_pool(handler);
390 return error;
391}
09cac43f
NR
392#endif /* _WIN32 */
393
69c51582
MC
394/* Given the port number 'port_idx', extracts the pid of netlink socket
395 * associated to the port and assigns it to 'upcall_pid'. */
1579cf67 396static bool
69c51582
MC
397vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
398 uint32_t *upcall_pid)
1579cf67 399{
1579cf67 400 /* Since the nl_sock can only be assigned in either all
69c51582 401 * or none "dpif" channels, the following check
1579cf67 402 * would suffice. */
69c51582 403 if (!dpif->channels[port_idx].sock) {
1579cf67
AW
404 return false;
405 }
09cac43f 406 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
1579cf67 407
69c51582 408 *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
989fd548 409
1579cf67 410 return true;
989fd548
JP
411}
412
413static int
69c51582
MC
414vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
415 struct nl_sock *socksp)
989fd548
JP
416{
417 struct epoll_event event;
4e022ec0 418 uint32_t port_idx = odp_to_u32(port_no);
69c51582 419 size_t i;
1579cf67 420 int error;
989fd548 421
1579cf67 422 if (dpif->handlers == NULL) {
989fd548
JP
423 return 0;
424 }
425
1579cf67
AW
426 /* We assume that the datapath densely chooses port numbers, which can
427 * therefore be used as an index into 'channels' and 'epoll_events' of
69c51582 428 * 'dpif'. */
4e022ec0
AW
429 if (port_idx >= dpif->uc_array_size) {
430 uint32_t new_size = port_idx + 1;
989fd548 431
12d76859 432 if (new_size > MAX_PORTS) {
989fd548
JP
433 VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
434 dpif_name(&dpif->dpif), port_no);
435 return EFBIG;
436 }
437
69c51582
MC
438 dpif->channels = xrealloc(dpif->channels,
439 new_size * sizeof *dpif->channels);
1579cf67 440
69c51582
MC
441 for (i = dpif->uc_array_size; i < new_size; i++) {
442 dpif->channels[i].sock = NULL;
443 }
1579cf67 444
69c51582
MC
445 for (i = 0; i < dpif->n_handlers; i++) {
446 struct dpif_handler *handler = &dpif->handlers[i];
1579cf67
AW
447
448 handler->epoll_events = xrealloc(handler->epoll_events,
449 new_size * sizeof *handler->epoll_events);
989fd548 450
1579cf67 451 }
989fd548
JP
452 dpif->uc_array_size = new_size;
453 }
454
455 memset(&event, 0, sizeof event);
69c51582 456 event.events = EPOLLIN | EPOLLEXCLUSIVE;
4e022ec0 457 event.data.u32 = port_idx;
989fd548 458
1579cf67
AW
459 for (i = 0; i < dpif->n_handlers; i++) {
460 struct dpif_handler *handler = &dpif->handlers[i];
461
09cac43f 462#ifndef _WIN32
69c51582 463 if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp),
1579cf67
AW
464 &event) < 0) {
465 error = errno;
466 goto error;
467 }
93451a0a 468#endif
1579cf67 469 }
69c51582
MC
470 dpif->channels[port_idx].sock = socksp;
471 dpif->channels[port_idx].last_poll = LLONG_MIN;
989fd548
JP
472
473 return 0;
1579cf67
AW
474
475error:
09cac43f 476#ifndef _WIN32
69c51582
MC
477 while (i--) {
478 epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
479 nl_sock_fd(socksp), NULL);
1579cf67 480 }
69c51582
MC
481#endif
482 dpif->channels[port_idx].sock = NULL;
1579cf67
AW
483
484 return error;
989fd548
JP
485}
486
487static void
93451a0a 488vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
989fd548 489{
4e022ec0 490 uint32_t port_idx = odp_to_u32(port_no);
1579cf67 491 size_t i;
989fd548 492
69c51582
MC
493 if (!dpif->handlers || port_idx >= dpif->uc_array_size
494 || !dpif->channels[port_idx].sock) {
989fd548
JP
495 return;
496 }
497
1579cf67
AW
498 for (i = 0; i < dpif->n_handlers; i++) {
499 struct dpif_handler *handler = &dpif->handlers[i];
09cac43f 500#ifndef _WIN32
1579cf67 501 epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
69c51582 502 nl_sock_fd(dpif->channels[port_idx].sock), NULL);
09cac43f 503#endif
1579cf67
AW
504 handler->event_offset = handler->n_events = 0;
505 }
69c51582
MC
506#ifndef _WIN32
507 nl_sock_destroy(dpif->channels[port_idx].sock);
508#endif
509 dpif->channels[port_idx].sock = NULL;
1579cf67
AW
510}
511
512static void
93451a0a
AS
513destroy_all_channels(struct dpif_netlink *dpif)
514 OVS_REQ_WRLOCK(dpif->upcall_lock)
1579cf67
AW
515{
516 unsigned int i;
517
518 if (!dpif->handlers) {
519 return;
520 }
521
522 for (i = 0; i < dpif->uc_array_size; i++ ) {
93451a0a 523 struct dpif_netlink_vport vport_request;
1579cf67
AW
524 uint32_t upcall_pids = 0;
525
69c51582 526 if (!dpif->channels[i].sock) {
1579cf67
AW
527 continue;
528 }
529
530 /* Turn off upcalls. */
93451a0a 531 dpif_netlink_vport_init(&vport_request);
1579cf67
AW
532 vport_request.cmd = OVS_VPORT_CMD_SET;
533 vport_request.dp_ifindex = dpif->dp_ifindex;
534 vport_request.port_no = u32_to_odp(i);
a78f446a 535 vport_request.n_upcall_pids = 1;
1579cf67 536 vport_request.upcall_pids = &upcall_pids;
93451a0a 537 dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67
AW
538
539 vport_del_channels(dpif, u32_to_odp(i));
540 }
541
542 for (i = 0; i < dpif->n_handlers; i++) {
543 struct dpif_handler *handler = &dpif->handlers[i];
544
09cac43f 545 dpif_netlink_handler_uninit(handler);
1579cf67 546 free(handler->epoll_events);
1579cf67 547 }
69c51582 548 free(dpif->channels);
1579cf67
AW
549 free(dpif->handlers);
550 dpif->handlers = NULL;
69c51582 551 dpif->channels = NULL;
1579cf67
AW
552 dpif->n_handlers = 0;
553 dpif->uc_array_size = 0;
17411ecf
JG
554}
555
96fba48f 556static void
93451a0a 557dpif_netlink_close(struct dpif *dpif_)
96fba48f 558{
93451a0a 559 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
c7178a0b 560
e4516b20 561 nl_sock_destroy(dpif->port_notifier);
1579cf67
AW
562
563 fat_rwlock_wrlock(&dpif->upcall_lock);
564 destroy_all_channels(dpif);
565 fat_rwlock_unlock(&dpif->upcall_lock);
566
567 fat_rwlock_destroy(&dpif->upcall_lock);
96fba48f
BP
568 free(dpif);
569}
570
571static int
93451a0a 572dpif_netlink_destroy(struct dpif *dpif_)
96fba48f 573{
93451a0a
AS
574 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
575 struct dpif_netlink_dp dp;
d6569377 576
93451a0a 577 dpif_netlink_dp_init(&dp);
df2c07f4 578 dp.cmd = OVS_DP_CMD_DEL;
254f2dc8 579 dp.dp_ifindex = dpif->dp_ifindex;
93451a0a 580 return dpif_netlink_dp_transact(&dp, NULL, NULL);
96fba48f
BP
581}
582
a36de779 583static bool
93451a0a 584dpif_netlink_run(struct dpif *dpif_)
61eae437 585{
93451a0a 586 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67 587
61eae437
BP
588 if (dpif->refresh_channels) {
589 dpif->refresh_channels = false;
1579cf67 590 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 591 dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
1579cf67 592 fat_rwlock_unlock(&dpif->upcall_lock);
61eae437 593 }
a36de779 594 return false;
61eae437
BP
595}
596
96fba48f 597static int
93451a0a 598dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
96fba48f 599{
93451a0a 600 struct dpif_netlink_dp dp;
d6569377
BP
601 struct ofpbuf *buf;
602 int error;
603
93451a0a 604 error = dpif_netlink_dp_get(dpif_, &dp, &buf);
d6569377 605 if (!error) {
6a54dedc
BP
606 memset(stats, 0, sizeof *stats);
607
608 if (dp.stats) {
609 stats->n_hit = get_32aligned_u64(&dp.stats->n_hit);
610 stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
611 stats->n_lost = get_32aligned_u64(&dp.stats->n_lost);
612 stats->n_flows = get_32aligned_u64(&dp.stats->n_flows);
613 }
614
615 if (dp.megaflow_stats) {
616 stats->n_masks = dp.megaflow_stats->n_masks;
617 stats->n_mask_hit = get_32aligned_u64(
618 &dp.megaflow_stats->n_mask_hit);
619 } else {
620 stats->n_masks = UINT32_MAX;
621 stats->n_mask_hit = UINT64_MAX;
622 }
d6569377
BP
623 ofpbuf_delete(buf);
624 }
625 return error;
96fba48f
BP
626}
627
b9ad7294 628static const char *
93451a0a 629get_vport_type(const struct dpif_netlink_vport *vport)
b9ad7294
EJ
630{
631 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
632
633 switch (vport->type) {
5ed51209
JS
634 case OVS_VPORT_TYPE_NETDEV: {
635 const char *type = netdev_get_type_from_name(vport->name);
636
637 return type ? type : "system";
638 }
b9ad7294
EJ
639
640 case OVS_VPORT_TYPE_INTERNAL:
641 return "internal";
642
c1fc1411
JG
643 case OVS_VPORT_TYPE_GENEVE:
644 return "geneve";
645
b9ad7294
EJ
646 case OVS_VPORT_TYPE_GRE:
647 return "gre";
648
b9ad7294
EJ
649 case OVS_VPORT_TYPE_VXLAN:
650 return "vxlan";
651
a6ae068b
LJ
652 case OVS_VPORT_TYPE_LISP:
653 return "lisp";
654
4237026e
PS
655 case OVS_VPORT_TYPE_STT:
656 return "stt";
657
c387d817 658 case OVS_VPORT_TYPE_ERSPAN:
98514eea
WT
659 return "erspan";
660
c387d817 661 case OVS_VPORT_TYPE_IP6ERSPAN:
3b10ceee
GR
662 return "ip6erspan";
663
c387d817 664 case OVS_VPORT_TYPE_IP6GRE:
3b10ceee 665 return "ip6gre";
c387d817 666
b9ad7294
EJ
667 case OVS_VPORT_TYPE_UNSPEC:
668 case __OVS_VPORT_TYPE_MAX:
669 break;
670 }
671
672 VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
673 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
674 return "unknown";
675}
676
c4e08753 677enum ovs_vport_type
20c57607 678netdev_to_ovs_vport_type(const char *type)
c060c4cf 679{
c060c4cf
EJ
680 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
681 return OVS_VPORT_TYPE_NETDEV;
682 } else if (!strcmp(type, "internal")) {
683 return OVS_VPORT_TYPE_INTERNAL;
4237026e
PS
684 } else if (strstr(type, "stt")) {
685 return OVS_VPORT_TYPE_STT;
c1fc1411
JG
686 } else if (!strcmp(type, "geneve")) {
687 return OVS_VPORT_TYPE_GENEVE;
c060c4cf
EJ
688 } else if (!strcmp(type, "vxlan")) {
689 return OVS_VPORT_TYPE_VXLAN;
a6ae068b
LJ
690 } else if (!strcmp(type, "lisp")) {
691 return OVS_VPORT_TYPE_LISP;
7dc18ae9
WT
692 } else if (!strcmp(type, "erspan")) {
693 return OVS_VPORT_TYPE_ERSPAN;
694 } else if (!strcmp(type, "ip6erspan")) {
695 return OVS_VPORT_TYPE_IP6ERSPAN;
3b10ceee
GR
696 } else if (!strcmp(type, "ip6gre")) {
697 return OVS_VPORT_TYPE_IP6GRE;
1c385f49
GR
698 } else if (!strcmp(type, "gre")) {
699 return OVS_VPORT_TYPE_GRE;
c060c4cf
EJ
700 } else {
701 return OVS_VPORT_TYPE_UNSPEC;
702 }
703}
704
96fba48f 705static int
20c57607
EG
706dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
707 enum ovs_vport_type type,
708 struct ofpbuf *options,
93451a0a 709 odp_port_t *port_nop)
b90de034 710 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 711{
93451a0a 712 struct dpif_netlink_vport request, reply;
c19e6535 713 struct ofpbuf *buf;
69c51582 714 struct nl_sock *socksp = NULL;
790a4372 715 uint32_t upcall_pids = 0;
1579cf67 716 int error = 0;
96fba48f 717
1579cf67 718 if (dpif->handlers) {
69c51582 719 if (nl_sock_create(NETLINK_GENERIC, &socksp)) {
989fd548
JP
720 return error;
721 }
722 }
723
93451a0a 724 dpif_netlink_vport_init(&request);
df2c07f4 725 request.cmd = OVS_VPORT_CMD_NEW;
254f2dc8 726 request.dp_ifindex = dpif->dp_ifindex;
20c57607
EG
727 request.type = type;
728 request.name = name;
729
730 request.port_no = *port_nop;
790a4372
MC
731 if (socksp) {
732 upcall_pids = nl_sock_pid(socksp);
733 }
69c51582
MC
734 request.n_upcall_pids = 1;
735 request.upcall_pids = &upcall_pids;
20c57607
EG
736
737 if (options) {
738 request.options = options->data;
739 request.options_len = options->size;
740 }
741
742 error = dpif_netlink_vport_transact(&request, &reply, &buf);
743 if (!error) {
744 *port_nop = reply.port_no;
745 } else {
746 if (error == EBUSY && *port_nop != ODPP_NONE) {
747 VLOG_INFO("%s: requested port %"PRIu32" is in use",
748 dpif_name(&dpif->dpif), *port_nop);
749 }
750
69c51582 751 nl_sock_destroy(socksp);
20c57607
EG
752 goto exit;
753 }
754
69c51582
MC
755 error = vport_add_channel(dpif, *port_nop, socksp);
756 if (error) {
757 VLOG_INFO("%s: could not add channel for port %s",
758 dpif_name(&dpif->dpif), name);
759
760 /* Delete the port. */
761 dpif_netlink_vport_init(&request);
762 request.cmd = OVS_VPORT_CMD_DEL;
763 request.dp_ifindex = dpif->dp_ifindex;
764 request.port_no = *port_nop;
765 dpif_netlink_vport_transact(&request, NULL, NULL);
766 nl_sock_destroy(socksp);
767 goto exit;
20c57607 768 }
20c57607
EG
769
770exit:
771 ofpbuf_delete(buf);
20c57607
EG
772
773 return error;
774}
775
776static int
777dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
778 odp_port_t *port_nop)
779 OVS_REQ_WRLOCK(dpif->upcall_lock)
780{
781 const struct netdev_tunnel_config *tnl_cfg;
782 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
783 const char *type = netdev_get_type(netdev);
784 uint64_t options_stub[64 / 8];
785 enum ovs_vport_type ovs_type;
786 struct ofpbuf options;
787 const char *name;
788
789 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
790
791 ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
792 if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
c283069c
BP
793 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
794 "unsupported type `%s'",
9b00386b 795 dpif_name(&dpif->dpif), name, type);
c283069c
BP
796 return EINVAL;
797 }
c3827f61 798
20c57607 799 if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
93451a0a 800#ifdef _WIN32
09cac43f 801 /* XXX : Map appropiate Windows handle */
93451a0a 802#else
24b019f8 803 netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
93451a0a 804#endif
24b019f8
JP
805 }
806
da467899 807#ifdef _WIN32
20c57607 808 if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
da467899
AS
809 if (!create_wmi_port(name)){
810 VLOG_ERR("Could not create wmi internal port with name:%s", name);
da467899
AS
811 return EINVAL;
812 };
813 }
814#endif
815
26508d9a 816 tnl_cfg = netdev_get_tunnel_config(netdev);
526df7d8 817 if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
26508d9a 818 ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
526df7d8
TG
819 if (tnl_cfg->dst_port) {
820 nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
821 ntohs(tnl_cfg->dst_port));
822 }
823 if (tnl_cfg->exts) {
824 size_t ext_ofs;
825 int i;
826
827 ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
828 for (i = 0; i < 32; i++) {
829 if (tnl_cfg->exts & (1 << i)) {
830 nl_msg_put_flag(&options, i);
831 }
832 }
833 nl_msg_end_nested(&options, ext_ofs);
834 }
20c57607
EG
835 return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
836 port_nop);
2510ba7c 837 } else {
20c57607 838 return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
78a2d59c 839 }
c3827f61 840
20c57607 841}
989fd548 842
921c370a 843static int
c4e08753
EG
844dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
845 struct netdev *netdev,
846 odp_port_t *port_nop)
847 OVS_REQ_WRLOCK(dpif->upcall_lock)
848{
849 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
850 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
851 const char *name;
852 int error;
989fd548 853
c4e08753
EG
854 error = dpif_netlink_rtnl_port_create(netdev);
855 if (error) {
856 if (error != EOPNOTSUPP) {
d52ef4eb 857 VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
c4e08753
EG
858 netdev_get_name(netdev), ovs_strerror(error));
859 }
860 return error;
861 }
1579cf67 862
c4e08753
EG
863 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
864 error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
865 port_nop);
a38dccb3 866 if (error && error != EEXIST) {
c4e08753
EG
867 dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
868 }
869 return error;
870}
96fba48f
BP
871
872static int
93451a0a
AS
873dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
874 odp_port_t *port_nop)
9fafa796 875{
93451a0a 876 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
921c370a 877 int error = EOPNOTSUPP;
9fafa796 878
1579cf67 879 fat_rwlock_wrlock(&dpif->upcall_lock);
921c370a
EG
880 if (!ovs_tunnels_out_of_tree) {
881 error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
882 }
a38dccb3 883 if (error && error != EEXIST) {
921c370a
EG
884 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
885 }
1579cf67 886 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
887
888 return error;
889}
890
891static int
93451a0a 892dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
b90de034 893 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 894{
93451a0a 895 struct dpif_netlink_vport vport;
921c370a 896 struct dpif_port dpif_port;
773cd538 897 int error;
c19e6535 898
921c370a
EG
899 error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
900 if (error) {
901 return error;
902 }
903
93451a0a 904 dpif_netlink_vport_init(&vport);
df2c07f4 905 vport.cmd = OVS_VPORT_CMD_DEL;
254f2dc8 906 vport.dp_ifindex = dpif->dp_ifindex;
c19e6535 907 vport.port_no = port_no;
da467899 908#ifdef _WIN32
921c370a
EG
909 if (!strcmp(dpif_port.type, "internal")) {
910 if (!delete_wmi_port(dpif_port.name)) {
da467899 911 VLOG_ERR("Could not delete wmi port with name: %s",
921c370a 912 dpif_port.name);
da467899
AS
913 };
914 }
915#endif
93451a0a 916 error = dpif_netlink_vport_transact(&vport, NULL, NULL);
773cd538 917
1579cf67 918 vport_del_channels(dpif, port_no);
989fd548 919
921c370a
EG
920 if (!error && !ovs_tunnels_out_of_tree) {
921 error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
922 if (error == EOPNOTSUPP) {
923 error = 0;
924 }
925 }
926
927 dpif_port_destroy(&dpif_port);
928
773cd538 929 return error;
c3827f61 930}
3abc4a1a 931
9fafa796 932static int
93451a0a 933dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
9fafa796 934{
93451a0a 935 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
936 int error;
937
1579cf67 938 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 939 error = dpif_netlink_port_del__(dpif, port_no);
1579cf67 940 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
941
942 return error;
943}
944
c3827f61 945static int
93451a0a
AS
946dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
947 const char *port_name, struct dpif_port *dpif_port)
c3827f61 948{
93451a0a
AS
949 struct dpif_netlink_vport request;
950 struct dpif_netlink_vport reply;
c19e6535 951 struct ofpbuf *buf;
4c738a8d
BP
952 int error;
953
93451a0a 954 dpif_netlink_vport_init(&request);
df2c07f4 955 request.cmd = OVS_VPORT_CMD_GET;
9b00386b 956 request.dp_ifindex = dpif->dp_ifindex;
c19e6535
BP
957 request.port_no = port_no;
958 request.name = port_name;
4c738a8d 959
93451a0a 960 error = dpif_netlink_vport_transact(&request, &reply, &buf);
c19e6535 961 if (!error) {
33db1592
BP
962 if (reply.dp_ifindex != request.dp_ifindex) {
963 /* A query by name reported that 'port_name' is in some datapath
964 * other than 'dpif', but the caller wants to know about 'dpif'. */
965 error = ENODEV;
4afba28d 966 } else if (dpif_port) {
33db1592 967 dpif_port->name = xstrdup(reply.name);
b9ad7294 968 dpif_port->type = xstrdup(get_vport_type(&reply));
33db1592
BP
969 dpif_port->port_no = reply.port_no;
970 }
c19e6535 971 ofpbuf_delete(buf);
3abc4a1a 972 }
c19e6535 973 return error;
96fba48f
BP
974}
975
976static int
93451a0a
AS
977dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
978 struct dpif_port *dpif_port)
96fba48f 979{
93451a0a 980 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 981
93451a0a 982 return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
96fba48f
BP
983}
984
985static int
93451a0a 986dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
4c738a8d 987 struct dpif_port *dpif_port)
96fba48f 988{
93451a0a 989 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 990
93451a0a 991 return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
96fba48f
BP
992}
993
98403001 994static uint32_t
93451a0a 995dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
769b5034 996 odp_port_t port_no)
b90de034 997 OVS_REQ_RDLOCK(dpif->upcall_lock)
98403001 998{
4e022ec0 999 uint32_t port_idx = odp_to_u32(port_no);
9fafa796 1000 uint32_t pid = 0;
98403001 1001
f8fc5489 1002 if (dpif->handlers && dpif->uc_array_size > 0) {
4e022ec0 1003 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
989fd548 1004 * channel, since it is not heavily loaded. */
4e022ec0 1005 uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1579cf67 1006
17f2748d
AW
1007 /* Needs to check in case the socket pointer is changed in between
1008 * the holding of upcall_lock. A known case happens when the main
1009 * thread deletes the vport while the handler thread is handling
1010 * the upcall from that port. */
69c51582
MC
1011 if (dpif->channels[idx].sock) {
1012 pid = nl_sock_pid(dpif->channels[idx].sock);
17f2748d 1013 }
98403001 1014 }
9fafa796
BP
1015
1016 return pid;
98403001
BP
1017}
1018
b90de034 1019static uint32_t
769b5034 1020dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
b90de034 1021{
93451a0a 1022 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
1023 uint32_t ret;
1024
1025 fat_rwlock_rdlock(&dpif->upcall_lock);
769b5034 1026 ret = dpif_netlink_port_get_pid__(dpif, port_no);
b90de034
AW
1027 fat_rwlock_unlock(&dpif->upcall_lock);
1028
1029 return ret;
1030}
1031
96fba48f 1032static int
93451a0a 1033dpif_netlink_flow_flush(struct dpif *dpif_)
96fba48f 1034{
93451a0a
AS
1035 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1036 struct dpif_netlink_flow flow;
37a1300c 1037
93451a0a 1038 dpif_netlink_flow_init(&flow);
df2c07f4 1039 flow.cmd = OVS_FLOW_CMD_DEL;
254f2dc8 1040 flow.dp_ifindex = dpif->dp_ifindex;
f7dde6df
PB
1041
1042 if (netdev_is_flow_api_enabled()) {
dfaf79dd 1043 netdev_ports_flow_flush(dpif_->dpif_class);
f7dde6df
PB
1044 }
1045
93451a0a 1046 return dpif_netlink_flow_transact(&flow, NULL, NULL);
96fba48f
BP
1047}
1048
93451a0a 1049struct dpif_netlink_port_state {
f0fef760 1050 struct nl_dump dump;
d57695d7 1051 struct ofpbuf buf;
c19e6535
BP
1052};
1053
222837c4 1054static void
93451a0a
AS
1055dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1056 struct nl_dump *dump)
96fba48f 1057{
93451a0a 1058 struct dpif_netlink_vport request;
f0fef760
BP
1059 struct ofpbuf *buf;
1060
93451a0a 1061 dpif_netlink_vport_init(&request);
067f1e23 1062 request.cmd = OVS_VPORT_CMD_GET;
254f2dc8 1063 request.dp_ifindex = dpif->dp_ifindex;
f0fef760
BP
1064
1065 buf = ofpbuf_new(1024);
93451a0a 1066 dpif_netlink_vport_to_ofpbuf(&request, buf);
222837c4 1067 nl_dump_start(dump, NETLINK_GENERIC, buf);
f0fef760 1068 ofpbuf_delete(buf);
222837c4
BP
1069}
1070
1071static int
93451a0a 1072dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
222837c4 1073{
93451a0a
AS
1074 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1075 struct dpif_netlink_port_state *state;
222837c4
BP
1076
1077 *statep = state = xmalloc(sizeof *state);
93451a0a 1078 dpif_netlink_port_dump_start__(dpif, &state->dump);
f0fef760 1079
d57695d7 1080 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
b0ec0f27
BP
1081 return 0;
1082}
1083
7c1ef244 1084static int
93451a0a
AS
1085dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1086 struct nl_dump *dump,
1087 struct dpif_netlink_vport *vport,
1088 struct ofpbuf *buffer)
222837c4 1089{
222837c4
BP
1090 struct ofpbuf buf;
1091 int error;
1092
d57695d7 1093 if (!nl_dump_next(dump, &buf, buffer)) {
222837c4
BP
1094 return EOF;
1095 }
1096
93451a0a 1097 error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
222837c4
BP
1098 if (error) {
1099 VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1100 dpif_name(&dpif->dpif), ovs_strerror(error));
1101 }
1102 return error;
1103}
1104
b0ec0f27 1105static int
93451a0a
AS
1106dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1107 struct dpif_port *dpif_port)
b0ec0f27 1108{
93451a0a
AS
1109 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1110 struct dpif_netlink_port_state *state = state_;
1111 struct dpif_netlink_vport vport;
96fba48f
BP
1112 int error;
1113
93451a0a
AS
1114 error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1115 &state->buf);
c3827f61 1116 if (error) {
f0fef760 1117 return error;
c3827f61 1118 }
ebc56baa 1119 dpif_port->name = CONST_CAST(char *, vport.name);
b9ad7294 1120 dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
f0fef760
BP
1121 dpif_port->port_no = vport.port_no;
1122 return 0;
b0ec0f27
BP
1123}
1124
1125static int
93451a0a 1126dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
b0ec0f27 1127{
93451a0a 1128 struct dpif_netlink_port_state *state = state_;
f0fef760 1129 int error = nl_dump_done(&state->dump);
8522b383 1130
d57695d7 1131 ofpbuf_uninit(&state->buf);
b0ec0f27 1132 free(state);
f0fef760 1133 return error;
96fba48f
BP
1134}
1135
e9e28be3 1136static int
93451a0a 1137dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
e9e28be3 1138{
93451a0a 1139 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e9e28be3 1140
e4516b20
BP
1141 /* Lazily create the Netlink socket to listen for notifications. */
1142 if (!dpif->port_notifier) {
1143 struct nl_sock *sock;
1144 int error;
1145
1146 error = nl_sock_create(NETLINK_GENERIC, &sock);
1147 if (error) {
1148 return error;
1149 }
1150
1151 error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1152 if (error) {
1153 nl_sock_destroy(sock);
1154 return error;
1155 }
1156 dpif->port_notifier = sock;
1157
1158 /* We have no idea of the current state so report that everything
1159 * changed. */
1160 return ENOBUFS;
1161 }
1162
1163 for (;;) {
1164 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1165 uint64_t buf_stub[4096 / 8];
1166 struct ofpbuf buf;
1167 int error;
1168
1169 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
a86bd14e 1170 error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
e4516b20 1171 if (!error) {
93451a0a 1172 struct dpif_netlink_vport vport;
e4516b20 1173
93451a0a 1174 error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
e4516b20
BP
1175 if (!error) {
1176 if (vport.dp_ifindex == dpif->dp_ifindex
1177 && (vport.cmd == OVS_VPORT_CMD_NEW
1178 || vport.cmd == OVS_VPORT_CMD_DEL
1179 || vport.cmd == OVS_VPORT_CMD_SET)) {
1180 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1181 dpif->dpif.full_name, vport.name, vport.cmd);
1579cf67 1182 if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
61eae437
BP
1183 dpif->refresh_channels = true;
1184 }
e4516b20 1185 *devnamep = xstrdup(vport.name);
59e0c910 1186 ofpbuf_uninit(&buf);
e4516b20 1187 return 0;
e4516b20
BP
1188 }
1189 }
59e0c910
BP
1190 } else if (error != EAGAIN) {
1191 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1192 ovs_strerror(error));
1193 nl_sock_drain(dpif->port_notifier);
1194 error = ENOBUFS;
e4516b20
BP
1195 }
1196
59e0c910
BP
1197 ofpbuf_uninit(&buf);
1198 if (error) {
1199 return error;
1200 }
e9e28be3 1201 }
e9e28be3
BP
1202}
1203
1204static void
93451a0a 1205dpif_netlink_port_poll_wait(const struct dpif *dpif_)
e9e28be3 1206{
93451a0a 1207 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e4516b20
BP
1208
1209 if (dpif->port_notifier) {
1210 nl_sock_wait(dpif->port_notifier, POLLIN);
1211 } else {
e9e28be3 1212 poll_immediate_wake();
e9e28be3
BP
1213 }
1214}
1215
6fe09f8c 1216static void
70e5ed6f
JS
1217dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1218 const ovs_u128 *ufid, bool terse)
1219{
1220 if (ufid) {
1221 request->ufid = *ufid;
1222 request->ufid_present = true;
1223 } else {
1224 request->ufid_present = false;
1225 }
1226 request->ufid_terse = terse;
1227}
1228
1229static void
1230dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1231 const struct nlattr *key, size_t key_len,
1232 const ovs_u128 *ufid, bool terse,
1233 struct dpif_netlink_flow *request)
96fba48f 1234{
93451a0a 1235 dpif_netlink_flow_init(request);
6fe09f8c
JS
1236 request->cmd = OVS_FLOW_CMD_GET;
1237 request->dp_ifindex = dpif->dp_ifindex;
1238 request->key = key;
1239 request->key_len = key_len;
70e5ed6f
JS
1240 dpif_netlink_flow_init_ufid(request, ufid, terse);
1241}
1242
1243static void
1244dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1245 const struct dpif_flow_get *get,
1246 struct dpif_netlink_flow *request)
1247{
1248 dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1249 false, request);
30053024
BP
1250}
1251
1252static int
70e5ed6f
JS
1253dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1254 const struct nlattr *key, size_t key_len,
1255 const ovs_u128 *ufid, bool terse,
1256 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
30053024 1257{
93451a0a 1258 struct dpif_netlink_flow request;
30053024 1259
70e5ed6f 1260 dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
93451a0a 1261 return dpif_netlink_flow_transact(&request, reply, bufp);
96fba48f
BP
1262}
1263
70e5ed6f
JS
1264static int
1265dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1266 const struct dpif_netlink_flow *flow,
1267 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1268{
1269 return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1270 flow->ufid_present ? &flow->ufid : NULL,
1271 false, reply, bufp);
1272}
1273
6bc60024 1274static void
93451a0a
AS
1275dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1276 const struct dpif_flow_put *put,
1277 struct dpif_netlink_flow *request)
6bc60024 1278{
d64e176c 1279 static const struct nlattr dummy_action;
6bc60024 1280
93451a0a 1281 dpif_netlink_flow_init(request);
89625d1e 1282 request->cmd = (put->flags & DPIF_FP_CREATE
6bc60024
BP
1283 ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1284 request->dp_ifindex = dpif->dp_ifindex;
89625d1e
BP
1285 request->key = put->key;
1286 request->key_len = put->key_len;
e6cc0bab
AZ
1287 request->mask = put->mask;
1288 request->mask_len = put->mask_len;
70e5ed6f
JS
1289 dpif_netlink_flow_init_ufid(request, put->ufid, false);
1290
6bc60024 1291 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
d64e176c
BP
1292 request->actions = (put->actions
1293 ? put->actions
1294 : CONST_CAST(struct nlattr *, &dummy_action));
89625d1e
BP
1295 request->actions_len = put->actions_len;
1296 if (put->flags & DPIF_FP_ZERO_STATS) {
6bc60024
BP
1297 request->clear = true;
1298 }
43f9ac0a
JR
1299 if (put->flags & DPIF_FP_PROBE) {
1300 request->probe = true;
1301 }
89625d1e 1302 request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
6bc60024
BP
1303}
1304
b99d3cee 1305static void
70e5ed6f
JS
1306dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1307 const struct nlattr *key, size_t key_len,
1308 const ovs_u128 *ufid, bool terse,
1309 struct dpif_netlink_flow *request)
96fba48f 1310{
93451a0a 1311 dpif_netlink_flow_init(request);
b99d3cee
BP
1312 request->cmd = OVS_FLOW_CMD_DEL;
1313 request->dp_ifindex = dpif->dp_ifindex;
70e5ed6f
JS
1314 request->key = key;
1315 request->key_len = key_len;
1316 dpif_netlink_flow_init_ufid(request, ufid, terse);
1317}
1318
1319static void
1320dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1321 const struct dpif_flow_del *del,
1322 struct dpif_netlink_flow *request)
1323{
37382aa6
AS
1324 dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1325 del->ufid, del->terse, request);
70e5ed6f
JS
1326}
1327
93451a0a 1328struct dpif_netlink_flow_dump {
ac64794a
BP
1329 struct dpif_flow_dump up;
1330 struct nl_dump nl_dump;
d2ad7ef1 1331 atomic_int status;
f2280b41
PB
1332 struct netdev_flow_dump **netdev_dumps;
1333 int netdev_dumps_num; /* Number of netdev_flow_dumps */
1334 struct ovs_mutex netdev_lock; /* Guards the following. */
1335 int netdev_current_dump OVS_GUARDED; /* Shared current dump */
a692410a 1336 struct dpif_flow_dump_types types; /* Type of dump */
e723fd32
JS
1337};
1338
93451a0a
AS
1339static struct dpif_netlink_flow_dump *
1340dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
e723fd32 1341{
93451a0a 1342 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
e723fd32
JS
1343}
1344
f2280b41
PB
1345static void
1346start_netdev_dump(const struct dpif *dpif_,
1347 struct dpif_netlink_flow_dump *dump)
1348{
1349 ovs_mutex_init(&dump->netdev_lock);
1350
a692410a 1351 if (!(dump->types.netdev_flows)) {
f2280b41
PB
1352 dump->netdev_dumps_num = 0;
1353 dump->netdev_dumps = NULL;
1354 return;
1355 }
1356
1357 ovs_mutex_lock(&dump->netdev_lock);
1358 dump->netdev_current_dump = 0;
1359 dump->netdev_dumps
dfaf79dd 1360 = netdev_ports_flow_dump_create(dpif_->dpif_class,
f2280b41
PB
1361 &dump->netdev_dumps_num);
1362 ovs_mutex_unlock(&dump->netdev_lock);
1363}
1364
a692410a
GT
1365static void
1366dpif_netlink_populate_flow_dump_types(struct dpif_netlink_flow_dump *dump,
1367 struct dpif_flow_dump_types *types)
1368{
1369 if (!types) {
1370 dump->types.ovs_flows = true;
1371 dump->types.netdev_flows = true;
1372 } else {
1373 memcpy(&dump->types, types, sizeof *types);
494a7455 1374 }
7e8b7199
PB
1375}
1376
ac64794a 1377static struct dpif_flow_dump *
7e8b7199 1378dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
a692410a 1379 struct dpif_flow_dump_types *types)
96fba48f 1380{
93451a0a
AS
1381 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1382 struct dpif_netlink_flow_dump *dump;
1383 struct dpif_netlink_flow request;
37a1300c
BP
1384 struct ofpbuf *buf;
1385
ac64794a
BP
1386 dump = xmalloc(sizeof *dump);
1387 dpif_flow_dump_init(&dump->up, dpif_);
37a1300c 1388
a692410a 1389 dpif_netlink_populate_flow_dump_types(dump, types);
37a1300c 1390
a692410a 1391 if (dump->types.ovs_flows) {
7e8b7199
PB
1392 dpif_netlink_flow_init(&request);
1393 request.cmd = OVS_FLOW_CMD_GET;
1394 request.dp_ifindex = dpif->dp_ifindex;
1395 request.ufid_present = false;
1396 request.ufid_terse = terse;
1397
1398 buf = ofpbuf_new(1024);
1399 dpif_netlink_flow_to_ofpbuf(&request, buf);
1400 nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1401 ofpbuf_delete(buf);
1402 }
ac64794a 1403 atomic_init(&dump->status, 0);
64bb477f 1404 dump->up.terse = terse;
30053024 1405
f2280b41
PB
1406 start_netdev_dump(dpif_, dump);
1407
ac64794a 1408 return &dump->up;
704a1e09
BP
1409}
1410
1411static int
93451a0a 1412dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
704a1e09 1413{
93451a0a 1414 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
7e8b7199 1415 unsigned int nl_status = 0;
ac64794a 1416 int dump_status;
96fba48f 1417
a692410a 1418 if (dump->types.ovs_flows) {
7e8b7199
PB
1419 nl_status = nl_dump_done(&dump->nl_dump);
1420 }
1421
f2280b41
PB
1422 for (int i = 0; i < dump->netdev_dumps_num; i++) {
1423 int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
1424
1425 if (err != 0 && err != EOPNOTSUPP) {
1426 VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
1427 }
1428 }
1429
1430 free(dump->netdev_dumps);
1431 ovs_mutex_destroy(&dump->netdev_lock);
1432
7424fc44
JR
1433 /* No other thread has access to 'dump' at this point. */
1434 atomic_read_relaxed(&dump->status, &dump_status);
ac64794a
BP
1435 free(dump);
1436 return dump_status ? dump_status : nl_status;
1437}
feebdea2 1438
93451a0a 1439struct dpif_netlink_flow_dump_thread {
ac64794a 1440 struct dpif_flow_dump_thread up;
93451a0a
AS
1441 struct dpif_netlink_flow_dump *dump;
1442 struct dpif_netlink_flow flow;
ac64794a
BP
1443 struct dpif_flow_stats stats;
1444 struct ofpbuf nl_flows; /* Always used to store flows. */
1445 struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */
f2280b41
PB
1446 int netdev_dump_idx; /* This thread current netdev dump index */
1447 bool netdev_done; /* If we are finished dumping netdevs */
1448
1449 /* (Key/Mask/Actions) Buffers for netdev dumping */
1450 struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
1451 struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
1452 struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
ac64794a
BP
1453};
1454
93451a0a
AS
1455static struct dpif_netlink_flow_dump_thread *
1456dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
ac64794a 1457{
93451a0a 1458 return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
ac64794a
BP
1459}
1460
1461static struct dpif_flow_dump_thread *
93451a0a 1462dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
ac64794a 1463{
93451a0a
AS
1464 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1465 struct dpif_netlink_flow_dump_thread *thread;
ac64794a
BP
1466
1467 thread = xmalloc(sizeof *thread);
1468 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1469 thread->dump = dump;
1470 ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1471 thread->nl_actions = NULL;
f2280b41
PB
1472 thread->netdev_dump_idx = 0;
1473 thread->netdev_done = !(thread->netdev_dump_idx < dump->netdev_dumps_num);
ac64794a
BP
1474
1475 return &thread->up;
1476}
1477
1478static void
93451a0a 1479dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
ac64794a 1480{
93451a0a
AS
1481 struct dpif_netlink_flow_dump_thread *thread
1482 = dpif_netlink_flow_dump_thread_cast(thread_);
ac64794a
BP
1483
1484 ofpbuf_uninit(&thread->nl_flows);
1485 ofpbuf_delete(thread->nl_actions);
1486 free(thread);
1487}
1488
1489static void
7af12bd7 1490dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
7fe98598 1491 const struct dpif_netlink_flow *datapath_flow)
ac64794a 1492{
7fe98598
NR
1493 dpif_flow->key = datapath_flow->key;
1494 dpif_flow->key_len = datapath_flow->key_len;
1495 dpif_flow->mask = datapath_flow->mask;
1496 dpif_flow->mask_len = datapath_flow->mask_len;
1497 dpif_flow->actions = datapath_flow->actions;
1498 dpif_flow->actions_len = datapath_flow->actions_len;
70e5ed6f 1499 dpif_flow->ufid_present = datapath_flow->ufid_present;
ec97c2df 1500 dpif_flow->pmd_id = PMD_ID_NULL;
70e5ed6f
JS
1501 if (datapath_flow->ufid_present) {
1502 dpif_flow->ufid = datapath_flow->ufid;
1503 } else {
1504 ovs_assert(datapath_flow->key && datapath_flow->key_len);
1505 dpif_flow_hash(dpif, datapath_flow->key, datapath_flow->key_len,
1506 &dpif_flow->ufid);
1507 }
7fe98598 1508 dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
d63ca532
GT
1509 dpif_flow->attrs.offloaded = false;
1510 dpif_flow->attrs.dp_layer = "ovs";
ac64794a
BP
1511}
1512
f2280b41
PB
1513/* The design is such that all threads are working together on the first dump
1514 * to the last, in order (at first they all on dump 0).
1515 * When the first thread finds that the given dump is finished,
1516 * they all move to the next. If two or more threads find the same dump
1517 * is finished at the same time, the first one will advance the shared
1518 * netdev_current_dump and the others will catch up. */
1519static void
1520dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
1521{
1522 struct dpif_netlink_flow_dump *dump = thread->dump;
1523
1524 ovs_mutex_lock(&dump->netdev_lock);
1525 /* if we haven't finished (dumped everything) */
1526 if (dump->netdev_current_dump < dump->netdev_dumps_num) {
1527 /* if we are the first to find that current dump is finished
1528 * advance it. */
1529 if (thread->netdev_dump_idx == dump->netdev_current_dump) {
1530 thread->netdev_dump_idx = ++dump->netdev_current_dump;
1531 /* did we just finish the last dump? done. */
1532 if (dump->netdev_current_dump == dump->netdev_dumps_num) {
1533 thread->netdev_done = true;
1534 }
1535 } else {
1536 /* otherwise, we are behind, catch up */
1537 thread->netdev_dump_idx = dump->netdev_current_dump;
1538 }
1539 } else {
1540 /* some other thread finished */
1541 thread->netdev_done = true;
1542 }
1543 ovs_mutex_unlock(&dump->netdev_lock);
1544}
1545
1546static int
1547dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
1548 struct ofpbuf *key_buf,
1549 struct ofpbuf *mask_buf,
1550 struct nlattr *actions,
1551 struct dpif_flow_stats *stats,
d63ca532 1552 struct dpif_flow_attrs *attrs,
f2280b41
PB
1553 ovs_u128 *ufid,
1554 struct dpif_flow *flow,
1555 bool terse OVS_UNUSED)
1556{
1557
1558 struct odp_flow_key_parms odp_parms = {
1559 .flow = &match->flow,
1560 .mask = &match->wc.masks,
1561 .support = {
f9885dc5 1562 .max_vlan_headers = 2,
f2280b41
PB
1563 },
1564 };
1565 size_t offset;
1566
1567 memset(flow, 0, sizeof *flow);
1568
1569 /* Key */
1570 offset = key_buf->size;
1571 flow->key = ofpbuf_tail(key_buf);
1572 odp_flow_key_from_flow(&odp_parms, key_buf);
1573 flow->key_len = key_buf->size - offset;
1574
1575 /* Mask */
1576 offset = mask_buf->size;
1577 flow->mask = ofpbuf_tail(mask_buf);
1578 odp_parms.key_buf = key_buf;
1579 odp_flow_key_from_mask(&odp_parms, mask_buf);
1580 flow->mask_len = mask_buf->size - offset;
1581
1582 /* Actions */
1583 flow->actions = nl_attr_get(actions);
1584 flow->actions_len = nl_attr_get_size(actions);
1585
1586 /* Stats */
1587 memcpy(&flow->stats, stats, sizeof *stats);
1588
1589 /* UFID */
1590 flow->ufid_present = true;
1591 flow->ufid = *ufid;
1592
1593 flow->pmd_id = PMD_ID_NULL;
4742003c 1594
d63ca532 1595 memcpy(&flow->attrs, attrs, sizeof *attrs);
4742003c 1596
f2280b41
PB
1597 return 0;
1598}
1599
ac64794a 1600static int
93451a0a
AS
1601dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1602 struct dpif_flow *flows, int max_flows)
ac64794a 1603{
93451a0a
AS
1604 struct dpif_netlink_flow_dump_thread *thread
1605 = dpif_netlink_flow_dump_thread_cast(thread_);
1606 struct dpif_netlink_flow_dump *dump = thread->dump;
1607 struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
ac64794a
BP
1608 int n_flows;
1609
1610 ofpbuf_delete(thread->nl_actions);
1611 thread->nl_actions = NULL;
1612
1613 n_flows = 0;
f2280b41
PB
1614 max_flows = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
1615
1616 while (!thread->netdev_done && n_flows < max_flows) {
1617 struct odputil_keybuf *maskbuf = &thread->maskbuf[n_flows];
1618 struct odputil_keybuf *keybuf = &thread->keybuf[n_flows];
1619 struct odputil_keybuf *actbuf = &thread->actbuf[n_flows];
1620 struct ofpbuf key, mask, act;
1621 struct dpif_flow *f = &flows[n_flows];
1622 int cur = thread->netdev_dump_idx;
1623 struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
1624 struct match match;
1625 struct nlattr *actions;
1626 struct dpif_flow_stats stats;
d63ca532 1627 struct dpif_flow_attrs attrs;
f2280b41
PB
1628 ovs_u128 ufid;
1629 bool has_next;
1630
1631 ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
1632 ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
1633 ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
1634 has_next = netdev_flow_dump_next(netdev_dump, &match,
d63ca532 1635 &actions, &stats, &attrs,
f2280b41
PB
1636 &ufid,
1637 &thread->nl_flows,
1638 &act);
1639 if (has_next) {
1640 dpif_netlink_netdev_match_to_dpif_flow(&match,
1641 &key, &mask,
1642 actions,
1643 &stats,
d63ca532 1644 &attrs,
f2280b41
PB
1645 &ufid,
1646 f,
1647 dump->up.terse);
1648 n_flows++;
1649 } else {
1650 dpif_netlink_advance_netdev_dump(thread);
1651 }
1652 }
1653
a692410a 1654 if (!(dump->types.ovs_flows)) {
7e8b7199
PB
1655 return n_flows;
1656 }
1657
ac64794a 1658 while (!n_flows
6fd6ed71 1659 || (n_flows < max_flows && thread->nl_flows.size)) {
7fe98598 1660 struct dpif_netlink_flow datapath_flow;
ac64794a
BP
1661 struct ofpbuf nl_flow;
1662 int error;
1663
1664 /* Try to grab another flow. */
1665 if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1666 break;
feebdea2 1667 }
30053024 1668
ac64794a 1669 /* Convert the flow to our output format. */
7fe98598 1670 error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
30053024 1671 if (error) {
7424fc44 1672 atomic_store_relaxed(&dump->status, error);
ac64794a 1673 break;
feebdea2 1674 }
30053024 1675
64bb477f
JS
1676 if (dump->up.terse || datapath_flow.actions) {
1677 /* Common case: we don't want actions, or the flow includes
1678 * actions. */
7af12bd7
JS
1679 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1680 &datapath_flow);
ac64794a
BP
1681 } else {
1682 /* Rare case: the flow does not include actions. Retrieve this
1683 * individual flow again to get the actions. */
70e5ed6f 1684 error = dpif_netlink_flow_get(dpif, &datapath_flow,
7fe98598 1685 &datapath_flow, &thread->nl_actions);
30053024
BP
1686 if (error == ENOENT) {
1687 VLOG_DBG("dumped flow disappeared on get");
ac64794a 1688 continue;
30053024 1689 } else if (error) {
10a89ef0
BP
1690 VLOG_WARN("error fetching dumped flow: %s",
1691 ovs_strerror(error));
7424fc44 1692 atomic_store_relaxed(&dump->status, error);
ac64794a 1693 break;
30053024 1694 }
30053024 1695
ac64794a
BP
1696 /* Save this flow. Then exit, because we only have one buffer to
1697 * handle this case. */
7af12bd7
JS
1698 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1699 &datapath_flow);
ac64794a
BP
1700 break;
1701 }
feebdea2 1702 }
ac64794a 1703 return n_flows;
96fba48f
BP
1704}
1705
eabe7c68 1706static void
93451a0a
AS
1707dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1708 struct ofpbuf *buf)
96fba48f 1709{
89625d1e 1710 struct ovs_header *k_exec;
758c456d 1711 size_t key_ofs;
f7cd0081 1712
eabe7c68 1713 ofpbuf_prealloc_tailroom(buf, (64
cf62fa4c 1714 + dp_packet_size(d_exec->packet)
758c456d 1715 + ODP_KEY_METADATA_SIZE
eabe7c68 1716 + d_exec->actions_len));
f7cd0081 1717
df2c07f4 1718 nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
69685a88 1719 OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
f7cd0081 1720
89625d1e
BP
1721 k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1722 k_exec->dp_ifindex = dp_ifindex;
f7cd0081 1723
89625d1e 1724 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
cf62fa4c
PS
1725 dp_packet_data(d_exec->packet),
1726 dp_packet_size(d_exec->packet));
758c456d
JR
1727
1728 key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
beb75a40 1729 odp_key_from_dp_packet(buf, d_exec->packet);
758c456d
JR
1730 nl_msg_end_nested(buf, key_ofs);
1731
89625d1e
BP
1732 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1733 d_exec->actions, d_exec->actions_len);
43f9ac0a 1734 if (d_exec->probe) {
2e460098 1735 nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
43f9ac0a 1736 }
27130224
AZ
1737 if (d_exec->mtu) {
1738 nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1739 }
6bc60024
BP
1740}
1741
0f3358ea
BP
1742/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1743 * Returns the number actually executed (at least 1, if 'n_ops' is
1744 * positive). */
1745static size_t
93451a0a
AS
1746dpif_netlink_operate__(struct dpif_netlink *dpif,
1747 struct dpif_op **ops, size_t n_ops)
6bc60024 1748{
eabe7c68
BP
1749 struct op_auxdata {
1750 struct nl_transaction txn;
72d32ac0 1751
eabe7c68
BP
1752 struct ofpbuf request;
1753 uint64_t request_stub[1024 / 8];
72d32ac0
BP
1754
1755 struct ofpbuf reply;
1756 uint64_t reply_stub[1024 / 8];
8b668ee3 1757 } auxes[OPERATE_MAX_OPS];
eabe7c68 1758
8b668ee3 1759 struct nl_transaction *txnsp[OPERATE_MAX_OPS];
6bc60024
BP
1760 size_t i;
1761
8b668ee3 1762 n_ops = MIN(n_ops, OPERATE_MAX_OPS);
6bc60024 1763 for (i = 0; i < n_ops; i++) {
eabe7c68 1764 struct op_auxdata *aux = &auxes[i];
c2b565b5 1765 struct dpif_op *op = ops[i];
b99d3cee
BP
1766 struct dpif_flow_put *put;
1767 struct dpif_flow_del *del;
6fe09f8c 1768 struct dpif_flow_get *get;
93451a0a 1769 struct dpif_netlink_flow flow;
eabe7c68
BP
1770
1771 ofpbuf_use_stub(&aux->request,
1772 aux->request_stub, sizeof aux->request_stub);
1773 aux->txn.request = &aux->request;
b99d3cee 1774
72d32ac0
BP
1775 ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1776 aux->txn.reply = NULL;
1777
b99d3cee
BP
1778 switch (op->type) {
1779 case DPIF_OP_FLOW_PUT:
fa37affa 1780 put = &op->flow_put;
93451a0a 1781 dpif_netlink_init_flow_put(dpif, put, &flow);
6bc60024 1782 if (put->stats) {
eabe7c68 1783 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1784 aux->txn.reply = &aux->reply;
6bc60024 1785 }
93451a0a 1786 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee
BP
1787 break;
1788
1789 case DPIF_OP_FLOW_DEL:
fa37affa 1790 del = &op->flow_del;
93451a0a 1791 dpif_netlink_init_flow_del(dpif, del, &flow);
b99d3cee 1792 if (del->stats) {
eabe7c68 1793 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1794 aux->txn.reply = &aux->reply;
b99d3cee 1795 }
93451a0a 1796 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee 1797 break;
6bc60024 1798
b99d3cee 1799 case DPIF_OP_EXECUTE:
0f3358ea
BP
1800 /* Can't execute a packet that won't fit in a Netlink attribute. */
1801 if (OVS_UNLIKELY(nl_attr_oversized(
fa37affa 1802 dp_packet_size(op->execute.packet)))) {
0f3358ea
BP
1803 /* Report an error immediately if this is the first operation.
1804 * Otherwise the easiest thing to do is to postpone to the next
1805 * call (when this will be the first operation). */
1806 if (i == 0) {
1807 VLOG_ERR_RL(&error_rl,
1808 "dropping oversized %"PRIu32"-byte packet",
fa37affa 1809 dp_packet_size(op->execute.packet));
0f3358ea
BP
1810 op->error = ENOBUFS;
1811 return 1;
1812 }
1813 n_ops = i;
1814 } else {
fa37affa 1815 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
0f3358ea
BP
1816 &aux->request);
1817 }
b99d3cee
BP
1818 break;
1819
6fe09f8c 1820 case DPIF_OP_FLOW_GET:
fa37affa 1821 get = &op->flow_get;
70e5ed6f 1822 dpif_netlink_init_flow_get(dpif, get, &flow);
6fe09f8c 1823 aux->txn.reply = get->buffer;
93451a0a 1824 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
6fe09f8c
JS
1825 break;
1826
b99d3cee 1827 default:
428b2edd 1828 OVS_NOT_REACHED();
6bc60024
BP
1829 }
1830 }
1831
6bc60024 1832 for (i = 0; i < n_ops; i++) {
eabe7c68 1833 txnsp[i] = &auxes[i].txn;
6bc60024 1834 }
a88b4e04 1835 nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
6bc60024 1836
6bc60024 1837 for (i = 0; i < n_ops; i++) {
72d32ac0 1838 struct op_auxdata *aux = &auxes[i];
eabe7c68 1839 struct nl_transaction *txn = &auxes[i].txn;
c2b565b5 1840 struct dpif_op *op = ops[i];
b99d3cee
BP
1841 struct dpif_flow_put *put;
1842 struct dpif_flow_del *del;
6fe09f8c 1843 struct dpif_flow_get *get;
6bc60024 1844
b99d3cee 1845 op->error = txn->error;
6bc60024 1846
b99d3cee
BP
1847 switch (op->type) {
1848 case DPIF_OP_FLOW_PUT:
fa37affa 1849 put = &op->flow_put;
cfceb2b5 1850 if (put->stats) {
b99d3cee 1851 if (!op->error) {
93451a0a 1852 struct dpif_netlink_flow reply;
cfceb2b5 1853
93451a0a
AS
1854 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1855 txn->reply);
cfceb2b5 1856 if (!op->error) {
93451a0a 1857 dpif_netlink_flow_get_stats(&reply, put->stats);
cfceb2b5
BP
1858 }
1859 }
6bc60024 1860 }
b99d3cee
BP
1861 break;
1862
1863 case DPIF_OP_FLOW_DEL:
fa37affa 1864 del = &op->flow_del;
cfceb2b5 1865 if (del->stats) {
b99d3cee 1866 if (!op->error) {
93451a0a 1867 struct dpif_netlink_flow reply;
cfceb2b5 1868
93451a0a
AS
1869 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1870 txn->reply);
cfceb2b5 1871 if (!op->error) {
93451a0a 1872 dpif_netlink_flow_get_stats(&reply, del->stats);
cfceb2b5
BP
1873 }
1874 }
b99d3cee
BP
1875 }
1876 break;
1877
1878 case DPIF_OP_EXECUTE:
1879 break;
1880
6fe09f8c 1881 case DPIF_OP_FLOW_GET:
fa37affa 1882 get = &op->flow_get;
6fe09f8c 1883 if (!op->error) {
93451a0a 1884 struct dpif_netlink_flow reply;
6fe09f8c 1885
93451a0a 1886 op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
6fe09f8c 1887 if (!op->error) {
7af12bd7
JS
1888 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, get->flow,
1889 &reply);
6fe09f8c
JS
1890 }
1891 }
1892 break;
1893
b99d3cee 1894 default:
428b2edd 1895 OVS_NOT_REACHED();
6bc60024
BP
1896 }
1897
72d32ac0
BP
1898 ofpbuf_uninit(&aux->request);
1899 ofpbuf_uninit(&aux->reply);
6bc60024 1900 }
0f3358ea
BP
1901
1902 return n_ops;
eabe7c68
BP
1903}
1904
6c343984
PB
1905static int
1906parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
1907{
1908 struct dpif_flow *dpif_flow = get->flow;
1909 struct match match;
1910 struct nlattr *actions;
1911 struct dpif_flow_stats stats;
d63ca532 1912 struct dpif_flow_attrs attrs;
6c343984
PB
1913 struct ofpbuf buf;
1914 uint64_t act_buf[1024 / 8];
1915 struct odputil_keybuf maskbuf;
1916 struct odputil_keybuf keybuf;
1917 struct odputil_keybuf actbuf;
1918 struct ofpbuf key, mask, act;
1919 int err;
1920
1921 ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
dfaf79dd 1922 err = netdev_ports_flow_get(dpif->dpif.dpif_class, &match,
d63ca532 1923 &actions, get->ufid, &stats, &attrs, &buf);
6c343984
PB
1924 if (err) {
1925 return err;
1926 }
1927
1928 VLOG_DBG("found flow from netdev, translating to dpif flow");
1929
1930 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1931 ofpbuf_use_stack(&act, &actbuf, sizeof actbuf);
1932 ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
1933 dpif_netlink_netdev_match_to_dpif_flow(&match, &key, &mask, actions,
d63ca532 1934 &stats, &attrs,
6c343984
PB
1935 (ovs_u128 *) get->ufid,
1936 dpif_flow,
1937 false);
1938 ofpbuf_put(get->buffer, nl_attr_get(actions), nl_attr_get_size(actions));
1939 dpif_flow->actions = ofpbuf_at(get->buffer, 0, 0);
1940 dpif_flow->actions_len = nl_attr_get_size(actions);
1941
1942 return 0;
1943}
1944
8b668ee3
PB
1945static int
1946parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
1947{
1948 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
dfaf79dd 1949 const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
8b668ee3
PB
1950 struct match match;
1951 odp_port_t in_port;
1952 const struct nlattr *nla;
1953 size_t left;
8b668ee3
PB
1954 struct netdev *dev;
1955 struct offload_info info;
1956 ovs_be16 dst_port = 0;
d9677a1f 1957 uint8_t csum_on = false;
8b668ee3
PB
1958 int err;
1959
1960 if (put->flags & DPIF_FP_PROBE) {
1961 return EOPNOTSUPP;
1962 }
1963
1964 err = parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
1965 put->mask_len, &match);
1966 if (err) {
1967 return err;
1968 }
1969
1970 /* When we try to install a dummy flow from a probed feature. */
1971 if (match.flow.dl_type == htons(0x1234)) {
1972 return EOPNOTSUPP;
1973 }
1974
1975 in_port = match.flow.in_port.odp_port;
dfaf79dd 1976 dev = netdev_ports_get(in_port, dpif_class);
8b668ee3
PB
1977 if (!dev) {
1978 return EOPNOTSUPP;
1979 }
1980
00a0a011 1981 /* Get tunnel dst port */
8b668ee3
PB
1982 NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
1983 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
1984 const struct netdev_tunnel_config *tnl_cfg;
1985 struct netdev *outdev;
1986 odp_port_t out_port;
1987
8b668ee3 1988 out_port = nl_attr_get_odp_port(nla);
dfaf79dd 1989 outdev = netdev_ports_get(out_port, dpif_class);
8b668ee3
PB
1990 if (!outdev) {
1991 err = EOPNOTSUPP;
1992 goto out;
1993 }
1994 tnl_cfg = netdev_get_tunnel_config(outdev);
1995 if (tnl_cfg && tnl_cfg->dst_port != 0) {
1996 dst_port = tnl_cfg->dst_port;
1997 }
d9677a1f
EB
1998 if (tnl_cfg) {
1999 csum_on = tnl_cfg->csum;
2000 }
8b668ee3
PB
2001 netdev_close(outdev);
2002 }
2003 }
2004
dfaf79dd 2005 info.dpif_class = dpif_class;
8b668ee3 2006 info.tp_dst_port = dst_port;
d9677a1f 2007 info.tunnel_csum_on = csum_on;
8b668ee3
PB
2008 err = netdev_flow_put(dev, &match,
2009 CONST_CAST(struct nlattr *, put->actions),
2010 put->actions_len,
2011 CONST_CAST(ovs_u128 *, put->ufid),
2012 &info, put->stats);
2013
2014 if (!err) {
2015 if (put->flags & DPIF_FP_MODIFY) {
2016 struct dpif_op *opp;
2017 struct dpif_op op;
2018
2019 op.type = DPIF_OP_FLOW_DEL;
fa37affa
BP
2020 op.flow_del.key = put->key;
2021 op.flow_del.key_len = put->key_len;
2022 op.flow_del.ufid = put->ufid;
2023 op.flow_del.pmd_id = put->pmd_id;
2024 op.flow_del.stats = NULL;
2025 op.flow_del.terse = false;
8b668ee3
PB
2026
2027 opp = &op;
2028 dpif_netlink_operate__(dpif, &opp, 1);
2029 }
2030
2031 VLOG_DBG("added flow");
2032 } else if (err != EEXIST) {
738c785f
SB
2033 struct netdev *oor_netdev = NULL;
2034 if (err == ENOSPC && netdev_is_offload_rebalance_policy_enabled()) {
2035 /*
2036 * We need to set OOR on the input netdev (i.e, 'dev') for the
2037 * flow. But if the flow has a tunnel attribute (i.e, decap action,
2038 * with a virtual device like a VxLAN interface as its in-port),
2039 * then lookup and set OOR on the underlying tunnel (real) netdev.
2040 */
2041 oor_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
2042 if (!oor_netdev) {
2043 /* Not a 'tunnel' flow */
2044 oor_netdev = dev;
2045 }
2046 netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true);
2047 }
2048 VLOG_ERR_RL(&rl, "failed to offload flow: %s: %s", ovs_strerror(err),
2049 (oor_netdev ? oor_netdev->name : dev->name));
8b668ee3
PB
2050 }
2051
2052out:
2053 if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
2054 /* Modified rule can't be offloaded, try and delete from HW */
2055 int del_err = netdev_flow_del(dev, put->ufid, put->stats);
2056
2057 if (!del_err) {
2058 /* Delete from hw success, so old flow was offloaded.
2059 * Change flags to create the flow in kernel */
2060 put->flags &= ~DPIF_FP_MODIFY;
2061 put->flags |= DPIF_FP_CREATE;
2062 } else if (del_err != ENOENT) {
2063 VLOG_ERR_RL(&rl, "failed to delete offloaded flow: %s",
2064 ovs_strerror(del_err));
2065 /* stop proccesing the flow in kernel */
2066 err = 0;
2067 }
2068 }
2069
2070 netdev_close(dev);
2071
2072 return err;
2073}
2074
8b668ee3
PB
2075static int
2076try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
eabe7c68 2077{
8b668ee3 2078 int err = EOPNOTSUPP;
9b00386b 2079
8b668ee3
PB
2080 switch (op->type) {
2081 case DPIF_OP_FLOW_PUT: {
fa37affa 2082 struct dpif_flow_put *put = &op->flow_put;
8b668ee3
PB
2083
2084 if (!put->ufid) {
2085 break;
2086 }
3cd99886
RD
2087
2088 log_flow_put_message(&dpif->dpif, &this_module, put, 0);
8b668ee3
PB
2089 err = parse_flow_put(dpif, put);
2090 break;
2091 }
0335a89c 2092 case DPIF_OP_FLOW_DEL: {
fa37affa 2093 struct dpif_flow_del *del = &op->flow_del;
0335a89c
PB
2094
2095 if (!del->ufid) {
2096 break;
2097 }
3cd99886
RD
2098
2099 log_flow_del_message(&dpif->dpif, &this_module, del, 0);
dfaf79dd 2100 err = netdev_ports_flow_del(dpif->dpif.dpif_class, del->ufid,
0335a89c
PB
2101 del->stats);
2102 break;
2103 }
6c343984 2104 case DPIF_OP_FLOW_GET: {
fa37affa 2105 struct dpif_flow_get *get = &op->flow_get;
6c343984 2106
fa37affa 2107 if (!op->flow_get.ufid) {
6c343984
PB
2108 break;
2109 }
3cd99886
RD
2110
2111 log_flow_get_message(&dpif->dpif, &this_module, get, 0);
6c343984
PB
2112 err = parse_flow_get(dpif, get);
2113 break;
2114 }
8b668ee3
PB
2115 case DPIF_OP_EXECUTE:
2116 default:
2117 break;
2118 }
2119
2120 return err;
2121}
2122
2123static void
2124dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
2125 size_t n_ops)
2126{
eabe7c68 2127 while (n_ops > 0) {
0f3358ea 2128 size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
8b668ee3 2129
eabe7c68
BP
2130 ops += chunk;
2131 n_ops -= chunk;
2132 }
6bc60024
BP
2133}
2134
8b668ee3 2135static void
57924fc9
SB
2136dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
2137 enum dpif_offload_type offload_type)
8b668ee3
PB
2138{
2139 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2140 struct dpif_op *new_ops[OPERATE_MAX_OPS];
2141 int count = 0;
2142 int i = 0;
2143 int err = 0;
2144
57924fc9
SB
2145 if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) {
2146 VLOG_DBG("Invalid offload_type: %d", offload_type);
2147 return;
2148 }
2149
2150 if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) {
8b668ee3
PB
2151 while (n_ops > 0) {
2152 count = 0;
2153
2154 while (n_ops > 0 && count < OPERATE_MAX_OPS) {
2155 struct dpif_op *op = ops[i++];
2156
2157 err = try_send_to_netdev(dpif, op);
2158 if (err && err != EEXIST) {
57924fc9
SB
2159 if (offload_type == DPIF_OFFLOAD_ALWAYS) {
2160 /* We got an error while offloading an op. Since
2161 * OFFLOAD_ALWAYS is specified, we stop further
2162 * processing and return to the caller without
2163 * invoking kernel datapath as fallback. But the
2164 * interface requires us to process all n_ops; so
2165 * return the same error in the remaining ops too.
2166 */
2167 op->error = err;
2168 n_ops--;
2169 while (n_ops > 0) {
2170 op = ops[i++];
2171 op->error = err;
2172 n_ops--;
2173 }
2174 return;
2175 }
8b668ee3
PB
2176 new_ops[count++] = op;
2177 } else {
2178 op->error = err;
2179 }
2180
2181 n_ops--;
2182 }
2183
2184 dpif_netlink_operate_chunks(dpif, new_ops, count);
2185 }
57924fc9 2186 } else if (offload_type != DPIF_OFFLOAD_ALWAYS) {
8b668ee3
PB
2187 dpif_netlink_operate_chunks(dpif, ops, n_ops);
2188 }
2189}
2190
09cac43f
NR
2191#if _WIN32
2192static void
2193dpif_netlink_handler_uninit(struct dpif_handler *handler)
2194{
2195 vport_delete_sock_pool(handler);
2196}
2197
2198static int
2199dpif_netlink_handler_init(struct dpif_handler *handler)
2200{
2201 return vport_create_sock_pool(handler);
2202}
2203#else
2204
2205static int
2206dpif_netlink_handler_init(struct dpif_handler *handler)
2207{
2208 handler->epoll_fd = epoll_create(10);
2209 return handler->epoll_fd < 0 ? errno : 0;
2210}
2211
2212static void
2213dpif_netlink_handler_uninit(struct dpif_handler *handler)
2214{
2215 close(handler->epoll_fd);
2216}
2217#endif
2218
1579cf67
AW
2219/* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
2220 * currently in 'dpif' in the kernel, by adding a new set of channels for
2221 * any kernel vport that lacks one and deleting any channels that have no
2222 * backing kernel vports. */
96fba48f 2223static int
93451a0a 2224dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
b90de034 2225 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 2226{
8381a3d3 2227 unsigned long int *keep_channels;
93451a0a 2228 struct dpif_netlink_vport vport;
8381a3d3
BP
2229 size_t keep_channels_nbits;
2230 struct nl_dump dump;
d57695d7
JS
2231 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2232 struct ofpbuf buf;
8381a3d3
BP
2233 int retval = 0;
2234 size_t i;
982b8810 2235
09cac43f
NR
2236 ovs_assert(!WINDOWS || n_handlers <= 1);
2237 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
2238
1579cf67
AW
2239 if (dpif->n_handlers != n_handlers) {
2240 destroy_all_channels(dpif);
2241 dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2242 for (i = 0; i < n_handlers; i++) {
09cac43f 2243 int error;
1579cf67
AW
2244 struct dpif_handler *handler = &dpif->handlers[i];
2245
09cac43f
NR
2246 error = dpif_netlink_handler_init(handler);
2247 if (error) {
1579cf67
AW
2248 size_t j;
2249
2250 for (j = 0; j < i; j++) {
aa5c0216 2251 struct dpif_handler *tmp = &dpif->handlers[j];
09cac43f 2252 dpif_netlink_handler_uninit(tmp);
1579cf67
AW
2253 }
2254 free(dpif->handlers);
2255 dpif->handlers = NULL;
2256
09cac43f 2257 return error;
1579cf67 2258 }
8381a3d3 2259 }
1579cf67
AW
2260 dpif->n_handlers = n_handlers;
2261 }
2262
2263 for (i = 0; i < n_handlers; i++) {
2264 struct dpif_handler *handler = &dpif->handlers[i];
2265
2266 handler->event_offset = handler->n_events = 0;
17411ecf 2267 }
b063d9f0 2268
8381a3d3
BP
2269 keep_channels_nbits = dpif->uc_array_size;
2270 keep_channels = bitmap_allocate(keep_channels_nbits);
982b8810 2271
d57695d7 2272 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a
AS
2273 dpif_netlink_port_dump_start__(dpif, &dump);
2274 while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
8381a3d3 2275 uint32_t port_no = odp_to_u32(vport.port_no);
69c51582 2276 uint32_t upcall_pid;
8381a3d3 2277 int error;
50f80534 2278
1579cf67 2279 if (port_no >= dpif->uc_array_size
69c51582
MC
2280 || !vport_get_pid(dpif, port_no, &upcall_pid)) {
2281 struct nl_sock *socksp;
1579cf67 2282
69c51582 2283 if (nl_sock_create(NETLINK_GENERIC, &socksp)) {
1579cf67
AW
2284 goto error;
2285 }
2286
69c51582 2287 error = vport_add_channel(dpif, vport.port_no, socksp);
b063d9f0 2288 if (error) {
1579cf67 2289 VLOG_INFO("%s: could not add channels for port %s",
9b00386b 2290 dpif_name(&dpif->dpif), vport.name);
69c51582 2291 nl_sock_destroy(socksp);
8381a3d3
BP
2292 retval = error;
2293 goto error;
982b8810 2294 }
69c51582 2295 upcall_pid = nl_sock_pid(socksp);
8381a3d3 2296 }
50f80534 2297
8381a3d3 2298 /* Configure the vport to deliver misses to 'sock'. */
1579cf67 2299 if (vport.upcall_pids[0] == 0
69c51582
MC
2300 || vport.n_upcall_pids != 1
2301 || upcall_pid != vport.upcall_pids[0]) {
93451a0a 2302 struct dpif_netlink_vport vport_request;
989fd548 2303
93451a0a 2304 dpif_netlink_vport_init(&vport_request);
989fd548
JP
2305 vport_request.cmd = OVS_VPORT_CMD_SET;
2306 vport_request.dp_ifindex = dpif->dp_ifindex;
8381a3d3 2307 vport_request.port_no = vport.port_no;
69c51582
MC
2308 vport_request.n_upcall_pids = 1;
2309 vport_request.upcall_pids = &upcall_pid;
93451a0a 2310 error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67 2311 if (error) {
989fd548
JP
2312 VLOG_WARN_RL(&error_rl,
2313 "%s: failed to set upcall pid on port: %s",
10a89ef0 2314 dpif_name(&dpif->dpif), ovs_strerror(error));
989fd548 2315
8381a3d3
BP
2316 if (error != ENODEV && error != ENOENT) {
2317 retval = error;
989fd548 2318 } else {
8381a3d3
BP
2319 /* The vport isn't really there, even though the dump says
2320 * it is. Probably we just hit a race after a port
2321 * disappeared. */
989fd548 2322 }
8381a3d3 2323 goto error;
50f80534 2324 }
8381a3d3 2325 }
14b4d2f9 2326
8381a3d3
BP
2327 if (port_no < keep_channels_nbits) {
2328 bitmap_set1(keep_channels, port_no);
2329 }
2330 continue;
2331
2332 error:
1579cf67 2333 vport_del_channels(dpif, vport.port_no);
982b8810 2334 }
8381a3d3 2335 nl_dump_done(&dump);
d57695d7 2336 ofpbuf_uninit(&buf);
b063d9f0 2337
8381a3d3
BP
2338 /* Discard any saved channels that we didn't reuse. */
2339 for (i = 0; i < keep_channels_nbits; i++) {
2340 if (!bitmap_is_set(keep_channels, i)) {
1579cf67 2341 vport_del_channels(dpif, u32_to_odp(i));
8381a3d3
BP
2342 }
2343 }
2344 free(keep_channels);
2345
2346 return retval;
2347}
2348
2349static int
93451a0a 2350dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable)
b90de034 2351 OVS_REQ_WRLOCK(dpif->upcall_lock)
8381a3d3 2352{
1579cf67 2353 if ((dpif->handlers != NULL) == enable) {
8381a3d3
BP
2354 return 0;
2355 } else if (!enable) {
1579cf67 2356 destroy_all_channels(dpif);
8381a3d3
BP
2357 return 0;
2358 } else {
93451a0a 2359 return dpif_netlink_refresh_channels(dpif, 1);
8381a3d3 2360 }
96fba48f
BP
2361}
2362
9fafa796 2363static int
93451a0a 2364dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
9fafa796 2365{
93451a0a 2366 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2367 int error;
2368
1579cf67 2369 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2370 error = dpif_netlink_recv_set__(dpif, enable);
1579cf67 2371 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2372
2373 return error;
2374}
2375
1954e6bb 2376static int
93451a0a 2377dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
1954e6bb 2378{
93451a0a 2379 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67
AW
2380 int error = 0;
2381
09cac43f
NR
2382#ifdef _WIN32
2383 /* Multiple upcall handlers will be supported once kernel datapath supports
2384 * it. */
2385 if (n_handlers > 1) {
2386 return error;
2387 }
2388#endif
2389
1579cf67
AW
2390 fat_rwlock_wrlock(&dpif->upcall_lock);
2391 if (dpif->handlers) {
93451a0a 2392 error = dpif_netlink_refresh_channels(dpif, n_handlers);
1579cf67
AW
2393 }
2394 fat_rwlock_unlock(&dpif->upcall_lock);
2395
2396 return error;
1954e6bb
AW
2397}
2398
aae51f53 2399static int
93451a0a 2400dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
aae51f53
BP
2401 uint32_t queue_id, uint32_t *priority)
2402{
2403 if (queue_id < 0xf000) {
17ee3c1f 2404 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
aae51f53
BP
2405 return 0;
2406 } else {
2407 return EINVAL;
2408 }
2409}
2410
96fba48f 2411static int
7af12bd7
JS
2412parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
2413 struct dpif_upcall *upcall, int *dp_ifindex)
856081f6 2414{
df2c07f4 2415 static const struct nl_policy ovs_packet_policy[] = {
856081f6 2416 /* Always present. */
df2c07f4 2417 [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
856081f6 2418 .min_len = ETH_HEADER_LEN },
df2c07f4 2419 [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
856081f6 2420
df2c07f4 2421 /* OVS_PACKET_CMD_ACTION only. */
e995e3df 2422 [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
8b7ea2d4 2423 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
7321bda3 2424 [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
27130224 2425 [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
856081f6
BP
2426 };
2427
0a2869d5
BP
2428 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2429 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2430 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2431 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
982b8810 2432
0a2869d5 2433 struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
df2c07f4
JP
2434 if (!nlmsg || !genl || !ovs_header
2435 || nlmsg->nlmsg_type != ovs_packet_family
2436 || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2437 ARRAY_SIZE(ovs_packet_policy))) {
856081f6
BP
2438 return EINVAL;
2439 }
2440
0a2869d5
BP
2441 int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2442 : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2443 : -1);
aaff4b55
BP
2444 if (type < 0) {
2445 return EINVAL;
2446 }
82272ede 2447
877c9270 2448 /* (Re)set ALL fields of '*upcall' on successful return. */
aaff4b55 2449 upcall->type = type;
ebc56baa
BP
2450 upcall->key = CONST_CAST(struct nlattr *,
2451 nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
df2c07f4 2452 upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
7af12bd7 2453 dpif_flow_hash(&dpif->dpif, upcall->key, upcall->key_len, &upcall->ufid);
e995e3df 2454 upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
8b7ea2d4 2455 upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
7321bda3 2456 upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
27130224 2457 upcall->mru = a[OVS_PACKET_ATTR_MRU];
da546e07
JR
2458
2459 /* Allow overwriting the netlink attribute header without reallocating. */
cf62fa4c 2460 dp_packet_use_stub(&upcall->packet,
da546e07
JR
2461 CONST_CAST(struct nlattr *,
2462 nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2463 nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2464 sizeof(struct nlattr));
cf62fa4c
PS
2465 dp_packet_set_data(&upcall->packet,
2466 (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2467 dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
da546e07 2468
2482b0b0
JS
2469 if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2470 /* Ethernet frame */
2471 upcall->packet.packet_type = htonl(PT_ETH);
2472 } else {
2473 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2474 ovs_be16 ethertype = 0;
2475 const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2476 upcall->key_len,
2477 OVS_KEY_ATTR_ETHERTYPE);
2478 if (et_nla) {
2479 ethertype = nl_attr_get_be16(et_nla);
2480 }
2481 upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2482 ntohs(ethertype));
2483 dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2484 }
2485
df2c07f4 2486 *dp_ifindex = ovs_header->dp_ifindex;
982b8810 2487
856081f6
BP
2488 return 0;
2489}
2490
09cac43f
NR
2491#ifdef _WIN32
2492#define PACKET_RECV_BATCH_SIZE 50
2493static int
2494dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2495 struct dpif_upcall *upcall, struct ofpbuf *buf)
2496 OVS_REQ_RDLOCK(dpif->upcall_lock)
2497{
2498 struct dpif_handler *handler;
2499 int read_tries = 0;
2500 struct dpif_windows_vport_sock *sock_pool;
2501 uint32_t i;
2502
2503 if (!dpif->handlers) {
2504 return EAGAIN;
2505 }
2506
2507 /* Only one handler is supported currently. */
2508 if (handler_id >= 1) {
2509 return EAGAIN;
2510 }
2511
2512 if (handler_id >= dpif->n_handlers) {
2513 return EAGAIN;
2514 }
2515
2516 handler = &dpif->handlers[handler_id];
2517 sock_pool = handler->vport_sock_pool;
2518
2519 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2520 for (;;) {
2521 int dp_ifindex;
2522 int error;
2523
2524 if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2525 return EAGAIN;
2526 }
2527
a86bd14e 2528 error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
09cac43f
NR
2529 if (error == ENOBUFS) {
2530 /* ENOBUFS typically means that we've received so many
2531 * packets that the buffer overflowed. Try again
2532 * immediately because there's almost certainly a packet
2533 * waiting for us. */
2534 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2535 continue;
2536 }
2537
2538 /* XXX: ch->last_poll = time_msec(); */
2539 if (error) {
2540 if (error == EAGAIN) {
2541 break;
2542 }
2543 return error;
2544 }
2545
27edb4aa 2546 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
09cac43f
NR
2547 if (!error && dp_ifindex == dpif->dp_ifindex) {
2548 return 0;
2549 } else if (error) {
2550 return error;
2551 }
2552 }
2553 }
2554
2555 return EAGAIN;
2556}
2557#else
856081f6 2558static int
93451a0a
AS
2559dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
2560 struct dpif_upcall *upcall, struct ofpbuf *buf)
b90de034 2561 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2562{
1579cf67 2563 struct dpif_handler *handler;
17411ecf 2564 int read_tries = 0;
96fba48f 2565
1579cf67
AW
2566 if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2567 return EAGAIN;
982b8810
BP
2568 }
2569
1579cf67
AW
2570 handler = &dpif->handlers[handler_id];
2571 if (handler->event_offset >= handler->n_events) {
8522ba09 2572 int retval;
989fd548 2573
1579cf67 2574 handler->event_offset = handler->n_events = 0;
f6d1465c 2575
8522ba09 2576 do {
1579cf67 2577 retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
989fd548 2578 dpif->uc_array_size, 0);
8522ba09 2579 } while (retval < 0 && errno == EINTR);
09cac43f 2580
8522ba09
BP
2581 if (retval < 0) {
2582 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
10a89ef0 2583 VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
989fd548 2584 } else if (retval > 0) {
1579cf67 2585 handler->n_events = retval;
8522ba09 2586 }
8522ba09
BP
2587 }
2588
1579cf67
AW
2589 while (handler->event_offset < handler->n_events) {
2590 int idx = handler->epoll_events[handler->event_offset].data.u32;
69c51582 2591 struct dpif_channel *ch = &dpif->channels[idx];
8522ba09 2592
1579cf67 2593 handler->event_offset++;
17411ecf 2594
f6d1465c 2595 for (;;) {
8522ba09 2596 int dp_ifindex;
f6d1465c 2597 int error;
17411ecf 2598
f6d1465c
BP
2599 if (++read_tries > 50) {
2600 return EAGAIN;
2601 }
17411ecf 2602
a86bd14e 2603 error = nl_sock_recv(ch->sock, buf, NULL, false);
14b4d2f9
BP
2604 if (error == ENOBUFS) {
2605 /* ENOBUFS typically means that we've received so many
2606 * packets that the buffer overflowed. Try again
2607 * immediately because there's almost certainly a packet
2608 * waiting for us. */
9b00386b 2609 report_loss(dpif, ch, idx, handler_id);
14b4d2f9
BP
2610 continue;
2611 }
2612
2613 ch->last_poll = time_msec();
72d32ac0 2614 if (error) {
72d32ac0
BP
2615 if (error == EAGAIN) {
2616 break;
2617 }
f6d1465c
BP
2618 return error;
2619 }
17411ecf 2620
7af12bd7 2621 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
a12b3ead 2622 if (!error && dp_ifindex == dpif->dp_ifindex) {
f6d1465c 2623 return 0;
989fd548 2624 } else if (error) {
f6d1465c 2625 return error;
17411ecf 2626 }
982b8810 2627 }
50f80534 2628 }
982b8810
BP
2629
2630 return EAGAIN;
96fba48f 2631}
09cac43f 2632#endif
96fba48f 2633
9fafa796 2634static int
93451a0a
AS
2635dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2636 struct dpif_upcall *upcall, struct ofpbuf *buf)
9fafa796 2637{
93451a0a 2638 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2639 int error;
2640
1579cf67 2641 fat_rwlock_rdlock(&dpif->upcall_lock);
09cac43f
NR
2642#ifdef _WIN32
2643 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2644#else
93451a0a 2645 error = dpif_netlink_recv__(dpif, handler_id, upcall, buf);
09cac43f 2646#endif
1579cf67 2647 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2648
2649 return error;
2650}
2651
96fba48f 2652static void
93451a0a 2653dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id)
b90de034 2654 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2655{
93451a0a 2656#ifdef _WIN32
09cac43f
NR
2657 uint32_t i;
2658 struct dpif_windows_vport_sock *sock_pool =
2659 dpif->handlers[handler_id].vport_sock_pool;
2660
2661 /* Only one handler is supported currently. */
2662 if (handler_id >= 1) {
2663 return;
2664 }
2665
2666 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2667 nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2668 }
93451a0a 2669#else
1579cf67
AW
2670 if (dpif->handlers && handler_id < dpif->n_handlers) {
2671 struct dpif_handler *handler = &dpif->handlers[handler_id];
2672
2673 poll_fd_wait(handler->epoll_fd, POLLIN);
17411ecf 2674 }
93451a0a 2675#endif
96fba48f
BP
2676}
2677
1ba530f4 2678static void
93451a0a 2679dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
1ba530f4 2680{
93451a0a 2681 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
17411ecf 2682
b90de034 2683 fat_rwlock_rdlock(&dpif->upcall_lock);
93451a0a 2684 dpif_netlink_recv_wait__(dpif, handler_id);
b90de034
AW
2685 fat_rwlock_unlock(&dpif->upcall_lock);
2686}
2687
2688static void
93451a0a 2689dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
b90de034
AW
2690 OVS_REQ_WRLOCK(dpif->upcall_lock)
2691{
1579cf67 2692 if (dpif->handlers) {
69c51582 2693 size_t i;
1579cf67 2694
69c51582
MC
2695 if (!dpif->channels[0].sock) {
2696 return;
2697 }
1579cf67 2698 for (i = 0; i < dpif->uc_array_size; i++ ) {
1ba530f4 2699
69c51582 2700 nl_sock_drain(dpif->channels[i].sock);
989fd548 2701 }
1ba530f4 2702 }
b90de034
AW
2703}
2704
2705static void
93451a0a 2706dpif_netlink_recv_purge(struct dpif *dpif_)
b90de034 2707{
93451a0a 2708 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
2709
2710 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2711 dpif_netlink_recv_purge__(dpif);
1579cf67 2712 fat_rwlock_unlock(&dpif->upcall_lock);
1ba530f4
BP
2713}
2714
b5cbbcf6
AZ
2715static char *
2716dpif_netlink_get_datapath_version(void)
2717{
2718 char *version_str = NULL;
2719
2720#ifdef __linux__
2721
2722#define MAX_VERSION_STR_SIZE 80
2723#define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2724 FILE *f;
2725
2726 f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2727 if (f) {
2728 char *newline;
2729 char version[MAX_VERSION_STR_SIZE];
2730
2731 if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2732 newline = strchr(version, '\n');
2733 if (newline) {
2734 *newline = '\0';
2735 }
2736 version_str = xstrdup(version);
2737 }
2738 fclose(f);
2739 }
2740#endif
2741
2742 return version_str;
2743}
2744
c11c9f4a
DDP
2745struct dpif_netlink_ct_dump_state {
2746 struct ct_dpif_dump_state up;
2747 struct nl_ct_dump_state *nl_ct_dump;
2748};
2749
2750static int
2751dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2752 struct ct_dpif_dump_state **dump_,
ded30c74 2753 const uint16_t *zone, int *ptot_bkts)
c11c9f4a
DDP
2754{
2755 struct dpif_netlink_ct_dump_state *dump;
2756 int err;
2757
2758 dump = xzalloc(sizeof *dump);
ded30c74 2759 err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
c11c9f4a
DDP
2760 if (err) {
2761 free(dump);
2762 return err;
2763 }
2764
2765 *dump_ = &dump->up;
2766
2767 return 0;
2768}
2769
2770static int
2771dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2772 struct ct_dpif_dump_state *dump_,
2773 struct ct_dpif_entry *entry)
2774{
2775 struct dpif_netlink_ct_dump_state *dump;
2776
2777 INIT_CONTAINER(dump, dump_, up);
2778
2779 return nl_ct_dump_next(dump->nl_ct_dump, entry);
2780}
2781
2782static int
2783dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2784 struct ct_dpif_dump_state *dump_)
2785{
2786 struct dpif_netlink_ct_dump_state *dump;
2787 int err;
2788
2789 INIT_CONTAINER(dump, dump_, up);
2790
2791 err = nl_ct_dump_done(dump->nl_ct_dump);
2792 free(dump);
2793 return err;
2794}
15eabc97
DDP
2795
2796static int
817a7657
YHW
2797dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
2798 const struct ct_dpif_tuple *tuple)
15eabc97 2799{
817a7657
YHW
2800 if (tuple) {
2801 return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
2802 } else if (zone) {
15eabc97
DDP
2803 return nl_ct_flush_zone(*zone);
2804 } else {
2805 return nl_ct_flush();
2806 }
2807}
c11c9f4a 2808
906ff9d2
YHW
2809static int
2810dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED,
2811 const uint32_t *default_limits,
2812 const struct ovs_list *zone_limits)
2813{
2814 struct ovs_zone_limit req_zone_limit;
2815
2816 if (ovs_ct_limit_family < 0) {
2817 return EOPNOTSUPP;
2818 }
2819
2820 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2821 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2822 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_SET,
2823 OVS_CT_LIMIT_VERSION);
2824
2825 struct ovs_header *ovs_header;
2826 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2827 ovs_header->dp_ifindex = 0;
2828
2829 size_t opt_offset;
2830 opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2831 if (default_limits) {
2832 req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
2833 req_zone_limit.limit = *default_limits;
2834 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2835 }
2836
2837 if (!ovs_list_is_empty(zone_limits)) {
2838 struct ct_dpif_zone_limit *zone_limit;
2839
2840 LIST_FOR_EACH (zone_limit, node, zone_limits) {
2841 req_zone_limit.zone_id = zone_limit->zone;
2842 req_zone_limit.limit = zone_limit->limit;
2843 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2844 }
2845 }
2846 nl_msg_end_nested(request, opt_offset);
2847
2848 int err = nl_transact(NETLINK_GENERIC, request, NULL);
2849 ofpbuf_uninit(request);
2850 return err;
2851}
2852
2853static int
2854dpif_netlink_zone_limits_from_ofpbuf(const struct ofpbuf *buf,
2855 uint32_t *default_limit,
2856 struct ovs_list *zone_limits)
2857{
2858 static const struct nl_policy ovs_ct_limit_policy[] = {
2859 [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED,
2860 .optional = true },
2861 };
2862
2863 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2864 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2865 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2866 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2867
2868 struct nlattr *attr[ARRAY_SIZE(ovs_ct_limit_policy)];
2869
2870 if (!nlmsg || !genl || !ovs_header
2871 || nlmsg->nlmsg_type != ovs_ct_limit_family
2872 || !nl_policy_parse(&b, 0, ovs_ct_limit_policy, attr,
2873 ARRAY_SIZE(ovs_ct_limit_policy))) {
2874 return EINVAL;
2875 }
2876
2877
2878 if (!attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
2879 return EINVAL;
2880 }
2881
2882 int rem = NLA_ALIGN(
2883 nl_attr_get_size(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]));
2884 const struct ovs_zone_limit *zone_limit =
2885 nl_attr_get(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]);
2886
2887 while (rem >= sizeof *zone_limit) {
2888 if (zone_limit->zone_id == OVS_ZONE_LIMIT_DEFAULT_ZONE) {
2889 *default_limit = zone_limit->limit;
2890 } else if (zone_limit->zone_id < OVS_ZONE_LIMIT_DEFAULT_ZONE ||
2891 zone_limit->zone_id > UINT16_MAX) {
2892 } else {
2893 ct_dpif_push_zone_limit(zone_limits, zone_limit->zone_id,
2894 zone_limit->limit, zone_limit->count);
2895 }
2896 rem -= NLA_ALIGN(sizeof *zone_limit);
2897 zone_limit = ALIGNED_CAST(struct ovs_zone_limit *,
2898 (unsigned char *) zone_limit + NLA_ALIGN(sizeof *zone_limit));
2899 }
2900 return 0;
2901}
2902
2903static int
2904dpif_netlink_ct_get_limits(struct dpif *dpif OVS_UNUSED,
2905 uint32_t *default_limit,
2906 const struct ovs_list *zone_limits_request,
2907 struct ovs_list *zone_limits_reply)
2908{
2909 if (ovs_ct_limit_family < 0) {
2910 return EOPNOTSUPP;
2911 }
2912
2913 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2914 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2915 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_GET,
2916 OVS_CT_LIMIT_VERSION);
2917
2918 struct ovs_header *ovs_header;
2919 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2920 ovs_header->dp_ifindex = 0;
2921
2922 if (!ovs_list_is_empty(zone_limits_request)) {
2923 size_t opt_offset = nl_msg_start_nested(request,
2924 OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2925
2926 struct ovs_zone_limit req_zone_limit;
2927 req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
2928 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2929
2930 struct ct_dpif_zone_limit *zone_limit;
2931 LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
2932 req_zone_limit.zone_id = zone_limit->zone;
2933 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2934 }
2935
2936 nl_msg_end_nested(request, opt_offset);
2937 }
2938
2939 struct ofpbuf *reply;
2940 int err = nl_transact(NETLINK_GENERIC, request, &reply);
2941 if (err) {
2942 goto out;
2943 }
2944
2945 err = dpif_netlink_zone_limits_from_ofpbuf(reply, default_limit,
2946 zone_limits_reply);
2947
2948out:
2949 ofpbuf_uninit(request);
2950 ofpbuf_uninit(reply);
2951 return err;
2952}
2953
2954static int
2955dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED,
2956 const struct ovs_list *zone_limits)
2957{
2958 if (ovs_ct_limit_family < 0) {
2959 return EOPNOTSUPP;
2960 }
2961
2962 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2963 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2964 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_DEL,
2965 OVS_CT_LIMIT_VERSION);
2966
2967 struct ovs_header *ovs_header;
2968 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2969 ovs_header->dp_ifindex = 0;
2970
2971 if (!ovs_list_is_empty(zone_limits)) {
2972 size_t opt_offset =
2973 nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2974
2975 struct ct_dpif_zone_limit *zone_limit;
2976 LIST_FOR_EACH (zone_limit, node, zone_limits) {
2977 struct ovs_zone_limit req_zone_limit;
2978 req_zone_limit.zone_id = zone_limit->zone;
2979 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2980 }
2981 nl_msg_end_nested(request, opt_offset);
2982 }
2983
2984 int err = nl_transact(NETLINK_GENERIC, request, NULL);
2985
2986 ofpbuf_uninit(request);
2987 return err;
2988}
5dddf960
JR
2989\f
2990/* Meters */
80738e5f
AZ
2991
2992/* Set of supported meter flags */
2993#define DP_SUPPORTED_METER_FLAGS_MASK \
2994 (OFPMF13_STATS | OFPMF13_PKTPS | OFPMF13_KBPS | OFPMF13_BURST)
2995
92d0d515
JP
2996/* Meter support was introduced in Linux 4.15. In some versions of
2997 * Linux 4.15, 4.16, and 4.17, there was a bug that never set the id
2998 * when the meter was created, so all meters essentially had an id of
2999 * zero. Check for that condition and disable meters on those kernels. */
3000static bool probe_broken_meters(struct dpif *);
3001
5dddf960 3002static void
80738e5f
AZ
3003dpif_netlink_meter_init(struct dpif_netlink *dpif, struct ofpbuf *buf,
3004 void *stub, size_t size, uint32_t command)
3005{
3006 ofpbuf_use_stub(buf, stub, size);
3007
3008 nl_msg_put_genlmsghdr(buf, 0, ovs_meter_family, NLM_F_REQUEST | NLM_F_ECHO,
3009 command, OVS_METER_VERSION);
3010
3011 struct ovs_header *ovs_header;
3012 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3013 ovs_header->dp_ifindex = dpif->dp_ifindex;
3014}
3015
3016/* Execute meter 'request' in the kernel datapath. If the command
3017 * fails, returns a positive errno value. Otherwise, stores the reply
3018 * in '*replyp', parses the policy according to 'reply_policy' into the
3019 * array of Netlink attribute in 'a', and returns 0. On success, the
3020 * caller is responsible for calling ofpbuf_delete() on '*replyp'
3021 * ('replyp' will contain pointers into 'a'). */
3022static int
3023dpif_netlink_meter_transact(struct ofpbuf *request, struct ofpbuf **replyp,
3024 const struct nl_policy *reply_policy,
3025 struct nlattr **a, size_t size_a)
3026{
3027 int error = nl_transact(NETLINK_GENERIC, request, replyp);
3028 ofpbuf_uninit(request);
3029
3030 if (error) {
3031 return error;
3032 }
3033
3034 struct nlmsghdr *nlmsg = ofpbuf_try_pull(*replyp, sizeof *nlmsg);
3035 struct genlmsghdr *genl = ofpbuf_try_pull(*replyp, sizeof *genl);
3036 struct ovs_header *ovs_header = ofpbuf_try_pull(*replyp,
3037 sizeof *ovs_header);
3038 if (!nlmsg || !genl || !ovs_header
3039 || nlmsg->nlmsg_type != ovs_meter_family
3040 || !nl_policy_parse(*replyp, 0, reply_policy, a, size_a)) {
3041 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3042 VLOG_DBG_RL(&rl,
3043 "Kernel module response to meter tranaction is invalid");
3044 return EINVAL;
3045 }
3046 return 0;
3047}
3048
3049static void
3050dpif_netlink_meter_get_features(const struct dpif *dpif_,
5dddf960
JR
3051 struct ofputil_meter_features *features)
3052{
92d0d515
JP
3053 if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
3054 features = NULL;
3055 return;
3056 }
3057
80738e5f
AZ
3058 struct ofpbuf buf, *msg;
3059 uint64_t stub[1024 / 8];
3060
3061 static const struct nl_policy ovs_meter_features_policy[] = {
3062 [OVS_METER_ATTR_MAX_METERS] = { .type = NL_A_U32 },
3063 [OVS_METER_ATTR_MAX_BANDS] = { .type = NL_A_U32 },
3064 [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3065 };
3066 struct nlattr *a[ARRAY_SIZE(ovs_meter_features_policy)];
3067
3068 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3069 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub,
3070 OVS_METER_CMD_FEATURES);
3071 if (dpif_netlink_meter_transact(&buf, &msg, ovs_meter_features_policy, a,
3072 ARRAY_SIZE(ovs_meter_features_policy))) {
3073 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3074 VLOG_INFO_RL(&rl,
3075 "dpif_netlink_meter_transact OVS_METER_CMD_FEATURES failed");
3076 return;
3077 }
3078
3079 features->max_meters = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_METERS]);
3080 features->max_bands = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_BANDS]);
3081
3082 /* Bands is a nested attribute of zero or more nested
3083 * band attributes. */
3084 if (a[OVS_METER_ATTR_BANDS]) {
3085 const struct nlattr *nla;
3086 size_t left;
3087
3088 NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3089 const struct nlattr *band_nla;
3090 size_t band_left;
3091
3092 NL_NESTED_FOR_EACH (band_nla, band_left, nla) {
3093 if (nl_attr_type(band_nla) == OVS_BAND_ATTR_TYPE) {
3094 if (nl_attr_get_size(band_nla) == sizeof(uint32_t)) {
3095 switch (nl_attr_get_u32(band_nla)) {
3096 case OVS_METER_BAND_TYPE_DROP:
3097 features->band_types |= 1 << OFPMBT13_DROP;
3098 break;
3099 }
3100 }
3101 }
3102 }
3103 }
3104 }
3105 features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
3106
3107 ofpbuf_delete(msg);
5dddf960
JR
3108}
3109
3110static int
60ebc04d
JP
3111dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
3112 struct ofputil_meter_config *config)
5dddf960 3113{
80738e5f
AZ
3114 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3115 struct ofpbuf buf, *msg;
3116 uint64_t stub[1024 / 8];
3117
3118 static const struct nl_policy ovs_meter_set_response_policy[] = {
3119 [OVS_METER_ATTR_ID] = { .type = NL_A_U32 },
3120 };
3121 struct nlattr *a[ARRAY_SIZE(ovs_meter_set_response_policy)];
3122
3123 if (config->flags & ~DP_SUPPORTED_METER_FLAGS_MASK) {
3124 return EBADF; /* Unsupported flags set */
3125 }
3126
3127 for (size_t i = 0; i < config->n_bands; i++) {
3128 switch (config->bands[i].type) {
3129 case OFPMBT13_DROP:
3130 break;
3131 default:
3132 return ENODEV; /* Unsupported band type */
3133 }
3134 }
3135
3136 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, OVS_METER_CMD_SET);
3137
8101f03f
JP
3138 nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3139
80738e5f
AZ
3140 if (config->flags & OFPMF13_KBPS) {
3141 nl_msg_put_flag(&buf, OVS_METER_ATTR_KBPS);
3142 }
3143
3144 size_t bands_offset = nl_msg_start_nested(&buf, OVS_METER_ATTR_BANDS);
3145 /* Bands */
3146 for (size_t i = 0; i < config->n_bands; ++i) {
3147 struct ofputil_meter_band * band = &config->bands[i];
3148 uint32_t band_type;
3149
3150 size_t band_offset = nl_msg_start_nested(&buf, OVS_BAND_ATTR_UNSPEC);
3151
3152 switch (band->type) {
3153 case OFPMBT13_DROP:
3154 band_type = OVS_METER_BAND_TYPE_DROP;
3155 break;
3156 default:
3157 band_type = OVS_METER_BAND_TYPE_UNSPEC;
3158 }
3159 nl_msg_put_u32(&buf, OVS_BAND_ATTR_TYPE, band_type);
3160 nl_msg_put_u32(&buf, OVS_BAND_ATTR_RATE, band->rate);
3161 nl_msg_put_u32(&buf, OVS_BAND_ATTR_BURST,
3162 config->flags & OFPMF13_BURST ?
3163 band->burst_size : band->rate);
3164 nl_msg_end_nested(&buf, band_offset);
3165 }
3166 nl_msg_end_nested(&buf, bands_offset);
3167
3168 int error = dpif_netlink_meter_transact(&buf, &msg,
3169 ovs_meter_set_response_policy, a,
3170 ARRAY_SIZE(ovs_meter_set_response_policy));
3171 if (error) {
3172 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3173 VLOG_INFO_RL(&rl,
3174 "dpif_netlink_meter_transact OVS_METER_CMD_SET failed");
3175 return error;
3176 }
3177
8101f03f
JP
3178 if (nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
3179 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3180 VLOG_INFO_RL(&rl,
3181 "Kernel returned a different meter id than requested");
3182 }
80738e5f
AZ
3183 ofpbuf_delete(msg);
3184 return 0;
5dddf960
JR
3185}
3186
60ebc04d
JP
3187static int
3188dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
3189 struct ofputil_meter_config *config)
3190{
3191 if (probe_broken_meters(dpif_)) {
3192 return ENOMEM;
3193 }
3194
3195 return dpif_netlink_meter_set__(dpif_, meter_id, config);
3196}
3197
80738e5f
AZ
3198/* Retrieve statistics and/or delete meter 'meter_id'. Statistics are
3199 * stored in 'stats', if it is not null. If 'command' is
3200 * OVS_METER_CMD_DEL, the meter is deleted and statistics are optionally
3201 * retrieved. If 'command' is OVS_METER_CMD_GET, then statistics are
3202 * simply retrieved. */
5dddf960 3203static int
80738e5f
AZ
3204dpif_netlink_meter_get_stats(const struct dpif *dpif_,
3205 ofproto_meter_id meter_id,
3206 struct ofputil_meter_stats *stats,
3207 uint16_t max_bands,
3208 enum ovs_meter_cmd command)
5dddf960 3209{
80738e5f
AZ
3210 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3211 struct ofpbuf buf, *msg;
3212 uint64_t stub[1024 / 8];
3213
3214 static const struct nl_policy ovs_meter_stats_policy[] = {
3215 [OVS_METER_ATTR_ID] = { .type = NL_A_U32, .optional = true},
3216 [OVS_METER_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
3217 .optional = true},
3218 [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3219 };
3220 struct nlattr *a[ARRAY_SIZE(ovs_meter_stats_policy)];
3221
3222 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, command);
3223
3224 nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3225
3226 int error = dpif_netlink_meter_transact(&buf, &msg,
3227 ovs_meter_stats_policy, a,
3228 ARRAY_SIZE(ovs_meter_stats_policy));
3229 if (error) {
3230 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3231 VLOG_INFO_RL(&rl, "dpif_netlink_meter_transact %s failed",
3232 command == OVS_METER_CMD_GET ? "get" : "del");
3233 return error;
3234 }
3235
3236 if (stats
3237 && a[OVS_METER_ATTR_ID]
3238 && a[OVS_METER_ATTR_STATS]
3239 && nl_attr_get_u32(a[OVS_METER_ATTR_ID]) == meter_id.uint32) {
3240 /* return stats */
3241 const struct ovs_flow_stats *stat;
3242 const struct nlattr *nla;
3243 size_t left;
3244
3245 stat = nl_attr_get(a[OVS_METER_ATTR_STATS]);
3246 stats->packet_in_count = get_32aligned_u64(&stat->n_packets);
3247 stats->byte_in_count = get_32aligned_u64(&stat->n_bytes);
3248
3249 if (a[OVS_METER_ATTR_BANDS]) {
3250 size_t n_bands = 0;
3251 NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3252 const struct nlattr *band_nla;
3253 band_nla = nl_attr_find_nested(nla, OVS_BAND_ATTR_STATS);
3254 if (band_nla && nl_attr_get_size(band_nla) \
3255 == sizeof(struct ovs_flow_stats)) {
3256 stat = nl_attr_get(band_nla);
3257
3258 if (n_bands < max_bands) {
3259 stats->bands[n_bands].packet_count
3260 = get_32aligned_u64(&stat->n_packets);
3261 stats->bands[n_bands].byte_count
3262 = get_32aligned_u64(&stat->n_bytes);
3263 ++n_bands;
3264 }
3265 } else {
3266 stats->bands[n_bands].packet_count = 0;
3267 stats->bands[n_bands].byte_count = 0;
3268 ++n_bands;
3269 }
3270 }
3271 stats->n_bands = n_bands;
3272 } else {
3273 /* For a non-existent meter, return 0 stats. */
3274 stats->n_bands = 0;
3275 }
3276 }
3277
3278 ofpbuf_delete(msg);
3279 return error;
5dddf960
JR
3280}
3281
3282static int
80738e5f
AZ
3283dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
3284 struct ofputil_meter_stats *stats, uint16_t max_bands)
5dddf960 3285{
80738e5f
AZ
3286 return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3287 OVS_METER_CMD_GET);
3288}
3289
3290static int
3291dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
3292 struct ofputil_meter_stats *stats, uint16_t max_bands)
3293{
3294 return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3295 OVS_METER_CMD_DEL);
5dddf960
JR
3296}
3297
92d0d515
JP
3298static bool
3299probe_broken_meters__(struct dpif *dpif)
3300{
3301 /* This test is destructive if a probe occurs while ovs-vswitchd is
3302 * running (e.g., an ovs-dpctl meter command is called), so choose a
3303 * random high meter id to make this less likely to occur. */
3304 ofproto_meter_id id1 = { 54545401 };
3305 ofproto_meter_id id2 = { 54545402 };
3306 struct ofputil_meter_band band = {OFPMBT13_DROP, 0, 1, 0};
3307 struct ofputil_meter_config config1 = { 1, OFPMF13_KBPS, 1, &band};
3308 struct ofputil_meter_config config2 = { 2, OFPMF13_KBPS, 1, &band};
3309
3310 /* Try adding two meters and make sure that they both come back with
60ebc04d
JP
3311 * the proper meter id. Use the "__" version so that we don't cause
3312 * a recurve deadlock. */
3313 dpif_netlink_meter_set__(dpif, id1, &config1);
3314 dpif_netlink_meter_set__(dpif, id2, &config2);
92d0d515
JP
3315
3316 if (dpif_netlink_meter_get(dpif, id1, NULL, 0)
3317 || dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
3318 VLOG_INFO("The kernel module has a broken meter implementation.");
3319 return true;
3320 }
3321
3322 dpif_netlink_meter_del(dpif, id1, NULL, 0);
3323 dpif_netlink_meter_del(dpif, id2, NULL, 0);
3324
3325 return false;
3326}
3327
3328static bool
3329probe_broken_meters(struct dpif *dpif)
3330{
3331 /* This is a once-only test because currently OVS only has at most a single
3332 * Netlink capable datapath on any given platform. */
3333 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3334
3335 static bool broken_meters = false;
3336 if (ovsthread_once_start(&once)) {
3337 broken_meters = probe_broken_meters__(dpif);
3338 ovsthread_once_done(&once);
3339 }
3340 return broken_meters;
3341}
5dddf960 3342\f
93451a0a 3343const struct dpif_class dpif_netlink_class = {
1a6f1e2a 3344 "system",
c8973eb6 3345 NULL, /* init */
93451a0a 3346 dpif_netlink_enumerate,
0aeaabc8 3347 NULL,
93451a0a
AS
3348 dpif_netlink_open,
3349 dpif_netlink_close,
3350 dpif_netlink_destroy,
3351 dpif_netlink_run,
e4516b20 3352 NULL, /* wait */
93451a0a
AS
3353 dpif_netlink_get_stats,
3354 dpif_netlink_port_add,
3355 dpif_netlink_port_del,
91364d18 3356 NULL, /* port_set_config */
93451a0a
AS
3357 dpif_netlink_port_query_by_number,
3358 dpif_netlink_port_query_by_name,
3359 dpif_netlink_port_get_pid,
3360 dpif_netlink_port_dump_start,
3361 dpif_netlink_port_dump_next,
3362 dpif_netlink_port_dump_done,
3363 dpif_netlink_port_poll,
3364 dpif_netlink_port_poll_wait,
3365 dpif_netlink_flow_flush,
3366 dpif_netlink_flow_dump_create,
3367 dpif_netlink_flow_dump_destroy,
3368 dpif_netlink_flow_dump_thread_create,
3369 dpif_netlink_flow_dump_thread_destroy,
3370 dpif_netlink_flow_dump_next,
3371 dpif_netlink_operate,
3372 dpif_netlink_recv_set,
3373 dpif_netlink_handlers_set,
d4f6865c 3374 NULL, /* set_config */
93451a0a
AS
3375 dpif_netlink_queue_to_priority,
3376 dpif_netlink_recv,
3377 dpif_netlink_recv_wait,
3378 dpif_netlink_recv_purge,
e4e74c3a 3379 NULL, /* register_dp_purge_cb */
6b31e073
RW
3380 NULL, /* register_upcall_cb */
3381 NULL, /* enable_upcall */
3382 NULL, /* disable_upcall */
b5cbbcf6 3383 dpif_netlink_get_datapath_version, /* get_datapath_version */
c11c9f4a
DDP
3384 dpif_netlink_ct_dump_start,
3385 dpif_netlink_ct_dump_next,
3386 dpif_netlink_ct_dump_done,
5dddf960 3387 dpif_netlink_ct_flush,
c92339ad
DB
3388 NULL, /* ct_set_maxconns */
3389 NULL, /* ct_get_maxconns */
875075b3 3390 NULL, /* ct_get_nconns */
906ff9d2
YHW
3391 dpif_netlink_ct_set_limits,
3392 dpif_netlink_ct_get_limits,
3393 dpif_netlink_ct_del_limits,
5dddf960
JR
3394 dpif_netlink_meter_get_features,
3395 dpif_netlink_meter_set,
3396 dpif_netlink_meter_get,
3397 dpif_netlink_meter_del,
96fba48f 3398};
93451a0a 3399
96fba48f 3400static int
93451a0a 3401dpif_netlink_init(void)
96fba48f 3402{
eb8ed438
BP
3403 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3404 static int error;
982b8810 3405
eb8ed438 3406 if (ovsthread_once_start(&once)) {
df2c07f4
JP
3407 error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
3408 &ovs_datapath_family);
37a1300c 3409 if (error) {
e0e2410d 3410 VLOG_INFO("Generic Netlink family '%s' does not exist. "
cae7529c
CL
3411 "The Open vSwitch kernel module is probably not loaded.",
3412 OVS_DATAPATH_FAMILY);
37a1300c 3413 }
f0fef760 3414 if (!error) {
df2c07f4 3415 error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
f0fef760 3416 }
37a1300c 3417 if (!error) {
df2c07f4 3418 error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
37a1300c 3419 }
aaff4b55 3420 if (!error) {
df2c07f4
JP
3421 error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
3422 &ovs_packet_family);
aaff4b55 3423 }
c7178a0b
EJ
3424 if (!error) {
3425 error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
b3dcb73c 3426 &ovs_vport_mcgroup);
c7178a0b 3427 }
80738e5f
AZ
3428 if (!error) {
3429 if (nl_lookup_genl_family(OVS_METER_FAMILY, &ovs_meter_family)) {
3430 VLOG_INFO("The kernel module does not support meters.");
3431 }
3432 }
906ff9d2
YHW
3433 if (nl_lookup_genl_family(OVS_CT_LIMIT_FAMILY,
3434 &ovs_ct_limit_family) < 0) {
3435 VLOG_INFO("Generic Netlink family '%s' does not exist. "
3436 "Please update the Open vSwitch kernel module to enable "
3437 "the conntrack limit feature.", OVS_CT_LIMIT_FAMILY);
3438 }
eb8ed438 3439
921c370a
EG
3440 ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
3441
eb8ed438 3442 ovsthread_once_done(&once);
982b8810
BP
3443 }
3444
3445 return error;
96fba48f
BP
3446}
3447
c19e6535 3448bool
93451a0a 3449dpif_netlink_is_internal_device(const char *name)
9fe3b9a2 3450{
93451a0a 3451 struct dpif_netlink_vport reply;
c19e6535 3452 struct ofpbuf *buf;
9fe3b9a2 3453 int error;
96fba48f 3454
93451a0a 3455 error = dpif_netlink_vport_get(name, &reply, &buf);
c19e6535
BP
3456 if (!error) {
3457 ofpbuf_delete(buf);
141d9ce4 3458 } else if (error != ENODEV && error != ENOENT) {
c19e6535 3459 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
10a89ef0 3460 name, ovs_strerror(error));
96fba48f
BP
3461 }
3462
df2c07f4 3463 return reply.type == OVS_VPORT_TYPE_INTERNAL;
96fba48f 3464}
e0467f6d 3465
df2c07f4 3466/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
c19e6535
BP
3467 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
3468 * positive errno value.
3469 *
3470 * 'vport' will contain pointers into 'buf', so the caller should not free
3471 * 'buf' while 'vport' is still in use. */
3472static int
93451a0a 3473dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
c19e6535
BP
3474 const struct ofpbuf *buf)
3475{
df2c07f4
JP
3476 static const struct nl_policy ovs_vport_policy[] = {
3477 [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
3478 [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
3479 [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1579cf67 3480 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
f7df9823 3481 [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
c19e6535 3482 .optional = true },
df2c07f4 3483 [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
bfda5239 3484 [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
c19e6535
BP
3485 };
3486
93451a0a 3487 dpif_netlink_vport_init(vport);
c19e6535 3488
0a2869d5
BP
3489 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3490 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3491 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3492 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3493
3494 struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
df2c07f4
JP
3495 if (!nlmsg || !genl || !ovs_header
3496 || nlmsg->nlmsg_type != ovs_vport_family
3497 || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
3498 ARRAY_SIZE(ovs_vport_policy))) {
c19e6535
BP
3499 return EINVAL;
3500 }
c19e6535 3501
f0fef760 3502 vport->cmd = genl->cmd;
df2c07f4 3503 vport->dp_ifindex = ovs_header->dp_ifindex;
4e022ec0 3504 vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
df2c07f4
JP
3505 vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
3506 vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
b063d9f0 3507 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
1579cf67
AW
3508 vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
3509 / (sizeof *vport->upcall_pids);
3510 vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
3511
b063d9f0 3512 }
df2c07f4
JP
3513 if (a[OVS_VPORT_ATTR_STATS]) {
3514 vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
3515 }
df2c07f4
JP
3516 if (a[OVS_VPORT_ATTR_OPTIONS]) {
3517 vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
3518 vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
c19e6535 3519 }
bfda5239
FL
3520 if (a[OVS_VPORT_ATTR_NETNSID]) {
3521 netnsid_set(&vport->netnsid,
3522 nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
3523 } else {
3524 netnsid_set_local(&vport->netnsid);
3525 }
c19e6535
BP
3526 return 0;
3527}
3528
df2c07f4 3529/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
c19e6535
BP
3530 * followed by Netlink attributes corresponding to 'vport'. */
3531static void
93451a0a
AS
3532dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
3533 struct ofpbuf *buf)
c19e6535 3534{
df2c07f4 3535 struct ovs_header *ovs_header;
f0fef760 3536
df2c07f4 3537 nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
69685a88 3538 vport->cmd, OVS_VPORT_VERSION);
c19e6535 3539
df2c07f4
JP
3540 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3541 ovs_header->dp_ifindex = vport->dp_ifindex;
c19e6535 3542
4e022ec0
AW
3543 if (vport->port_no != ODPP_NONE) {
3544 nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
c19e6535
BP
3545 }
3546
df2c07f4
JP
3547 if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
3548 nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
c19e6535
BP
3549 }
3550
3551 if (vport->name) {
df2c07f4 3552 nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
c19e6535
BP
3553 }
3554
1579cf67
AW
3555 if (vport->upcall_pids) {
3556 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
3557 vport->upcall_pids,
3558 vport->n_upcall_pids * sizeof *vport->upcall_pids);
a24a6574 3559 }
b063d9f0 3560
c19e6535 3561 if (vport->stats) {
df2c07f4 3562 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
c19e6535
BP
3563 vport->stats, sizeof *vport->stats);
3564 }
3565
c19e6535 3566 if (vport->options) {
df2c07f4 3567 nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
c19e6535
BP
3568 vport->options, vport->options_len);
3569 }
c19e6535
BP
3570}
3571
3572/* Clears 'vport' to "empty" values. */
3573void
93451a0a 3574dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
c19e6535
BP
3575{
3576 memset(vport, 0, sizeof *vport);
4e022ec0 3577 vport->port_no = ODPP_NONE;
c19e6535
BP
3578}
3579
3580/* Executes 'request' in the kernel datapath. If the command fails, returns a
3581 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3582 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
df2c07f4 3583 * result of the command is expected to be an ovs_vport also, which is decoded
c19e6535
BP
3584 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3585 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
3586int
93451a0a
AS
3587dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
3588 struct dpif_netlink_vport *reply,
3589 struct ofpbuf **bufp)
c19e6535 3590{
f0fef760 3591 struct ofpbuf *request_buf;
c19e6535
BP
3592 int error;
3593
cb22974d 3594 ovs_assert((reply != NULL) == (bufp != NULL));
c19e6535 3595
93451a0a 3596 error = dpif_netlink_init();
42bb6c72
BP
3597 if (error) {
3598 if (reply) {
3599 *bufp = NULL;
93451a0a 3600 dpif_netlink_vport_init(reply);
42bb6c72
BP
3601 }
3602 return error;
3603 }
3604
f0fef760 3605 request_buf = ofpbuf_new(1024);
93451a0a 3606 dpif_netlink_vport_to_ofpbuf(request, request_buf);
a88b4e04 3607 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
f0fef760 3608 ofpbuf_delete(request_buf);
c19e6535 3609
f0fef760
BP
3610 if (reply) {
3611 if (!error) {
93451a0a 3612 error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
f0fef760 3613 }
c19e6535 3614 if (error) {
93451a0a 3615 dpif_netlink_vport_init(reply);
f0fef760
BP
3616 ofpbuf_delete(*bufp);
3617 *bufp = NULL;
c19e6535 3618 }
c19e6535
BP
3619 }
3620 return error;
3621}
3622
3623/* Obtains information about the kernel vport named 'name' and stores it into
3624 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
3625 * longer needed ('reply' will contain pointers into '*bufp'). */
3626int
93451a0a
AS
3627dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
3628 struct ofpbuf **bufp)
c19e6535 3629{
93451a0a 3630 struct dpif_netlink_vport request;
c19e6535 3631
93451a0a 3632 dpif_netlink_vport_init(&request);
df2c07f4 3633 request.cmd = OVS_VPORT_CMD_GET;
c19e6535
BP
3634 request.name = name;
3635
93451a0a 3636 return dpif_netlink_vport_transact(&request, reply, bufp);
c19e6535 3637}
93451a0a 3638
df2c07f4 3639/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
aaff4b55
BP
3640 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
3641 * positive errno value.
d6569377
BP
3642 *
3643 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
3644 * while 'dp' is still in use. */
3645static int
93451a0a 3646dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
d6569377 3647{
df2c07f4
JP
3648 static const struct nl_policy ovs_datapath_policy[] = {
3649 [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
f7df9823 3650 [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
d6569377 3651 .optional = true },
847108dc
AZ
3652 [OVS_DP_ATTR_MEGAFLOW_STATS] = {
3653 NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
3654 .optional = true },
d6569377
BP
3655 };
3656
93451a0a 3657 dpif_netlink_dp_init(dp);
d6569377 3658
0a2869d5
BP
3659 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3660 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3661 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3662 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3663
3664 struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
df2c07f4
JP
3665 if (!nlmsg || !genl || !ovs_header
3666 || nlmsg->nlmsg_type != ovs_datapath_family
3667 || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
3668 ARRAY_SIZE(ovs_datapath_policy))) {
d6569377
BP
3669 return EINVAL;
3670 }
d6569377 3671
aaff4b55 3672 dp->cmd = genl->cmd;
df2c07f4
JP
3673 dp->dp_ifindex = ovs_header->dp_ifindex;
3674 dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
3675 if (a[OVS_DP_ATTR_STATS]) {
6a54dedc 3676 dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
d6569377 3677 }
982b8810 3678
847108dc 3679 if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
6a54dedc 3680 dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
847108dc
AZ
3681 }
3682
d6569377
BP
3683 return 0;
3684}
3685
aaff4b55 3686/* Appends to 'buf' the Generic Netlink message described by 'dp'. */
d6569377 3687static void
93451a0a 3688dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
d6569377 3689{
df2c07f4 3690 struct ovs_header *ovs_header;
d6569377 3691
df2c07f4 3692 nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
69685a88
JG
3693 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
3694 OVS_DATAPATH_VERSION);
aaff4b55 3695
df2c07f4
JP
3696 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3697 ovs_header->dp_ifindex = dp->dp_ifindex;
d6569377
BP
3698
3699 if (dp->name) {
df2c07f4 3700 nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
d6569377
BP
3701 }
3702
a24a6574
BP
3703 if (dp->upcall_pid) {
3704 nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
3705 }
b063d9f0 3706
b7fd5e38
TG
3707 if (dp->user_features) {
3708 nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
3709 }
3710
df2c07f4 3711 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
d6569377
BP
3712}
3713
3714/* Clears 'dp' to "empty" values. */
d3d8f1f7 3715static void
93451a0a 3716dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
d6569377
BP
3717{
3718 memset(dp, 0, sizeof *dp);
d6569377
BP
3719}
3720
aaff4b55 3721static void
93451a0a 3722dpif_netlink_dp_dump_start(struct nl_dump *dump)
aaff4b55 3723{
93451a0a 3724 struct dpif_netlink_dp request;
aaff4b55
BP
3725 struct ofpbuf *buf;
3726
93451a0a 3727 dpif_netlink_dp_init(&request);
df2c07f4 3728 request.cmd = OVS_DP_CMD_GET;
aaff4b55
BP
3729
3730 buf = ofpbuf_new(1024);
93451a0a 3731 dpif_netlink_dp_to_ofpbuf(&request, buf);
a88b4e04 3732 nl_dump_start(dump, NETLINK_GENERIC, buf);
aaff4b55
BP
3733 ofpbuf_delete(buf);
3734}
3735
d6569377
BP
3736/* Executes 'request' in the kernel datapath. If the command fails, returns a
3737 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3738 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
aaff4b55
BP
3739 * result of the command is expected to be of the same form, which is decoded
3740 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3741 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 3742static int
93451a0a
AS
3743dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
3744 struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
d6569377 3745{
aaff4b55 3746 struct ofpbuf *request_buf;
d6569377 3747 int error;
d6569377 3748
cb22974d 3749 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 3750
aaff4b55 3751 request_buf = ofpbuf_new(1024);
93451a0a 3752 dpif_netlink_dp_to_ofpbuf(request, request_buf);
a88b4e04 3753 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
aaff4b55 3754 ofpbuf_delete(request_buf);
d6569377 3755
aaff4b55 3756 if (reply) {
93451a0a 3757 dpif_netlink_dp_init(reply);
aaff4b55 3758 if (!error) {
93451a0a 3759 error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
aaff4b55 3760 }
d6569377 3761 if (error) {
aaff4b55
BP
3762 ofpbuf_delete(*bufp);
3763 *bufp = NULL;
d6569377 3764 }
d6569377
BP
3765 }
3766 return error;
3767}
3768
3769/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
3770 * The caller must free '*bufp' when the reply is no longer needed ('reply'
3771 * will contain pointers into '*bufp'). */
d3d8f1f7 3772static int
93451a0a
AS
3773dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
3774 struct ofpbuf **bufp)
d6569377 3775{
93451a0a
AS
3776 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3777 struct dpif_netlink_dp request;
d6569377 3778
93451a0a 3779 dpif_netlink_dp_init(&request);
df2c07f4 3780 request.cmd = OVS_DP_CMD_GET;
254f2dc8 3781 request.dp_ifindex = dpif->dp_ifindex;
d6569377 3782
93451a0a 3783 return dpif_netlink_dp_transact(&request, reply, bufp);
d6569377 3784}
93451a0a 3785
df2c07f4 3786/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
37a1300c 3787 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
d6569377
BP
3788 * positive errno value.
3789 *
3790 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
3791 * while 'flow' is still in use. */
3792static int
93451a0a
AS
3793dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
3794 const struct ofpbuf *buf)
d6569377 3795{
70e5ed6f
JS
3796 static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
3797 [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
e6cc0bab 3798 [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
df2c07f4 3799 [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
f7df9823 3800 [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
d6569377 3801 .optional = true },
df2c07f4
JP
3802 [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
3803 [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
ab79d262 3804 [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
df2c07f4 3805 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 3806 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
70e5ed6f 3807 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
d6569377
BP
3808 };
3809
93451a0a 3810 dpif_netlink_flow_init(flow);
d6569377 3811
0a2869d5
BP
3812 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3813 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3814 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3815 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3816
3817 struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
df2c07f4
JP
3818 if (!nlmsg || !genl || !ovs_header
3819 || nlmsg->nlmsg_type != ovs_flow_family
3820 || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
3821 ARRAY_SIZE(ovs_flow_policy))) {
d6569377
BP
3822 return EINVAL;
3823 }
70e5ed6f
JS
3824 if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
3825 return EINVAL;
3826 }
d6569377 3827
37a1300c 3828 flow->nlmsg_flags = nlmsg->nlmsg_flags;
df2c07f4 3829 flow->dp_ifindex = ovs_header->dp_ifindex;
70e5ed6f
JS
3830 if (a[OVS_FLOW_ATTR_KEY]) {
3831 flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
3832 flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
3833 }
e6cc0bab 3834
70e5ed6f 3835 if (a[OVS_FLOW_ATTR_UFID]) {
ab79d262 3836 flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
70e5ed6f
JS
3837 flow->ufid_present = true;
3838 }
e6cc0bab
AZ
3839 if (a[OVS_FLOW_ATTR_MASK]) {
3840 flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
3841 flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
3842 }
df2c07f4
JP
3843 if (a[OVS_FLOW_ATTR_ACTIONS]) {
3844 flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
3845 flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
d6569377 3846 }
df2c07f4
JP
3847 if (a[OVS_FLOW_ATTR_STATS]) {
3848 flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
d6569377 3849 }
df2c07f4
JP
3850 if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
3851 flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
d6569377 3852 }
df2c07f4
JP
3853 if (a[OVS_FLOW_ATTR_USED]) {
3854 flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
9e980142 3855 }
d6569377
BP
3856 return 0;
3857}
3858
beb75a40
JS
3859
3860/*
a8a3eee4
JS
3861 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
3862 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
3863 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
beb75a40
JS
3864 */
3865static void
3866put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
3867 const struct nlattr *data, uint16_t data_len)
3868{
3869 const struct nlattr *packet_type;
3870
3871 packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
3872
3873 if (packet_type) {
3874 /* exclude PACKET_TYPE Netlink attribute. */
3875 ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
3876 size_t packet_type_len = NL_A_U32_SIZE;
3877 size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
3878 size_t second_chunk_size = data_len - first_chunk_size
3879 - packet_type_len;
beb75a40 3880 struct nlattr *next_attr = nl_attr_next(packet_type);
1ca5b61b 3881 size_t ofs;
beb75a40 3882
1ca5b61b
JS
3883 ofs = nl_msg_start_nested(buf, type);
3884 nl_msg_put(buf, data, first_chunk_size);
3885 nl_msg_put(buf, next_attr, second_chunk_size);
a8a3eee4
JS
3886 if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
3887 ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
3888 const struct nlattr *nla;
3889
3890 nla = nl_attr_find(buf, NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
3891 if (nla) {
3892 ovs_be16 *ethertype;
3893
3894 ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
3895 *ethertype = pt;
3896 } else {
3897 nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
3898 }
3899 }
1ca5b61b 3900 nl_msg_end_nested(buf, ofs);
beb75a40
JS
3901 } else {
3902 nl_msg_put_unspec(buf, type, data, data_len);
3903 }
3904}
3905
df2c07f4 3906/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
d6569377
BP
3907 * followed by Netlink attributes corresponding to 'flow'. */
3908static void
93451a0a
AS
3909dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
3910 struct ofpbuf *buf)
d6569377 3911{
df2c07f4 3912 struct ovs_header *ovs_header;
d6569377 3913
df2c07f4 3914 nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
30b44744 3915 NLM_F_REQUEST | flow->nlmsg_flags,
69685a88 3916 flow->cmd, OVS_FLOW_VERSION);
37a1300c 3917
df2c07f4
JP
3918 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3919 ovs_header->dp_ifindex = flow->dp_ifindex;
d6569377 3920
70e5ed6f 3921 if (flow->ufid_present) {
ab79d262 3922 nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
70e5ed6f
JS
3923 }
3924 if (flow->ufid_terse) {
3925 nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
3926 OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
3927 | OVS_UFID_F_OMIT_ACTIONS);
3928 }
64bb477f
JS
3929 if (!flow->ufid_terse || !flow->ufid_present) {
3930 if (flow->key_len) {
beb75a40
JS
3931 put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
3932 flow->key_len);
64bb477f 3933 }
64bb477f 3934 if (flow->mask_len) {
beb75a40
JS
3935 put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
3936 flow->mask_len);
64bb477f
JS
3937 }
3938 if (flow->actions || flow->actions_len) {
3939 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
3940 flow->actions, flow->actions_len);
3941 }
d6569377
BP
3942 }
3943
3944 /* We never need to send these to the kernel. */
cb22974d
BP
3945 ovs_assert(!flow->stats);
3946 ovs_assert(!flow->tcp_flags);
3947 ovs_assert(!flow->used);
d6569377
BP
3948
3949 if (flow->clear) {
df2c07f4 3950 nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
d6569377 3951 }
43f9ac0a
JR
3952 if (flow->probe) {
3953 nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
3954 }
d6569377
BP
3955}
3956
3957/* Clears 'flow' to "empty" values. */
d3d8f1f7 3958static void
93451a0a 3959dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
d6569377
BP
3960{
3961 memset(flow, 0, sizeof *flow);
3962}
3963
3964/* Executes 'request' in the kernel datapath. If the command fails, returns a
3965 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3966 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
37a1300c
BP
3967 * result of the command is expected to be a flow also, which is decoded and
3968 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
3969 * is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 3970static int
93451a0a
AS
3971dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
3972 struct dpif_netlink_flow *reply,
3973 struct ofpbuf **bufp)
d6569377 3974{
37a1300c 3975 struct ofpbuf *request_buf;
d6569377 3976 int error;
d6569377 3977
cb22974d 3978 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 3979
30b44744
BP
3980 if (reply) {
3981 request->nlmsg_flags |= NLM_F_ECHO;
3982 }
3983
37a1300c 3984 request_buf = ofpbuf_new(1024);
93451a0a 3985 dpif_netlink_flow_to_ofpbuf(request, request_buf);
a88b4e04 3986 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
37a1300c 3987 ofpbuf_delete(request_buf);
d6569377 3988
37a1300c
BP
3989 if (reply) {
3990 if (!error) {
93451a0a 3991 error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
37a1300c 3992 }
d6569377 3993 if (error) {
93451a0a 3994 dpif_netlink_flow_init(reply);
37a1300c
BP
3995 ofpbuf_delete(*bufp);
3996 *bufp = NULL;
d6569377 3997 }
d6569377
BP
3998 }
3999 return error;
4000}
4001
4002static void
93451a0a
AS
4003dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
4004 struct dpif_flow_stats *stats)
d6569377
BP
4005{
4006 if (flow->stats) {
6a54dedc
BP
4007 stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
4008 stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
d6569377
BP
4009 } else {
4010 stats->n_packets = 0;
4011 stats->n_bytes = 0;
4012 }
0e70cdcb 4013 stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
d6569377
BP
4014 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
4015}
e0467f6d 4016
14b4d2f9
BP
4017/* Logs information about a packet that was recently lost in 'ch' (in
4018 * 'dpif_'). */
4019static void
93451a0a 4020report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
1579cf67 4021 uint32_t handler_id)
14b4d2f9 4022{
14b4d2f9 4023 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
14b4d2f9
BP
4024 struct ds s;
4025
8d675c5a 4026 if (VLOG_DROP_WARN(&rl)) {
14b4d2f9
BP
4027 return;
4028 }
4029
4030 ds_init(&s);
4031 if (ch->last_poll != LLONG_MIN) {
4032 ds_put_format(&s, " (last polled %lld ms ago)",
4033 time_msec() - ch->last_poll);
4034 }
14b4d2f9 4035
1579cf67 4036 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
9b00386b 4037 dpif_name(&dpif->dpif), ch_idx, handler_id);
14b4d2f9
BP
4038 ds_destroy(&s);
4039}