]> git.proxmox.com Git - mirror_ovs.git/blame - lib/dpif-netlink.c
tunnel: Bareudp Tunnel Support.
[mirror_ovs.git] / lib / dpif-netlink.c
CommitLineData
96fba48f 1/*
4ea96698 2 * Copyright (c) 2008-2018 Nicira, Inc.
96fba48f
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
9fe3b9a2 18
93451a0a 19#include "dpif-netlink.h"
96fba48f 20
96fba48f
BP
21#include <ctype.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <net/if.h>
b90fa799 26#include <linux/types.h>
aae51f53 27#include <linux/pkt_sched.h>
8522ba09 28#include <poll.h>
96fba48f 29#include <stdlib.h>
8522ba09 30#include <strings.h>
50f80534 31#include <sys/epoll.h>
10dcf8de 32#include <sys/stat.h>
96fba48f
BP
33#include <unistd.h>
34
773cd538 35#include "bitmap.h"
c4e08753 36#include "dpif-netlink-rtnl.h"
0d71302e 37#include "dpif-provider.h"
1579cf67 38#include "fat-rwlock.h"
0d71302e 39#include "flow.h"
032aa6a3 40#include "netdev-linux.h"
b6cabb8f 41#include "netdev-offload.h"
0d71302e 42#include "netdev-provider.h"
c3827f61 43#include "netdev-vport.h"
0d71302e 44#include "netdev.h"
c11c9f4a 45#include "netlink-conntrack.h"
45c8d3a1 46#include "netlink-notifier.h"
982b8810 47#include "netlink-socket.h"
856081f6 48#include "netlink.h"
bfda5239 49#include "netnsid.h"
feebdea2 50#include "odp-util.h"
0d71302e
BP
51#include "openvswitch/dynamic-string.h"
52#include "openvswitch/flow.h"
1f161318 53#include "openvswitch/hmap.h"
0d71302e 54#include "openvswitch/match.h"
64c96779 55#include "openvswitch/ofpbuf.h"
fd016ae3 56#include "openvswitch/poll-loop.h"
ee89ea7b 57#include "openvswitch/shash.h"
92d0d515 58#include "openvswitch/thread.h"
0d71302e
BP
59#include "openvswitch/vlog.h"
60#include "packets.h"
61#include "random.h"
b3c01ed3 62#include "sset.h"
14b4d2f9 63#include "timeval.h"
d6569377 64#include "unaligned.h"
96fba48f 65#include "util.h"
5136ce49 66
93451a0a 67VLOG_DEFINE_THIS_MODULE(dpif_netlink);
09cac43f 68#ifdef _WIN32
da467899 69#include "wmi.h"
09cac43f
NR
70enum { WINDOWS = 1 };
71#else
72enum { WINDOWS = 0 };
73#endif
95b1d73a 74enum { MAX_PORTS = USHRT_MAX };
773cd538 75
24b019f8
JP
76/* This ethtool flag was introduced in Linux 2.6.24, so it might be
77 * missing if we have old headers. */
78#define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
79
f2280b41 80#define FLOW_DUMP_MAX_BATCH 50
8b668ee3 81#define OPERATE_MAX_OPS 50
f2280b41 82
69c51582
MC
83#ifndef EPOLLEXCLUSIVE
84#define EPOLLEXCLUSIVE (1u << 28)
85#endif
86
93451a0a 87struct dpif_netlink_dp {
aaff4b55
BP
88 /* Generic Netlink header. */
89 uint8_t cmd;
d6569377 90
df2c07f4 91 /* struct ovs_header. */
254f2dc8 92 int dp_ifindex;
d6569377
BP
93
94 /* Attributes. */
df2c07f4 95 const char *name; /* OVS_DP_ATTR_NAME. */
fcd5d230 96 const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */
b7fd5e38 97 uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */
6a54dedc
BP
98 const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */
99 const struct ovs_dp_megaflow_stats *megaflow_stats;
847108dc 100 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
d6569377
BP
101};
102
93451a0a
AS
103static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
104static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
105 const struct ofpbuf *);
106static void dpif_netlink_dp_dump_start(struct nl_dump *);
107static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
108 struct dpif_netlink_dp *reply,
109 struct ofpbuf **bufp);
110static int dpif_netlink_dp_get(const struct dpif *,
111 struct dpif_netlink_dp *reply,
112 struct ofpbuf **bufp);
b2ae4069
PB
113static int
114dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features);
93451a0a
AS
115
116struct dpif_netlink_flow {
37a1300c
BP
117 /* Generic Netlink header. */
118 uint8_t cmd;
d6569377 119
df2c07f4 120 /* struct ovs_header. */
d6569377 121 unsigned int nlmsg_flags;
254f2dc8 122 int dp_ifindex;
d6569377
BP
123
124 /* Attributes.
125 *
0e70cdcb
BP
126 * The 'stats' member points to 64-bit data that might only be aligned on
127 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
128 * values.
d2a23af2 129 *
df2c07f4 130 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
d2a23af2 131 * the Netlink version of the command, even if actions_len is zero. */
df2c07f4 132 const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
d6569377 133 size_t key_len;
e6cc0bab
AZ
134 const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */
135 size_t mask_len;
df2c07f4 136 const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
d6569377 137 size_t actions_len;
70e5ed6f
JS
138 ovs_u128 ufid; /* OVS_FLOW_ATTR_FLOW_ID. */
139 bool ufid_present; /* Is there a UFID? */
140 bool ufid_terse; /* Skip serializing key/mask/acts? */
df2c07f4
JP
141 const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
142 const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
0e70cdcb 143 const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
df2c07f4 144 bool clear; /* OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 145 bool probe; /* OVS_FLOW_ATTR_PROBE. */
d6569377
BP
146};
147
93451a0a
AS
148static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
149static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
150 const struct ofpbuf *);
151static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
152 struct ofpbuf *);
153static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
154 struct dpif_netlink_flow *reply,
155 struct ofpbuf **bufp);
156static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
157 struct dpif_flow_stats *);
7a5e0ee7 158static void dpif_netlink_flow_to_dpif_flow(struct dpif_flow *,
93451a0a 159 const struct dpif_netlink_flow *);
d6569377 160
989fd548 161/* One of the dpif channels between the kernel and userspace. */
fe3d61b3 162struct dpif_channel {
14b4d2f9 163 struct nl_sock *sock; /* Netlink socket. */
14b4d2f9 164 long long int last_poll; /* Last time this channel was polled. */
fe3d61b3
BP
165};
166
09cac43f
NR
167#ifdef _WIN32
168#define VPORT_SOCK_POOL_SIZE 1
169/* On Windows, there is no native support for epoll. There are equivalent
170 * interfaces though, that are not used currently. For simpicity, a pool of
171 * netlink sockets is used. Each socket is represented by 'struct
172 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
173 * sharing the same socket. In the future, we can add a reference count and
174 * such fields. */
175struct dpif_windows_vport_sock {
176 struct nl_sock *nl_sock; /* netlink socket. */
177};
178#endif
179
1579cf67 180struct dpif_handler {
1579cf67
AW
181 struct epoll_event *epoll_events;
182 int epoll_fd; /* epoll fd that includes channel socks. */
183 int n_events; /* Num events returned by epoll_wait(). */
184 int event_offset; /* Offset into 'epoll_events'. */
09cac43f
NR
185
186#ifdef _WIN32
187 /* Pool of sockets. */
188 struct dpif_windows_vport_sock *vport_sock_pool;
189 size_t last_used_pool_idx; /* Index to aid in allocating a
190 socket in the pool to a port. */
191#endif
1579cf67 192};
14b4d2f9 193
96fba48f 194/* Datapath interface for the openvswitch Linux kernel module. */
93451a0a 195struct dpif_netlink {
96fba48f 196 struct dpif dpif;
254f2dc8 197 int dp_ifindex;
dcdcad68 198 uint32_t user_features;
e9e28be3 199
b063d9f0 200 /* Upcall messages. */
1579cf67
AW
201 struct fat_rwlock upcall_lock;
202 struct dpif_handler *handlers;
203 uint32_t n_handlers; /* Num of upcall handlers. */
69c51582 204 struct dpif_channel *channels; /* Array of channels for each port. */
1579cf67
AW
205 int uc_array_size; /* Size of 'handler->channels' and */
206 /* 'handler->epoll_events'. */
982b8810 207
e9e28be3 208 /* Change notification. */
e4516b20 209 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
61eae437 210 bool refresh_channels;
96fba48f
BP
211};
212
93451a0a 213static void report_loss(struct dpif_netlink *, struct dpif_channel *,
9b00386b 214 uint32_t ch_idx, uint32_t handler_id);
1579cf67 215
96fba48f
BP
216static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
217
e4516b20
BP
218/* Generic Netlink family numbers for OVS.
219 *
93451a0a 220 * Initialized by dpif_netlink_init(). */
df2c07f4
JP
221static int ovs_datapath_family;
222static int ovs_vport_family;
223static int ovs_flow_family;
224static int ovs_packet_family;
80738e5f 225static int ovs_meter_family;
906ff9d2 226static int ovs_ct_limit_family;
982b8810 227
e4516b20
BP
228/* Generic Netlink multicast groups for OVS.
229 *
93451a0a 230 * Initialized by dpif_netlink_init(). */
e4516b20 231static unsigned int ovs_vport_mcgroup;
982b8810 232
921c370a
EG
233/* If true, tunnel devices are created using OVS compat/genetlink.
234 * If false, tunnel devices are created with rtnetlink and using light weight
235 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
236 * to using the compat interface. */
237static bool ovs_tunnels_out_of_tree = true;
238
93451a0a
AS
239static int dpif_netlink_init(void);
240static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
241static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
769b5034 242 odp_port_t port_no);
09cac43f 243static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
93451a0a
AS
244static int dpif_netlink_refresh_channels(struct dpif_netlink *,
245 uint32_t n_handlers);
246static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
247 struct ofpbuf *);
248static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
249 const struct ofpbuf *);
921c370a
EG
250static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
251 odp_port_t port_no, const char *port_name,
252 struct dpif_port *dpif_port);
f0fef760 253
d240e46a 254static int
622ea8fd 255create_nl_sock(struct dpif_netlink *dpif OVS_UNUSED, struct nl_sock **sockp)
d240e46a
AGS
256 OVS_REQ_WRLOCK(dpif->upcall_lock)
257{
258#ifndef _WIN32
622ea8fd 259 return nl_sock_create(NETLINK_GENERIC, sockp);
d240e46a
AGS
260#else
261 /* Pick netlink sockets to use in a round-robin fashion from each
262 * handler's pool of sockets. */
263 struct dpif_handler *handler = &dpif->handlers[0];
264 struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
265 size_t index = handler->last_used_pool_idx;
266
267 /* A pool of sockets is allocated when the handler is initialized. */
268 if (sock_pool == NULL) {
622ea8fd 269 *sockp = NULL;
d240e46a
AGS
270 return EINVAL;
271 }
272
273 ovs_assert(index < VPORT_SOCK_POOL_SIZE);
622ea8fd
BP
274 *sockp = sock_pool[index].nl_sock;
275 ovs_assert(*sockp);
d240e46a
AGS
276 index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
277 handler->last_used_pool_idx = index;
278 return 0;
279#endif
280}
281
282static void
622ea8fd 283close_nl_sock(struct nl_sock *sock)
d240e46a
AGS
284{
285#ifndef _WIN32
622ea8fd 286 nl_sock_destroy(sock);
d240e46a
AGS
287#endif
288}
289
93451a0a
AS
290static struct dpif_netlink *
291dpif_netlink_cast(const struct dpif *dpif)
96fba48f 292{
93451a0a
AS
293 dpif_assert_class(dpif, &dpif_netlink_class);
294 return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
96fba48f
BP
295}
296
d3d22744 297static int
93451a0a
AS
298dpif_netlink_enumerate(struct sset *all_dps,
299 const struct dpif_class *dpif_class OVS_UNUSED)
d3d22744 300{
aaff4b55 301 struct nl_dump dump;
d57695d7
JS
302 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
303 struct ofpbuf msg, buf;
aaff4b55 304 int error;
982b8810 305
93451a0a 306 error = dpif_netlink_init();
aaff4b55
BP
307 if (error) {
308 return error;
982b8810 309 }
d3d22744 310
d57695d7 311 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a 312 dpif_netlink_dp_dump_start(&dump);
d57695d7 313 while (nl_dump_next(&dump, &msg, &buf)) {
93451a0a 314 struct dpif_netlink_dp dp;
d6569377 315
93451a0a 316 if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
d0c23a1a 317 sset_add(all_dps, dp.name);
d3d22744
BP
318 }
319 }
d57695d7 320 ofpbuf_uninit(&buf);
aaff4b55 321 return nl_dump_done(&dump);
d3d22744
BP
322}
323
96fba48f 324static int
93451a0a
AS
325dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
326 bool create, struct dpif **dpifp)
96fba48f 327{
93451a0a 328 struct dpif_netlink_dp dp_request, dp;
c19e6535 329 struct ofpbuf *buf;
ea36840f 330 uint32_t upcall_pid;
c19e6535 331 int error;
96fba48f 332
93451a0a 333 error = dpif_netlink_init();
982b8810
BP
334 if (error) {
335 return error;
336 }
337
982b8810 338 /* Create or look up datapath. */
93451a0a 339 dpif_netlink_dp_init(&dp_request);
dcdcad68
PB
340 upcall_pid = 0;
341 dp_request.upcall_pid = &upcall_pid;
342 dp_request.name = name;
343
ea36840f
BP
344 if (create) {
345 dp_request.cmd = OVS_DP_CMD_NEW;
ea36840f 346 } else {
dcdcad68
PB
347 dp_request.cmd = OVS_DP_CMD_GET;
348
349 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
350 if (error) {
351 return error;
352 }
353 dp_request.user_features = dp.user_features;
354 ofpbuf_delete(buf);
355
b7fd5e38
TG
356 /* Use OVS_DP_CMD_SET to report user features */
357 dp_request.cmd = OVS_DP_CMD_SET;
ea36840f 358 }
dcdcad68 359
b7fd5e38 360 dp_request.user_features |= OVS_DP_F_UNALIGNED;
1579cf67 361 dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
93451a0a 362 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
982b8810
BP
363 if (error) {
364 return error;
c19e6535 365 }
254f2dc8 366
e4516b20 367 error = open_dpif(&dp, dpifp);
b2ae4069 368 dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING);
8f4a4df5 369 ofpbuf_delete(buf);
b2ae4069 370
e4516b20 371 return error;
c19e6535
BP
372}
373
e4516b20 374static int
93451a0a 375open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
c19e6535 376{
93451a0a 377 struct dpif_netlink *dpif;
c19e6535 378
17411ecf 379 dpif = xzalloc(sizeof *dpif);
e4516b20 380 dpif->port_notifier = NULL;
1579cf67 381 fat_rwlock_init(&dpif->upcall_lock);
c19e6535 382
93451a0a 383 dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
254f2dc8 384 dp->dp_ifindex, dp->dp_ifindex);
c19e6535 385
254f2dc8 386 dpif->dp_ifindex = dp->dp_ifindex;
dcdcad68 387 dpif->user_features = dp->user_features;
c19e6535 388 *dpifp = &dpif->dpif;
e4516b20
BP
389
390 return 0;
96fba48f
BP
391}
392
09cac43f
NR
393#ifdef _WIN32
394static void
395vport_delete_sock_pool(struct dpif_handler *handler)
396 OVS_REQ_WRLOCK(dpif->upcall_lock)
397{
398 if (handler->vport_sock_pool) {
399 uint32_t i;
400 struct dpif_windows_vport_sock *sock_pool =
401 handler->vport_sock_pool;
402
403 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
404 if (sock_pool[i].nl_sock) {
405 nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
406 nl_sock_destroy(sock_pool[i].nl_sock);
407 sock_pool[i].nl_sock = NULL;
408 }
409 }
410
411 free(handler->vport_sock_pool);
412 handler->vport_sock_pool = NULL;
413 }
414}
415
416static int
417vport_create_sock_pool(struct dpif_handler *handler)
418 OVS_REQ_WRLOCK(dpif->upcall_lock)
419{
420 struct dpif_windows_vport_sock *sock_pool;
421 size_t i;
422 int error = 0;
423
424 sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
425 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
426 error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
427 if (error) {
428 goto error;
429 }
430
431 /* Enable the netlink socket to receive packets. This is equivalent to
432 * calling nl_sock_join_mcgroup() to receive events. */
433 error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
434 if (error) {
435 goto error;
436 }
437 }
438
439 handler->vport_sock_pool = sock_pool;
440 handler->last_used_pool_idx = 0;
441 return 0;
442
443error:
444 vport_delete_sock_pool(handler);
445 return error;
446}
09cac43f
NR
447#endif /* _WIN32 */
448
69c51582
MC
449/* Given the port number 'port_idx', extracts the pid of netlink socket
450 * associated to the port and assigns it to 'upcall_pid'. */
1579cf67 451static bool
69c51582
MC
452vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
453 uint32_t *upcall_pid)
1579cf67 454{
1579cf67 455 /* Since the nl_sock can only be assigned in either all
69c51582 456 * or none "dpif" channels, the following check
1579cf67 457 * would suffice. */
69c51582 458 if (!dpif->channels[port_idx].sock) {
1579cf67
AW
459 return false;
460 }
09cac43f 461 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
1579cf67 462
69c51582 463 *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
989fd548 464
1579cf67 465 return true;
989fd548
JP
466}
467
468static int
69c51582 469vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
622ea8fd 470 struct nl_sock *sock)
989fd548
JP
471{
472 struct epoll_event event;
4e022ec0 473 uint32_t port_idx = odp_to_u32(port_no);
69c51582 474 size_t i;
1579cf67 475 int error;
989fd548 476
1579cf67 477 if (dpif->handlers == NULL) {
622ea8fd 478 close_nl_sock(sock);
989fd548
JP
479 return 0;
480 }
481
1579cf67
AW
482 /* We assume that the datapath densely chooses port numbers, which can
483 * therefore be used as an index into 'channels' and 'epoll_events' of
69c51582 484 * 'dpif'. */
4e022ec0
AW
485 if (port_idx >= dpif->uc_array_size) {
486 uint32_t new_size = port_idx + 1;
989fd548 487
12d76859 488 if (new_size > MAX_PORTS) {
989fd548
JP
489 VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
490 dpif_name(&dpif->dpif), port_no);
491 return EFBIG;
492 }
493
69c51582
MC
494 dpif->channels = xrealloc(dpif->channels,
495 new_size * sizeof *dpif->channels);
1579cf67 496
69c51582
MC
497 for (i = dpif->uc_array_size; i < new_size; i++) {
498 dpif->channels[i].sock = NULL;
499 }
1579cf67 500
69c51582
MC
501 for (i = 0; i < dpif->n_handlers; i++) {
502 struct dpif_handler *handler = &dpif->handlers[i];
1579cf67
AW
503
504 handler->epoll_events = xrealloc(handler->epoll_events,
505 new_size * sizeof *handler->epoll_events);
989fd548 506
1579cf67 507 }
989fd548
JP
508 dpif->uc_array_size = new_size;
509 }
510
511 memset(&event, 0, sizeof event);
69c51582 512 event.events = EPOLLIN | EPOLLEXCLUSIVE;
4e022ec0 513 event.data.u32 = port_idx;
989fd548 514
1579cf67
AW
515 for (i = 0; i < dpif->n_handlers; i++) {
516 struct dpif_handler *handler = &dpif->handlers[i];
517
09cac43f 518#ifndef _WIN32
622ea8fd 519 if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock),
1579cf67
AW
520 &event) < 0) {
521 error = errno;
522 goto error;
523 }
93451a0a 524#endif
1579cf67 525 }
622ea8fd 526 dpif->channels[port_idx].sock = sock;
69c51582 527 dpif->channels[port_idx].last_poll = LLONG_MIN;
989fd548
JP
528
529 return 0;
1579cf67
AW
530
531error:
09cac43f 532#ifndef _WIN32
69c51582
MC
533 while (i--) {
534 epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
622ea8fd 535 nl_sock_fd(sock), NULL);
1579cf67 536 }
69c51582
MC
537#endif
538 dpif->channels[port_idx].sock = NULL;
1579cf67
AW
539
540 return error;
989fd548
JP
541}
542
543static void
93451a0a 544vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
989fd548 545{
4e022ec0 546 uint32_t port_idx = odp_to_u32(port_no);
1579cf67 547 size_t i;
989fd548 548
69c51582
MC
549 if (!dpif->handlers || port_idx >= dpif->uc_array_size
550 || !dpif->channels[port_idx].sock) {
989fd548
JP
551 return;
552 }
553
1579cf67
AW
554 for (i = 0; i < dpif->n_handlers; i++) {
555 struct dpif_handler *handler = &dpif->handlers[i];
09cac43f 556#ifndef _WIN32
1579cf67 557 epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
69c51582 558 nl_sock_fd(dpif->channels[port_idx].sock), NULL);
09cac43f 559#endif
1579cf67
AW
560 handler->event_offset = handler->n_events = 0;
561 }
69c51582
MC
562#ifndef _WIN32
563 nl_sock_destroy(dpif->channels[port_idx].sock);
564#endif
565 dpif->channels[port_idx].sock = NULL;
1579cf67
AW
566}
567
568static void
93451a0a
AS
569destroy_all_channels(struct dpif_netlink *dpif)
570 OVS_REQ_WRLOCK(dpif->upcall_lock)
1579cf67
AW
571{
572 unsigned int i;
573
574 if (!dpif->handlers) {
575 return;
576 }
577
578 for (i = 0; i < dpif->uc_array_size; i++ ) {
93451a0a 579 struct dpif_netlink_vport vport_request;
1579cf67
AW
580 uint32_t upcall_pids = 0;
581
69c51582 582 if (!dpif->channels[i].sock) {
1579cf67
AW
583 continue;
584 }
585
586 /* Turn off upcalls. */
93451a0a 587 dpif_netlink_vport_init(&vport_request);
1579cf67
AW
588 vport_request.cmd = OVS_VPORT_CMD_SET;
589 vport_request.dp_ifindex = dpif->dp_ifindex;
590 vport_request.port_no = u32_to_odp(i);
a78f446a 591 vport_request.n_upcall_pids = 1;
1579cf67 592 vport_request.upcall_pids = &upcall_pids;
93451a0a 593 dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67
AW
594
595 vport_del_channels(dpif, u32_to_odp(i));
596 }
597
598 for (i = 0; i < dpif->n_handlers; i++) {
599 struct dpif_handler *handler = &dpif->handlers[i];
600
09cac43f 601 dpif_netlink_handler_uninit(handler);
1579cf67 602 free(handler->epoll_events);
1579cf67 603 }
69c51582 604 free(dpif->channels);
1579cf67
AW
605 free(dpif->handlers);
606 dpif->handlers = NULL;
69c51582 607 dpif->channels = NULL;
1579cf67
AW
608 dpif->n_handlers = 0;
609 dpif->uc_array_size = 0;
17411ecf
JG
610}
611
96fba48f 612static void
93451a0a 613dpif_netlink_close(struct dpif *dpif_)
96fba48f 614{
93451a0a 615 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
c7178a0b 616
e4516b20 617 nl_sock_destroy(dpif->port_notifier);
1579cf67
AW
618
619 fat_rwlock_wrlock(&dpif->upcall_lock);
620 destroy_all_channels(dpif);
621 fat_rwlock_unlock(&dpif->upcall_lock);
622
623 fat_rwlock_destroy(&dpif->upcall_lock);
96fba48f
BP
624 free(dpif);
625}
626
627static int
93451a0a 628dpif_netlink_destroy(struct dpif *dpif_)
96fba48f 629{
93451a0a
AS
630 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
631 struct dpif_netlink_dp dp;
d6569377 632
93451a0a 633 dpif_netlink_dp_init(&dp);
df2c07f4 634 dp.cmd = OVS_DP_CMD_DEL;
254f2dc8 635 dp.dp_ifindex = dpif->dp_ifindex;
93451a0a 636 return dpif_netlink_dp_transact(&dp, NULL, NULL);
96fba48f
BP
637}
638
a36de779 639static bool
93451a0a 640dpif_netlink_run(struct dpif *dpif_)
61eae437 641{
93451a0a 642 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67 643
61eae437
BP
644 if (dpif->refresh_channels) {
645 dpif->refresh_channels = false;
1579cf67 646 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 647 dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
1579cf67 648 fat_rwlock_unlock(&dpif->upcall_lock);
61eae437 649 }
a36de779 650 return false;
61eae437
BP
651}
652
96fba48f 653static int
93451a0a 654dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
96fba48f 655{
93451a0a 656 struct dpif_netlink_dp dp;
d6569377
BP
657 struct ofpbuf *buf;
658 int error;
659
93451a0a 660 error = dpif_netlink_dp_get(dpif_, &dp, &buf);
d6569377 661 if (!error) {
6a54dedc
BP
662 memset(stats, 0, sizeof *stats);
663
664 if (dp.stats) {
665 stats->n_hit = get_32aligned_u64(&dp.stats->n_hit);
666 stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
667 stats->n_lost = get_32aligned_u64(&dp.stats->n_lost);
668 stats->n_flows = get_32aligned_u64(&dp.stats->n_flows);
669 }
670
671 if (dp.megaflow_stats) {
672 stats->n_masks = dp.megaflow_stats->n_masks;
673 stats->n_mask_hit = get_32aligned_u64(
674 &dp.megaflow_stats->n_mask_hit);
675 } else {
676 stats->n_masks = UINT32_MAX;
677 stats->n_mask_hit = UINT64_MAX;
678 }
d6569377
BP
679 ofpbuf_delete(buf);
680 }
681 return error;
96fba48f
BP
682}
683
dcdcad68
PB
684static int
685dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features)
686{
687 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
688 struct dpif_netlink_dp request, reply;
689 struct ofpbuf *bufp;
690 int error;
691
692 dpif_netlink_dp_init(&request);
693 request.cmd = OVS_DP_CMD_SET;
25a2af4f 694 request.name = dpif_->base_name;
dcdcad68
PB
695 request.dp_ifindex = dpif->dp_ifindex;
696 request.user_features = dpif->user_features | new_features;
697
698 error = dpif_netlink_dp_transact(&request, &reply, &bufp);
699 if (!error) {
700 dpif->user_features = reply.user_features;
701 ofpbuf_delete(bufp);
702 if (!(dpif->user_features & new_features)) {
703 return -EOPNOTSUPP;
704 }
705 }
706
707 return error;
708}
709
b9ad7294 710static const char *
93451a0a 711get_vport_type(const struct dpif_netlink_vport *vport)
b9ad7294
EJ
712{
713 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
714
715 switch (vport->type) {
5ed51209
JS
716 case OVS_VPORT_TYPE_NETDEV: {
717 const char *type = netdev_get_type_from_name(vport->name);
718
719 return type ? type : "system";
720 }
b9ad7294
EJ
721
722 case OVS_VPORT_TYPE_INTERNAL:
723 return "internal";
724
c1fc1411
JG
725 case OVS_VPORT_TYPE_GENEVE:
726 return "geneve";
727
b9ad7294
EJ
728 case OVS_VPORT_TYPE_GRE:
729 return "gre";
730
b9ad7294
EJ
731 case OVS_VPORT_TYPE_VXLAN:
732 return "vxlan";
733
a6ae068b
LJ
734 case OVS_VPORT_TYPE_LISP:
735 return "lisp";
736
4237026e
PS
737 case OVS_VPORT_TYPE_STT:
738 return "stt";
739
c387d817 740 case OVS_VPORT_TYPE_ERSPAN:
98514eea
WT
741 return "erspan";
742
c387d817 743 case OVS_VPORT_TYPE_IP6ERSPAN:
3b10ceee
GR
744 return "ip6erspan";
745
c387d817 746 case OVS_VPORT_TYPE_IP6GRE:
3b10ceee 747 return "ip6gre";
c387d817 748
3c6d05a0
WT
749 case OVS_VPORT_TYPE_GTPU:
750 return "gtpu";
751
ebe0e518
MV
752 case OVS_VPORT_TYPE_BAREUDP:
753 return "bareudp";
754
b9ad7294
EJ
755 case OVS_VPORT_TYPE_UNSPEC:
756 case __OVS_VPORT_TYPE_MAX:
757 break;
758 }
759
760 VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
761 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
762 return "unknown";
763}
764
c4e08753 765enum ovs_vport_type
20c57607 766netdev_to_ovs_vport_type(const char *type)
c060c4cf 767{
c060c4cf
EJ
768 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
769 return OVS_VPORT_TYPE_NETDEV;
770 } else if (!strcmp(type, "internal")) {
771 return OVS_VPORT_TYPE_INTERNAL;
4237026e
PS
772 } else if (strstr(type, "stt")) {
773 return OVS_VPORT_TYPE_STT;
c1fc1411
JG
774 } else if (!strcmp(type, "geneve")) {
775 return OVS_VPORT_TYPE_GENEVE;
c060c4cf
EJ
776 } else if (!strcmp(type, "vxlan")) {
777 return OVS_VPORT_TYPE_VXLAN;
a6ae068b
LJ
778 } else if (!strcmp(type, "lisp")) {
779 return OVS_VPORT_TYPE_LISP;
7dc18ae9
WT
780 } else if (!strcmp(type, "erspan")) {
781 return OVS_VPORT_TYPE_ERSPAN;
782 } else if (!strcmp(type, "ip6erspan")) {
783 return OVS_VPORT_TYPE_IP6ERSPAN;
3b10ceee
GR
784 } else if (!strcmp(type, "ip6gre")) {
785 return OVS_VPORT_TYPE_IP6GRE;
1c385f49
GR
786 } else if (!strcmp(type, "gre")) {
787 return OVS_VPORT_TYPE_GRE;
3c6d05a0
WT
788 } else if (!strcmp(type, "gtpu")) {
789 return OVS_VPORT_TYPE_GTPU;
ebe0e518
MV
790 } else if (!strcmp(type, "bareudp")) {
791 return OVS_VPORT_TYPE_BAREUDP;
c060c4cf
EJ
792 } else {
793 return OVS_VPORT_TYPE_UNSPEC;
794 }
795}
796
96fba48f 797static int
20c57607
EG
798dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
799 enum ovs_vport_type type,
800 struct ofpbuf *options,
93451a0a 801 odp_port_t *port_nop)
b90de034 802 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 803{
93451a0a 804 struct dpif_netlink_vport request, reply;
c19e6535 805 struct ofpbuf *buf;
622ea8fd 806 struct nl_sock *sock = NULL;
790a4372 807 uint32_t upcall_pids = 0;
1579cf67 808 int error = 0;
96fba48f 809
1579cf67 810 if (dpif->handlers) {
622ea8fd 811 error = create_nl_sock(dpif, &sock);
713a45db 812 if (error) {
989fd548
JP
813 return error;
814 }
815 }
816
93451a0a 817 dpif_netlink_vport_init(&request);
df2c07f4 818 request.cmd = OVS_VPORT_CMD_NEW;
254f2dc8 819 request.dp_ifindex = dpif->dp_ifindex;
20c57607
EG
820 request.type = type;
821 request.name = name;
822
823 request.port_no = *port_nop;
622ea8fd
BP
824 if (sock) {
825 upcall_pids = nl_sock_pid(sock);
790a4372 826 }
69c51582
MC
827 request.n_upcall_pids = 1;
828 request.upcall_pids = &upcall_pids;
20c57607
EG
829
830 if (options) {
831 request.options = options->data;
832 request.options_len = options->size;
833 }
834
835 error = dpif_netlink_vport_transact(&request, &reply, &buf);
836 if (!error) {
837 *port_nop = reply.port_no;
838 } else {
839 if (error == EBUSY && *port_nop != ODPP_NONE) {
840 VLOG_INFO("%s: requested port %"PRIu32" is in use",
841 dpif_name(&dpif->dpif), *port_nop);
842 }
843
622ea8fd 844 close_nl_sock(sock);
20c57607
EG
845 goto exit;
846 }
847
622ea8fd 848 error = vport_add_channel(dpif, *port_nop, sock);
69c51582
MC
849 if (error) {
850 VLOG_INFO("%s: could not add channel for port %s",
851 dpif_name(&dpif->dpif), name);
852
853 /* Delete the port. */
854 dpif_netlink_vport_init(&request);
855 request.cmd = OVS_VPORT_CMD_DEL;
856 request.dp_ifindex = dpif->dp_ifindex;
857 request.port_no = *port_nop;
858 dpif_netlink_vport_transact(&request, NULL, NULL);
622ea8fd 859 close_nl_sock(sock);
69c51582 860 goto exit;
20c57607 861 }
20c57607
EG
862
863exit:
864 ofpbuf_delete(buf);
20c57607
EG
865
866 return error;
867}
868
869static int
870dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
871 odp_port_t *port_nop)
872 OVS_REQ_WRLOCK(dpif->upcall_lock)
873{
874 const struct netdev_tunnel_config *tnl_cfg;
875 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
876 const char *type = netdev_get_type(netdev);
877 uint64_t options_stub[64 / 8];
878 enum ovs_vport_type ovs_type;
879 struct ofpbuf options;
880 const char *name;
881
882 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
883
884 ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
885 if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
c283069c
BP
886 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
887 "unsupported type `%s'",
9b00386b 888 dpif_name(&dpif->dpif), name, type);
c283069c
BP
889 return EINVAL;
890 }
c3827f61 891
20c57607 892 if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
93451a0a 893#ifdef _WIN32
09cac43f 894 /* XXX : Map appropiate Windows handle */
93451a0a 895#else
24b019f8 896 netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
93451a0a 897#endif
24b019f8
JP
898 }
899
da467899 900#ifdef _WIN32
20c57607 901 if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
da467899
AS
902 if (!create_wmi_port(name)){
903 VLOG_ERR("Could not create wmi internal port with name:%s", name);
da467899
AS
904 return EINVAL;
905 };
906 }
907#endif
908
26508d9a 909 tnl_cfg = netdev_get_tunnel_config(netdev);
526df7d8 910 if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
26508d9a 911 ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
526df7d8
TG
912 if (tnl_cfg->dst_port) {
913 nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
914 ntohs(tnl_cfg->dst_port));
915 }
916 if (tnl_cfg->exts) {
917 size_t ext_ofs;
918 int i;
919
920 ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
921 for (i = 0; i < 32; i++) {
922 if (tnl_cfg->exts & (1 << i)) {
923 nl_msg_put_flag(&options, i);
924 }
925 }
926 nl_msg_end_nested(&options, ext_ofs);
927 }
20c57607
EG
928 return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
929 port_nop);
2510ba7c 930 } else {
20c57607 931 return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
78a2d59c 932 }
c3827f61 933
20c57607 934}
989fd548 935
921c370a 936static int
c4e08753
EG
937dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
938 struct netdev *netdev,
939 odp_port_t *port_nop)
940 OVS_REQ_WRLOCK(dpif->upcall_lock)
941{
942 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
943 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
944 const char *name;
945 int error;
989fd548 946
c4e08753
EG
947 error = dpif_netlink_rtnl_port_create(netdev);
948 if (error) {
949 if (error != EOPNOTSUPP) {
d52ef4eb 950 VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
c4e08753
EG
951 netdev_get_name(netdev), ovs_strerror(error));
952 }
953 return error;
954 }
1579cf67 955
c4e08753
EG
956 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
957 error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
958 port_nop);
c37cb3ee 959 if (error) {
c4e08753
EG
960 dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
961 }
962 return error;
963}
96fba48f
BP
964
965static int
93451a0a
AS
966dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
967 odp_port_t *port_nop)
9fafa796 968{
93451a0a 969 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
921c370a 970 int error = EOPNOTSUPP;
9fafa796 971
1579cf67 972 fat_rwlock_wrlock(&dpif->upcall_lock);
921c370a
EG
973 if (!ovs_tunnels_out_of_tree) {
974 error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
975 }
c37cb3ee 976 if (error) {
921c370a
EG
977 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
978 }
1579cf67 979 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
980
981 return error;
982}
983
984static int
93451a0a 985dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
b90de034 986 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 987{
93451a0a 988 struct dpif_netlink_vport vport;
921c370a 989 struct dpif_port dpif_port;
773cd538 990 int error;
c19e6535 991
921c370a
EG
992 error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
993 if (error) {
994 return error;
995 }
996
93451a0a 997 dpif_netlink_vport_init(&vport);
df2c07f4 998 vport.cmd = OVS_VPORT_CMD_DEL;
254f2dc8 999 vport.dp_ifindex = dpif->dp_ifindex;
c19e6535 1000 vport.port_no = port_no;
da467899 1001#ifdef _WIN32
921c370a
EG
1002 if (!strcmp(dpif_port.type, "internal")) {
1003 if (!delete_wmi_port(dpif_port.name)) {
da467899 1004 VLOG_ERR("Could not delete wmi port with name: %s",
921c370a 1005 dpif_port.name);
da467899
AS
1006 };
1007 }
1008#endif
93451a0a 1009 error = dpif_netlink_vport_transact(&vport, NULL, NULL);
773cd538 1010
1579cf67 1011 vport_del_channels(dpif, port_no);
989fd548 1012
921c370a
EG
1013 if (!error && !ovs_tunnels_out_of_tree) {
1014 error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
1015 if (error == EOPNOTSUPP) {
1016 error = 0;
1017 }
1018 }
1019
1020 dpif_port_destroy(&dpif_port);
1021
773cd538 1022 return error;
c3827f61 1023}
3abc4a1a 1024
9fafa796 1025static int
93451a0a 1026dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
9fafa796 1027{
93451a0a 1028 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
1029 int error;
1030
1579cf67 1031 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 1032 error = dpif_netlink_port_del__(dpif, port_no);
1579cf67 1033 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
1034
1035 return error;
1036}
1037
c3827f61 1038static int
93451a0a
AS
1039dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
1040 const char *port_name, struct dpif_port *dpif_port)
c3827f61 1041{
93451a0a
AS
1042 struct dpif_netlink_vport request;
1043 struct dpif_netlink_vport reply;
c19e6535 1044 struct ofpbuf *buf;
4c738a8d
BP
1045 int error;
1046
93451a0a 1047 dpif_netlink_vport_init(&request);
df2c07f4 1048 request.cmd = OVS_VPORT_CMD_GET;
9b00386b 1049 request.dp_ifindex = dpif->dp_ifindex;
c19e6535
BP
1050 request.port_no = port_no;
1051 request.name = port_name;
4c738a8d 1052
93451a0a 1053 error = dpif_netlink_vport_transact(&request, &reply, &buf);
c19e6535 1054 if (!error) {
33db1592
BP
1055 if (reply.dp_ifindex != request.dp_ifindex) {
1056 /* A query by name reported that 'port_name' is in some datapath
1057 * other than 'dpif', but the caller wants to know about 'dpif'. */
1058 error = ENODEV;
4afba28d 1059 } else if (dpif_port) {
33db1592 1060 dpif_port->name = xstrdup(reply.name);
b9ad7294 1061 dpif_port->type = xstrdup(get_vport_type(&reply));
33db1592
BP
1062 dpif_port->port_no = reply.port_no;
1063 }
c19e6535 1064 ofpbuf_delete(buf);
3abc4a1a 1065 }
c19e6535 1066 return error;
96fba48f
BP
1067}
1068
1069static int
93451a0a
AS
1070dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1071 struct dpif_port *dpif_port)
96fba48f 1072{
93451a0a 1073 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1074
93451a0a 1075 return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
96fba48f
BP
1076}
1077
1078static int
93451a0a 1079dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
4c738a8d 1080 struct dpif_port *dpif_port)
96fba48f 1081{
93451a0a 1082 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1083
93451a0a 1084 return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
96fba48f
BP
1085}
1086
98403001 1087static uint32_t
93451a0a 1088dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
769b5034 1089 odp_port_t port_no)
b90de034 1090 OVS_REQ_RDLOCK(dpif->upcall_lock)
98403001 1091{
4e022ec0 1092 uint32_t port_idx = odp_to_u32(port_no);
9fafa796 1093 uint32_t pid = 0;
98403001 1094
f8fc5489 1095 if (dpif->handlers && dpif->uc_array_size > 0) {
4e022ec0 1096 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
989fd548 1097 * channel, since it is not heavily loaded. */
4e022ec0 1098 uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1579cf67 1099
17f2748d
AW
1100 /* Needs to check in case the socket pointer is changed in between
1101 * the holding of upcall_lock. A known case happens when the main
1102 * thread deletes the vport while the handler thread is handling
1103 * the upcall from that port. */
69c51582
MC
1104 if (dpif->channels[idx].sock) {
1105 pid = nl_sock_pid(dpif->channels[idx].sock);
17f2748d 1106 }
98403001 1107 }
9fafa796
BP
1108
1109 return pid;
98403001
BP
1110}
1111
b90de034 1112static uint32_t
769b5034 1113dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
b90de034 1114{
93451a0a 1115 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
1116 uint32_t ret;
1117
1118 fat_rwlock_rdlock(&dpif->upcall_lock);
769b5034 1119 ret = dpif_netlink_port_get_pid__(dpif, port_no);
b90de034
AW
1120 fat_rwlock_unlock(&dpif->upcall_lock);
1121
1122 return ret;
1123}
1124
96fba48f 1125static int
93451a0a 1126dpif_netlink_flow_flush(struct dpif *dpif_)
96fba48f 1127{
8842fdf1 1128 const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif_));
93451a0a
AS
1129 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1130 struct dpif_netlink_flow flow;
37a1300c 1131
93451a0a 1132 dpif_netlink_flow_init(&flow);
df2c07f4 1133 flow.cmd = OVS_FLOW_CMD_DEL;
254f2dc8 1134 flow.dp_ifindex = dpif->dp_ifindex;
f7dde6df
PB
1135
1136 if (netdev_is_flow_api_enabled()) {
8842fdf1 1137 netdev_ports_flow_flush(dpif_type_str);
f7dde6df
PB
1138 }
1139
93451a0a 1140 return dpif_netlink_flow_transact(&flow, NULL, NULL);
96fba48f
BP
1141}
1142
93451a0a 1143struct dpif_netlink_port_state {
f0fef760 1144 struct nl_dump dump;
d57695d7 1145 struct ofpbuf buf;
c19e6535
BP
1146};
1147
222837c4 1148static void
93451a0a
AS
1149dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1150 struct nl_dump *dump)
96fba48f 1151{
93451a0a 1152 struct dpif_netlink_vport request;
f0fef760
BP
1153 struct ofpbuf *buf;
1154
93451a0a 1155 dpif_netlink_vport_init(&request);
067f1e23 1156 request.cmd = OVS_VPORT_CMD_GET;
254f2dc8 1157 request.dp_ifindex = dpif->dp_ifindex;
f0fef760
BP
1158
1159 buf = ofpbuf_new(1024);
93451a0a 1160 dpif_netlink_vport_to_ofpbuf(&request, buf);
222837c4 1161 nl_dump_start(dump, NETLINK_GENERIC, buf);
f0fef760 1162 ofpbuf_delete(buf);
222837c4
BP
1163}
1164
1165static int
93451a0a 1166dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
222837c4 1167{
93451a0a
AS
1168 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1169 struct dpif_netlink_port_state *state;
222837c4
BP
1170
1171 *statep = state = xmalloc(sizeof *state);
93451a0a 1172 dpif_netlink_port_dump_start__(dpif, &state->dump);
f0fef760 1173
d57695d7 1174 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
b0ec0f27
BP
1175 return 0;
1176}
1177
7c1ef244 1178static int
93451a0a
AS
1179dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1180 struct nl_dump *dump,
1181 struct dpif_netlink_vport *vport,
1182 struct ofpbuf *buffer)
222837c4 1183{
222837c4
BP
1184 struct ofpbuf buf;
1185 int error;
1186
d57695d7 1187 if (!nl_dump_next(dump, &buf, buffer)) {
222837c4
BP
1188 return EOF;
1189 }
1190
93451a0a 1191 error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
222837c4
BP
1192 if (error) {
1193 VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1194 dpif_name(&dpif->dpif), ovs_strerror(error));
1195 }
1196 return error;
1197}
1198
b0ec0f27 1199static int
93451a0a
AS
1200dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1201 struct dpif_port *dpif_port)
b0ec0f27 1202{
93451a0a
AS
1203 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1204 struct dpif_netlink_port_state *state = state_;
1205 struct dpif_netlink_vport vport;
96fba48f
BP
1206 int error;
1207
93451a0a
AS
1208 error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1209 &state->buf);
c3827f61 1210 if (error) {
f0fef760 1211 return error;
c3827f61 1212 }
ebc56baa 1213 dpif_port->name = CONST_CAST(char *, vport.name);
b9ad7294 1214 dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
f0fef760
BP
1215 dpif_port->port_no = vport.port_no;
1216 return 0;
b0ec0f27
BP
1217}
1218
1219static int
93451a0a 1220dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
b0ec0f27 1221{
93451a0a 1222 struct dpif_netlink_port_state *state = state_;
f0fef760 1223 int error = nl_dump_done(&state->dump);
8522b383 1224
d57695d7 1225 ofpbuf_uninit(&state->buf);
b0ec0f27 1226 free(state);
f0fef760 1227 return error;
96fba48f
BP
1228}
1229
e9e28be3 1230static int
93451a0a 1231dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
e9e28be3 1232{
93451a0a 1233 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e9e28be3 1234
e4516b20
BP
1235 /* Lazily create the Netlink socket to listen for notifications. */
1236 if (!dpif->port_notifier) {
1237 struct nl_sock *sock;
1238 int error;
1239
1240 error = nl_sock_create(NETLINK_GENERIC, &sock);
1241 if (error) {
1242 return error;
1243 }
1244
1245 error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1246 if (error) {
1247 nl_sock_destroy(sock);
1248 return error;
1249 }
1250 dpif->port_notifier = sock;
1251
1252 /* We have no idea of the current state so report that everything
1253 * changed. */
1254 return ENOBUFS;
1255 }
1256
1257 for (;;) {
1258 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1259 uint64_t buf_stub[4096 / 8];
1260 struct ofpbuf buf;
1261 int error;
1262
1263 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
a86bd14e 1264 error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
e4516b20 1265 if (!error) {
93451a0a 1266 struct dpif_netlink_vport vport;
e4516b20 1267
93451a0a 1268 error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
e4516b20
BP
1269 if (!error) {
1270 if (vport.dp_ifindex == dpif->dp_ifindex
1271 && (vport.cmd == OVS_VPORT_CMD_NEW
1272 || vport.cmd == OVS_VPORT_CMD_DEL
1273 || vport.cmd == OVS_VPORT_CMD_SET)) {
1274 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1275 dpif->dpif.full_name, vport.name, vport.cmd);
1579cf67 1276 if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
61eae437
BP
1277 dpif->refresh_channels = true;
1278 }
e4516b20 1279 *devnamep = xstrdup(vport.name);
59e0c910 1280 ofpbuf_uninit(&buf);
e4516b20 1281 return 0;
e4516b20
BP
1282 }
1283 }
59e0c910
BP
1284 } else if (error != EAGAIN) {
1285 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1286 ovs_strerror(error));
1287 nl_sock_drain(dpif->port_notifier);
1288 error = ENOBUFS;
e4516b20
BP
1289 }
1290
59e0c910
BP
1291 ofpbuf_uninit(&buf);
1292 if (error) {
1293 return error;
1294 }
e9e28be3 1295 }
e9e28be3
BP
1296}
1297
1298static void
93451a0a 1299dpif_netlink_port_poll_wait(const struct dpif *dpif_)
e9e28be3 1300{
93451a0a 1301 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e4516b20
BP
1302
1303 if (dpif->port_notifier) {
1304 nl_sock_wait(dpif->port_notifier, POLLIN);
1305 } else {
e9e28be3 1306 poll_immediate_wake();
e9e28be3
BP
1307 }
1308}
1309
6fe09f8c 1310static void
70e5ed6f
JS
1311dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1312 const ovs_u128 *ufid, bool terse)
1313{
1314 if (ufid) {
1315 request->ufid = *ufid;
1316 request->ufid_present = true;
1317 } else {
1318 request->ufid_present = false;
1319 }
1320 request->ufid_terse = terse;
1321}
1322
1323static void
1324dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1325 const struct nlattr *key, size_t key_len,
1326 const ovs_u128 *ufid, bool terse,
1327 struct dpif_netlink_flow *request)
96fba48f 1328{
93451a0a 1329 dpif_netlink_flow_init(request);
6fe09f8c
JS
1330 request->cmd = OVS_FLOW_CMD_GET;
1331 request->dp_ifindex = dpif->dp_ifindex;
1332 request->key = key;
1333 request->key_len = key_len;
70e5ed6f
JS
1334 dpif_netlink_flow_init_ufid(request, ufid, terse);
1335}
1336
1337static void
1338dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1339 const struct dpif_flow_get *get,
1340 struct dpif_netlink_flow *request)
1341{
1342 dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1343 false, request);
30053024
BP
1344}
1345
1346static int
70e5ed6f
JS
1347dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1348 const struct nlattr *key, size_t key_len,
1349 const ovs_u128 *ufid, bool terse,
1350 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
30053024 1351{
93451a0a 1352 struct dpif_netlink_flow request;
30053024 1353
70e5ed6f 1354 dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
93451a0a 1355 return dpif_netlink_flow_transact(&request, reply, bufp);
96fba48f
BP
1356}
1357
70e5ed6f
JS
1358static int
1359dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1360 const struct dpif_netlink_flow *flow,
1361 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1362{
1363 return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1364 flow->ufid_present ? &flow->ufid : NULL,
1365 false, reply, bufp);
1366}
1367
6bc60024 1368static void
93451a0a
AS
1369dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1370 const struct dpif_flow_put *put,
1371 struct dpif_netlink_flow *request)
6bc60024 1372{
d64e176c 1373 static const struct nlattr dummy_action;
6bc60024 1374
93451a0a 1375 dpif_netlink_flow_init(request);
89625d1e 1376 request->cmd = (put->flags & DPIF_FP_CREATE
6bc60024
BP
1377 ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1378 request->dp_ifindex = dpif->dp_ifindex;
89625d1e
BP
1379 request->key = put->key;
1380 request->key_len = put->key_len;
e6cc0bab
AZ
1381 request->mask = put->mask;
1382 request->mask_len = put->mask_len;
70e5ed6f
JS
1383 dpif_netlink_flow_init_ufid(request, put->ufid, false);
1384
6bc60024 1385 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
d64e176c
BP
1386 request->actions = (put->actions
1387 ? put->actions
1388 : CONST_CAST(struct nlattr *, &dummy_action));
89625d1e
BP
1389 request->actions_len = put->actions_len;
1390 if (put->flags & DPIF_FP_ZERO_STATS) {
6bc60024
BP
1391 request->clear = true;
1392 }
43f9ac0a
JR
1393 if (put->flags & DPIF_FP_PROBE) {
1394 request->probe = true;
1395 }
89625d1e 1396 request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
6bc60024
BP
1397}
1398
b99d3cee 1399static void
70e5ed6f
JS
1400dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1401 const struct nlattr *key, size_t key_len,
1402 const ovs_u128 *ufid, bool terse,
1403 struct dpif_netlink_flow *request)
96fba48f 1404{
93451a0a 1405 dpif_netlink_flow_init(request);
b99d3cee
BP
1406 request->cmd = OVS_FLOW_CMD_DEL;
1407 request->dp_ifindex = dpif->dp_ifindex;
70e5ed6f
JS
1408 request->key = key;
1409 request->key_len = key_len;
1410 dpif_netlink_flow_init_ufid(request, ufid, terse);
1411}
1412
1413static void
1414dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1415 const struct dpif_flow_del *del,
1416 struct dpif_netlink_flow *request)
1417{
37382aa6
AS
1418 dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1419 del->ufid, del->terse, request);
70e5ed6f
JS
1420}
1421
93451a0a 1422struct dpif_netlink_flow_dump {
ac64794a
BP
1423 struct dpif_flow_dump up;
1424 struct nl_dump nl_dump;
d2ad7ef1 1425 atomic_int status;
f2280b41
PB
1426 struct netdev_flow_dump **netdev_dumps;
1427 int netdev_dumps_num; /* Number of netdev_flow_dumps */
1428 struct ovs_mutex netdev_lock; /* Guards the following. */
1429 int netdev_current_dump OVS_GUARDED; /* Shared current dump */
a692410a 1430 struct dpif_flow_dump_types types; /* Type of dump */
e723fd32
JS
1431};
1432
93451a0a
AS
1433static struct dpif_netlink_flow_dump *
1434dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
e723fd32 1435{
93451a0a 1436 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
e723fd32
JS
1437}
1438
f2280b41
PB
1439static void
1440start_netdev_dump(const struct dpif *dpif_,
1441 struct dpif_netlink_flow_dump *dump)
1442{
1443 ovs_mutex_init(&dump->netdev_lock);
1444
a692410a 1445 if (!(dump->types.netdev_flows)) {
f2280b41
PB
1446 dump->netdev_dumps_num = 0;
1447 dump->netdev_dumps = NULL;
1448 return;
1449 }
1450
1451 ovs_mutex_lock(&dump->netdev_lock);
1452 dump->netdev_current_dump = 0;
1453 dump->netdev_dumps
8842fdf1 1454 = netdev_ports_flow_dump_create(dpif_normalize_type(dpif_type(dpif_)),
19153657
VB
1455 &dump->netdev_dumps_num,
1456 dump->up.terse);
f2280b41
PB
1457 ovs_mutex_unlock(&dump->netdev_lock);
1458}
1459
a692410a
GT
1460static void
1461dpif_netlink_populate_flow_dump_types(struct dpif_netlink_flow_dump *dump,
1462 struct dpif_flow_dump_types *types)
1463{
1464 if (!types) {
1465 dump->types.ovs_flows = true;
1466 dump->types.netdev_flows = true;
1467 } else {
1468 memcpy(&dump->types, types, sizeof *types);
494a7455 1469 }
7e8b7199
PB
1470}
1471
ac64794a 1472static struct dpif_flow_dump *
7e8b7199 1473dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
a692410a 1474 struct dpif_flow_dump_types *types)
96fba48f 1475{
93451a0a
AS
1476 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1477 struct dpif_netlink_flow_dump *dump;
1478 struct dpif_netlink_flow request;
37a1300c
BP
1479 struct ofpbuf *buf;
1480
ac64794a
BP
1481 dump = xmalloc(sizeof *dump);
1482 dpif_flow_dump_init(&dump->up, dpif_);
37a1300c 1483
a692410a 1484 dpif_netlink_populate_flow_dump_types(dump, types);
37a1300c 1485
a692410a 1486 if (dump->types.ovs_flows) {
7e8b7199
PB
1487 dpif_netlink_flow_init(&request);
1488 request.cmd = OVS_FLOW_CMD_GET;
1489 request.dp_ifindex = dpif->dp_ifindex;
1490 request.ufid_present = false;
1491 request.ufid_terse = terse;
1492
1493 buf = ofpbuf_new(1024);
1494 dpif_netlink_flow_to_ofpbuf(&request, buf);
1495 nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1496 ofpbuf_delete(buf);
1497 }
ac64794a 1498 atomic_init(&dump->status, 0);
64bb477f 1499 dump->up.terse = terse;
30053024 1500
f2280b41
PB
1501 start_netdev_dump(dpif_, dump);
1502
ac64794a 1503 return &dump->up;
704a1e09
BP
1504}
1505
1506static int
93451a0a 1507dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
704a1e09 1508{
93451a0a 1509 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
7e8b7199 1510 unsigned int nl_status = 0;
ac64794a 1511 int dump_status;
96fba48f 1512
a692410a 1513 if (dump->types.ovs_flows) {
7e8b7199
PB
1514 nl_status = nl_dump_done(&dump->nl_dump);
1515 }
1516
f2280b41
PB
1517 for (int i = 0; i < dump->netdev_dumps_num; i++) {
1518 int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
1519
1520 if (err != 0 && err != EOPNOTSUPP) {
1521 VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
1522 }
1523 }
1524
1525 free(dump->netdev_dumps);
1526 ovs_mutex_destroy(&dump->netdev_lock);
1527
7424fc44
JR
1528 /* No other thread has access to 'dump' at this point. */
1529 atomic_read_relaxed(&dump->status, &dump_status);
ac64794a
BP
1530 free(dump);
1531 return dump_status ? dump_status : nl_status;
1532}
feebdea2 1533
93451a0a 1534struct dpif_netlink_flow_dump_thread {
ac64794a 1535 struct dpif_flow_dump_thread up;
93451a0a
AS
1536 struct dpif_netlink_flow_dump *dump;
1537 struct dpif_netlink_flow flow;
ac64794a
BP
1538 struct dpif_flow_stats stats;
1539 struct ofpbuf nl_flows; /* Always used to store flows. */
1540 struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */
f2280b41
PB
1541 int netdev_dump_idx; /* This thread current netdev dump index */
1542 bool netdev_done; /* If we are finished dumping netdevs */
1543
1544 /* (Key/Mask/Actions) Buffers for netdev dumping */
1545 struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
1546 struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
1547 struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
ac64794a
BP
1548};
1549
93451a0a
AS
1550static struct dpif_netlink_flow_dump_thread *
1551dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
ac64794a 1552{
93451a0a 1553 return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
ac64794a
BP
1554}
1555
1556static struct dpif_flow_dump_thread *
93451a0a 1557dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
ac64794a 1558{
93451a0a
AS
1559 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1560 struct dpif_netlink_flow_dump_thread *thread;
ac64794a
BP
1561
1562 thread = xmalloc(sizeof *thread);
1563 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1564 thread->dump = dump;
1565 ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1566 thread->nl_actions = NULL;
f2280b41
PB
1567 thread->netdev_dump_idx = 0;
1568 thread->netdev_done = !(thread->netdev_dump_idx < dump->netdev_dumps_num);
ac64794a
BP
1569
1570 return &thread->up;
1571}
1572
1573static void
93451a0a 1574dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
ac64794a 1575{
93451a0a
AS
1576 struct dpif_netlink_flow_dump_thread *thread
1577 = dpif_netlink_flow_dump_thread_cast(thread_);
ac64794a
BP
1578
1579 ofpbuf_uninit(&thread->nl_flows);
1580 ofpbuf_delete(thread->nl_actions);
1581 free(thread);
1582}
1583
1584static void
7a5e0ee7 1585dpif_netlink_flow_to_dpif_flow(struct dpif_flow *dpif_flow,
7fe98598 1586 const struct dpif_netlink_flow *datapath_flow)
ac64794a 1587{
7fe98598
NR
1588 dpif_flow->key = datapath_flow->key;
1589 dpif_flow->key_len = datapath_flow->key_len;
1590 dpif_flow->mask = datapath_flow->mask;
1591 dpif_flow->mask_len = datapath_flow->mask_len;
1592 dpif_flow->actions = datapath_flow->actions;
1593 dpif_flow->actions_len = datapath_flow->actions_len;
70e5ed6f 1594 dpif_flow->ufid_present = datapath_flow->ufid_present;
ec97c2df 1595 dpif_flow->pmd_id = PMD_ID_NULL;
70e5ed6f
JS
1596 if (datapath_flow->ufid_present) {
1597 dpif_flow->ufid = datapath_flow->ufid;
1598 } else {
1599 ovs_assert(datapath_flow->key && datapath_flow->key_len);
7a5e0ee7
IM
1600 odp_flow_key_hash(datapath_flow->key, datapath_flow->key_len,
1601 &dpif_flow->ufid);
70e5ed6f 1602 }
7fe98598 1603 dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
d63ca532
GT
1604 dpif_flow->attrs.offloaded = false;
1605 dpif_flow->attrs.dp_layer = "ovs";
d7b55c5c 1606 dpif_flow->attrs.dp_extra_info = NULL;
ac64794a
BP
1607}
1608
f2280b41
PB
1609/* The design is such that all threads are working together on the first dump
1610 * to the last, in order (at first they all on dump 0).
1611 * When the first thread finds that the given dump is finished,
1612 * they all move to the next. If two or more threads find the same dump
1613 * is finished at the same time, the first one will advance the shared
1614 * netdev_current_dump and the others will catch up. */
1615static void
1616dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
1617{
1618 struct dpif_netlink_flow_dump *dump = thread->dump;
1619
1620 ovs_mutex_lock(&dump->netdev_lock);
1621 /* if we haven't finished (dumped everything) */
1622 if (dump->netdev_current_dump < dump->netdev_dumps_num) {
1623 /* if we are the first to find that current dump is finished
1624 * advance it. */
1625 if (thread->netdev_dump_idx == dump->netdev_current_dump) {
1626 thread->netdev_dump_idx = ++dump->netdev_current_dump;
1627 /* did we just finish the last dump? done. */
1628 if (dump->netdev_current_dump == dump->netdev_dumps_num) {
1629 thread->netdev_done = true;
1630 }
1631 } else {
1632 /* otherwise, we are behind, catch up */
1633 thread->netdev_dump_idx = dump->netdev_current_dump;
1634 }
1635 } else {
1636 /* some other thread finished */
1637 thread->netdev_done = true;
1638 }
1639 ovs_mutex_unlock(&dump->netdev_lock);
1640}
1641
1642static int
1643dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
1644 struct ofpbuf *key_buf,
1645 struct ofpbuf *mask_buf,
1646 struct nlattr *actions,
1647 struct dpif_flow_stats *stats,
d63ca532 1648 struct dpif_flow_attrs *attrs,
f2280b41
PB
1649 ovs_u128 *ufid,
1650 struct dpif_flow *flow,
19153657 1651 bool terse)
f2280b41 1652{
f2280b41
PB
1653 memset(flow, 0, sizeof *flow);
1654
19153657
VB
1655 if (!terse) {
1656 struct odp_flow_key_parms odp_parms = {
1657 .flow = &match->flow,
1658 .mask = &match->wc.masks,
1659 .support = {
1660 .max_vlan_headers = 2,
1661 .recirc = true,
1662 .ct_state = true,
1663 .ct_zone = true,
1664 .ct_mark = true,
1665 .ct_label = true,
1666 },
1667 };
1668 size_t offset;
1669
1670 /* Key */
1671 offset = key_buf->size;
1672 flow->key = ofpbuf_tail(key_buf);
1673 odp_flow_key_from_flow(&odp_parms, key_buf);
1674 flow->key_len = key_buf->size - offset;
1675
1676 /* Mask */
1677 offset = mask_buf->size;
1678 flow->mask = ofpbuf_tail(mask_buf);
1679 odp_parms.key_buf = key_buf;
1680 odp_flow_key_from_mask(&odp_parms, mask_buf);
1681 flow->mask_len = mask_buf->size - offset;
1682
1683 /* Actions */
1684 flow->actions = nl_attr_get(actions);
1685 flow->actions_len = nl_attr_get_size(actions);
1686 }
f2280b41
PB
1687
1688 /* Stats */
1689 memcpy(&flow->stats, stats, sizeof *stats);
1690
1691 /* UFID */
1692 flow->ufid_present = true;
1693 flow->ufid = *ufid;
1694
1695 flow->pmd_id = PMD_ID_NULL;
4742003c 1696
d63ca532 1697 memcpy(&flow->attrs, attrs, sizeof *attrs);
4742003c 1698
f2280b41
PB
1699 return 0;
1700}
1701
ac64794a 1702static int
93451a0a
AS
1703dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1704 struct dpif_flow *flows, int max_flows)
ac64794a 1705{
93451a0a
AS
1706 struct dpif_netlink_flow_dump_thread *thread
1707 = dpif_netlink_flow_dump_thread_cast(thread_);
1708 struct dpif_netlink_flow_dump *dump = thread->dump;
1709 struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
ac64794a
BP
1710 int n_flows;
1711
1712 ofpbuf_delete(thread->nl_actions);
1713 thread->nl_actions = NULL;
1714
1715 n_flows = 0;
f2280b41
PB
1716 max_flows = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
1717
1718 while (!thread->netdev_done && n_flows < max_flows) {
1719 struct odputil_keybuf *maskbuf = &thread->maskbuf[n_flows];
1720 struct odputil_keybuf *keybuf = &thread->keybuf[n_flows];
1721 struct odputil_keybuf *actbuf = &thread->actbuf[n_flows];
1722 struct ofpbuf key, mask, act;
1723 struct dpif_flow *f = &flows[n_flows];
1724 int cur = thread->netdev_dump_idx;
1725 struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
1726 struct match match;
1727 struct nlattr *actions;
1728 struct dpif_flow_stats stats;
d63ca532 1729 struct dpif_flow_attrs attrs;
f2280b41
PB
1730 ovs_u128 ufid;
1731 bool has_next;
1732
1733 ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
1734 ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
1735 ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
1736 has_next = netdev_flow_dump_next(netdev_dump, &match,
d63ca532 1737 &actions, &stats, &attrs,
f2280b41
PB
1738 &ufid,
1739 &thread->nl_flows,
1740 &act);
1741 if (has_next) {
1742 dpif_netlink_netdev_match_to_dpif_flow(&match,
1743 &key, &mask,
1744 actions,
1745 &stats,
d63ca532 1746 &attrs,
f2280b41
PB
1747 &ufid,
1748 f,
1749 dump->up.terse);
1750 n_flows++;
1751 } else {
1752 dpif_netlink_advance_netdev_dump(thread);
1753 }
1754 }
1755
a692410a 1756 if (!(dump->types.ovs_flows)) {
7e8b7199
PB
1757 return n_flows;
1758 }
1759
ac64794a 1760 while (!n_flows
6fd6ed71 1761 || (n_flows < max_flows && thread->nl_flows.size)) {
7fe98598 1762 struct dpif_netlink_flow datapath_flow;
ac64794a
BP
1763 struct ofpbuf nl_flow;
1764 int error;
1765
1766 /* Try to grab another flow. */
1767 if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1768 break;
feebdea2 1769 }
30053024 1770
ac64794a 1771 /* Convert the flow to our output format. */
7fe98598 1772 error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
30053024 1773 if (error) {
7424fc44 1774 atomic_store_relaxed(&dump->status, error);
ac64794a 1775 break;
feebdea2 1776 }
30053024 1777
64bb477f
JS
1778 if (dump->up.terse || datapath_flow.actions) {
1779 /* Common case: we don't want actions, or the flow includes
1780 * actions. */
7a5e0ee7 1781 dpif_netlink_flow_to_dpif_flow(&flows[n_flows++], &datapath_flow);
ac64794a
BP
1782 } else {
1783 /* Rare case: the flow does not include actions. Retrieve this
1784 * individual flow again to get the actions. */
70e5ed6f 1785 error = dpif_netlink_flow_get(dpif, &datapath_flow,
7fe98598 1786 &datapath_flow, &thread->nl_actions);
30053024
BP
1787 if (error == ENOENT) {
1788 VLOG_DBG("dumped flow disappeared on get");
ac64794a 1789 continue;
30053024 1790 } else if (error) {
10a89ef0
BP
1791 VLOG_WARN("error fetching dumped flow: %s",
1792 ovs_strerror(error));
7424fc44 1793 atomic_store_relaxed(&dump->status, error);
ac64794a 1794 break;
30053024 1795 }
30053024 1796
ac64794a
BP
1797 /* Save this flow. Then exit, because we only have one buffer to
1798 * handle this case. */
7a5e0ee7 1799 dpif_netlink_flow_to_dpif_flow(&flows[n_flows++], &datapath_flow);
ac64794a
BP
1800 break;
1801 }
feebdea2 1802 }
ac64794a 1803 return n_flows;
96fba48f
BP
1804}
1805
eabe7c68 1806static void
93451a0a
AS
1807dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1808 struct ofpbuf *buf)
96fba48f 1809{
89625d1e 1810 struct ovs_header *k_exec;
758c456d 1811 size_t key_ofs;
f7cd0081 1812
eabe7c68 1813 ofpbuf_prealloc_tailroom(buf, (64
cf62fa4c 1814 + dp_packet_size(d_exec->packet)
758c456d 1815 + ODP_KEY_METADATA_SIZE
eabe7c68 1816 + d_exec->actions_len));
f7cd0081 1817
df2c07f4 1818 nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
69685a88 1819 OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
f7cd0081 1820
89625d1e
BP
1821 k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1822 k_exec->dp_ifindex = dp_ifindex;
f7cd0081 1823
89625d1e 1824 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
cf62fa4c
PS
1825 dp_packet_data(d_exec->packet),
1826 dp_packet_size(d_exec->packet));
758c456d
JR
1827
1828 key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
beb75a40 1829 odp_key_from_dp_packet(buf, d_exec->packet);
758c456d
JR
1830 nl_msg_end_nested(buf, key_ofs);
1831
89625d1e
BP
1832 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1833 d_exec->actions, d_exec->actions_len);
43f9ac0a 1834 if (d_exec->probe) {
2e460098 1835 nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
43f9ac0a 1836 }
27130224
AZ
1837 if (d_exec->mtu) {
1838 nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1839 }
0442bfb1
TZ
1840
1841 if (d_exec->hash) {
1842 nl_msg_put_u64(buf, OVS_PACKET_ATTR_HASH, d_exec->hash);
1843 }
6bc60024
BP
1844}
1845
0f3358ea
BP
1846/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1847 * Returns the number actually executed (at least 1, if 'n_ops' is
1848 * positive). */
1849static size_t
93451a0a
AS
1850dpif_netlink_operate__(struct dpif_netlink *dpif,
1851 struct dpif_op **ops, size_t n_ops)
6bc60024 1852{
eabe7c68
BP
1853 struct op_auxdata {
1854 struct nl_transaction txn;
72d32ac0 1855
eabe7c68
BP
1856 struct ofpbuf request;
1857 uint64_t request_stub[1024 / 8];
72d32ac0
BP
1858
1859 struct ofpbuf reply;
1860 uint64_t reply_stub[1024 / 8];
8b668ee3 1861 } auxes[OPERATE_MAX_OPS];
eabe7c68 1862
8b668ee3 1863 struct nl_transaction *txnsp[OPERATE_MAX_OPS];
6bc60024
BP
1864 size_t i;
1865
8b668ee3 1866 n_ops = MIN(n_ops, OPERATE_MAX_OPS);
6bc60024 1867 for (i = 0; i < n_ops; i++) {
eabe7c68 1868 struct op_auxdata *aux = &auxes[i];
c2b565b5 1869 struct dpif_op *op = ops[i];
b99d3cee
BP
1870 struct dpif_flow_put *put;
1871 struct dpif_flow_del *del;
6fe09f8c 1872 struct dpif_flow_get *get;
93451a0a 1873 struct dpif_netlink_flow flow;
eabe7c68
BP
1874
1875 ofpbuf_use_stub(&aux->request,
1876 aux->request_stub, sizeof aux->request_stub);
1877 aux->txn.request = &aux->request;
b99d3cee 1878
72d32ac0
BP
1879 ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1880 aux->txn.reply = NULL;
1881
b99d3cee
BP
1882 switch (op->type) {
1883 case DPIF_OP_FLOW_PUT:
fa37affa 1884 put = &op->flow_put;
93451a0a 1885 dpif_netlink_init_flow_put(dpif, put, &flow);
6bc60024 1886 if (put->stats) {
eabe7c68 1887 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1888 aux->txn.reply = &aux->reply;
6bc60024 1889 }
93451a0a 1890 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee
BP
1891 break;
1892
1893 case DPIF_OP_FLOW_DEL:
fa37affa 1894 del = &op->flow_del;
93451a0a 1895 dpif_netlink_init_flow_del(dpif, del, &flow);
b99d3cee 1896 if (del->stats) {
eabe7c68 1897 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1898 aux->txn.reply = &aux->reply;
b99d3cee 1899 }
93451a0a 1900 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee 1901 break;
6bc60024 1902
b99d3cee 1903 case DPIF_OP_EXECUTE:
0f3358ea
BP
1904 /* Can't execute a packet that won't fit in a Netlink attribute. */
1905 if (OVS_UNLIKELY(nl_attr_oversized(
fa37affa 1906 dp_packet_size(op->execute.packet)))) {
0f3358ea
BP
1907 /* Report an error immediately if this is the first operation.
1908 * Otherwise the easiest thing to do is to postpone to the next
1909 * call (when this will be the first operation). */
1910 if (i == 0) {
1911 VLOG_ERR_RL(&error_rl,
1912 "dropping oversized %"PRIu32"-byte packet",
fa37affa 1913 dp_packet_size(op->execute.packet));
0f3358ea
BP
1914 op->error = ENOBUFS;
1915 return 1;
1916 }
1917 n_ops = i;
1918 } else {
fa37affa 1919 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
0f3358ea
BP
1920 &aux->request);
1921 }
b99d3cee
BP
1922 break;
1923
6fe09f8c 1924 case DPIF_OP_FLOW_GET:
fa37affa 1925 get = &op->flow_get;
70e5ed6f 1926 dpif_netlink_init_flow_get(dpif, get, &flow);
6fe09f8c 1927 aux->txn.reply = get->buffer;
93451a0a 1928 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
6fe09f8c
JS
1929 break;
1930
b99d3cee 1931 default:
428b2edd 1932 OVS_NOT_REACHED();
6bc60024
BP
1933 }
1934 }
1935
6bc60024 1936 for (i = 0; i < n_ops; i++) {
eabe7c68 1937 txnsp[i] = &auxes[i].txn;
6bc60024 1938 }
a88b4e04 1939 nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
6bc60024 1940
6bc60024 1941 for (i = 0; i < n_ops; i++) {
72d32ac0 1942 struct op_auxdata *aux = &auxes[i];
eabe7c68 1943 struct nl_transaction *txn = &auxes[i].txn;
c2b565b5 1944 struct dpif_op *op = ops[i];
b99d3cee
BP
1945 struct dpif_flow_put *put;
1946 struct dpif_flow_del *del;
6fe09f8c 1947 struct dpif_flow_get *get;
6bc60024 1948
b99d3cee 1949 op->error = txn->error;
6bc60024 1950
b99d3cee
BP
1951 switch (op->type) {
1952 case DPIF_OP_FLOW_PUT:
fa37affa 1953 put = &op->flow_put;
cfceb2b5 1954 if (put->stats) {
b99d3cee 1955 if (!op->error) {
93451a0a 1956 struct dpif_netlink_flow reply;
cfceb2b5 1957
93451a0a
AS
1958 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1959 txn->reply);
cfceb2b5 1960 if (!op->error) {
93451a0a 1961 dpif_netlink_flow_get_stats(&reply, put->stats);
cfceb2b5
BP
1962 }
1963 }
6bc60024 1964 }
b99d3cee
BP
1965 break;
1966
1967 case DPIF_OP_FLOW_DEL:
fa37affa 1968 del = &op->flow_del;
cfceb2b5 1969 if (del->stats) {
b99d3cee 1970 if (!op->error) {
93451a0a 1971 struct dpif_netlink_flow reply;
cfceb2b5 1972
93451a0a
AS
1973 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1974 txn->reply);
cfceb2b5 1975 if (!op->error) {
93451a0a 1976 dpif_netlink_flow_get_stats(&reply, del->stats);
cfceb2b5
BP
1977 }
1978 }
b99d3cee
BP
1979 }
1980 break;
1981
1982 case DPIF_OP_EXECUTE:
1983 break;
1984
6fe09f8c 1985 case DPIF_OP_FLOW_GET:
fa37affa 1986 get = &op->flow_get;
6fe09f8c 1987 if (!op->error) {
93451a0a 1988 struct dpif_netlink_flow reply;
6fe09f8c 1989
93451a0a 1990 op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
6fe09f8c 1991 if (!op->error) {
7a5e0ee7 1992 dpif_netlink_flow_to_dpif_flow(get->flow, &reply);
6fe09f8c
JS
1993 }
1994 }
1995 break;
1996
b99d3cee 1997 default:
428b2edd 1998 OVS_NOT_REACHED();
6bc60024
BP
1999 }
2000
72d32ac0
BP
2001 ofpbuf_uninit(&aux->request);
2002 ofpbuf_uninit(&aux->reply);
6bc60024 2003 }
0f3358ea
BP
2004
2005 return n_ops;
eabe7c68
BP
2006}
2007
6c343984
PB
2008static int
2009parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
2010{
8842fdf1 2011 const char *dpif_type_str = dpif_normalize_type(dpif_type(&dpif->dpif));
6c343984
PB
2012 struct dpif_flow *dpif_flow = get->flow;
2013 struct match match;
2014 struct nlattr *actions;
2015 struct dpif_flow_stats stats;
d63ca532 2016 struct dpif_flow_attrs attrs;
6c343984
PB
2017 struct ofpbuf buf;
2018 uint64_t act_buf[1024 / 8];
2019 struct odputil_keybuf maskbuf;
2020 struct odputil_keybuf keybuf;
2021 struct odputil_keybuf actbuf;
2022 struct ofpbuf key, mask, act;
2023 int err;
2024
2025 ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
8842fdf1
IM
2026 err = netdev_ports_flow_get(dpif_type_str, &match, &actions, get->ufid,
2027 &stats, &attrs, &buf);
6c343984
PB
2028 if (err) {
2029 return err;
2030 }
2031
2032 VLOG_DBG("found flow from netdev, translating to dpif flow");
2033
2034 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
2035 ofpbuf_use_stack(&act, &actbuf, sizeof actbuf);
2036 ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
2037 dpif_netlink_netdev_match_to_dpif_flow(&match, &key, &mask, actions,
d63ca532 2038 &stats, &attrs,
6c343984
PB
2039 (ovs_u128 *) get->ufid,
2040 dpif_flow,
2041 false);
2042 ofpbuf_put(get->buffer, nl_attr_get(actions), nl_attr_get_size(actions));
2043 dpif_flow->actions = ofpbuf_at(get->buffer, 0, 0);
2044 dpif_flow->actions_len = nl_attr_get_size(actions);
2045
2046 return 0;
2047}
2048
8b668ee3
PB
2049static int
2050parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
2051{
8842fdf1 2052 const char *dpif_type_str = dpif_normalize_type(dpif_type(&dpif->dpif));
8b668ee3
PB
2053 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
2054 struct match match;
2055 odp_port_t in_port;
2056 const struct nlattr *nla;
2057 size_t left;
8b668ee3
PB
2058 struct netdev *dev;
2059 struct offload_info info;
2060 ovs_be16 dst_port = 0;
d9677a1f 2061 uint8_t csum_on = false;
8b668ee3
PB
2062 int err;
2063
2064 if (put->flags & DPIF_FP_PROBE) {
2065 return EOPNOTSUPP;
2066 }
2067
2068 err = parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
2069 put->mask_len, &match);
2070 if (err) {
2071 return err;
2072 }
2073
8b668ee3 2074 in_port = match.flow.in_port.odp_port;
8842fdf1 2075 dev = netdev_ports_get(in_port, dpif_type_str);
8b668ee3
PB
2076 if (!dev) {
2077 return EOPNOTSUPP;
2078 }
2079
00a0a011 2080 /* Get tunnel dst port */
8b668ee3
PB
2081 NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
2082 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
2083 const struct netdev_tunnel_config *tnl_cfg;
2084 struct netdev *outdev;
2085 odp_port_t out_port;
2086
8b668ee3 2087 out_port = nl_attr_get_odp_port(nla);
8842fdf1 2088 outdev = netdev_ports_get(out_port, dpif_type_str);
8b668ee3
PB
2089 if (!outdev) {
2090 err = EOPNOTSUPP;
2091 goto out;
2092 }
2093 tnl_cfg = netdev_get_tunnel_config(outdev);
2094 if (tnl_cfg && tnl_cfg->dst_port != 0) {
2095 dst_port = tnl_cfg->dst_port;
2096 }
d9677a1f
EB
2097 if (tnl_cfg) {
2098 csum_on = tnl_cfg->csum;
2099 }
8b668ee3
PB
2100 netdev_close(outdev);
2101 }
2102 }
2103
8b668ee3 2104 info.tp_dst_port = dst_port;
d9677a1f 2105 info.tunnel_csum_on = csum_on;
b2ae4069
PB
2106 info.recirc_id_shared_with_tc = (dpif->user_features
2107 & OVS_DP_F_TC_RECIRC_SHARING);
65b84d4a 2108 info.tc_modify_flow_deleted = false;
8b668ee3
PB
2109 err = netdev_flow_put(dev, &match,
2110 CONST_CAST(struct nlattr *, put->actions),
2111 put->actions_len,
2112 CONST_CAST(ovs_u128 *, put->ufid),
2113 &info, put->stats);
2114
2115 if (!err) {
2116 if (put->flags & DPIF_FP_MODIFY) {
2117 struct dpif_op *opp;
2118 struct dpif_op op;
2119
2120 op.type = DPIF_OP_FLOW_DEL;
fa37affa
BP
2121 op.flow_del.key = put->key;
2122 op.flow_del.key_len = put->key_len;
2123 op.flow_del.ufid = put->ufid;
2124 op.flow_del.pmd_id = put->pmd_id;
2125 op.flow_del.stats = NULL;
2126 op.flow_del.terse = false;
8b668ee3
PB
2127
2128 opp = &op;
2129 dpif_netlink_operate__(dpif, &opp, 1);
2130 }
2131
2132 VLOG_DBG("added flow");
2133 } else if (err != EEXIST) {
738c785f 2134 struct netdev *oor_netdev = NULL;
1028cb71 2135 enum vlog_level level;
738c785f
SB
2136 if (err == ENOSPC && netdev_is_offload_rebalance_policy_enabled()) {
2137 /*
2138 * We need to set OOR on the input netdev (i.e, 'dev') for the
2139 * flow. But if the flow has a tunnel attribute (i.e, decap action,
2140 * with a virtual device like a VxLAN interface as its in-port),
2141 * then lookup and set OOR on the underlying tunnel (real) netdev.
2142 */
2143 oor_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
2144 if (!oor_netdev) {
2145 /* Not a 'tunnel' flow */
2146 oor_netdev = dev;
2147 }
2148 netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true);
2149 }
1028cb71 2150 level = (err == ENOSPC || err == EOPNOTSUPP) ? VLL_DBG : VLL_ERR;
2151 VLOG_RL(&rl, level, "failed to offload flow: %s: %s",
2152 ovs_strerror(err),
2153 (oor_netdev ? oor_netdev->name : dev->name));
8b668ee3
PB
2154 }
2155
2156out:
2157 if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
2158 /* Modified rule can't be offloaded, try and delete from HW */
65b84d4a 2159 int del_err = 0;
2160
2161 if (!info.tc_modify_flow_deleted) {
2162 del_err = netdev_flow_del(dev, put->ufid, put->stats);
2163 }
8b668ee3
PB
2164
2165 if (!del_err) {
2166 /* Delete from hw success, so old flow was offloaded.
2167 * Change flags to create the flow in kernel */
2168 put->flags &= ~DPIF_FP_MODIFY;
2169 put->flags |= DPIF_FP_CREATE;
2170 } else if (del_err != ENOENT) {
2171 VLOG_ERR_RL(&rl, "failed to delete offloaded flow: %s",
2172 ovs_strerror(del_err));
2173 /* stop proccesing the flow in kernel */
2174 err = 0;
2175 }
2176 }
2177
2178 netdev_close(dev);
2179
2180 return err;
2181}
2182
8b668ee3
PB
2183static int
2184try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
eabe7c68 2185{
8b668ee3 2186 int err = EOPNOTSUPP;
9b00386b 2187
8b668ee3
PB
2188 switch (op->type) {
2189 case DPIF_OP_FLOW_PUT: {
fa37affa 2190 struct dpif_flow_put *put = &op->flow_put;
8b668ee3
PB
2191
2192 if (!put->ufid) {
2193 break;
2194 }
3cd99886 2195
8b668ee3 2196 err = parse_flow_put(dpif, put);
f7392b44 2197 log_flow_put_message(&dpif->dpif, &this_module, put, 0);
8b668ee3
PB
2198 break;
2199 }
0335a89c 2200 case DPIF_OP_FLOW_DEL: {
fa37affa 2201 struct dpif_flow_del *del = &op->flow_del;
0335a89c
PB
2202
2203 if (!del->ufid) {
2204 break;
2205 }
3cd99886 2206
8842fdf1
IM
2207 err = netdev_ports_flow_del(
2208 dpif_normalize_type(dpif_type(&dpif->dpif)),
2209 del->ufid,
2210 del->stats);
f7392b44 2211 log_flow_del_message(&dpif->dpif, &this_module, del, 0);
0335a89c
PB
2212 break;
2213 }
6c343984 2214 case DPIF_OP_FLOW_GET: {
fa37affa 2215 struct dpif_flow_get *get = &op->flow_get;
6c343984 2216
fa37affa 2217 if (!op->flow_get.ufid) {
6c343984
PB
2218 break;
2219 }
3cd99886 2220
6c343984 2221 err = parse_flow_get(dpif, get);
f7392b44 2222 log_flow_get_message(&dpif->dpif, &this_module, get, 0);
6c343984
PB
2223 break;
2224 }
8b668ee3
PB
2225 case DPIF_OP_EXECUTE:
2226 default:
2227 break;
2228 }
2229
2230 return err;
2231}
2232
2233static void
2234dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
2235 size_t n_ops)
2236{
eabe7c68 2237 while (n_ops > 0) {
0f3358ea 2238 size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
8b668ee3 2239
eabe7c68
BP
2240 ops += chunk;
2241 n_ops -= chunk;
2242 }
6bc60024
BP
2243}
2244
8b668ee3 2245static void
57924fc9
SB
2246dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
2247 enum dpif_offload_type offload_type)
8b668ee3
PB
2248{
2249 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2250 struct dpif_op *new_ops[OPERATE_MAX_OPS];
2251 int count = 0;
2252 int i = 0;
2253 int err = 0;
2254
57924fc9
SB
2255 if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) {
2256 VLOG_DBG("Invalid offload_type: %d", offload_type);
2257 return;
2258 }
2259
2260 if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) {
8b668ee3
PB
2261 while (n_ops > 0) {
2262 count = 0;
2263
2264 while (n_ops > 0 && count < OPERATE_MAX_OPS) {
2265 struct dpif_op *op = ops[i++];
2266
2267 err = try_send_to_netdev(dpif, op);
2268 if (err && err != EEXIST) {
57924fc9
SB
2269 if (offload_type == DPIF_OFFLOAD_ALWAYS) {
2270 /* We got an error while offloading an op. Since
2271 * OFFLOAD_ALWAYS is specified, we stop further
2272 * processing and return to the caller without
2273 * invoking kernel datapath as fallback. But the
2274 * interface requires us to process all n_ops; so
2275 * return the same error in the remaining ops too.
2276 */
2277 op->error = err;
2278 n_ops--;
2279 while (n_ops > 0) {
2280 op = ops[i++];
2281 op->error = err;
2282 n_ops--;
2283 }
2284 return;
2285 }
8b668ee3
PB
2286 new_ops[count++] = op;
2287 } else {
2288 op->error = err;
2289 }
2290
2291 n_ops--;
2292 }
2293
2294 dpif_netlink_operate_chunks(dpif, new_ops, count);
2295 }
57924fc9 2296 } else if (offload_type != DPIF_OFFLOAD_ALWAYS) {
8b668ee3
PB
2297 dpif_netlink_operate_chunks(dpif, ops, n_ops);
2298 }
2299}
2300
09cac43f
NR
2301#if _WIN32
2302static void
2303dpif_netlink_handler_uninit(struct dpif_handler *handler)
2304{
2305 vport_delete_sock_pool(handler);
2306}
2307
2308static int
2309dpif_netlink_handler_init(struct dpif_handler *handler)
2310{
2311 return vport_create_sock_pool(handler);
2312}
2313#else
2314
2315static int
2316dpif_netlink_handler_init(struct dpif_handler *handler)
2317{
2318 handler->epoll_fd = epoll_create(10);
2319 return handler->epoll_fd < 0 ? errno : 0;
2320}
2321
2322static void
2323dpif_netlink_handler_uninit(struct dpif_handler *handler)
2324{
2325 close(handler->epoll_fd);
2326}
2327#endif
2328
1579cf67
AW
2329/* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
2330 * currently in 'dpif' in the kernel, by adding a new set of channels for
2331 * any kernel vport that lacks one and deleting any channels that have no
2332 * backing kernel vports. */
96fba48f 2333static int
93451a0a 2334dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
b90de034 2335 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 2336{
8381a3d3 2337 unsigned long int *keep_channels;
93451a0a 2338 struct dpif_netlink_vport vport;
8381a3d3
BP
2339 size_t keep_channels_nbits;
2340 struct nl_dump dump;
d57695d7
JS
2341 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2342 struct ofpbuf buf;
8381a3d3
BP
2343 int retval = 0;
2344 size_t i;
982b8810 2345
09cac43f
NR
2346 ovs_assert(!WINDOWS || n_handlers <= 1);
2347 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
2348
1579cf67
AW
2349 if (dpif->n_handlers != n_handlers) {
2350 destroy_all_channels(dpif);
2351 dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2352 for (i = 0; i < n_handlers; i++) {
09cac43f 2353 int error;
1579cf67
AW
2354 struct dpif_handler *handler = &dpif->handlers[i];
2355
09cac43f
NR
2356 error = dpif_netlink_handler_init(handler);
2357 if (error) {
1579cf67
AW
2358 size_t j;
2359
2360 for (j = 0; j < i; j++) {
aa5c0216 2361 struct dpif_handler *tmp = &dpif->handlers[j];
09cac43f 2362 dpif_netlink_handler_uninit(tmp);
1579cf67
AW
2363 }
2364 free(dpif->handlers);
2365 dpif->handlers = NULL;
2366
09cac43f 2367 return error;
1579cf67 2368 }
8381a3d3 2369 }
1579cf67
AW
2370 dpif->n_handlers = n_handlers;
2371 }
2372
2373 for (i = 0; i < n_handlers; i++) {
2374 struct dpif_handler *handler = &dpif->handlers[i];
2375
2376 handler->event_offset = handler->n_events = 0;
17411ecf 2377 }
b063d9f0 2378
8381a3d3
BP
2379 keep_channels_nbits = dpif->uc_array_size;
2380 keep_channels = bitmap_allocate(keep_channels_nbits);
982b8810 2381
d57695d7 2382 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a
AS
2383 dpif_netlink_port_dump_start__(dpif, &dump);
2384 while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
8381a3d3 2385 uint32_t port_no = odp_to_u32(vport.port_no);
69c51582 2386 uint32_t upcall_pid;
8381a3d3 2387 int error;
50f80534 2388
1579cf67 2389 if (port_no >= dpif->uc_array_size
69c51582 2390 || !vport_get_pid(dpif, port_no, &upcall_pid)) {
622ea8fd
BP
2391 struct nl_sock *sock;
2392 error = create_nl_sock(dpif, &sock);
1579cf67 2393
d240e46a 2394 if (error) {
1579cf67
AW
2395 goto error;
2396 }
2397
622ea8fd 2398 error = vport_add_channel(dpif, vport.port_no, sock);
b063d9f0 2399 if (error) {
1579cf67 2400 VLOG_INFO("%s: could not add channels for port %s",
9b00386b 2401 dpif_name(&dpif->dpif), vport.name);
622ea8fd 2402 nl_sock_destroy(sock);
8381a3d3
BP
2403 retval = error;
2404 goto error;
982b8810 2405 }
622ea8fd 2406 upcall_pid = nl_sock_pid(sock);
8381a3d3 2407 }
50f80534 2408
8381a3d3 2409 /* Configure the vport to deliver misses to 'sock'. */
1579cf67 2410 if (vport.upcall_pids[0] == 0
69c51582
MC
2411 || vport.n_upcall_pids != 1
2412 || upcall_pid != vport.upcall_pids[0]) {
93451a0a 2413 struct dpif_netlink_vport vport_request;
989fd548 2414
93451a0a 2415 dpif_netlink_vport_init(&vport_request);
989fd548
JP
2416 vport_request.cmd = OVS_VPORT_CMD_SET;
2417 vport_request.dp_ifindex = dpif->dp_ifindex;
8381a3d3 2418 vport_request.port_no = vport.port_no;
69c51582
MC
2419 vport_request.n_upcall_pids = 1;
2420 vport_request.upcall_pids = &upcall_pid;
93451a0a 2421 error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67 2422 if (error) {
989fd548
JP
2423 VLOG_WARN_RL(&error_rl,
2424 "%s: failed to set upcall pid on port: %s",
10a89ef0 2425 dpif_name(&dpif->dpif), ovs_strerror(error));
989fd548 2426
8381a3d3
BP
2427 if (error != ENODEV && error != ENOENT) {
2428 retval = error;
989fd548 2429 } else {
8381a3d3
BP
2430 /* The vport isn't really there, even though the dump says
2431 * it is. Probably we just hit a race after a port
2432 * disappeared. */
989fd548 2433 }
8381a3d3 2434 goto error;
50f80534 2435 }
8381a3d3 2436 }
14b4d2f9 2437
8381a3d3
BP
2438 if (port_no < keep_channels_nbits) {
2439 bitmap_set1(keep_channels, port_no);
2440 }
2441 continue;
2442
2443 error:
1579cf67 2444 vport_del_channels(dpif, vport.port_no);
982b8810 2445 }
8381a3d3 2446 nl_dump_done(&dump);
d57695d7 2447 ofpbuf_uninit(&buf);
b063d9f0 2448
8381a3d3
BP
2449 /* Discard any saved channels that we didn't reuse. */
2450 for (i = 0; i < keep_channels_nbits; i++) {
2451 if (!bitmap_is_set(keep_channels, i)) {
1579cf67 2452 vport_del_channels(dpif, u32_to_odp(i));
8381a3d3
BP
2453 }
2454 }
2455 free(keep_channels);
2456
2457 return retval;
2458}
2459
2460static int
93451a0a 2461dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable)
b90de034 2462 OVS_REQ_WRLOCK(dpif->upcall_lock)
8381a3d3 2463{
1579cf67 2464 if ((dpif->handlers != NULL) == enable) {
8381a3d3
BP
2465 return 0;
2466 } else if (!enable) {
1579cf67 2467 destroy_all_channels(dpif);
8381a3d3
BP
2468 return 0;
2469 } else {
93451a0a 2470 return dpif_netlink_refresh_channels(dpif, 1);
8381a3d3 2471 }
96fba48f
BP
2472}
2473
9fafa796 2474static int
93451a0a 2475dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
9fafa796 2476{
93451a0a 2477 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2478 int error;
2479
1579cf67 2480 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2481 error = dpif_netlink_recv_set__(dpif, enable);
1579cf67 2482 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2483
2484 return error;
2485}
2486
1954e6bb 2487static int
93451a0a 2488dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
1954e6bb 2489{
93451a0a 2490 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67
AW
2491 int error = 0;
2492
09cac43f
NR
2493#ifdef _WIN32
2494 /* Multiple upcall handlers will be supported once kernel datapath supports
2495 * it. */
2496 if (n_handlers > 1) {
2497 return error;
2498 }
2499#endif
2500
1579cf67
AW
2501 fat_rwlock_wrlock(&dpif->upcall_lock);
2502 if (dpif->handlers) {
93451a0a 2503 error = dpif_netlink_refresh_channels(dpif, n_handlers);
1579cf67
AW
2504 }
2505 fat_rwlock_unlock(&dpif->upcall_lock);
2506
2507 return error;
1954e6bb
AW
2508}
2509
aae51f53 2510static int
93451a0a 2511dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
aae51f53
BP
2512 uint32_t queue_id, uint32_t *priority)
2513{
2514 if (queue_id < 0xf000) {
17ee3c1f 2515 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
aae51f53
BP
2516 return 0;
2517 } else {
2518 return EINVAL;
2519 }
2520}
2521
96fba48f 2522static int
7a5e0ee7
IM
2523parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
2524 int *dp_ifindex)
856081f6 2525{
df2c07f4 2526 static const struct nl_policy ovs_packet_policy[] = {
856081f6 2527 /* Always present. */
df2c07f4 2528 [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
856081f6 2529 .min_len = ETH_HEADER_LEN },
df2c07f4 2530 [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
856081f6 2531
df2c07f4 2532 /* OVS_PACKET_CMD_ACTION only. */
e995e3df 2533 [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
8b7ea2d4 2534 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
7321bda3 2535 [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
0442bfb1
TZ
2536 [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true },
2537 [OVS_PACKET_ATTR_HASH] = { .type = NL_A_U64, .optional = true }
856081f6
BP
2538 };
2539
0a2869d5
BP
2540 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2541 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2542 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2543 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
982b8810 2544
0a2869d5 2545 struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
df2c07f4
JP
2546 if (!nlmsg || !genl || !ovs_header
2547 || nlmsg->nlmsg_type != ovs_packet_family
2548 || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2549 ARRAY_SIZE(ovs_packet_policy))) {
856081f6
BP
2550 return EINVAL;
2551 }
2552
0a2869d5
BP
2553 int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2554 : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2555 : -1);
aaff4b55
BP
2556 if (type < 0) {
2557 return EINVAL;
2558 }
82272ede 2559
877c9270 2560 /* (Re)set ALL fields of '*upcall' on successful return. */
aaff4b55 2561 upcall->type = type;
ebc56baa
BP
2562 upcall->key = CONST_CAST(struct nlattr *,
2563 nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
df2c07f4 2564 upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
7a5e0ee7 2565 odp_flow_key_hash(upcall->key, upcall->key_len, &upcall->ufid);
e995e3df 2566 upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
8b7ea2d4 2567 upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
7321bda3 2568 upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
27130224 2569 upcall->mru = a[OVS_PACKET_ATTR_MRU];
0442bfb1 2570 upcall->hash = a[OVS_PACKET_ATTR_HASH];
da546e07
JR
2571
2572 /* Allow overwriting the netlink attribute header without reallocating. */
cf62fa4c 2573 dp_packet_use_stub(&upcall->packet,
da546e07
JR
2574 CONST_CAST(struct nlattr *,
2575 nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2576 nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2577 sizeof(struct nlattr));
cf62fa4c
PS
2578 dp_packet_set_data(&upcall->packet,
2579 (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2580 dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
da546e07 2581
2482b0b0
JS
2582 if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2583 /* Ethernet frame */
2584 upcall->packet.packet_type = htonl(PT_ETH);
2585 } else {
2586 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2587 ovs_be16 ethertype = 0;
2588 const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2589 upcall->key_len,
2590 OVS_KEY_ATTR_ETHERTYPE);
2591 if (et_nla) {
2592 ethertype = nl_attr_get_be16(et_nla);
2593 }
2594 upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2595 ntohs(ethertype));
2596 dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2597 }
2598
df2c07f4 2599 *dp_ifindex = ovs_header->dp_ifindex;
982b8810 2600
856081f6
BP
2601 return 0;
2602}
2603
09cac43f
NR
2604#ifdef _WIN32
2605#define PACKET_RECV_BATCH_SIZE 50
2606static int
2607dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2608 struct dpif_upcall *upcall, struct ofpbuf *buf)
2609 OVS_REQ_RDLOCK(dpif->upcall_lock)
2610{
2611 struct dpif_handler *handler;
2612 int read_tries = 0;
2613 struct dpif_windows_vport_sock *sock_pool;
2614 uint32_t i;
2615
2616 if (!dpif->handlers) {
2617 return EAGAIN;
2618 }
2619
2620 /* Only one handler is supported currently. */
2621 if (handler_id >= 1) {
2622 return EAGAIN;
2623 }
2624
2625 if (handler_id >= dpif->n_handlers) {
2626 return EAGAIN;
2627 }
2628
2629 handler = &dpif->handlers[handler_id];
2630 sock_pool = handler->vport_sock_pool;
2631
2632 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2633 for (;;) {
2634 int dp_ifindex;
2635 int error;
2636
2637 if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2638 return EAGAIN;
2639 }
2640
a86bd14e 2641 error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
09cac43f
NR
2642 if (error == ENOBUFS) {
2643 /* ENOBUFS typically means that we've received so many
2644 * packets that the buffer overflowed. Try again
2645 * immediately because there's almost certainly a packet
2646 * waiting for us. */
2647 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2648 continue;
2649 }
2650
2651 /* XXX: ch->last_poll = time_msec(); */
2652 if (error) {
2653 if (error == EAGAIN) {
2654 break;
2655 }
2656 return error;
2657 }
2658
7a5e0ee7 2659 error = parse_odp_packet(buf, upcall, &dp_ifindex);
09cac43f
NR
2660 if (!error && dp_ifindex == dpif->dp_ifindex) {
2661 return 0;
2662 } else if (error) {
2663 return error;
2664 }
2665 }
2666 }
2667
2668 return EAGAIN;
2669}
2670#else
856081f6 2671static int
93451a0a
AS
2672dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
2673 struct dpif_upcall *upcall, struct ofpbuf *buf)
b90de034 2674 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2675{
1579cf67 2676 struct dpif_handler *handler;
17411ecf 2677 int read_tries = 0;
96fba48f 2678
1579cf67
AW
2679 if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2680 return EAGAIN;
982b8810
BP
2681 }
2682
1579cf67
AW
2683 handler = &dpif->handlers[handler_id];
2684 if (handler->event_offset >= handler->n_events) {
8522ba09 2685 int retval;
989fd548 2686
1579cf67 2687 handler->event_offset = handler->n_events = 0;
f6d1465c 2688
8522ba09 2689 do {
1579cf67 2690 retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
989fd548 2691 dpif->uc_array_size, 0);
8522ba09 2692 } while (retval < 0 && errno == EINTR);
09cac43f 2693
8522ba09
BP
2694 if (retval < 0) {
2695 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
10a89ef0 2696 VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
989fd548 2697 } else if (retval > 0) {
1579cf67 2698 handler->n_events = retval;
8522ba09 2699 }
8522ba09
BP
2700 }
2701
1579cf67
AW
2702 while (handler->event_offset < handler->n_events) {
2703 int idx = handler->epoll_events[handler->event_offset].data.u32;
69c51582 2704 struct dpif_channel *ch = &dpif->channels[idx];
8522ba09 2705
1579cf67 2706 handler->event_offset++;
17411ecf 2707
f6d1465c 2708 for (;;) {
8522ba09 2709 int dp_ifindex;
f6d1465c 2710 int error;
17411ecf 2711
f6d1465c
BP
2712 if (++read_tries > 50) {
2713 return EAGAIN;
2714 }
17411ecf 2715
a86bd14e 2716 error = nl_sock_recv(ch->sock, buf, NULL, false);
14b4d2f9
BP
2717 if (error == ENOBUFS) {
2718 /* ENOBUFS typically means that we've received so many
2719 * packets that the buffer overflowed. Try again
2720 * immediately because there's almost certainly a packet
2721 * waiting for us. */
9b00386b 2722 report_loss(dpif, ch, idx, handler_id);
14b4d2f9
BP
2723 continue;
2724 }
2725
2726 ch->last_poll = time_msec();
72d32ac0 2727 if (error) {
72d32ac0
BP
2728 if (error == EAGAIN) {
2729 break;
2730 }
f6d1465c
BP
2731 return error;
2732 }
17411ecf 2733
7a5e0ee7 2734 error = parse_odp_packet(buf, upcall, &dp_ifindex);
a12b3ead 2735 if (!error && dp_ifindex == dpif->dp_ifindex) {
f6d1465c 2736 return 0;
989fd548 2737 } else if (error) {
f6d1465c 2738 return error;
17411ecf 2739 }
982b8810 2740 }
50f80534 2741 }
982b8810
BP
2742
2743 return EAGAIN;
96fba48f 2744}
09cac43f 2745#endif
96fba48f 2746
9fafa796 2747static int
93451a0a
AS
2748dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2749 struct dpif_upcall *upcall, struct ofpbuf *buf)
9fafa796 2750{
93451a0a 2751 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2752 int error;
2753
1579cf67 2754 fat_rwlock_rdlock(&dpif->upcall_lock);
09cac43f
NR
2755#ifdef _WIN32
2756 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2757#else
93451a0a 2758 error = dpif_netlink_recv__(dpif, handler_id, upcall, buf);
09cac43f 2759#endif
1579cf67 2760 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2761
2762 return error;
2763}
2764
96fba48f 2765static void
93451a0a 2766dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id)
b90de034 2767 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2768{
93451a0a 2769#ifdef _WIN32
09cac43f
NR
2770 uint32_t i;
2771 struct dpif_windows_vport_sock *sock_pool =
2772 dpif->handlers[handler_id].vport_sock_pool;
2773
2774 /* Only one handler is supported currently. */
2775 if (handler_id >= 1) {
2776 return;
2777 }
2778
2779 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2780 nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2781 }
93451a0a 2782#else
1579cf67
AW
2783 if (dpif->handlers && handler_id < dpif->n_handlers) {
2784 struct dpif_handler *handler = &dpif->handlers[handler_id];
2785
2786 poll_fd_wait(handler->epoll_fd, POLLIN);
17411ecf 2787 }
93451a0a 2788#endif
96fba48f
BP
2789}
2790
1ba530f4 2791static void
93451a0a 2792dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
1ba530f4 2793{
93451a0a 2794 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
17411ecf 2795
b90de034 2796 fat_rwlock_rdlock(&dpif->upcall_lock);
93451a0a 2797 dpif_netlink_recv_wait__(dpif, handler_id);
b90de034
AW
2798 fat_rwlock_unlock(&dpif->upcall_lock);
2799}
2800
2801static void
93451a0a 2802dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
b90de034
AW
2803 OVS_REQ_WRLOCK(dpif->upcall_lock)
2804{
1579cf67 2805 if (dpif->handlers) {
69c51582 2806 size_t i;
1579cf67 2807
69c51582
MC
2808 if (!dpif->channels[0].sock) {
2809 return;
2810 }
1579cf67 2811 for (i = 0; i < dpif->uc_array_size; i++ ) {
1ba530f4 2812
69c51582 2813 nl_sock_drain(dpif->channels[i].sock);
989fd548 2814 }
1ba530f4 2815 }
b90de034
AW
2816}
2817
2818static void
93451a0a 2819dpif_netlink_recv_purge(struct dpif *dpif_)
b90de034 2820{
93451a0a 2821 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
2822
2823 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2824 dpif_netlink_recv_purge__(dpif);
1579cf67 2825 fat_rwlock_unlock(&dpif->upcall_lock);
1ba530f4
BP
2826}
2827
b5cbbcf6
AZ
2828static char *
2829dpif_netlink_get_datapath_version(void)
2830{
2831 char *version_str = NULL;
2832
2833#ifdef __linux__
2834
2835#define MAX_VERSION_STR_SIZE 80
2836#define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2837 FILE *f;
2838
2839 f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2840 if (f) {
2841 char *newline;
2842 char version[MAX_VERSION_STR_SIZE];
2843
2844 if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2845 newline = strchr(version, '\n');
2846 if (newline) {
2847 *newline = '\0';
2848 }
2849 version_str = xstrdup(version);
2850 }
2851 fclose(f);
2852 }
2853#endif
2854
2855 return version_str;
2856}
2857
c11c9f4a
DDP
2858struct dpif_netlink_ct_dump_state {
2859 struct ct_dpif_dump_state up;
2860 struct nl_ct_dump_state *nl_ct_dump;
2861};
2862
2863static int
2864dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2865 struct ct_dpif_dump_state **dump_,
ded30c74 2866 const uint16_t *zone, int *ptot_bkts)
c11c9f4a
DDP
2867{
2868 struct dpif_netlink_ct_dump_state *dump;
2869 int err;
2870
2871 dump = xzalloc(sizeof *dump);
ded30c74 2872 err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
c11c9f4a
DDP
2873 if (err) {
2874 free(dump);
2875 return err;
2876 }
2877
2878 *dump_ = &dump->up;
2879
2880 return 0;
2881}
2882
2883static int
2884dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2885 struct ct_dpif_dump_state *dump_,
2886 struct ct_dpif_entry *entry)
2887{
2888 struct dpif_netlink_ct_dump_state *dump;
2889
2890 INIT_CONTAINER(dump, dump_, up);
2891
2892 return nl_ct_dump_next(dump->nl_ct_dump, entry);
2893}
2894
2895static int
2896dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2897 struct ct_dpif_dump_state *dump_)
2898{
2899 struct dpif_netlink_ct_dump_state *dump;
c11c9f4a
DDP
2900
2901 INIT_CONTAINER(dump, dump_, up);
2902
1f161318 2903 int err = nl_ct_dump_done(dump->nl_ct_dump);
c11c9f4a
DDP
2904 free(dump);
2905 return err;
2906}
15eabc97
DDP
2907
2908static int
817a7657
YHW
2909dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
2910 const struct ct_dpif_tuple *tuple)
15eabc97 2911{
817a7657
YHW
2912 if (tuple) {
2913 return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
2914 } else if (zone) {
15eabc97
DDP
2915 return nl_ct_flush_zone(*zone);
2916 } else {
2917 return nl_ct_flush();
2918 }
2919}
c11c9f4a 2920
906ff9d2
YHW
2921static int
2922dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED,
2923 const uint32_t *default_limits,
2924 const struct ovs_list *zone_limits)
2925{
2926 struct ovs_zone_limit req_zone_limit;
2927
2928 if (ovs_ct_limit_family < 0) {
2929 return EOPNOTSUPP;
2930 }
2931
2932 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2933 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2934 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_SET,
2935 OVS_CT_LIMIT_VERSION);
2936
2937 struct ovs_header *ovs_header;
2938 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2939 ovs_header->dp_ifindex = 0;
2940
2941 size_t opt_offset;
2942 opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2943 if (default_limits) {
2944 req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
2945 req_zone_limit.limit = *default_limits;
2946 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2947 }
2948
2949 if (!ovs_list_is_empty(zone_limits)) {
2950 struct ct_dpif_zone_limit *zone_limit;
2951
2952 LIST_FOR_EACH (zone_limit, node, zone_limits) {
2953 req_zone_limit.zone_id = zone_limit->zone;
2954 req_zone_limit.limit = zone_limit->limit;
2955 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2956 }
2957 }
2958 nl_msg_end_nested(request, opt_offset);
2959
2960 int err = nl_transact(NETLINK_GENERIC, request, NULL);
c225ce22 2961 ofpbuf_delete(request);
906ff9d2
YHW
2962 return err;
2963}
2964
2965static int
2966dpif_netlink_zone_limits_from_ofpbuf(const struct ofpbuf *buf,
2967 uint32_t *default_limit,
2968 struct ovs_list *zone_limits)
2969{
2970 static const struct nl_policy ovs_ct_limit_policy[] = {
2971 [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED,
2972 .optional = true },
2973 };
2974
2975 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2976 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2977 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2978 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2979
2980 struct nlattr *attr[ARRAY_SIZE(ovs_ct_limit_policy)];
2981
2982 if (!nlmsg || !genl || !ovs_header
2983 || nlmsg->nlmsg_type != ovs_ct_limit_family
2984 || !nl_policy_parse(&b, 0, ovs_ct_limit_policy, attr,
2985 ARRAY_SIZE(ovs_ct_limit_policy))) {
2986 return EINVAL;
2987 }
2988
2989
2990 if (!attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
2991 return EINVAL;
2992 }
2993
2994 int rem = NLA_ALIGN(
2995 nl_attr_get_size(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]));
2996 const struct ovs_zone_limit *zone_limit =
2997 nl_attr_get(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]);
2998
2999 while (rem >= sizeof *zone_limit) {
3000 if (zone_limit->zone_id == OVS_ZONE_LIMIT_DEFAULT_ZONE) {
3001 *default_limit = zone_limit->limit;
3002 } else if (zone_limit->zone_id < OVS_ZONE_LIMIT_DEFAULT_ZONE ||
3003 zone_limit->zone_id > UINT16_MAX) {
3004 } else {
3005 ct_dpif_push_zone_limit(zone_limits, zone_limit->zone_id,
3006 zone_limit->limit, zone_limit->count);
3007 }
3008 rem -= NLA_ALIGN(sizeof *zone_limit);
3009 zone_limit = ALIGNED_CAST(struct ovs_zone_limit *,
3010 (unsigned char *) zone_limit + NLA_ALIGN(sizeof *zone_limit));
3011 }
3012 return 0;
3013}
3014
3015static int
3016dpif_netlink_ct_get_limits(struct dpif *dpif OVS_UNUSED,
3017 uint32_t *default_limit,
3018 const struct ovs_list *zone_limits_request,
3019 struct ovs_list *zone_limits_reply)
3020{
3021 if (ovs_ct_limit_family < 0) {
3022 return EOPNOTSUPP;
3023 }
3024
3025 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
3026 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
3027 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_GET,
3028 OVS_CT_LIMIT_VERSION);
3029
3030 struct ovs_header *ovs_header;
3031 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
3032 ovs_header->dp_ifindex = 0;
3033
3034 if (!ovs_list_is_empty(zone_limits_request)) {
3035 size_t opt_offset = nl_msg_start_nested(request,
3036 OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
3037
3038 struct ovs_zone_limit req_zone_limit;
3039 req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
3040 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3041
3042 struct ct_dpif_zone_limit *zone_limit;
3043 LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
3044 req_zone_limit.zone_id = zone_limit->zone;
3045 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3046 }
3047
3048 nl_msg_end_nested(request, opt_offset);
3049 }
3050
3051 struct ofpbuf *reply;
3052 int err = nl_transact(NETLINK_GENERIC, request, &reply);
3053 if (err) {
3054 goto out;
3055 }
3056
3057 err = dpif_netlink_zone_limits_from_ofpbuf(reply, default_limit,
3058 zone_limits_reply);
3059
3060out:
c225ce22
YS
3061 ofpbuf_delete(request);
3062 ofpbuf_delete(reply);
906ff9d2
YHW
3063 return err;
3064}
3065
3066static int
3067dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED,
3068 const struct ovs_list *zone_limits)
3069{
3070 if (ovs_ct_limit_family < 0) {
3071 return EOPNOTSUPP;
3072 }
3073
3074 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
3075 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
3076 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_DEL,
3077 OVS_CT_LIMIT_VERSION);
3078
3079 struct ovs_header *ovs_header;
3080 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
3081 ovs_header->dp_ifindex = 0;
3082
3083 if (!ovs_list_is_empty(zone_limits)) {
3084 size_t opt_offset =
3085 nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
3086
3087 struct ct_dpif_zone_limit *zone_limit;
3088 LIST_FOR_EACH (zone_limit, node, zone_limits) {
3089 struct ovs_zone_limit req_zone_limit;
3090 req_zone_limit.zone_id = zone_limit->zone;
3091 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3092 }
3093 nl_msg_end_nested(request, opt_offset);
3094 }
3095
3096 int err = nl_transact(NETLINK_GENERIC, request, NULL);
3097
c225ce22 3098 ofpbuf_delete(request);
906ff9d2
YHW
3099 return err;
3100}
1f161318
YHW
3101
3102#define NL_TP_NAME_PREFIX "ovs_tp_"
3103
3104struct dpif_netlink_timeout_policy_protocol {
3105 uint16_t l3num;
3106 uint8_t l4num;
3107};
3108
3109enum OVS_PACKED_ENUM dpif_netlink_support_timeout_policy_protocol {
3110 DPIF_NL_TP_AF_INET_TCP,
3111 DPIF_NL_TP_AF_INET_UDP,
3112 DPIF_NL_TP_AF_INET_ICMP,
3113 DPIF_NL_TP_AF_INET6_TCP,
3114 DPIF_NL_TP_AF_INET6_UDP,
3115 DPIF_NL_TP_AF_INET6_ICMPV6,
3116 DPIF_NL_TP_MAX
3117};
3118
3119#define DPIF_NL_ALL_TP ((1UL << DPIF_NL_TP_MAX) - 1)
3120
3121
3122static struct dpif_netlink_timeout_policy_protocol tp_protos[] = {
3123 [DPIF_NL_TP_AF_INET_TCP] = { .l3num = AF_INET, .l4num = IPPROTO_TCP },
3124 [DPIF_NL_TP_AF_INET_UDP] = { .l3num = AF_INET, .l4num = IPPROTO_UDP },
3125 [DPIF_NL_TP_AF_INET_ICMP] = { .l3num = AF_INET, .l4num = IPPROTO_ICMP },
3126 [DPIF_NL_TP_AF_INET6_TCP] = { .l3num = AF_INET6, .l4num = IPPROTO_TCP },
3127 [DPIF_NL_TP_AF_INET6_UDP] = { .l3num = AF_INET6, .l4num = IPPROTO_UDP },
3128 [DPIF_NL_TP_AF_INET6_ICMPV6] = { .l3num = AF_INET6,
3129 .l4num = IPPROTO_ICMPV6 },
3130};
3131
3132static void
3133dpif_netlink_format_tp_name(uint32_t id, uint16_t l3num, uint8_t l4num,
187bb41f 3134 char **tp_name)
1f161318 3135{
187bb41f
YHW
3136 struct ds ds = DS_EMPTY_INITIALIZER;
3137 ds_put_format(&ds, "%s%"PRIu32"_", NL_TP_NAME_PREFIX, id);
3138 ct_dpif_format_ipproto(&ds, l4num);
1f161318
YHW
3139
3140 if (l3num == AF_INET) {
187bb41f 3141 ds_put_cstr(&ds, "4");
1f161318 3142 } else if (l3num == AF_INET6 && l4num != IPPROTO_ICMPV6) {
187bb41f 3143 ds_put_cstr(&ds, "6");
1f161318
YHW
3144 }
3145
187bb41f
YHW
3146 ovs_assert(ds.length < CTNL_TIMEOUT_NAME_MAX);
3147
3148 *tp_name = ds_steal_cstr(&ds);
3149}
3150
3151static int
3152dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
3153 uint32_t tp_id, uint16_t dl_type,
3154 uint8_t nw_proto, char **tp_name,
3155 bool *is_generic)
3156{
3157 dpif_netlink_format_tp_name(tp_id,
3158 dl_type == ETH_TYPE_IP ? AF_INET : AF_INET6,
3159 nw_proto, tp_name);
3160 *is_generic = false;
3161 return 0;
1f161318
YHW
3162}
3163
3164#define CT_DPIF_NL_TP_TCP_MAPPINGS \
3165 CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \
3166 CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \
3167 CT_DPIF_NL_TP_MAPPING(TCP, TCP, ESTABLISHED, ESTABLISHED) \
3168 CT_DPIF_NL_TP_MAPPING(TCP, TCP, FIN_WAIT, FIN_WAIT) \
3169 CT_DPIF_NL_TP_MAPPING(TCP, TCP, CLOSE_WAIT, CLOSE_WAIT) \
3170 CT_DPIF_NL_TP_MAPPING(TCP, TCP, LAST_ACK, LAST_ACK) \
3171 CT_DPIF_NL_TP_MAPPING(TCP, TCP, TIME_WAIT, TIME_WAIT) \
3172 CT_DPIF_NL_TP_MAPPING(TCP, TCP, CLOSE, CLOSE) \
3173 CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT2, SYN_SENT2) \
3174 CT_DPIF_NL_TP_MAPPING(TCP, TCP, RETRANSMIT, RETRANS) \
3175 CT_DPIF_NL_TP_MAPPING(TCP, TCP, UNACK, UNACK)
3176
3177#define CT_DPIF_NL_TP_UDP_MAPPINGS \
3178 CT_DPIF_NL_TP_MAPPING(UDP, UDP, SINGLE, UNREPLIED) \
3179 CT_DPIF_NL_TP_MAPPING(UDP, UDP, MULTIPLE, REPLIED)
3180
3181#define CT_DPIF_NL_TP_ICMP_MAPPINGS \
3182 CT_DPIF_NL_TP_MAPPING(ICMP, ICMP, FIRST, TIMEOUT)
3183
3184#define CT_DPIF_NL_TP_ICMPV6_MAPPINGS \
3185 CT_DPIF_NL_TP_MAPPING(ICMP, ICMPV6, FIRST, TIMEOUT)
3186
3187
3188#define CT_DPIF_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2) \
3189if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) { \
3190 nl_tp->present |= 1 << CTA_TIMEOUT_##PROTO2##_##ATTR2; \
3191 nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2] = \
3192 tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1]; \
3193}
3194
3195static void
3196dpif_netlink_get_nl_tp_tcp_attrs(const struct ct_dpif_timeout_policy *tp,
3197 struct nl_ct_timeout_policy *nl_tp)
3198{
3199 CT_DPIF_NL_TP_TCP_MAPPINGS
3200}
3201
3202static void
3203dpif_netlink_get_nl_tp_udp_attrs(const struct ct_dpif_timeout_policy *tp,
3204 struct nl_ct_timeout_policy *nl_tp)
3205{
3206 CT_DPIF_NL_TP_UDP_MAPPINGS
3207}
3208
3209static void
3210dpif_netlink_get_nl_tp_icmp_attrs(const struct ct_dpif_timeout_policy *tp,
3211 struct nl_ct_timeout_policy *nl_tp)
3212{
3213 CT_DPIF_NL_TP_ICMP_MAPPINGS
3214}
3215
3216static void
3217dpif_netlink_get_nl_tp_icmpv6_attrs(const struct ct_dpif_timeout_policy *tp,
3218 struct nl_ct_timeout_policy *nl_tp)
3219{
3220 CT_DPIF_NL_TP_ICMPV6_MAPPINGS
3221}
3222
3223#undef CT_DPIF_NL_TP_MAPPING
3224
3225static void
3226dpif_netlink_get_nl_tp_attrs(const struct ct_dpif_timeout_policy *tp,
3227 uint8_t l4num, struct nl_ct_timeout_policy *nl_tp)
3228{
3229 nl_tp->present = 0;
3230
3231 if (l4num == IPPROTO_TCP) {
3232 dpif_netlink_get_nl_tp_tcp_attrs(tp, nl_tp);
3233 } else if (l4num == IPPROTO_UDP) {
3234 dpif_netlink_get_nl_tp_udp_attrs(tp, nl_tp);
3235 } else if (l4num == IPPROTO_ICMP) {
3236 dpif_netlink_get_nl_tp_icmp_attrs(tp, nl_tp);
3237 } else if (l4num == IPPROTO_ICMPV6) {
3238 dpif_netlink_get_nl_tp_icmpv6_attrs(tp, nl_tp);
3239 }
3240}
3241
3242#define CT_DPIF_NL_TP_MAPPING(PROTO1, PROTO2, ATTR1, ATTR2) \
3243if (nl_tp->present & (1 << CTA_TIMEOUT_##PROTO2##_##ATTR2)) { \
3244 if (tp->present & (1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1)) { \
3245 if (tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] != \
3246 nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2]) { \
3247 VLOG_WARN_RL(&error_rl, "Inconsistent timeout policy %s " \
3248 "attribute %s=%"PRIu32" while %s=%"PRIu32, \
3249 nl_tp->name, "CTA_TIMEOUT_"#PROTO2"_"#ATTR2, \
3250 nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2], \
3251 "CT_DPIF_TP_ATTR_"#PROTO1"_"#ATTR1, \
3252 tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1]); \
3253 } \
3254 } else { \
3255 tp->present |= 1 << CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1; \
3256 tp->attrs[CT_DPIF_TP_ATTR_##PROTO1##_##ATTR1] = \
3257 nl_tp->attrs[CTA_TIMEOUT_##PROTO2##_##ATTR2]; \
3258 } \
3259}
3260
3261static void
3262dpif_netlink_set_ct_dpif_tp_tcp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3263 struct ct_dpif_timeout_policy *tp)
3264{
3265 CT_DPIF_NL_TP_TCP_MAPPINGS
3266}
3267
3268static void
3269dpif_netlink_set_ct_dpif_tp_udp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3270 struct ct_dpif_timeout_policy *tp)
3271{
3272 CT_DPIF_NL_TP_UDP_MAPPINGS
3273}
3274
3275static void
3276dpif_netlink_set_ct_dpif_tp_icmp_attrs(
3277 const struct nl_ct_timeout_policy *nl_tp,
3278 struct ct_dpif_timeout_policy *tp)
3279{
3280 CT_DPIF_NL_TP_ICMP_MAPPINGS
3281}
3282
3283static void
3284dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(
3285 const struct nl_ct_timeout_policy *nl_tp,
3286 struct ct_dpif_timeout_policy *tp)
3287{
3288 CT_DPIF_NL_TP_ICMPV6_MAPPINGS
3289}
3290
3291#undef CT_DPIF_NL_TP_MAPPING
3292
3293static void
3294dpif_netlink_set_ct_dpif_tp_attrs(const struct nl_ct_timeout_policy *nl_tp,
3295 struct ct_dpif_timeout_policy *tp)
3296{
3297 if (nl_tp->l4num == IPPROTO_TCP) {
3298 dpif_netlink_set_ct_dpif_tp_tcp_attrs(nl_tp, tp);
3299 } else if (nl_tp->l4num == IPPROTO_UDP) {
3300 dpif_netlink_set_ct_dpif_tp_udp_attrs(nl_tp, tp);
3301 } else if (nl_tp->l4num == IPPROTO_ICMP) {
3302 dpif_netlink_set_ct_dpif_tp_icmp_attrs(nl_tp, tp);
3303 } else if (nl_tp->l4num == IPPROTO_ICMPV6) {
3304 dpif_netlink_set_ct_dpif_tp_icmpv6_attrs(nl_tp, tp);
3305 }
3306}
3307
3308#ifdef _WIN32
3309static int
3310dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
3311 const struct ct_dpif_timeout_policy *tp)
3312{
3313 return EOPNOTSUPP;
3314}
3315
3316static int
3317dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED,
3318 uint32_t tp_id,
3319 struct ct_dpif_timeout_policy *tp)
3320{
3321 return EOPNOTSUPP;
3322}
3323
3324static int
3325dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
3326 uint32_t tp_id)
3327{
3328 return EOPNOTSUPP;
3329}
3330
3331static int
3332dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED,
3333 void **statep)
3334{
3335 return EOPNOTSUPP;
3336}
3337
3338static int
3339dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED,
3340 void *state,
3341 struct ct_dpif_timeout_policy **tp)
3342{
3343 return EOPNOTSUPP;
3344}
3345
3346static int
3347dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED,
3348 void *state)
3349{
3350 return EOPNOTSUPP;
3351}
3352#else
3353static int
3354dpif_netlink_ct_set_timeout_policy(struct dpif *dpif OVS_UNUSED,
3355 const struct ct_dpif_timeout_policy *tp)
3356{
1f161318
YHW
3357 int err = 0;
3358
3359 for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
187bb41f
YHW
3360 struct nl_ct_timeout_policy nl_tp;
3361 char *nl_tp_name;
3362
1f161318
YHW
3363 dpif_netlink_format_tp_name(tp->id, tp_protos[i].l3num,
3364 tp_protos[i].l4num, &nl_tp_name);
187bb41f
YHW
3365 ovs_strlcpy(nl_tp.name, nl_tp_name, sizeof nl_tp.name);
3366 free(nl_tp_name);
3367
1f161318
YHW
3368 nl_tp.l3num = tp_protos[i].l3num;
3369 nl_tp.l4num = tp_protos[i].l4num;
3370 dpif_netlink_get_nl_tp_attrs(tp, tp_protos[i].l4num, &nl_tp);
3371 err = nl_ct_set_timeout_policy(&nl_tp);
3372 if (err) {
3373 VLOG_WARN_RL(&error_rl, "failed to add timeout policy %s (%s)",
3374 nl_tp.name, ovs_strerror(err));
3375 goto out;
3376 }
3377 }
3378
3379out:
1f161318
YHW
3380 return err;
3381}
3382
3383static int
3384dpif_netlink_ct_get_timeout_policy(struct dpif *dpif OVS_UNUSED,
3385 uint32_t tp_id,
3386 struct ct_dpif_timeout_policy *tp)
3387{
1f161318
YHW
3388 int err = 0;
3389
3390 tp->id = tp_id;
3391 tp->present = 0;
3392 for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
187bb41f
YHW
3393 struct nl_ct_timeout_policy nl_tp;
3394 char *nl_tp_name;
3395
1f161318
YHW
3396 dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
3397 tp_protos[i].l4num, &nl_tp_name);
187bb41f 3398 err = nl_ct_get_timeout_policy(nl_tp_name, &nl_tp);
1f161318
YHW
3399
3400 if (err) {
3401 VLOG_WARN_RL(&error_rl, "failed to get timeout policy %s (%s)",
187bb41f
YHW
3402 nl_tp_name, ovs_strerror(err));
3403 free(nl_tp_name);
1f161318
YHW
3404 goto out;
3405 }
187bb41f 3406 free(nl_tp_name);
1f161318
YHW
3407 dpif_netlink_set_ct_dpif_tp_attrs(&nl_tp, tp);
3408 }
3409
3410out:
1f161318
YHW
3411 return err;
3412}
3413
3414/* Returns 0 if all the sub timeout policies are deleted or not exist in the
3415 * kernel. Returns 1 if any sub timeout policy deletion failed. */
3416static int
3417dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
3418 uint32_t tp_id)
3419{
1f161318
YHW
3420 int ret = 0;
3421
3422 for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
187bb41f 3423 char *nl_tp_name;
1f161318
YHW
3424 dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
3425 tp_protos[i].l4num, &nl_tp_name);
187bb41f 3426 int err = nl_ct_del_timeout_policy(nl_tp_name);
1f161318
YHW
3427 if (err == ENOENT) {
3428 err = 0;
3429 }
3430 if (err) {
3431 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6);
3432 VLOG_INFO_RL(&rl, "failed to delete timeout policy %s (%s)",
187bb41f 3433 nl_tp_name, ovs_strerror(err));
1f161318
YHW
3434 ret = 1;
3435 }
187bb41f 3436 free(nl_tp_name);
1f161318
YHW
3437 }
3438
1f161318
YHW
3439 return ret;
3440}
3441
3442struct dpif_netlink_ct_timeout_policy_dump_state {
3443 struct nl_ct_timeout_policy_dump_state *nl_dump_state;
3444 struct hmap tp_dump_map;
3445};
3446
3447struct dpif_netlink_tp_dump_node {
3448 struct hmap_node hmap_node; /* node in tp_dump_map. */
3449 struct ct_dpif_timeout_policy *tp;
3450 uint32_t l3_l4_present;
3451};
3452
3453static struct dpif_netlink_tp_dump_node *
3454get_dpif_netlink_tp_dump_node_by_tp_id(uint32_t tp_id,
3455 struct hmap *tp_dump_map)
3456{
3457 struct dpif_netlink_tp_dump_node *tp_dump_node;
3458
3459 HMAP_FOR_EACH_WITH_HASH (tp_dump_node, hmap_node, hash_int(tp_id, 0),
3460 tp_dump_map) {
3461 if (tp_dump_node->tp->id == tp_id) {
3462 return tp_dump_node;
3463 }
3464 }
3465 return NULL;
3466}
3467
3468static void
3469update_dpif_netlink_tp_dump_node(
3470 const struct nl_ct_timeout_policy *nl_tp,
3471 struct dpif_netlink_tp_dump_node *tp_dump_node)
3472{
3473 dpif_netlink_set_ct_dpif_tp_attrs(nl_tp, tp_dump_node->tp);
3474 for (int i = 0; i < DPIF_NL_TP_MAX; ++i) {
3475 if (nl_tp->l3num == tp_protos[i].l3num &&
3476 nl_tp->l4num == tp_protos[i].l4num) {
3477 tp_dump_node->l3_l4_present |= 1 << i;
3478 break;
3479 }
3480 }
3481}
3482
3483static int
3484dpif_netlink_ct_timeout_policy_dump_start(struct dpif *dpif OVS_UNUSED,
3485 void **statep)
3486{
3487 struct dpif_netlink_ct_timeout_policy_dump_state *dump_state;
3488
3489 *statep = dump_state = xzalloc(sizeof *dump_state);
3490 int err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state);
3491 if (err) {
3492 free(dump_state);
3493 return err;
3494 }
3495 hmap_init(&dump_state->tp_dump_map);
3496 return 0;
3497}
3498
3499static void
3500get_and_cleanup_tp_dump_node(struct hmap *hmap,
3501 struct dpif_netlink_tp_dump_node *tp_dump_node,
3502 struct ct_dpif_timeout_policy *tp)
3503{
3504 hmap_remove(hmap, &tp_dump_node->hmap_node);
3505 *tp = *tp_dump_node->tp;
3506 free(tp_dump_node->tp);
3507 free(tp_dump_node);
3508}
3509
3510static int
3511dpif_netlink_ct_timeout_policy_dump_next(struct dpif *dpif OVS_UNUSED,
3512 void *state,
3513 struct ct_dpif_timeout_policy *tp)
3514{
3515 struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state;
3516 struct dpif_netlink_tp_dump_node *tp_dump_node;
3517 int err;
3518
3519 /* Dumps all the timeout policies in the kernel. */
3520 do {
3521 struct nl_ct_timeout_policy nl_tp;
3522 uint32_t tp_id;
3523
3524 err = nl_ct_timeout_policy_dump_next(dump_state->nl_dump_state,
3525 &nl_tp);
3526 if (err) {
3527 break;
3528 }
3529
3530 /* We only interest in OVS installed timeout policies. */
3531 if (!ovs_scan(nl_tp.name, NL_TP_NAME_PREFIX"%"PRIu32, &tp_id)) {
3532 continue;
3533 }
3534
3535 tp_dump_node = get_dpif_netlink_tp_dump_node_by_tp_id(
3536 tp_id, &dump_state->tp_dump_map);
3537 if (!tp_dump_node) {
3538 tp_dump_node = xzalloc(sizeof *tp_dump_node);
3539 tp_dump_node->tp = xzalloc(sizeof *tp_dump_node->tp);
3540 tp_dump_node->tp->id = tp_id;
3541 hmap_insert(&dump_state->tp_dump_map, &tp_dump_node->hmap_node,
3542 hash_int(tp_id, 0));
3543 }
3544
3545 update_dpif_netlink_tp_dump_node(&nl_tp, tp_dump_node);
3546
3547 /* Returns one ct_dpif_timeout_policy if we gather all the L3/L4
3548 * sub-pieces. */
3549 if (tp_dump_node->l3_l4_present == DPIF_NL_ALL_TP) {
3550 get_and_cleanup_tp_dump_node(&dump_state->tp_dump_map,
3551 tp_dump_node, tp);
3552 break;
3553 }
3554 } while (true);
3555
3556 /* Dump the incomplete timeout policies. */
3557 if (err == EOF) {
3558 if (!hmap_is_empty(&dump_state->tp_dump_map)) {
3559 struct hmap_node *hmap_node = hmap_first(&dump_state->tp_dump_map);
3560 tp_dump_node = CONTAINER_OF(hmap_node,
3561 struct dpif_netlink_tp_dump_node,
3562 hmap_node);
3563 get_and_cleanup_tp_dump_node(&dump_state->tp_dump_map,
3564 tp_dump_node, tp);
3565 return 0;
3566 }
3567 }
3568
3569 return err;
3570}
3571
3572static int
3573dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED,
3574 void *state)
3575{
3576 struct dpif_netlink_ct_timeout_policy_dump_state *dump_state = state;
3577 struct dpif_netlink_tp_dump_node *tp_dump_node;
3578
3579 int err = nl_ct_timeout_policy_dump_done(dump_state->nl_dump_state);
3580 HMAP_FOR_EACH_POP (tp_dump_node, hmap_node, &dump_state->tp_dump_map) {
3581 free(tp_dump_node->tp);
3582 free(tp_dump_node);
3583 }
3584 hmap_destroy(&dump_state->tp_dump_map);
3585 free(dump_state);
3586 return err;
3587}
3588#endif
3589
5dddf960
JR
3590\f
3591/* Meters */
80738e5f
AZ
3592
3593/* Set of supported meter flags */
3594#define DP_SUPPORTED_METER_FLAGS_MASK \
3595 (OFPMF13_STATS | OFPMF13_PKTPS | OFPMF13_KBPS | OFPMF13_BURST)
3596
92d0d515
JP
3597/* Meter support was introduced in Linux 4.15. In some versions of
3598 * Linux 4.15, 4.16, and 4.17, there was a bug that never set the id
3599 * when the meter was created, so all meters essentially had an id of
3600 * zero. Check for that condition and disable meters on those kernels. */
3601static bool probe_broken_meters(struct dpif *);
3602
5dddf960 3603static void
80738e5f
AZ
3604dpif_netlink_meter_init(struct dpif_netlink *dpif, struct ofpbuf *buf,
3605 void *stub, size_t size, uint32_t command)
3606{
3607 ofpbuf_use_stub(buf, stub, size);
3608
3609 nl_msg_put_genlmsghdr(buf, 0, ovs_meter_family, NLM_F_REQUEST | NLM_F_ECHO,
3610 command, OVS_METER_VERSION);
3611
3612 struct ovs_header *ovs_header;
3613 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3614 ovs_header->dp_ifindex = dpif->dp_ifindex;
3615}
3616
3617/* Execute meter 'request' in the kernel datapath. If the command
3618 * fails, returns a positive errno value. Otherwise, stores the reply
3619 * in '*replyp', parses the policy according to 'reply_policy' into the
3620 * array of Netlink attribute in 'a', and returns 0. On success, the
3621 * caller is responsible for calling ofpbuf_delete() on '*replyp'
3622 * ('replyp' will contain pointers into 'a'). */
3623static int
3624dpif_netlink_meter_transact(struct ofpbuf *request, struct ofpbuf **replyp,
3625 const struct nl_policy *reply_policy,
3626 struct nlattr **a, size_t size_a)
3627{
3628 int error = nl_transact(NETLINK_GENERIC, request, replyp);
3629 ofpbuf_uninit(request);
3630
3631 if (error) {
3632 return error;
3633 }
3634
3635 struct nlmsghdr *nlmsg = ofpbuf_try_pull(*replyp, sizeof *nlmsg);
3636 struct genlmsghdr *genl = ofpbuf_try_pull(*replyp, sizeof *genl);
3637 struct ovs_header *ovs_header = ofpbuf_try_pull(*replyp,
3638 sizeof *ovs_header);
3639 if (!nlmsg || !genl || !ovs_header
3640 || nlmsg->nlmsg_type != ovs_meter_family
3641 || !nl_policy_parse(*replyp, 0, reply_policy, a, size_a)) {
3642 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3643 VLOG_DBG_RL(&rl,
3644 "Kernel module response to meter tranaction is invalid");
3645 return EINVAL;
3646 }
3647 return 0;
3648}
3649
3650static void
3651dpif_netlink_meter_get_features(const struct dpif *dpif_,
5dddf960
JR
3652 struct ofputil_meter_features *features)
3653{
92d0d515
JP
3654 if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
3655 features = NULL;
3656 return;
3657 }
3658
80738e5f
AZ
3659 struct ofpbuf buf, *msg;
3660 uint64_t stub[1024 / 8];
3661
3662 static const struct nl_policy ovs_meter_features_policy[] = {
3663 [OVS_METER_ATTR_MAX_METERS] = { .type = NL_A_U32 },
3664 [OVS_METER_ATTR_MAX_BANDS] = { .type = NL_A_U32 },
3665 [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3666 };
3667 struct nlattr *a[ARRAY_SIZE(ovs_meter_features_policy)];
3668
3669 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3670 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub,
3671 OVS_METER_CMD_FEATURES);
3672 if (dpif_netlink_meter_transact(&buf, &msg, ovs_meter_features_policy, a,
3673 ARRAY_SIZE(ovs_meter_features_policy))) {
3674 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3675 VLOG_INFO_RL(&rl,
3676 "dpif_netlink_meter_transact OVS_METER_CMD_FEATURES failed");
3677 return;
3678 }
3679
3680 features->max_meters = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_METERS]);
3681 features->max_bands = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_BANDS]);
3682
3683 /* Bands is a nested attribute of zero or more nested
3684 * band attributes. */
3685 if (a[OVS_METER_ATTR_BANDS]) {
3686 const struct nlattr *nla;
3687 size_t left;
3688
3689 NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3690 const struct nlattr *band_nla;
3691 size_t band_left;
3692
3693 NL_NESTED_FOR_EACH (band_nla, band_left, nla) {
3694 if (nl_attr_type(band_nla) == OVS_BAND_ATTR_TYPE) {
3695 if (nl_attr_get_size(band_nla) == sizeof(uint32_t)) {
3696 switch (nl_attr_get_u32(band_nla)) {
3697 case OVS_METER_BAND_TYPE_DROP:
3698 features->band_types |= 1 << OFPMBT13_DROP;
3699 break;
3700 }
3701 }
3702 }
3703 }
3704 }
3705 }
3706 features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
3707
3708 ofpbuf_delete(msg);
5dddf960
JR
3709}
3710
3711static int
60ebc04d
JP
3712dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
3713 struct ofputil_meter_config *config)
5dddf960 3714{
80738e5f
AZ
3715 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3716 struct ofpbuf buf, *msg;
3717 uint64_t stub[1024 / 8];
3718
3719 static const struct nl_policy ovs_meter_set_response_policy[] = {
3720 [OVS_METER_ATTR_ID] = { .type = NL_A_U32 },
3721 };
3722 struct nlattr *a[ARRAY_SIZE(ovs_meter_set_response_policy)];
3723
3724 if (config->flags & ~DP_SUPPORTED_METER_FLAGS_MASK) {
3725 return EBADF; /* Unsupported flags set */
3726 }
3727
3728 for (size_t i = 0; i < config->n_bands; i++) {
3729 switch (config->bands[i].type) {
3730 case OFPMBT13_DROP:
3731 break;
3732 default:
3733 return ENODEV; /* Unsupported band type */
3734 }
3735 }
3736
3737 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, OVS_METER_CMD_SET);
3738
8101f03f
JP
3739 nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3740
80738e5f
AZ
3741 if (config->flags & OFPMF13_KBPS) {
3742 nl_msg_put_flag(&buf, OVS_METER_ATTR_KBPS);
3743 }
3744
3745 size_t bands_offset = nl_msg_start_nested(&buf, OVS_METER_ATTR_BANDS);
3746 /* Bands */
3747 for (size_t i = 0; i < config->n_bands; ++i) {
3748 struct ofputil_meter_band * band = &config->bands[i];
3749 uint32_t band_type;
3750
3751 size_t band_offset = nl_msg_start_nested(&buf, OVS_BAND_ATTR_UNSPEC);
3752
3753 switch (band->type) {
3754 case OFPMBT13_DROP:
3755 band_type = OVS_METER_BAND_TYPE_DROP;
3756 break;
3757 default:
3758 band_type = OVS_METER_BAND_TYPE_UNSPEC;
3759 }
3760 nl_msg_put_u32(&buf, OVS_BAND_ATTR_TYPE, band_type);
3761 nl_msg_put_u32(&buf, OVS_BAND_ATTR_RATE, band->rate);
3762 nl_msg_put_u32(&buf, OVS_BAND_ATTR_BURST,
3763 config->flags & OFPMF13_BURST ?
3764 band->burst_size : band->rate);
3765 nl_msg_end_nested(&buf, band_offset);
3766 }
3767 nl_msg_end_nested(&buf, bands_offset);
3768
3769 int error = dpif_netlink_meter_transact(&buf, &msg,
3770 ovs_meter_set_response_policy, a,
3771 ARRAY_SIZE(ovs_meter_set_response_policy));
3772 if (error) {
3773 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3774 VLOG_INFO_RL(&rl,
3775 "dpif_netlink_meter_transact OVS_METER_CMD_SET failed");
3776 return error;
3777 }
3778
8101f03f
JP
3779 if (nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
3780 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3781 VLOG_INFO_RL(&rl,
3782 "Kernel returned a different meter id than requested");
3783 }
80738e5f
AZ
3784 ofpbuf_delete(msg);
3785 return 0;
5dddf960
JR
3786}
3787
60ebc04d
JP
3788static int
3789dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
3790 struct ofputil_meter_config *config)
3791{
3792 if (probe_broken_meters(dpif_)) {
3793 return ENOMEM;
3794 }
3795
3796 return dpif_netlink_meter_set__(dpif_, meter_id, config);
3797}
3798
80738e5f
AZ
3799/* Retrieve statistics and/or delete meter 'meter_id'. Statistics are
3800 * stored in 'stats', if it is not null. If 'command' is
3801 * OVS_METER_CMD_DEL, the meter is deleted and statistics are optionally
3802 * retrieved. If 'command' is OVS_METER_CMD_GET, then statistics are
3803 * simply retrieved. */
5dddf960 3804static int
80738e5f
AZ
3805dpif_netlink_meter_get_stats(const struct dpif *dpif_,
3806 ofproto_meter_id meter_id,
3807 struct ofputil_meter_stats *stats,
3808 uint16_t max_bands,
3809 enum ovs_meter_cmd command)
5dddf960 3810{
80738e5f
AZ
3811 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3812 struct ofpbuf buf, *msg;
3813 uint64_t stub[1024 / 8];
3814
3815 static const struct nl_policy ovs_meter_stats_policy[] = {
3816 [OVS_METER_ATTR_ID] = { .type = NL_A_U32, .optional = true},
3817 [OVS_METER_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
3818 .optional = true},
3819 [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3820 };
3821 struct nlattr *a[ARRAY_SIZE(ovs_meter_stats_policy)];
3822
3823 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, command);
3824
3825 nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3826
3827 int error = dpif_netlink_meter_transact(&buf, &msg,
3828 ovs_meter_stats_policy, a,
3829 ARRAY_SIZE(ovs_meter_stats_policy));
3830 if (error) {
3831 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3832 VLOG_INFO_RL(&rl, "dpif_netlink_meter_transact %s failed",
3833 command == OVS_METER_CMD_GET ? "get" : "del");
3834 return error;
3835 }
3836
3837 if (stats
3838 && a[OVS_METER_ATTR_ID]
3839 && a[OVS_METER_ATTR_STATS]
3840 && nl_attr_get_u32(a[OVS_METER_ATTR_ID]) == meter_id.uint32) {
3841 /* return stats */
3842 const struct ovs_flow_stats *stat;
3843 const struct nlattr *nla;
3844 size_t left;
3845
3846 stat = nl_attr_get(a[OVS_METER_ATTR_STATS]);
3847 stats->packet_in_count = get_32aligned_u64(&stat->n_packets);
3848 stats->byte_in_count = get_32aligned_u64(&stat->n_bytes);
3849
3850 if (a[OVS_METER_ATTR_BANDS]) {
3851 size_t n_bands = 0;
3852 NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3853 const struct nlattr *band_nla;
3854 band_nla = nl_attr_find_nested(nla, OVS_BAND_ATTR_STATS);
3855 if (band_nla && nl_attr_get_size(band_nla) \
3856 == sizeof(struct ovs_flow_stats)) {
3857 stat = nl_attr_get(band_nla);
3858
3859 if (n_bands < max_bands) {
3860 stats->bands[n_bands].packet_count
3861 = get_32aligned_u64(&stat->n_packets);
3862 stats->bands[n_bands].byte_count
3863 = get_32aligned_u64(&stat->n_bytes);
3864 ++n_bands;
3865 }
3866 } else {
3867 stats->bands[n_bands].packet_count = 0;
3868 stats->bands[n_bands].byte_count = 0;
3869 ++n_bands;
3870 }
3871 }
3872 stats->n_bands = n_bands;
3873 } else {
3874 /* For a non-existent meter, return 0 stats. */
3875 stats->n_bands = 0;
3876 }
3877 }
3878
3879 ofpbuf_delete(msg);
3880 return error;
5dddf960
JR
3881}
3882
3883static int
80738e5f
AZ
3884dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
3885 struct ofputil_meter_stats *stats, uint16_t max_bands)
5dddf960 3886{
80738e5f
AZ
3887 return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3888 OVS_METER_CMD_GET);
3889}
3890
3891static int
3892dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
3893 struct ofputil_meter_stats *stats, uint16_t max_bands)
3894{
3895 return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3896 OVS_METER_CMD_DEL);
5dddf960
JR
3897}
3898
92d0d515
JP
3899static bool
3900probe_broken_meters__(struct dpif *dpif)
3901{
3902 /* This test is destructive if a probe occurs while ovs-vswitchd is
3903 * running (e.g., an ovs-dpctl meter command is called), so choose a
3904 * random high meter id to make this less likely to occur. */
3905 ofproto_meter_id id1 = { 54545401 };
3906 ofproto_meter_id id2 = { 54545402 };
3907 struct ofputil_meter_band band = {OFPMBT13_DROP, 0, 1, 0};
3908 struct ofputil_meter_config config1 = { 1, OFPMF13_KBPS, 1, &band};
3909 struct ofputil_meter_config config2 = { 2, OFPMF13_KBPS, 1, &band};
3910
3911 /* Try adding two meters and make sure that they both come back with
60ebc04d
JP
3912 * the proper meter id. Use the "__" version so that we don't cause
3913 * a recurve deadlock. */
3914 dpif_netlink_meter_set__(dpif, id1, &config1);
3915 dpif_netlink_meter_set__(dpif, id2, &config2);
92d0d515
JP
3916
3917 if (dpif_netlink_meter_get(dpif, id1, NULL, 0)
3918 || dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
3919 VLOG_INFO("The kernel module has a broken meter implementation.");
3920 return true;
3921 }
3922
3923 dpif_netlink_meter_del(dpif, id1, NULL, 0);
3924 dpif_netlink_meter_del(dpif, id2, NULL, 0);
3925
3926 return false;
3927}
3928
3929static bool
3930probe_broken_meters(struct dpif *dpif)
3931{
3932 /* This is a once-only test because currently OVS only has at most a single
3933 * Netlink capable datapath on any given platform. */
3934 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3935
3936 static bool broken_meters = false;
3937 if (ovsthread_once_start(&once)) {
3938 broken_meters = probe_broken_meters__(dpif);
3939 ovsthread_once_done(&once);
3940 }
3941 return broken_meters;
3942}
5dddf960 3943\f
93451a0a 3944const struct dpif_class dpif_netlink_class = {
1a6f1e2a 3945 "system",
f87c1357 3946 false, /* cleanup_required */
c8973eb6 3947 NULL, /* init */
93451a0a 3948 dpif_netlink_enumerate,
0aeaabc8 3949 NULL,
93451a0a
AS
3950 dpif_netlink_open,
3951 dpif_netlink_close,
3952 dpif_netlink_destroy,
3953 dpif_netlink_run,
e4516b20 3954 NULL, /* wait */
93451a0a 3955 dpif_netlink_get_stats,
dcdcad68 3956 dpif_netlink_set_features,
93451a0a
AS
3957 dpif_netlink_port_add,
3958 dpif_netlink_port_del,
91364d18 3959 NULL, /* port_set_config */
93451a0a
AS
3960 dpif_netlink_port_query_by_number,
3961 dpif_netlink_port_query_by_name,
3962 dpif_netlink_port_get_pid,
3963 dpif_netlink_port_dump_start,
3964 dpif_netlink_port_dump_next,
3965 dpif_netlink_port_dump_done,
3966 dpif_netlink_port_poll,
3967 dpif_netlink_port_poll_wait,
3968 dpif_netlink_flow_flush,
3969 dpif_netlink_flow_dump_create,
3970 dpif_netlink_flow_dump_destroy,
3971 dpif_netlink_flow_dump_thread_create,
3972 dpif_netlink_flow_dump_thread_destroy,
3973 dpif_netlink_flow_dump_next,
3974 dpif_netlink_operate,
3975 dpif_netlink_recv_set,
3976 dpif_netlink_handlers_set,
d4f6865c 3977 NULL, /* set_config */
93451a0a
AS
3978 dpif_netlink_queue_to_priority,
3979 dpif_netlink_recv,
3980 dpif_netlink_recv_wait,
3981 dpif_netlink_recv_purge,
e4e74c3a 3982 NULL, /* register_dp_purge_cb */
6b31e073
RW
3983 NULL, /* register_upcall_cb */
3984 NULL, /* enable_upcall */
3985 NULL, /* disable_upcall */
b5cbbcf6 3986 dpif_netlink_get_datapath_version, /* get_datapath_version */
c11c9f4a
DDP
3987 dpif_netlink_ct_dump_start,
3988 dpif_netlink_ct_dump_next,
3989 dpif_netlink_ct_dump_done,
5dddf960 3990 dpif_netlink_ct_flush,
c92339ad
DB
3991 NULL, /* ct_set_maxconns */
3992 NULL, /* ct_get_maxconns */
875075b3 3993 NULL, /* ct_get_nconns */
64207120
DB
3994 NULL, /* ct_set_tcp_seq_chk */
3995 NULL, /* ct_get_tcp_seq_chk */
906ff9d2
YHW
3996 dpif_netlink_ct_set_limits,
3997 dpif_netlink_ct_get_limits,
3998 dpif_netlink_ct_del_limits,
1f161318
YHW
3999 dpif_netlink_ct_set_timeout_policy,
4000 dpif_netlink_ct_get_timeout_policy,
4001 dpif_netlink_ct_del_timeout_policy,
4002 dpif_netlink_ct_timeout_policy_dump_start,
4003 dpif_netlink_ct_timeout_policy_dump_next,
4004 dpif_netlink_ct_timeout_policy_dump_done,
187bb41f 4005 dpif_netlink_ct_get_timeout_policy_name,
4ea96698
DB
4006 NULL, /* ipf_set_enabled */
4007 NULL, /* ipf_set_min_frag */
4008 NULL, /* ipf_set_max_nfrags */
4009 NULL, /* ipf_get_status */
4010 NULL, /* ipf_dump_start */
4011 NULL, /* ipf_dump_next */
4012 NULL, /* ipf_dump_done */
5dddf960
JR
4013 dpif_netlink_meter_get_features,
4014 dpif_netlink_meter_set,
4015 dpif_netlink_meter_get,
4016 dpif_netlink_meter_del,
9df65060
VDA
4017 NULL, /* bond_add */
4018 NULL, /* bond_del */
4019 NULL, /* bond_stats_get */
96fba48f 4020};
93451a0a 4021
96fba48f 4022static int
93451a0a 4023dpif_netlink_init(void)
96fba48f 4024{
eb8ed438
BP
4025 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
4026 static int error;
982b8810 4027
eb8ed438 4028 if (ovsthread_once_start(&once)) {
df2c07f4
JP
4029 error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
4030 &ovs_datapath_family);
37a1300c 4031 if (error) {
e0e2410d 4032 VLOG_INFO("Generic Netlink family '%s' does not exist. "
cae7529c
CL
4033 "The Open vSwitch kernel module is probably not loaded.",
4034 OVS_DATAPATH_FAMILY);
37a1300c 4035 }
f0fef760 4036 if (!error) {
df2c07f4 4037 error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
f0fef760 4038 }
37a1300c 4039 if (!error) {
df2c07f4 4040 error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
37a1300c 4041 }
aaff4b55 4042 if (!error) {
df2c07f4
JP
4043 error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
4044 &ovs_packet_family);
aaff4b55 4045 }
c7178a0b
EJ
4046 if (!error) {
4047 error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
b3dcb73c 4048 &ovs_vport_mcgroup);
c7178a0b 4049 }
80738e5f
AZ
4050 if (!error) {
4051 if (nl_lookup_genl_family(OVS_METER_FAMILY, &ovs_meter_family)) {
4052 VLOG_INFO("The kernel module does not support meters.");
4053 }
4054 }
906ff9d2
YHW
4055 if (nl_lookup_genl_family(OVS_CT_LIMIT_FAMILY,
4056 &ovs_ct_limit_family) < 0) {
4057 VLOG_INFO("Generic Netlink family '%s' does not exist. "
4058 "Please update the Open vSwitch kernel module to enable "
4059 "the conntrack limit feature.", OVS_CT_LIMIT_FAMILY);
4060 }
eb8ed438 4061
921c370a
EG
4062 ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
4063
eb8ed438 4064 ovsthread_once_done(&once);
982b8810
BP
4065 }
4066
4067 return error;
96fba48f
BP
4068}
4069
c19e6535 4070bool
93451a0a 4071dpif_netlink_is_internal_device(const char *name)
9fe3b9a2 4072{
93451a0a 4073 struct dpif_netlink_vport reply;
c19e6535 4074 struct ofpbuf *buf;
9fe3b9a2 4075 int error;
96fba48f 4076
93451a0a 4077 error = dpif_netlink_vport_get(name, &reply, &buf);
c19e6535
BP
4078 if (!error) {
4079 ofpbuf_delete(buf);
141d9ce4 4080 } else if (error != ENODEV && error != ENOENT) {
c19e6535 4081 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
10a89ef0 4082 name, ovs_strerror(error));
96fba48f
BP
4083 }
4084
df2c07f4 4085 return reply.type == OVS_VPORT_TYPE_INTERNAL;
96fba48f 4086}
e0467f6d 4087
df2c07f4 4088/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
c19e6535
BP
4089 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
4090 * positive errno value.
4091 *
4092 * 'vport' will contain pointers into 'buf', so the caller should not free
4093 * 'buf' while 'vport' is still in use. */
4094static int
93451a0a 4095dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
c19e6535
BP
4096 const struct ofpbuf *buf)
4097{
df2c07f4
JP
4098 static const struct nl_policy ovs_vport_policy[] = {
4099 [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
4100 [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
4101 [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1579cf67 4102 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
f7df9823 4103 [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
c19e6535 4104 .optional = true },
df2c07f4 4105 [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
bfda5239 4106 [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
c19e6535
BP
4107 };
4108
93451a0a 4109 dpif_netlink_vport_init(vport);
c19e6535 4110
0a2869d5
BP
4111 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4112 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4113 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4114 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4115
4116 struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
df2c07f4
JP
4117 if (!nlmsg || !genl || !ovs_header
4118 || nlmsg->nlmsg_type != ovs_vport_family
4119 || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
4120 ARRAY_SIZE(ovs_vport_policy))) {
c19e6535
BP
4121 return EINVAL;
4122 }
c19e6535 4123
f0fef760 4124 vport->cmd = genl->cmd;
df2c07f4 4125 vport->dp_ifindex = ovs_header->dp_ifindex;
4e022ec0 4126 vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
df2c07f4
JP
4127 vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
4128 vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
b063d9f0 4129 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
1579cf67
AW
4130 vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
4131 / (sizeof *vport->upcall_pids);
4132 vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
4133
b063d9f0 4134 }
df2c07f4
JP
4135 if (a[OVS_VPORT_ATTR_STATS]) {
4136 vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
4137 }
df2c07f4
JP
4138 if (a[OVS_VPORT_ATTR_OPTIONS]) {
4139 vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
4140 vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
c19e6535 4141 }
bfda5239
FL
4142 if (a[OVS_VPORT_ATTR_NETNSID]) {
4143 netnsid_set(&vport->netnsid,
4144 nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
4145 } else {
4146 netnsid_set_local(&vport->netnsid);
4147 }
c19e6535
BP
4148 return 0;
4149}
4150
df2c07f4 4151/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
c19e6535
BP
4152 * followed by Netlink attributes corresponding to 'vport'. */
4153static void
93451a0a
AS
4154dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
4155 struct ofpbuf *buf)
c19e6535 4156{
df2c07f4 4157 struct ovs_header *ovs_header;
f0fef760 4158
df2c07f4 4159 nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
69685a88 4160 vport->cmd, OVS_VPORT_VERSION);
c19e6535 4161
df2c07f4
JP
4162 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4163 ovs_header->dp_ifindex = vport->dp_ifindex;
c19e6535 4164
4e022ec0
AW
4165 if (vport->port_no != ODPP_NONE) {
4166 nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
c19e6535
BP
4167 }
4168
df2c07f4
JP
4169 if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
4170 nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
c19e6535
BP
4171 }
4172
4173 if (vport->name) {
df2c07f4 4174 nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
c19e6535
BP
4175 }
4176
1579cf67
AW
4177 if (vport->upcall_pids) {
4178 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
4179 vport->upcall_pids,
4180 vport->n_upcall_pids * sizeof *vport->upcall_pids);
a24a6574 4181 }
b063d9f0 4182
c19e6535 4183 if (vport->stats) {
df2c07f4 4184 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
c19e6535
BP
4185 vport->stats, sizeof *vport->stats);
4186 }
4187
c19e6535 4188 if (vport->options) {
df2c07f4 4189 nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
c19e6535
BP
4190 vport->options, vport->options_len);
4191 }
c19e6535
BP
4192}
4193
4194/* Clears 'vport' to "empty" values. */
4195void
93451a0a 4196dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
c19e6535
BP
4197{
4198 memset(vport, 0, sizeof *vport);
4e022ec0 4199 vport->port_no = ODPP_NONE;
c19e6535
BP
4200}
4201
4202/* Executes 'request' in the kernel datapath. If the command fails, returns a
4203 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
4204 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
df2c07f4 4205 * result of the command is expected to be an ovs_vport also, which is decoded
c19e6535
BP
4206 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
4207 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
4208int
93451a0a
AS
4209dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
4210 struct dpif_netlink_vport *reply,
4211 struct ofpbuf **bufp)
c19e6535 4212{
f0fef760 4213 struct ofpbuf *request_buf;
c19e6535
BP
4214 int error;
4215
cb22974d 4216 ovs_assert((reply != NULL) == (bufp != NULL));
c19e6535 4217
93451a0a 4218 error = dpif_netlink_init();
42bb6c72
BP
4219 if (error) {
4220 if (reply) {
4221 *bufp = NULL;
93451a0a 4222 dpif_netlink_vport_init(reply);
42bb6c72
BP
4223 }
4224 return error;
4225 }
4226
f0fef760 4227 request_buf = ofpbuf_new(1024);
93451a0a 4228 dpif_netlink_vport_to_ofpbuf(request, request_buf);
a88b4e04 4229 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
f0fef760 4230 ofpbuf_delete(request_buf);
c19e6535 4231
f0fef760
BP
4232 if (reply) {
4233 if (!error) {
93451a0a 4234 error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
f0fef760 4235 }
c19e6535 4236 if (error) {
93451a0a 4237 dpif_netlink_vport_init(reply);
f0fef760
BP
4238 ofpbuf_delete(*bufp);
4239 *bufp = NULL;
c19e6535 4240 }
c19e6535
BP
4241 }
4242 return error;
4243}
4244
4245/* Obtains information about the kernel vport named 'name' and stores it into
4246 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
4247 * longer needed ('reply' will contain pointers into '*bufp'). */
4248int
93451a0a
AS
4249dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
4250 struct ofpbuf **bufp)
c19e6535 4251{
93451a0a 4252 struct dpif_netlink_vport request;
c19e6535 4253
93451a0a 4254 dpif_netlink_vport_init(&request);
df2c07f4 4255 request.cmd = OVS_VPORT_CMD_GET;
c19e6535
BP
4256 request.name = name;
4257
93451a0a 4258 return dpif_netlink_vport_transact(&request, reply, bufp);
c19e6535 4259}
93451a0a 4260
df2c07f4 4261/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
aaff4b55
BP
4262 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
4263 * positive errno value.
d6569377
BP
4264 *
4265 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
4266 * while 'dp' is still in use. */
4267static int
93451a0a 4268dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
d6569377 4269{
df2c07f4
JP
4270 static const struct nl_policy ovs_datapath_policy[] = {
4271 [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
f7df9823 4272 [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
d6569377 4273 .optional = true },
847108dc
AZ
4274 [OVS_DP_ATTR_MEGAFLOW_STATS] = {
4275 NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
4276 .optional = true },
dcdcad68
PB
4277 [OVS_DP_ATTR_USER_FEATURES] = {
4278 .type = NL_A_U32,
4279 .optional = true },
d6569377
BP
4280 };
4281
93451a0a 4282 dpif_netlink_dp_init(dp);
d6569377 4283
0a2869d5
BP
4284 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4285 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4286 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4287 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4288
4289 struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
df2c07f4
JP
4290 if (!nlmsg || !genl || !ovs_header
4291 || nlmsg->nlmsg_type != ovs_datapath_family
4292 || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
4293 ARRAY_SIZE(ovs_datapath_policy))) {
d6569377
BP
4294 return EINVAL;
4295 }
d6569377 4296
aaff4b55 4297 dp->cmd = genl->cmd;
df2c07f4
JP
4298 dp->dp_ifindex = ovs_header->dp_ifindex;
4299 dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
4300 if (a[OVS_DP_ATTR_STATS]) {
6a54dedc 4301 dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
d6569377 4302 }
982b8810 4303
847108dc 4304 if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
6a54dedc 4305 dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
847108dc
AZ
4306 }
4307
dcdcad68
PB
4308 if (a[OVS_DP_ATTR_USER_FEATURES]) {
4309 dp->user_features = nl_attr_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
4310 }
4311
d6569377
BP
4312 return 0;
4313}
4314
aaff4b55 4315/* Appends to 'buf' the Generic Netlink message described by 'dp'. */
d6569377 4316static void
93451a0a 4317dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
d6569377 4318{
df2c07f4 4319 struct ovs_header *ovs_header;
d6569377 4320
df2c07f4 4321 nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
69685a88
JG
4322 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
4323 OVS_DATAPATH_VERSION);
aaff4b55 4324
df2c07f4
JP
4325 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4326 ovs_header->dp_ifindex = dp->dp_ifindex;
d6569377
BP
4327
4328 if (dp->name) {
df2c07f4 4329 nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
d6569377
BP
4330 }
4331
a24a6574
BP
4332 if (dp->upcall_pid) {
4333 nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
4334 }
b063d9f0 4335
b7fd5e38
TG
4336 if (dp->user_features) {
4337 nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
4338 }
4339
df2c07f4 4340 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
d6569377
BP
4341}
4342
4343/* Clears 'dp' to "empty" values. */
d3d8f1f7 4344static void
93451a0a 4345dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
d6569377
BP
4346{
4347 memset(dp, 0, sizeof *dp);
d6569377
BP
4348}
4349
aaff4b55 4350static void
93451a0a 4351dpif_netlink_dp_dump_start(struct nl_dump *dump)
aaff4b55 4352{
93451a0a 4353 struct dpif_netlink_dp request;
aaff4b55
BP
4354 struct ofpbuf *buf;
4355
93451a0a 4356 dpif_netlink_dp_init(&request);
df2c07f4 4357 request.cmd = OVS_DP_CMD_GET;
aaff4b55
BP
4358
4359 buf = ofpbuf_new(1024);
93451a0a 4360 dpif_netlink_dp_to_ofpbuf(&request, buf);
a88b4e04 4361 nl_dump_start(dump, NETLINK_GENERIC, buf);
aaff4b55
BP
4362 ofpbuf_delete(buf);
4363}
4364
d6569377
BP
4365/* Executes 'request' in the kernel datapath. If the command fails, returns a
4366 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
4367 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
aaff4b55
BP
4368 * result of the command is expected to be of the same form, which is decoded
4369 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
4370 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 4371static int
93451a0a
AS
4372dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
4373 struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
d6569377 4374{
aaff4b55 4375 struct ofpbuf *request_buf;
d6569377 4376 int error;
d6569377 4377
cb22974d 4378 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 4379
aaff4b55 4380 request_buf = ofpbuf_new(1024);
93451a0a 4381 dpif_netlink_dp_to_ofpbuf(request, request_buf);
a88b4e04 4382 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
aaff4b55 4383 ofpbuf_delete(request_buf);
d6569377 4384
aaff4b55 4385 if (reply) {
93451a0a 4386 dpif_netlink_dp_init(reply);
aaff4b55 4387 if (!error) {
93451a0a 4388 error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
aaff4b55 4389 }
d6569377 4390 if (error) {
aaff4b55
BP
4391 ofpbuf_delete(*bufp);
4392 *bufp = NULL;
d6569377 4393 }
d6569377
BP
4394 }
4395 return error;
4396}
4397
4398/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
4399 * The caller must free '*bufp' when the reply is no longer needed ('reply'
4400 * will contain pointers into '*bufp'). */
d3d8f1f7 4401static int
93451a0a
AS
4402dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
4403 struct ofpbuf **bufp)
d6569377 4404{
93451a0a
AS
4405 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
4406 struct dpif_netlink_dp request;
d6569377 4407
93451a0a 4408 dpif_netlink_dp_init(&request);
df2c07f4 4409 request.cmd = OVS_DP_CMD_GET;
254f2dc8 4410 request.dp_ifindex = dpif->dp_ifindex;
d6569377 4411
93451a0a 4412 return dpif_netlink_dp_transact(&request, reply, bufp);
d6569377 4413}
93451a0a 4414
df2c07f4 4415/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
37a1300c 4416 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
d6569377
BP
4417 * positive errno value.
4418 *
4419 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
4420 * while 'flow' is still in use. */
4421static int
93451a0a
AS
4422dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
4423 const struct ofpbuf *buf)
d6569377 4424{
70e5ed6f
JS
4425 static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
4426 [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
e6cc0bab 4427 [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
df2c07f4 4428 [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
f7df9823 4429 [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
d6569377 4430 .optional = true },
df2c07f4
JP
4431 [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
4432 [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
ab79d262 4433 [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
df2c07f4 4434 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 4435 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
70e5ed6f 4436 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
d6569377
BP
4437 };
4438
93451a0a 4439 dpif_netlink_flow_init(flow);
d6569377 4440
0a2869d5
BP
4441 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
4442 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
4443 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
4444 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
4445
4446 struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
df2c07f4
JP
4447 if (!nlmsg || !genl || !ovs_header
4448 || nlmsg->nlmsg_type != ovs_flow_family
4449 || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
4450 ARRAY_SIZE(ovs_flow_policy))) {
d6569377
BP
4451 return EINVAL;
4452 }
70e5ed6f
JS
4453 if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
4454 return EINVAL;
4455 }
d6569377 4456
37a1300c 4457 flow->nlmsg_flags = nlmsg->nlmsg_flags;
df2c07f4 4458 flow->dp_ifindex = ovs_header->dp_ifindex;
70e5ed6f
JS
4459 if (a[OVS_FLOW_ATTR_KEY]) {
4460 flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
4461 flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
4462 }
e6cc0bab 4463
70e5ed6f 4464 if (a[OVS_FLOW_ATTR_UFID]) {
ab79d262 4465 flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
70e5ed6f
JS
4466 flow->ufid_present = true;
4467 }
e6cc0bab
AZ
4468 if (a[OVS_FLOW_ATTR_MASK]) {
4469 flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
4470 flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
4471 }
df2c07f4
JP
4472 if (a[OVS_FLOW_ATTR_ACTIONS]) {
4473 flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
4474 flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
d6569377 4475 }
df2c07f4
JP
4476 if (a[OVS_FLOW_ATTR_STATS]) {
4477 flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
d6569377 4478 }
df2c07f4
JP
4479 if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
4480 flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
d6569377 4481 }
df2c07f4
JP
4482 if (a[OVS_FLOW_ATTR_USED]) {
4483 flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
9e980142 4484 }
d6569377
BP
4485 return 0;
4486}
4487
beb75a40
JS
4488
4489/*
a8a3eee4
JS
4490 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
4491 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
4492 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
beb75a40
JS
4493 */
4494static void
4495put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
4496 const struct nlattr *data, uint16_t data_len)
4497{
4498 const struct nlattr *packet_type;
4499
4500 packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
4501
4502 if (packet_type) {
4503 /* exclude PACKET_TYPE Netlink attribute. */
4504 ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
4505 size_t packet_type_len = NL_A_U32_SIZE;
4506 size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
4507 size_t second_chunk_size = data_len - first_chunk_size
4508 - packet_type_len;
beb75a40 4509 struct nlattr *next_attr = nl_attr_next(packet_type);
1ca5b61b 4510 size_t ofs;
beb75a40 4511
1ca5b61b
JS
4512 ofs = nl_msg_start_nested(buf, type);
4513 nl_msg_put(buf, data, first_chunk_size);
4514 nl_msg_put(buf, next_attr, second_chunk_size);
a8a3eee4
JS
4515 if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
4516 ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
4517 const struct nlattr *nla;
4518
7c5793e6 4519 nla = nl_attr_find(buf, ofs + NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
a8a3eee4
JS
4520 if (nla) {
4521 ovs_be16 *ethertype;
4522
4523 ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
4524 *ethertype = pt;
4525 } else {
4526 nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
4527 }
4528 }
1ca5b61b 4529 nl_msg_end_nested(buf, ofs);
beb75a40
JS
4530 } else {
4531 nl_msg_put_unspec(buf, type, data, data_len);
4532 }
4533}
4534
df2c07f4 4535/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
d6569377
BP
4536 * followed by Netlink attributes corresponding to 'flow'. */
4537static void
93451a0a
AS
4538dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
4539 struct ofpbuf *buf)
d6569377 4540{
df2c07f4 4541 struct ovs_header *ovs_header;
d6569377 4542
df2c07f4 4543 nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
30b44744 4544 NLM_F_REQUEST | flow->nlmsg_flags,
69685a88 4545 flow->cmd, OVS_FLOW_VERSION);
37a1300c 4546
df2c07f4
JP
4547 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4548 ovs_header->dp_ifindex = flow->dp_ifindex;
d6569377 4549
70e5ed6f 4550 if (flow->ufid_present) {
ab79d262 4551 nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
70e5ed6f
JS
4552 }
4553 if (flow->ufid_terse) {
4554 nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
4555 OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
4556 | OVS_UFID_F_OMIT_ACTIONS);
4557 }
64bb477f
JS
4558 if (!flow->ufid_terse || !flow->ufid_present) {
4559 if (flow->key_len) {
beb75a40
JS
4560 put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
4561 flow->key_len);
64bb477f 4562 }
64bb477f 4563 if (flow->mask_len) {
beb75a40
JS
4564 put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
4565 flow->mask_len);
64bb477f
JS
4566 }
4567 if (flow->actions || flow->actions_len) {
4568 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
4569 flow->actions, flow->actions_len);
4570 }
d6569377
BP
4571 }
4572
4573 /* We never need to send these to the kernel. */
cb22974d
BP
4574 ovs_assert(!flow->stats);
4575 ovs_assert(!flow->tcp_flags);
4576 ovs_assert(!flow->used);
d6569377
BP
4577
4578 if (flow->clear) {
df2c07f4 4579 nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
d6569377 4580 }
43f9ac0a
JR
4581 if (flow->probe) {
4582 nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
4583 }
d6569377
BP
4584}
4585
4586/* Clears 'flow' to "empty" values. */
d3d8f1f7 4587static void
93451a0a 4588dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
d6569377
BP
4589{
4590 memset(flow, 0, sizeof *flow);
4591}
4592
4593/* Executes 'request' in the kernel datapath. If the command fails, returns a
4594 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
4595 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
37a1300c
BP
4596 * result of the command is expected to be a flow also, which is decoded and
4597 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
4598 * is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 4599static int
93451a0a
AS
4600dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
4601 struct dpif_netlink_flow *reply,
4602 struct ofpbuf **bufp)
d6569377 4603{
37a1300c 4604 struct ofpbuf *request_buf;
d6569377 4605 int error;
d6569377 4606
cb22974d 4607 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 4608
30b44744
BP
4609 if (reply) {
4610 request->nlmsg_flags |= NLM_F_ECHO;
4611 }
4612
37a1300c 4613 request_buf = ofpbuf_new(1024);
93451a0a 4614 dpif_netlink_flow_to_ofpbuf(request, request_buf);
a88b4e04 4615 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
37a1300c 4616 ofpbuf_delete(request_buf);
d6569377 4617
37a1300c
BP
4618 if (reply) {
4619 if (!error) {
93451a0a 4620 error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
37a1300c 4621 }
d6569377 4622 if (error) {
93451a0a 4623 dpif_netlink_flow_init(reply);
37a1300c
BP
4624 ofpbuf_delete(*bufp);
4625 *bufp = NULL;
d6569377 4626 }
d6569377
BP
4627 }
4628 return error;
4629}
4630
4631static void
93451a0a
AS
4632dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
4633 struct dpif_flow_stats *stats)
d6569377
BP
4634{
4635 if (flow->stats) {
6a54dedc
BP
4636 stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
4637 stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
d6569377
BP
4638 } else {
4639 stats->n_packets = 0;
4640 stats->n_bytes = 0;
4641 }
0e70cdcb 4642 stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
d6569377
BP
4643 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
4644}
e0467f6d 4645
14b4d2f9
BP
4646/* Logs information about a packet that was recently lost in 'ch' (in
4647 * 'dpif_'). */
4648static void
93451a0a 4649report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
1579cf67 4650 uint32_t handler_id)
14b4d2f9 4651{
14b4d2f9 4652 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
14b4d2f9
BP
4653 struct ds s;
4654
8d675c5a 4655 if (VLOG_DROP_WARN(&rl)) {
14b4d2f9
BP
4656 return;
4657 }
4658
4659 ds_init(&s);
4660 if (ch->last_poll != LLONG_MIN) {
4661 ds_put_format(&s, " (last polled %lld ms ago)",
4662 time_msec() - ch->last_poll);
4663 }
14b4d2f9 4664
1579cf67 4665 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
9b00386b 4666 dpif_name(&dpif->dpif), ch_idx, handler_id);
14b4d2f9
BP
4667 ds_destroy(&s);
4668}