]> git.proxmox.com Git - mirror_ovs.git/blame - lib/dpif-netlink.c
system-traffic: Add conntrack per zone limit test case
[mirror_ovs.git] / lib / dpif-netlink.c
CommitLineData
96fba48f 1/*
aa5c0216 2 * Copyright (c) 2008-2017 Nicira, Inc.
96fba48f
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
9fe3b9a2 18
93451a0a 19#include "dpif-netlink.h"
96fba48f 20
96fba48f
BP
21#include <ctype.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <net/if.h>
b90fa799 26#include <linux/types.h>
aae51f53 27#include <linux/pkt_sched.h>
8522ba09 28#include <poll.h>
96fba48f 29#include <stdlib.h>
8522ba09 30#include <strings.h>
50f80534 31#include <sys/epoll.h>
10dcf8de 32#include <sys/stat.h>
96fba48f
BP
33#include <unistd.h>
34
773cd538 35#include "bitmap.h"
c4e08753 36#include "dpif-netlink-rtnl.h"
0d71302e 37#include "dpif-provider.h"
1579cf67 38#include "fat-rwlock.h"
0d71302e 39#include "flow.h"
032aa6a3 40#include "netdev-linux.h"
0d71302e 41#include "netdev-provider.h"
c3827f61 42#include "netdev-vport.h"
0d71302e 43#include "netdev.h"
c11c9f4a 44#include "netlink-conntrack.h"
45c8d3a1 45#include "netlink-notifier.h"
982b8810 46#include "netlink-socket.h"
856081f6 47#include "netlink.h"
bfda5239 48#include "netnsid.h"
feebdea2 49#include "odp-util.h"
0d71302e
BP
50#include "openvswitch/dynamic-string.h"
51#include "openvswitch/flow.h"
52#include "openvswitch/match.h"
64c96779 53#include "openvswitch/ofpbuf.h"
fd016ae3 54#include "openvswitch/poll-loop.h"
ee89ea7b 55#include "openvswitch/shash.h"
92d0d515 56#include "openvswitch/thread.h"
0d71302e
BP
57#include "openvswitch/vlog.h"
58#include "packets.h"
59#include "random.h"
b3c01ed3 60#include "sset.h"
14b4d2f9 61#include "timeval.h"
d6569377 62#include "unaligned.h"
96fba48f 63#include "util.h"
5136ce49 64
93451a0a 65VLOG_DEFINE_THIS_MODULE(dpif_netlink);
09cac43f 66#ifdef _WIN32
da467899 67#include "wmi.h"
09cac43f
NR
68enum { WINDOWS = 1 };
69#else
70enum { WINDOWS = 0 };
71#endif
95b1d73a 72enum { MAX_PORTS = USHRT_MAX };
773cd538 73
24b019f8
JP
74/* This ethtool flag was introduced in Linux 2.6.24, so it might be
75 * missing if we have old headers. */
76#define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
77
f2280b41 78#define FLOW_DUMP_MAX_BATCH 50
8b668ee3 79#define OPERATE_MAX_OPS 50
f2280b41 80
93451a0a 81struct dpif_netlink_dp {
aaff4b55
BP
82 /* Generic Netlink header. */
83 uint8_t cmd;
d6569377 84
df2c07f4 85 /* struct ovs_header. */
254f2dc8 86 int dp_ifindex;
d6569377
BP
87
88 /* Attributes. */
df2c07f4 89 const char *name; /* OVS_DP_ATTR_NAME. */
fcd5d230 90 const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */
b7fd5e38 91 uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */
6a54dedc
BP
92 const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */
93 const struct ovs_dp_megaflow_stats *megaflow_stats;
847108dc 94 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
d6569377
BP
95};
96
93451a0a
AS
97static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
98static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
99 const struct ofpbuf *);
100static void dpif_netlink_dp_dump_start(struct nl_dump *);
101static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
102 struct dpif_netlink_dp *reply,
103 struct ofpbuf **bufp);
104static int dpif_netlink_dp_get(const struct dpif *,
105 struct dpif_netlink_dp *reply,
106 struct ofpbuf **bufp);
107
108struct dpif_netlink_flow {
37a1300c
BP
109 /* Generic Netlink header. */
110 uint8_t cmd;
d6569377 111
df2c07f4 112 /* struct ovs_header. */
d6569377 113 unsigned int nlmsg_flags;
254f2dc8 114 int dp_ifindex;
d6569377
BP
115
116 /* Attributes.
117 *
0e70cdcb
BP
118 * The 'stats' member points to 64-bit data that might only be aligned on
119 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
120 * values.
d2a23af2 121 *
df2c07f4 122 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
d2a23af2 123 * the Netlink version of the command, even if actions_len is zero. */
df2c07f4 124 const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
d6569377 125 size_t key_len;
e6cc0bab
AZ
126 const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */
127 size_t mask_len;
df2c07f4 128 const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
d6569377 129 size_t actions_len;
70e5ed6f
JS
130 ovs_u128 ufid; /* OVS_FLOW_ATTR_FLOW_ID. */
131 bool ufid_present; /* Is there a UFID? */
132 bool ufid_terse; /* Skip serializing key/mask/acts? */
df2c07f4
JP
133 const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
134 const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
0e70cdcb 135 const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
df2c07f4 136 bool clear; /* OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 137 bool probe; /* OVS_FLOW_ATTR_PROBE. */
d6569377
BP
138};
139
93451a0a
AS
140static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
141static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
142 const struct ofpbuf *);
143static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
144 struct ofpbuf *);
145static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
146 struct dpif_netlink_flow *reply,
147 struct ofpbuf **bufp);
148static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
149 struct dpif_flow_stats *);
7af12bd7 150static void dpif_netlink_flow_to_dpif_flow(struct dpif *, struct dpif_flow *,
93451a0a 151 const struct dpif_netlink_flow *);
d6569377 152
989fd548 153/* One of the dpif channels between the kernel and userspace. */
fe3d61b3 154struct dpif_channel {
14b4d2f9 155 struct nl_sock *sock; /* Netlink socket. */
14b4d2f9 156 long long int last_poll; /* Last time this channel was polled. */
fe3d61b3
BP
157};
158
09cac43f
NR
159#ifdef _WIN32
160#define VPORT_SOCK_POOL_SIZE 1
161/* On Windows, there is no native support for epoll. There are equivalent
162 * interfaces though, that are not used currently. For simpicity, a pool of
163 * netlink sockets is used. Each socket is represented by 'struct
164 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
165 * sharing the same socket. In the future, we can add a reference count and
166 * such fields. */
167struct dpif_windows_vport_sock {
168 struct nl_sock *nl_sock; /* netlink socket. */
169};
170#endif
171
1579cf67
AW
172struct dpif_handler {
173 struct dpif_channel *channels;/* Array of channels for each handler. */
174 struct epoll_event *epoll_events;
175 int epoll_fd; /* epoll fd that includes channel socks. */
176 int n_events; /* Num events returned by epoll_wait(). */
177 int event_offset; /* Offset into 'epoll_events'. */
09cac43f
NR
178
179#ifdef _WIN32
180 /* Pool of sockets. */
181 struct dpif_windows_vport_sock *vport_sock_pool;
182 size_t last_used_pool_idx; /* Index to aid in allocating a
183 socket in the pool to a port. */
184#endif
1579cf67 185};
14b4d2f9 186
96fba48f 187/* Datapath interface for the openvswitch Linux kernel module. */
93451a0a 188struct dpif_netlink {
96fba48f 189 struct dpif dpif;
254f2dc8 190 int dp_ifindex;
e9e28be3 191
b063d9f0 192 /* Upcall messages. */
1579cf67
AW
193 struct fat_rwlock upcall_lock;
194 struct dpif_handler *handlers;
195 uint32_t n_handlers; /* Num of upcall handlers. */
196 int uc_array_size; /* Size of 'handler->channels' and */
197 /* 'handler->epoll_events'. */
982b8810 198
e9e28be3 199 /* Change notification. */
e4516b20 200 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
61eae437 201 bool refresh_channels;
96fba48f
BP
202};
203
93451a0a 204static void report_loss(struct dpif_netlink *, struct dpif_channel *,
9b00386b 205 uint32_t ch_idx, uint32_t handler_id);
1579cf67 206
96fba48f
BP
207static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
208
e4516b20
BP
209/* Generic Netlink family numbers for OVS.
210 *
93451a0a 211 * Initialized by dpif_netlink_init(). */
df2c07f4
JP
212static int ovs_datapath_family;
213static int ovs_vport_family;
214static int ovs_flow_family;
215static int ovs_packet_family;
80738e5f 216static int ovs_meter_family;
906ff9d2 217static int ovs_ct_limit_family;
982b8810 218
e4516b20
BP
219/* Generic Netlink multicast groups for OVS.
220 *
93451a0a 221 * Initialized by dpif_netlink_init(). */
e4516b20 222static unsigned int ovs_vport_mcgroup;
982b8810 223
921c370a
EG
224/* If true, tunnel devices are created using OVS compat/genetlink.
225 * If false, tunnel devices are created with rtnetlink and using light weight
226 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
227 * to using the compat interface. */
228static bool ovs_tunnels_out_of_tree = true;
229
93451a0a
AS
230static int dpif_netlink_init(void);
231static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
232static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
233 odp_port_t port_no, uint32_t hash);
09cac43f 234static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
93451a0a
AS
235static int dpif_netlink_refresh_channels(struct dpif_netlink *,
236 uint32_t n_handlers);
237static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
238 struct ofpbuf *);
239static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
240 const struct ofpbuf *);
921c370a
EG
241static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
242 odp_port_t port_no, const char *port_name,
243 struct dpif_port *dpif_port);
f0fef760 244
93451a0a
AS
245static struct dpif_netlink *
246dpif_netlink_cast(const struct dpif *dpif)
96fba48f 247{
93451a0a
AS
248 dpif_assert_class(dpif, &dpif_netlink_class);
249 return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
96fba48f
BP
250}
251
d3d22744 252static int
93451a0a
AS
253dpif_netlink_enumerate(struct sset *all_dps,
254 const struct dpif_class *dpif_class OVS_UNUSED)
d3d22744 255{
aaff4b55 256 struct nl_dump dump;
d57695d7
JS
257 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
258 struct ofpbuf msg, buf;
aaff4b55 259 int error;
982b8810 260
93451a0a 261 error = dpif_netlink_init();
aaff4b55
BP
262 if (error) {
263 return error;
982b8810 264 }
d3d22744 265
d57695d7 266 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a 267 dpif_netlink_dp_dump_start(&dump);
d57695d7 268 while (nl_dump_next(&dump, &msg, &buf)) {
93451a0a 269 struct dpif_netlink_dp dp;
d6569377 270
93451a0a 271 if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
d0c23a1a 272 sset_add(all_dps, dp.name);
d3d22744
BP
273 }
274 }
d57695d7 275 ofpbuf_uninit(&buf);
aaff4b55 276 return nl_dump_done(&dump);
d3d22744
BP
277}
278
96fba48f 279static int
93451a0a
AS
280dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
281 bool create, struct dpif **dpifp)
96fba48f 282{
93451a0a 283 struct dpif_netlink_dp dp_request, dp;
c19e6535 284 struct ofpbuf *buf;
ea36840f 285 uint32_t upcall_pid;
c19e6535 286 int error;
96fba48f 287
93451a0a 288 error = dpif_netlink_init();
982b8810
BP
289 if (error) {
290 return error;
291 }
292
982b8810 293 /* Create or look up datapath. */
93451a0a 294 dpif_netlink_dp_init(&dp_request);
ea36840f
BP
295 if (create) {
296 dp_request.cmd = OVS_DP_CMD_NEW;
297 upcall_pid = 0;
298 dp_request.upcall_pid = &upcall_pid;
299 } else {
b7fd5e38
TG
300 /* Use OVS_DP_CMD_SET to report user features */
301 dp_request.cmd = OVS_DP_CMD_SET;
ea36840f 302 }
254f2dc8 303 dp_request.name = name;
b7fd5e38 304 dp_request.user_features |= OVS_DP_F_UNALIGNED;
1579cf67 305 dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
93451a0a 306 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
982b8810
BP
307 if (error) {
308 return error;
c19e6535 309 }
254f2dc8 310
e4516b20 311 error = open_dpif(&dp, dpifp);
8f4a4df5 312 ofpbuf_delete(buf);
e4516b20 313 return error;
c19e6535
BP
314}
315
e4516b20 316static int
93451a0a 317open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
c19e6535 318{
93451a0a 319 struct dpif_netlink *dpif;
c19e6535 320
17411ecf 321 dpif = xzalloc(sizeof *dpif);
e4516b20 322 dpif->port_notifier = NULL;
1579cf67 323 fat_rwlock_init(&dpif->upcall_lock);
c19e6535 324
93451a0a 325 dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
254f2dc8 326 dp->dp_ifindex, dp->dp_ifindex);
c19e6535 327
254f2dc8 328 dpif->dp_ifindex = dp->dp_ifindex;
c19e6535 329 *dpifp = &dpif->dpif;
e4516b20
BP
330
331 return 0;
96fba48f
BP
332}
333
1579cf67
AW
334/* Destroys the netlink sockets pointed by the elements in 'socksp'
335 * and frees the 'socksp'. */
17411ecf 336static void
09cac43f 337vport_del_socksp__(struct nl_sock **socksp, uint32_t n_socks)
17411ecf 338{
1579cf67 339 size_t i;
17411ecf 340
1579cf67
AW
341 for (i = 0; i < n_socks; i++) {
342 nl_sock_destroy(socksp[i]);
50f80534 343 }
989fd548 344
1579cf67
AW
345 free(socksp);
346}
989fd548 347
1579cf67
AW
348/* Creates an array of netlink sockets. Returns an array of the
349 * corresponding pointers. Records the error in 'error'. */
350static struct nl_sock **
09cac43f 351vport_create_socksp__(uint32_t n_socks, int *error)
1579cf67
AW
352{
353 struct nl_sock **socksp = xzalloc(n_socks * sizeof *socksp);
354 size_t i;
355
356 for (i = 0; i < n_socks; i++) {
357 *error = nl_sock_create(NETLINK_GENERIC, &socksp[i]);
358 if (*error) {
359 goto error;
989fd548 360 }
1579cf67 361 }
989fd548 362
1579cf67 363 return socksp;
9fafa796 364
1579cf67 365error:
09cac43f 366 vport_del_socksp__(socksp, n_socks);
989fd548 367
1579cf67
AW
368 return NULL;
369}
370
09cac43f
NR
371#ifdef _WIN32
372static void
373vport_delete_sock_pool(struct dpif_handler *handler)
374 OVS_REQ_WRLOCK(dpif->upcall_lock)
375{
376 if (handler->vport_sock_pool) {
377 uint32_t i;
378 struct dpif_windows_vport_sock *sock_pool =
379 handler->vport_sock_pool;
380
381 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
382 if (sock_pool[i].nl_sock) {
383 nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
384 nl_sock_destroy(sock_pool[i].nl_sock);
385 sock_pool[i].nl_sock = NULL;
386 }
387 }
388
389 free(handler->vport_sock_pool);
390 handler->vport_sock_pool = NULL;
391 }
392}
393
394static int
395vport_create_sock_pool(struct dpif_handler *handler)
396 OVS_REQ_WRLOCK(dpif->upcall_lock)
397{
398 struct dpif_windows_vport_sock *sock_pool;
399 size_t i;
400 int error = 0;
401
402 sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
403 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
404 error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
405 if (error) {
406 goto error;
407 }
408
409 /* Enable the netlink socket to receive packets. This is equivalent to
410 * calling nl_sock_join_mcgroup() to receive events. */
411 error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
412 if (error) {
413 goto error;
414 }
415 }
416
417 handler->vport_sock_pool = sock_pool;
418 handler->last_used_pool_idx = 0;
419 return 0;
420
421error:
422 vport_delete_sock_pool(handler);
423 return error;
424}
425
426/* Returns an array pointers to netlink sockets. The sockets are picked from a
427 * pool. Records the error in 'error'. */
428static struct nl_sock **
429vport_create_socksp_windows(struct dpif_netlink *dpif, int *error)
430 OVS_REQ_WRLOCK(dpif->upcall_lock)
431{
432 uint32_t n_socks = dpif->n_handlers;
433 struct nl_sock **socksp;
434 size_t i;
435
436 ovs_assert(n_socks <= 1);
437 socksp = xzalloc(n_socks * sizeof *socksp);
438
439 /* Pick netlink sockets to use in a round-robin fashion from each
440 * handler's pool of sockets. */
441 for (i = 0; i < n_socks; i++) {
442 struct dpif_handler *handler = &dpif->handlers[i];
443 struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
444 size_t index = handler->last_used_pool_idx;
445
446 /* A pool of sockets is allocated when the handler is initialized. */
447 if (sock_pool == NULL) {
448 free(socksp);
449 *error = EINVAL;
450 return NULL;
451 }
452
453 ovs_assert(index < VPORT_SOCK_POOL_SIZE);
454 socksp[i] = sock_pool[index].nl_sock;
455 socksp[i] = sock_pool[index].nl_sock;
456 ovs_assert(socksp[i]);
457 index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
458 handler->last_used_pool_idx = index;
459 }
460
461 return socksp;
462}
463
464static void
465vport_del_socksp_windows(struct dpif_netlink *dpif, struct nl_sock **socksp)
466{
467 free(socksp);
468}
469#endif /* _WIN32 */
470
471static struct nl_sock **
472vport_create_socksp(struct dpif_netlink *dpif, int *error)
473{
474#ifdef _WIN32
475 return vport_create_socksp_windows(dpif, error);
476#else
477 return vport_create_socksp__(dpif->n_handlers, error);
478#endif
479}
480
481static void
482vport_del_socksp(struct dpif_netlink *dpif, struct nl_sock **socksp)
483{
484#ifdef _WIN32
485 vport_del_socksp_windows(dpif, socksp);
486#else
487 vport_del_socksp__(socksp, dpif->n_handlers);
488#endif
489}
490
1579cf67
AW
491/* Given the array of pointers to netlink sockets 'socksp', returns
492 * the array of corresponding pids. If the 'socksp' is NULL, returns
493 * a single-element array of value 0. */
494static uint32_t *
495vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks)
496{
497 uint32_t *pids;
498
499 if (!socksp) {
500 pids = xzalloc(sizeof *pids);
501 } else {
502 size_t i;
503
504 pids = xzalloc(n_socks * sizeof *pids);
505 for (i = 0; i < n_socks; i++) {
506 pids[i] = nl_sock_pid(socksp[i]);
507 }
17411ecf 508 }
989fd548 509
1579cf67
AW
510 return pids;
511}
512
513/* Given the port number 'port_idx', extracts the pids of netlink sockets
514 * associated to the port and assigns it to 'upcall_pids'. */
515static bool
93451a0a 516vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
1579cf67
AW
517 uint32_t **upcall_pids)
518{
519 uint32_t *pids;
520 size_t i;
989fd548 521
1579cf67
AW
522 /* Since the nl_sock can only be assigned in either all
523 * or none "dpif->handlers" channels, the following check
524 * would suffice. */
525 if (!dpif->handlers[0].channels[port_idx].sock) {
526 return false;
527 }
09cac43f 528 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
1579cf67
AW
529
530 pids = xzalloc(dpif->n_handlers * sizeof *pids);
531
532 for (i = 0; i < dpif->n_handlers; i++) {
533 pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock);
534 }
535
536 *upcall_pids = pids;
989fd548 537
1579cf67 538 return true;
989fd548
JP
539}
540
541static int
93451a0a 542vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
1579cf67 543 struct nl_sock **socksp)
989fd548
JP
544{
545 struct epoll_event event;
4e022ec0 546 uint32_t port_idx = odp_to_u32(port_no);
1579cf67
AW
547 size_t i, j;
548 int error;
989fd548 549
1579cf67 550 if (dpif->handlers == NULL) {
989fd548
JP
551 return 0;
552 }
553
1579cf67
AW
554 /* We assume that the datapath densely chooses port numbers, which can
555 * therefore be used as an index into 'channels' and 'epoll_events' of
556 * 'dpif->handler'. */
4e022ec0
AW
557 if (port_idx >= dpif->uc_array_size) {
558 uint32_t new_size = port_idx + 1;
989fd548 559
12d76859 560 if (new_size > MAX_PORTS) {
989fd548
JP
561 VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
562 dpif_name(&dpif->dpif), port_no);
563 return EFBIG;
564 }
565
1579cf67
AW
566 for (i = 0; i < dpif->n_handlers; i++) {
567 struct dpif_handler *handler = &dpif->handlers[i];
568
569 handler->channels = xrealloc(handler->channels,
570 new_size * sizeof *handler->channels);
571
572 for (j = dpif->uc_array_size; j < new_size; j++) {
573 handler->channels[j].sock = NULL;
574 }
575
576 handler->epoll_events = xrealloc(handler->epoll_events,
577 new_size * sizeof *handler->epoll_events);
989fd548 578
1579cf67 579 }
989fd548
JP
580 dpif->uc_array_size = new_size;
581 }
582
583 memset(&event, 0, sizeof event);
584 event.events = EPOLLIN;
4e022ec0 585 event.data.u32 = port_idx;
989fd548 586
1579cf67
AW
587 for (i = 0; i < dpif->n_handlers; i++) {
588 struct dpif_handler *handler = &dpif->handlers[i];
589
09cac43f 590#ifndef _WIN32
1579cf67
AW
591 if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp[i]),
592 &event) < 0) {
593 error = errno;
594 goto error;
595 }
93451a0a 596#endif
1579cf67
AW
597 dpif->handlers[i].channels[port_idx].sock = socksp[i];
598 dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN;
599 }
989fd548
JP
600
601 return 0;
1579cf67
AW
602
603error:
604 for (j = 0; j < i; j++) {
09cac43f 605#ifndef _WIN32
1579cf67
AW
606 epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL,
607 nl_sock_fd(socksp[j]), NULL);
93451a0a 608#endif
1579cf67
AW
609 dpif->handlers[j].channels[port_idx].sock = NULL;
610 }
611
612 return error;
989fd548
JP
613}
614
615static void
93451a0a 616vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
989fd548 617{
4e022ec0 618 uint32_t port_idx = odp_to_u32(port_no);
1579cf67 619 size_t i;
989fd548 620
1579cf67 621 if (!dpif->handlers || port_idx >= dpif->uc_array_size) {
989fd548
JP
622 return;
623 }
624
1579cf67
AW
625 /* Since the sock can only be assigned in either all or none
626 * of "dpif->handlers" channels, the following check would
627 * suffice. */
628 if (!dpif->handlers[0].channels[port_idx].sock) {
989fd548
JP
629 return;
630 }
631
1579cf67
AW
632 for (i = 0; i < dpif->n_handlers; i++) {
633 struct dpif_handler *handler = &dpif->handlers[i];
09cac43f 634#ifndef _WIN32
1579cf67
AW
635 epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
636 nl_sock_fd(handler->channels[port_idx].sock), NULL);
637 nl_sock_destroy(handler->channels[port_idx].sock);
09cac43f 638#endif
1579cf67
AW
639 handler->channels[port_idx].sock = NULL;
640 handler->event_offset = handler->n_events = 0;
641 }
642}
643
644static void
93451a0a
AS
645destroy_all_channels(struct dpif_netlink *dpif)
646 OVS_REQ_WRLOCK(dpif->upcall_lock)
1579cf67
AW
647{
648 unsigned int i;
649
650 if (!dpif->handlers) {
651 return;
652 }
653
654 for (i = 0; i < dpif->uc_array_size; i++ ) {
93451a0a 655 struct dpif_netlink_vport vport_request;
1579cf67
AW
656 uint32_t upcall_pids = 0;
657
658 /* Since the sock can only be assigned in either all or none
659 * of "dpif->handlers" channels, the following check would
660 * suffice. */
661 if (!dpif->handlers[0].channels[i].sock) {
662 continue;
663 }
664
665 /* Turn off upcalls. */
93451a0a 666 dpif_netlink_vport_init(&vport_request);
1579cf67
AW
667 vport_request.cmd = OVS_VPORT_CMD_SET;
668 vport_request.dp_ifindex = dpif->dp_ifindex;
669 vport_request.port_no = u32_to_odp(i);
a78f446a 670 vport_request.n_upcall_pids = 1;
1579cf67 671 vport_request.upcall_pids = &upcall_pids;
93451a0a 672 dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67
AW
673
674 vport_del_channels(dpif, u32_to_odp(i));
675 }
676
677 for (i = 0; i < dpif->n_handlers; i++) {
678 struct dpif_handler *handler = &dpif->handlers[i];
679
09cac43f 680 dpif_netlink_handler_uninit(handler);
1579cf67
AW
681 free(handler->epoll_events);
682 free(handler->channels);
683 }
989fd548 684
1579cf67
AW
685 free(dpif->handlers);
686 dpif->handlers = NULL;
687 dpif->n_handlers = 0;
688 dpif->uc_array_size = 0;
17411ecf
JG
689}
690
96fba48f 691static void
93451a0a 692dpif_netlink_close(struct dpif *dpif_)
96fba48f 693{
93451a0a 694 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
c7178a0b 695
e4516b20 696 nl_sock_destroy(dpif->port_notifier);
1579cf67
AW
697
698 fat_rwlock_wrlock(&dpif->upcall_lock);
699 destroy_all_channels(dpif);
700 fat_rwlock_unlock(&dpif->upcall_lock);
701
702 fat_rwlock_destroy(&dpif->upcall_lock);
96fba48f
BP
703 free(dpif);
704}
705
706static int
93451a0a 707dpif_netlink_destroy(struct dpif *dpif_)
96fba48f 708{
93451a0a
AS
709 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
710 struct dpif_netlink_dp dp;
d6569377 711
93451a0a 712 dpif_netlink_dp_init(&dp);
df2c07f4 713 dp.cmd = OVS_DP_CMD_DEL;
254f2dc8 714 dp.dp_ifindex = dpif->dp_ifindex;
93451a0a 715 return dpif_netlink_dp_transact(&dp, NULL, NULL);
96fba48f
BP
716}
717
a36de779 718static bool
93451a0a 719dpif_netlink_run(struct dpif *dpif_)
61eae437 720{
93451a0a 721 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67 722
61eae437
BP
723 if (dpif->refresh_channels) {
724 dpif->refresh_channels = false;
1579cf67 725 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 726 dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
1579cf67 727 fat_rwlock_unlock(&dpif->upcall_lock);
61eae437 728 }
a36de779 729 return false;
61eae437
BP
730}
731
96fba48f 732static int
93451a0a 733dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
96fba48f 734{
93451a0a 735 struct dpif_netlink_dp dp;
d6569377
BP
736 struct ofpbuf *buf;
737 int error;
738
93451a0a 739 error = dpif_netlink_dp_get(dpif_, &dp, &buf);
d6569377 740 if (!error) {
6a54dedc
BP
741 memset(stats, 0, sizeof *stats);
742
743 if (dp.stats) {
744 stats->n_hit = get_32aligned_u64(&dp.stats->n_hit);
745 stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
746 stats->n_lost = get_32aligned_u64(&dp.stats->n_lost);
747 stats->n_flows = get_32aligned_u64(&dp.stats->n_flows);
748 }
749
750 if (dp.megaflow_stats) {
751 stats->n_masks = dp.megaflow_stats->n_masks;
752 stats->n_mask_hit = get_32aligned_u64(
753 &dp.megaflow_stats->n_mask_hit);
754 } else {
755 stats->n_masks = UINT32_MAX;
756 stats->n_mask_hit = UINT64_MAX;
757 }
d6569377
BP
758 ofpbuf_delete(buf);
759 }
760 return error;
96fba48f
BP
761}
762
b9ad7294 763static const char *
93451a0a 764get_vport_type(const struct dpif_netlink_vport *vport)
b9ad7294
EJ
765{
766 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
767
768 switch (vport->type) {
5ed51209
JS
769 case OVS_VPORT_TYPE_NETDEV: {
770 const char *type = netdev_get_type_from_name(vport->name);
771
772 return type ? type : "system";
773 }
b9ad7294
EJ
774
775 case OVS_VPORT_TYPE_INTERNAL:
776 return "internal";
777
c1fc1411
JG
778 case OVS_VPORT_TYPE_GENEVE:
779 return "geneve";
780
b9ad7294
EJ
781 case OVS_VPORT_TYPE_GRE:
782 return "gre";
783
b9ad7294
EJ
784 case OVS_VPORT_TYPE_VXLAN:
785 return "vxlan";
786
a6ae068b
LJ
787 case OVS_VPORT_TYPE_LISP:
788 return "lisp";
789
4237026e
PS
790 case OVS_VPORT_TYPE_STT:
791 return "stt";
792
c387d817 793 case OVS_VPORT_TYPE_ERSPAN:
98514eea
WT
794 return "erspan";
795
c387d817 796 case OVS_VPORT_TYPE_IP6ERSPAN:
3b10ceee
GR
797 return "ip6erspan";
798
c387d817 799 case OVS_VPORT_TYPE_IP6GRE:
3b10ceee 800 return "ip6gre";
c387d817 801
b9ad7294
EJ
802 case OVS_VPORT_TYPE_UNSPEC:
803 case __OVS_VPORT_TYPE_MAX:
804 break;
805 }
806
807 VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
808 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
809 return "unknown";
810}
811
c4e08753 812enum ovs_vport_type
20c57607 813netdev_to_ovs_vport_type(const char *type)
c060c4cf 814{
c060c4cf
EJ
815 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
816 return OVS_VPORT_TYPE_NETDEV;
817 } else if (!strcmp(type, "internal")) {
818 return OVS_VPORT_TYPE_INTERNAL;
4237026e
PS
819 } else if (strstr(type, "stt")) {
820 return OVS_VPORT_TYPE_STT;
c1fc1411
JG
821 } else if (!strcmp(type, "geneve")) {
822 return OVS_VPORT_TYPE_GENEVE;
c060c4cf
EJ
823 } else if (!strcmp(type, "vxlan")) {
824 return OVS_VPORT_TYPE_VXLAN;
a6ae068b
LJ
825 } else if (!strcmp(type, "lisp")) {
826 return OVS_VPORT_TYPE_LISP;
7dc18ae9
WT
827 } else if (!strcmp(type, "erspan")) {
828 return OVS_VPORT_TYPE_ERSPAN;
829 } else if (!strcmp(type, "ip6erspan")) {
830 return OVS_VPORT_TYPE_IP6ERSPAN;
3b10ceee
GR
831 } else if (!strcmp(type, "ip6gre")) {
832 return OVS_VPORT_TYPE_IP6GRE;
1c385f49
GR
833 } else if (!strcmp(type, "gre")) {
834 return OVS_VPORT_TYPE_GRE;
c060c4cf
EJ
835 } else {
836 return OVS_VPORT_TYPE_UNSPEC;
837 }
838}
839
96fba48f 840static int
20c57607
EG
841dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
842 enum ovs_vport_type type,
843 struct ofpbuf *options,
93451a0a 844 odp_port_t *port_nop)
b90de034 845 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 846{
93451a0a 847 struct dpif_netlink_vport request, reply;
c19e6535 848 struct ofpbuf *buf;
1579cf67
AW
849 struct nl_sock **socksp = NULL;
850 uint32_t *upcall_pids;
851 int error = 0;
96fba48f 852
1579cf67 853 if (dpif->handlers) {
09cac43f 854 socksp = vport_create_socksp(dpif, &error);
1579cf67 855 if (!socksp) {
989fd548
JP
856 return error;
857 }
858 }
859
93451a0a 860 dpif_netlink_vport_init(&request);
df2c07f4 861 request.cmd = OVS_VPORT_CMD_NEW;
254f2dc8 862 request.dp_ifindex = dpif->dp_ifindex;
20c57607
EG
863 request.type = type;
864 request.name = name;
865
866 request.port_no = *port_nop;
867 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
868 request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
869 request.upcall_pids = upcall_pids;
870
871 if (options) {
872 request.options = options->data;
873 request.options_len = options->size;
874 }
875
876 error = dpif_netlink_vport_transact(&request, &reply, &buf);
877 if (!error) {
878 *port_nop = reply.port_no;
879 } else {
880 if (error == EBUSY && *port_nop != ODPP_NONE) {
881 VLOG_INFO("%s: requested port %"PRIu32" is in use",
882 dpif_name(&dpif->dpif), *port_nop);
883 }
884
885 vport_del_socksp(dpif, socksp);
886 goto exit;
887 }
888
889 if (socksp) {
890 error = vport_add_channels(dpif, *port_nop, socksp);
891 if (error) {
892 VLOG_INFO("%s: could not add channel for port %s",
893 dpif_name(&dpif->dpif), name);
894
895 /* Delete the port. */
896 dpif_netlink_vport_init(&request);
897 request.cmd = OVS_VPORT_CMD_DEL;
898 request.dp_ifindex = dpif->dp_ifindex;
899 request.port_no = *port_nop;
900 dpif_netlink_vport_transact(&request, NULL, NULL);
901 vport_del_socksp(dpif, socksp);
902 goto exit;
903 }
904 }
905 free(socksp);
906
907exit:
908 ofpbuf_delete(buf);
909 free(upcall_pids);
910
911 return error;
912}
913
914static int
915dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
916 odp_port_t *port_nop)
917 OVS_REQ_WRLOCK(dpif->upcall_lock)
918{
919 const struct netdev_tunnel_config *tnl_cfg;
920 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
921 const char *type = netdev_get_type(netdev);
922 uint64_t options_stub[64 / 8];
923 enum ovs_vport_type ovs_type;
924 struct ofpbuf options;
925 const char *name;
926
927 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
928
929 ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
930 if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
c283069c
BP
931 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
932 "unsupported type `%s'",
9b00386b 933 dpif_name(&dpif->dpif), name, type);
c283069c
BP
934 return EINVAL;
935 }
c3827f61 936
20c57607 937 if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
93451a0a 938#ifdef _WIN32
09cac43f 939 /* XXX : Map appropiate Windows handle */
93451a0a 940#else
24b019f8 941 netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
93451a0a 942#endif
24b019f8
JP
943 }
944
da467899 945#ifdef _WIN32
20c57607 946 if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
da467899
AS
947 if (!create_wmi_port(name)){
948 VLOG_ERR("Could not create wmi internal port with name:%s", name);
da467899
AS
949 return EINVAL;
950 };
951 }
952#endif
953
26508d9a 954 tnl_cfg = netdev_get_tunnel_config(netdev);
526df7d8 955 if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
26508d9a 956 ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
526df7d8
TG
957 if (tnl_cfg->dst_port) {
958 nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
959 ntohs(tnl_cfg->dst_port));
960 }
961 if (tnl_cfg->exts) {
962 size_t ext_ofs;
963 int i;
964
965 ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
966 for (i = 0; i < 32; i++) {
967 if (tnl_cfg->exts & (1 << i)) {
968 nl_msg_put_flag(&options, i);
969 }
970 }
971 nl_msg_end_nested(&options, ext_ofs);
972 }
20c57607
EG
973 return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
974 port_nop);
2510ba7c 975 } else {
20c57607 976 return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
78a2d59c 977 }
c3827f61 978
20c57607 979}
989fd548 980
921c370a 981static int
c4e08753
EG
982dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
983 struct netdev *netdev,
984 odp_port_t *port_nop)
985 OVS_REQ_WRLOCK(dpif->upcall_lock)
986{
987 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
988 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
989 const char *name;
990 int error;
989fd548 991
c4e08753
EG
992 error = dpif_netlink_rtnl_port_create(netdev);
993 if (error) {
994 if (error != EOPNOTSUPP) {
d52ef4eb 995 VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
c4e08753
EG
996 netdev_get_name(netdev), ovs_strerror(error));
997 }
998 return error;
999 }
1579cf67 1000
c4e08753
EG
1001 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
1002 error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
1003 port_nop);
1004 if (error) {
1005 dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
1006 }
1007 return error;
1008}
96fba48f
BP
1009
1010static int
93451a0a
AS
1011dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
1012 odp_port_t *port_nop)
9fafa796 1013{
93451a0a 1014 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
921c370a 1015 int error = EOPNOTSUPP;
9fafa796 1016
1579cf67 1017 fat_rwlock_wrlock(&dpif->upcall_lock);
921c370a
EG
1018 if (!ovs_tunnels_out_of_tree) {
1019 error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
1020 }
1021 if (error) {
1022 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
1023 }
1579cf67 1024 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
1025
1026 return error;
1027}
1028
1029static int
93451a0a 1030dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
b90de034 1031 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 1032{
93451a0a 1033 struct dpif_netlink_vport vport;
921c370a 1034 struct dpif_port dpif_port;
773cd538 1035 int error;
c19e6535 1036
921c370a
EG
1037 error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
1038 if (error) {
1039 return error;
1040 }
1041
93451a0a 1042 dpif_netlink_vport_init(&vport);
df2c07f4 1043 vport.cmd = OVS_VPORT_CMD_DEL;
254f2dc8 1044 vport.dp_ifindex = dpif->dp_ifindex;
c19e6535 1045 vport.port_no = port_no;
da467899 1046#ifdef _WIN32
921c370a
EG
1047 if (!strcmp(dpif_port.type, "internal")) {
1048 if (!delete_wmi_port(dpif_port.name)) {
da467899 1049 VLOG_ERR("Could not delete wmi port with name: %s",
921c370a 1050 dpif_port.name);
da467899
AS
1051 };
1052 }
1053#endif
93451a0a 1054 error = dpif_netlink_vport_transact(&vport, NULL, NULL);
773cd538 1055
1579cf67 1056 vport_del_channels(dpif, port_no);
989fd548 1057
921c370a
EG
1058 if (!error && !ovs_tunnels_out_of_tree) {
1059 error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
1060 if (error == EOPNOTSUPP) {
1061 error = 0;
1062 }
1063 }
1064
1065 dpif_port_destroy(&dpif_port);
1066
773cd538 1067 return error;
c3827f61 1068}
3abc4a1a 1069
9fafa796 1070static int
93451a0a 1071dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
9fafa796 1072{
93451a0a 1073 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
1074 int error;
1075
1579cf67 1076 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 1077 error = dpif_netlink_port_del__(dpif, port_no);
1579cf67 1078 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
1079
1080 return error;
1081}
1082
c3827f61 1083static int
93451a0a
AS
1084dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
1085 const char *port_name, struct dpif_port *dpif_port)
c3827f61 1086{
93451a0a
AS
1087 struct dpif_netlink_vport request;
1088 struct dpif_netlink_vport reply;
c19e6535 1089 struct ofpbuf *buf;
4c738a8d
BP
1090 int error;
1091
93451a0a 1092 dpif_netlink_vport_init(&request);
df2c07f4 1093 request.cmd = OVS_VPORT_CMD_GET;
9b00386b 1094 request.dp_ifindex = dpif->dp_ifindex;
c19e6535
BP
1095 request.port_no = port_no;
1096 request.name = port_name;
4c738a8d 1097
93451a0a 1098 error = dpif_netlink_vport_transact(&request, &reply, &buf);
c19e6535 1099 if (!error) {
33db1592
BP
1100 if (reply.dp_ifindex != request.dp_ifindex) {
1101 /* A query by name reported that 'port_name' is in some datapath
1102 * other than 'dpif', but the caller wants to know about 'dpif'. */
1103 error = ENODEV;
4afba28d 1104 } else if (dpif_port) {
33db1592 1105 dpif_port->name = xstrdup(reply.name);
b9ad7294 1106 dpif_port->type = xstrdup(get_vport_type(&reply));
33db1592
BP
1107 dpif_port->port_no = reply.port_no;
1108 }
c19e6535 1109 ofpbuf_delete(buf);
3abc4a1a 1110 }
c19e6535 1111 return error;
96fba48f
BP
1112}
1113
1114static int
93451a0a
AS
1115dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1116 struct dpif_port *dpif_port)
96fba48f 1117{
93451a0a 1118 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1119
93451a0a 1120 return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
96fba48f
BP
1121}
1122
1123static int
93451a0a 1124dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
4c738a8d 1125 struct dpif_port *dpif_port)
96fba48f 1126{
93451a0a 1127 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1128
93451a0a 1129 return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
96fba48f
BP
1130}
1131
98403001 1132static uint32_t
93451a0a
AS
1133dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
1134 odp_port_t port_no, uint32_t hash)
b90de034 1135 OVS_REQ_RDLOCK(dpif->upcall_lock)
98403001 1136{
4e022ec0 1137 uint32_t port_idx = odp_to_u32(port_no);
9fafa796 1138 uint32_t pid = 0;
98403001 1139
f8fc5489 1140 if (dpif->handlers && dpif->uc_array_size > 0) {
4e022ec0 1141 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
989fd548 1142 * channel, since it is not heavily loaded. */
4e022ec0 1143 uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1579cf67
AW
1144 struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers];
1145
17f2748d
AW
1146 /* Needs to check in case the socket pointer is changed in between
1147 * the holding of upcall_lock. A known case happens when the main
1148 * thread deletes the vport while the handler thread is handling
1149 * the upcall from that port. */
1150 if (h->channels[idx].sock) {
1151 pid = nl_sock_pid(h->channels[idx].sock);
1152 }
98403001 1153 }
9fafa796
BP
1154
1155 return pid;
98403001
BP
1156}
1157
b90de034 1158static uint32_t
93451a0a
AS
1159dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
1160 uint32_t hash)
b90de034 1161{
93451a0a 1162 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
1163 uint32_t ret;
1164
1165 fat_rwlock_rdlock(&dpif->upcall_lock);
93451a0a 1166 ret = dpif_netlink_port_get_pid__(dpif, port_no, hash);
b90de034
AW
1167 fat_rwlock_unlock(&dpif->upcall_lock);
1168
1169 return ret;
1170}
1171
96fba48f 1172static int
93451a0a 1173dpif_netlink_flow_flush(struct dpif *dpif_)
96fba48f 1174{
93451a0a
AS
1175 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1176 struct dpif_netlink_flow flow;
37a1300c 1177
93451a0a 1178 dpif_netlink_flow_init(&flow);
df2c07f4 1179 flow.cmd = OVS_FLOW_CMD_DEL;
254f2dc8 1180 flow.dp_ifindex = dpif->dp_ifindex;
f7dde6df
PB
1181
1182 if (netdev_is_flow_api_enabled()) {
dfaf79dd 1183 netdev_ports_flow_flush(dpif_->dpif_class);
f7dde6df
PB
1184 }
1185
93451a0a 1186 return dpif_netlink_flow_transact(&flow, NULL, NULL);
96fba48f
BP
1187}
1188
93451a0a 1189struct dpif_netlink_port_state {
f0fef760 1190 struct nl_dump dump;
d57695d7 1191 struct ofpbuf buf;
c19e6535
BP
1192};
1193
222837c4 1194static void
93451a0a
AS
1195dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1196 struct nl_dump *dump)
96fba48f 1197{
93451a0a 1198 struct dpif_netlink_vport request;
f0fef760
BP
1199 struct ofpbuf *buf;
1200
93451a0a 1201 dpif_netlink_vport_init(&request);
067f1e23 1202 request.cmd = OVS_VPORT_CMD_GET;
254f2dc8 1203 request.dp_ifindex = dpif->dp_ifindex;
f0fef760
BP
1204
1205 buf = ofpbuf_new(1024);
93451a0a 1206 dpif_netlink_vport_to_ofpbuf(&request, buf);
222837c4 1207 nl_dump_start(dump, NETLINK_GENERIC, buf);
f0fef760 1208 ofpbuf_delete(buf);
222837c4
BP
1209}
1210
1211static int
93451a0a 1212dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
222837c4 1213{
93451a0a
AS
1214 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1215 struct dpif_netlink_port_state *state;
222837c4
BP
1216
1217 *statep = state = xmalloc(sizeof *state);
93451a0a 1218 dpif_netlink_port_dump_start__(dpif, &state->dump);
f0fef760 1219
d57695d7 1220 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
b0ec0f27
BP
1221 return 0;
1222}
1223
7c1ef244 1224static int
93451a0a
AS
1225dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1226 struct nl_dump *dump,
1227 struct dpif_netlink_vport *vport,
1228 struct ofpbuf *buffer)
222837c4 1229{
222837c4
BP
1230 struct ofpbuf buf;
1231 int error;
1232
d57695d7 1233 if (!nl_dump_next(dump, &buf, buffer)) {
222837c4
BP
1234 return EOF;
1235 }
1236
93451a0a 1237 error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
222837c4
BP
1238 if (error) {
1239 VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1240 dpif_name(&dpif->dpif), ovs_strerror(error));
1241 }
1242 return error;
1243}
1244
b0ec0f27 1245static int
93451a0a
AS
1246dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1247 struct dpif_port *dpif_port)
b0ec0f27 1248{
93451a0a
AS
1249 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1250 struct dpif_netlink_port_state *state = state_;
1251 struct dpif_netlink_vport vport;
96fba48f
BP
1252 int error;
1253
93451a0a
AS
1254 error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1255 &state->buf);
c3827f61 1256 if (error) {
f0fef760 1257 return error;
c3827f61 1258 }
ebc56baa 1259 dpif_port->name = CONST_CAST(char *, vport.name);
b9ad7294 1260 dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
f0fef760
BP
1261 dpif_port->port_no = vport.port_no;
1262 return 0;
b0ec0f27
BP
1263}
1264
1265static int
93451a0a 1266dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
b0ec0f27 1267{
93451a0a 1268 struct dpif_netlink_port_state *state = state_;
f0fef760 1269 int error = nl_dump_done(&state->dump);
8522b383 1270
d57695d7 1271 ofpbuf_uninit(&state->buf);
b0ec0f27 1272 free(state);
f0fef760 1273 return error;
96fba48f
BP
1274}
1275
e9e28be3 1276static int
93451a0a 1277dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
e9e28be3 1278{
93451a0a 1279 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e9e28be3 1280
e4516b20
BP
1281 /* Lazily create the Netlink socket to listen for notifications. */
1282 if (!dpif->port_notifier) {
1283 struct nl_sock *sock;
1284 int error;
1285
1286 error = nl_sock_create(NETLINK_GENERIC, &sock);
1287 if (error) {
1288 return error;
1289 }
1290
1291 error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1292 if (error) {
1293 nl_sock_destroy(sock);
1294 return error;
1295 }
1296 dpif->port_notifier = sock;
1297
1298 /* We have no idea of the current state so report that everything
1299 * changed. */
1300 return ENOBUFS;
1301 }
1302
1303 for (;;) {
1304 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1305 uint64_t buf_stub[4096 / 8];
1306 struct ofpbuf buf;
1307 int error;
1308
1309 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
a86bd14e 1310 error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
e4516b20 1311 if (!error) {
93451a0a 1312 struct dpif_netlink_vport vport;
e4516b20 1313
93451a0a 1314 error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
e4516b20
BP
1315 if (!error) {
1316 if (vport.dp_ifindex == dpif->dp_ifindex
1317 && (vport.cmd == OVS_VPORT_CMD_NEW
1318 || vport.cmd == OVS_VPORT_CMD_DEL
1319 || vport.cmd == OVS_VPORT_CMD_SET)) {
1320 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1321 dpif->dpif.full_name, vport.name, vport.cmd);
1579cf67 1322 if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
61eae437
BP
1323 dpif->refresh_channels = true;
1324 }
e4516b20 1325 *devnamep = xstrdup(vport.name);
59e0c910 1326 ofpbuf_uninit(&buf);
e4516b20 1327 return 0;
e4516b20
BP
1328 }
1329 }
59e0c910
BP
1330 } else if (error != EAGAIN) {
1331 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1332 ovs_strerror(error));
1333 nl_sock_drain(dpif->port_notifier);
1334 error = ENOBUFS;
e4516b20
BP
1335 }
1336
59e0c910
BP
1337 ofpbuf_uninit(&buf);
1338 if (error) {
1339 return error;
1340 }
e9e28be3 1341 }
e9e28be3
BP
1342}
1343
1344static void
93451a0a 1345dpif_netlink_port_poll_wait(const struct dpif *dpif_)
e9e28be3 1346{
93451a0a 1347 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e4516b20
BP
1348
1349 if (dpif->port_notifier) {
1350 nl_sock_wait(dpif->port_notifier, POLLIN);
1351 } else {
e9e28be3 1352 poll_immediate_wake();
e9e28be3
BP
1353 }
1354}
1355
6fe09f8c 1356static void
70e5ed6f
JS
1357dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1358 const ovs_u128 *ufid, bool terse)
1359{
1360 if (ufid) {
1361 request->ufid = *ufid;
1362 request->ufid_present = true;
1363 } else {
1364 request->ufid_present = false;
1365 }
1366 request->ufid_terse = terse;
1367}
1368
1369static void
1370dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1371 const struct nlattr *key, size_t key_len,
1372 const ovs_u128 *ufid, bool terse,
1373 struct dpif_netlink_flow *request)
96fba48f 1374{
93451a0a 1375 dpif_netlink_flow_init(request);
6fe09f8c
JS
1376 request->cmd = OVS_FLOW_CMD_GET;
1377 request->dp_ifindex = dpif->dp_ifindex;
1378 request->key = key;
1379 request->key_len = key_len;
70e5ed6f
JS
1380 dpif_netlink_flow_init_ufid(request, ufid, terse);
1381}
1382
1383static void
1384dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1385 const struct dpif_flow_get *get,
1386 struct dpif_netlink_flow *request)
1387{
1388 dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1389 false, request);
30053024
BP
1390}
1391
1392static int
70e5ed6f
JS
1393dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1394 const struct nlattr *key, size_t key_len,
1395 const ovs_u128 *ufid, bool terse,
1396 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
30053024 1397{
93451a0a 1398 struct dpif_netlink_flow request;
30053024 1399
70e5ed6f 1400 dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
93451a0a 1401 return dpif_netlink_flow_transact(&request, reply, bufp);
96fba48f
BP
1402}
1403
70e5ed6f
JS
1404static int
1405dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1406 const struct dpif_netlink_flow *flow,
1407 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1408{
1409 return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1410 flow->ufid_present ? &flow->ufid : NULL,
1411 false, reply, bufp);
1412}
1413
6bc60024 1414static void
93451a0a
AS
1415dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1416 const struct dpif_flow_put *put,
1417 struct dpif_netlink_flow *request)
6bc60024 1418{
d64e176c 1419 static const struct nlattr dummy_action;
6bc60024 1420
93451a0a 1421 dpif_netlink_flow_init(request);
89625d1e 1422 request->cmd = (put->flags & DPIF_FP_CREATE
6bc60024
BP
1423 ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1424 request->dp_ifindex = dpif->dp_ifindex;
89625d1e
BP
1425 request->key = put->key;
1426 request->key_len = put->key_len;
e6cc0bab
AZ
1427 request->mask = put->mask;
1428 request->mask_len = put->mask_len;
70e5ed6f
JS
1429 dpif_netlink_flow_init_ufid(request, put->ufid, false);
1430
6bc60024 1431 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
d64e176c
BP
1432 request->actions = (put->actions
1433 ? put->actions
1434 : CONST_CAST(struct nlattr *, &dummy_action));
89625d1e
BP
1435 request->actions_len = put->actions_len;
1436 if (put->flags & DPIF_FP_ZERO_STATS) {
6bc60024
BP
1437 request->clear = true;
1438 }
43f9ac0a
JR
1439 if (put->flags & DPIF_FP_PROBE) {
1440 request->probe = true;
1441 }
89625d1e 1442 request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
6bc60024
BP
1443}
1444
b99d3cee 1445static void
70e5ed6f
JS
1446dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1447 const struct nlattr *key, size_t key_len,
1448 const ovs_u128 *ufid, bool terse,
1449 struct dpif_netlink_flow *request)
96fba48f 1450{
93451a0a 1451 dpif_netlink_flow_init(request);
b99d3cee
BP
1452 request->cmd = OVS_FLOW_CMD_DEL;
1453 request->dp_ifindex = dpif->dp_ifindex;
70e5ed6f
JS
1454 request->key = key;
1455 request->key_len = key_len;
1456 dpif_netlink_flow_init_ufid(request, ufid, terse);
1457}
1458
1459static void
1460dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1461 const struct dpif_flow_del *del,
1462 struct dpif_netlink_flow *request)
1463{
37382aa6
AS
1464 dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1465 del->ufid, del->terse, request);
70e5ed6f
JS
1466}
1467
494a7455
JP
1468enum {
1469 DUMP_OVS_FLOWS_BIT = 0,
1470 DUMP_NETDEV_FLOWS_BIT = 1,
1471};
1472
1473enum {
1474 DUMP_OVS_FLOWS = (1 << DUMP_OVS_FLOWS_BIT),
1475 DUMP_NETDEV_FLOWS = (1 << DUMP_NETDEV_FLOWS_BIT),
1476};
1477
93451a0a 1478struct dpif_netlink_flow_dump {
ac64794a
BP
1479 struct dpif_flow_dump up;
1480 struct nl_dump nl_dump;
d2ad7ef1 1481 atomic_int status;
f2280b41
PB
1482 struct netdev_flow_dump **netdev_dumps;
1483 int netdev_dumps_num; /* Number of netdev_flow_dumps */
1484 struct ovs_mutex netdev_lock; /* Guards the following. */
1485 int netdev_current_dump OVS_GUARDED; /* Shared current dump */
494a7455 1486 int type; /* Type of dump */
e723fd32
JS
1487};
1488
93451a0a
AS
1489static struct dpif_netlink_flow_dump *
1490dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
e723fd32 1491{
93451a0a 1492 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
e723fd32
JS
1493}
1494
f2280b41
PB
1495static void
1496start_netdev_dump(const struct dpif *dpif_,
1497 struct dpif_netlink_flow_dump *dump)
1498{
1499 ovs_mutex_init(&dump->netdev_lock);
1500
494a7455 1501 if (!(dump->type & DUMP_NETDEV_FLOWS)) {
f2280b41
PB
1502 dump->netdev_dumps_num = 0;
1503 dump->netdev_dumps = NULL;
1504 return;
1505 }
1506
1507 ovs_mutex_lock(&dump->netdev_lock);
1508 dump->netdev_current_dump = 0;
1509 dump->netdev_dumps
dfaf79dd 1510 = netdev_ports_flow_dump_create(dpif_->dpif_class,
f2280b41
PB
1511 &dump->netdev_dumps_num);
1512 ovs_mutex_unlock(&dump->netdev_lock);
1513}
1514
494a7455
JP
1515static int
1516dpif_netlink_get_dump_type(char *str) {
1517 int type = 0;
1518
1519 if (!str || !strcmp(str, "ovs") || !strcmp(str, "dpctl")) {
1520 type |= DUMP_OVS_FLOWS;
7e8b7199 1521 }
494a7455
JP
1522 if ((netdev_is_flow_api_enabled() && !str)
1523 || (str && (!strcmp(str, "offloaded") || !strcmp(str, "dpctl")))) {
1524 type |= DUMP_NETDEV_FLOWS;
1525 }
1526
1527 return type;
7e8b7199
PB
1528}
1529
ac64794a 1530static struct dpif_flow_dump *
7e8b7199 1531dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
494a7455 1532 char *type)
96fba48f 1533{
93451a0a
AS
1534 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1535 struct dpif_netlink_flow_dump *dump;
1536 struct dpif_netlink_flow request;
37a1300c
BP
1537 struct ofpbuf *buf;
1538
ac64794a
BP
1539 dump = xmalloc(sizeof *dump);
1540 dpif_flow_dump_init(&dump->up, dpif_);
37a1300c 1541
494a7455 1542 dump->type = dpif_netlink_get_dump_type(type);
37a1300c 1543
494a7455 1544 if (dump->type & DUMP_OVS_FLOWS) {
7e8b7199
PB
1545 dpif_netlink_flow_init(&request);
1546 request.cmd = OVS_FLOW_CMD_GET;
1547 request.dp_ifindex = dpif->dp_ifindex;
1548 request.ufid_present = false;
1549 request.ufid_terse = terse;
1550
1551 buf = ofpbuf_new(1024);
1552 dpif_netlink_flow_to_ofpbuf(&request, buf);
1553 nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1554 ofpbuf_delete(buf);
1555 }
ac64794a 1556 atomic_init(&dump->status, 0);
64bb477f 1557 dump->up.terse = terse;
30053024 1558
f2280b41
PB
1559 start_netdev_dump(dpif_, dump);
1560
ac64794a 1561 return &dump->up;
704a1e09
BP
1562}
1563
1564static int
93451a0a 1565dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
704a1e09 1566{
93451a0a 1567 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
7e8b7199 1568 unsigned int nl_status = 0;
ac64794a 1569 int dump_status;
96fba48f 1570
494a7455 1571 if (dump->type & DUMP_OVS_FLOWS) {
7e8b7199
PB
1572 nl_status = nl_dump_done(&dump->nl_dump);
1573 }
1574
f2280b41
PB
1575 for (int i = 0; i < dump->netdev_dumps_num; i++) {
1576 int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
1577
1578 if (err != 0 && err != EOPNOTSUPP) {
1579 VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
1580 }
1581 }
1582
1583 free(dump->netdev_dumps);
1584 ovs_mutex_destroy(&dump->netdev_lock);
1585
7424fc44
JR
1586 /* No other thread has access to 'dump' at this point. */
1587 atomic_read_relaxed(&dump->status, &dump_status);
ac64794a
BP
1588 free(dump);
1589 return dump_status ? dump_status : nl_status;
1590}
feebdea2 1591
93451a0a 1592struct dpif_netlink_flow_dump_thread {
ac64794a 1593 struct dpif_flow_dump_thread up;
93451a0a
AS
1594 struct dpif_netlink_flow_dump *dump;
1595 struct dpif_netlink_flow flow;
ac64794a
BP
1596 struct dpif_flow_stats stats;
1597 struct ofpbuf nl_flows; /* Always used to store flows. */
1598 struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */
f2280b41
PB
1599 int netdev_dump_idx; /* This thread current netdev dump index */
1600 bool netdev_done; /* If we are finished dumping netdevs */
1601
1602 /* (Key/Mask/Actions) Buffers for netdev dumping */
1603 struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
1604 struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
1605 struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
ac64794a
BP
1606};
1607
93451a0a
AS
1608static struct dpif_netlink_flow_dump_thread *
1609dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
ac64794a 1610{
93451a0a 1611 return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
ac64794a
BP
1612}
1613
1614static struct dpif_flow_dump_thread *
93451a0a 1615dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
ac64794a 1616{
93451a0a
AS
1617 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1618 struct dpif_netlink_flow_dump_thread *thread;
ac64794a
BP
1619
1620 thread = xmalloc(sizeof *thread);
1621 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1622 thread->dump = dump;
1623 ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1624 thread->nl_actions = NULL;
f2280b41
PB
1625 thread->netdev_dump_idx = 0;
1626 thread->netdev_done = !(thread->netdev_dump_idx < dump->netdev_dumps_num);
ac64794a
BP
1627
1628 return &thread->up;
1629}
1630
1631static void
93451a0a 1632dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
ac64794a 1633{
93451a0a
AS
1634 struct dpif_netlink_flow_dump_thread *thread
1635 = dpif_netlink_flow_dump_thread_cast(thread_);
ac64794a
BP
1636
1637 ofpbuf_uninit(&thread->nl_flows);
1638 ofpbuf_delete(thread->nl_actions);
1639 free(thread);
1640}
1641
1642static void
7af12bd7 1643dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
7fe98598 1644 const struct dpif_netlink_flow *datapath_flow)
ac64794a 1645{
7fe98598
NR
1646 dpif_flow->key = datapath_flow->key;
1647 dpif_flow->key_len = datapath_flow->key_len;
1648 dpif_flow->mask = datapath_flow->mask;
1649 dpif_flow->mask_len = datapath_flow->mask_len;
1650 dpif_flow->actions = datapath_flow->actions;
1651 dpif_flow->actions_len = datapath_flow->actions_len;
70e5ed6f 1652 dpif_flow->ufid_present = datapath_flow->ufid_present;
ec97c2df 1653 dpif_flow->pmd_id = PMD_ID_NULL;
70e5ed6f
JS
1654 if (datapath_flow->ufid_present) {
1655 dpif_flow->ufid = datapath_flow->ufid;
1656 } else {
1657 ovs_assert(datapath_flow->key && datapath_flow->key_len);
1658 dpif_flow_hash(dpif, datapath_flow->key, datapath_flow->key_len,
1659 &dpif_flow->ufid);
1660 }
7fe98598 1661 dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
d63ca532
GT
1662 dpif_flow->attrs.offloaded = false;
1663 dpif_flow->attrs.dp_layer = "ovs";
ac64794a
BP
1664}
1665
f2280b41
PB
1666/* The design is such that all threads are working together on the first dump
1667 * to the last, in order (at first they all on dump 0).
1668 * When the first thread finds that the given dump is finished,
1669 * they all move to the next. If two or more threads find the same dump
1670 * is finished at the same time, the first one will advance the shared
1671 * netdev_current_dump and the others will catch up. */
1672static void
1673dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
1674{
1675 struct dpif_netlink_flow_dump *dump = thread->dump;
1676
1677 ovs_mutex_lock(&dump->netdev_lock);
1678 /* if we haven't finished (dumped everything) */
1679 if (dump->netdev_current_dump < dump->netdev_dumps_num) {
1680 /* if we are the first to find that current dump is finished
1681 * advance it. */
1682 if (thread->netdev_dump_idx == dump->netdev_current_dump) {
1683 thread->netdev_dump_idx = ++dump->netdev_current_dump;
1684 /* did we just finish the last dump? done. */
1685 if (dump->netdev_current_dump == dump->netdev_dumps_num) {
1686 thread->netdev_done = true;
1687 }
1688 } else {
1689 /* otherwise, we are behind, catch up */
1690 thread->netdev_dump_idx = dump->netdev_current_dump;
1691 }
1692 } else {
1693 /* some other thread finished */
1694 thread->netdev_done = true;
1695 }
1696 ovs_mutex_unlock(&dump->netdev_lock);
1697}
1698
1699static int
1700dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
1701 struct ofpbuf *key_buf,
1702 struct ofpbuf *mask_buf,
1703 struct nlattr *actions,
1704 struct dpif_flow_stats *stats,
d63ca532 1705 struct dpif_flow_attrs *attrs,
f2280b41
PB
1706 ovs_u128 *ufid,
1707 struct dpif_flow *flow,
1708 bool terse OVS_UNUSED)
1709{
1710
1711 struct odp_flow_key_parms odp_parms = {
1712 .flow = &match->flow,
1713 .mask = &match->wc.masks,
1714 .support = {
f9885dc5 1715 .max_vlan_headers = 2,
f2280b41
PB
1716 },
1717 };
1718 size_t offset;
1719
1720 memset(flow, 0, sizeof *flow);
1721
1722 /* Key */
1723 offset = key_buf->size;
1724 flow->key = ofpbuf_tail(key_buf);
1725 odp_flow_key_from_flow(&odp_parms, key_buf);
1726 flow->key_len = key_buf->size - offset;
1727
1728 /* Mask */
1729 offset = mask_buf->size;
1730 flow->mask = ofpbuf_tail(mask_buf);
1731 odp_parms.key_buf = key_buf;
1732 odp_flow_key_from_mask(&odp_parms, mask_buf);
1733 flow->mask_len = mask_buf->size - offset;
1734
1735 /* Actions */
1736 flow->actions = nl_attr_get(actions);
1737 flow->actions_len = nl_attr_get_size(actions);
1738
1739 /* Stats */
1740 memcpy(&flow->stats, stats, sizeof *stats);
1741
1742 /* UFID */
1743 flow->ufid_present = true;
1744 flow->ufid = *ufid;
1745
1746 flow->pmd_id = PMD_ID_NULL;
4742003c 1747
d63ca532 1748 memcpy(&flow->attrs, attrs, sizeof *attrs);
4742003c 1749
f2280b41
PB
1750 return 0;
1751}
1752
ac64794a 1753static int
93451a0a
AS
1754dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1755 struct dpif_flow *flows, int max_flows)
ac64794a 1756{
93451a0a
AS
1757 struct dpif_netlink_flow_dump_thread *thread
1758 = dpif_netlink_flow_dump_thread_cast(thread_);
1759 struct dpif_netlink_flow_dump *dump = thread->dump;
1760 struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
ac64794a
BP
1761 int n_flows;
1762
1763 ofpbuf_delete(thread->nl_actions);
1764 thread->nl_actions = NULL;
1765
1766 n_flows = 0;
f2280b41
PB
1767 max_flows = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
1768
1769 while (!thread->netdev_done && n_flows < max_flows) {
1770 struct odputil_keybuf *maskbuf = &thread->maskbuf[n_flows];
1771 struct odputil_keybuf *keybuf = &thread->keybuf[n_flows];
1772 struct odputil_keybuf *actbuf = &thread->actbuf[n_flows];
1773 struct ofpbuf key, mask, act;
1774 struct dpif_flow *f = &flows[n_flows];
1775 int cur = thread->netdev_dump_idx;
1776 struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
1777 struct match match;
1778 struct nlattr *actions;
1779 struct dpif_flow_stats stats;
d63ca532 1780 struct dpif_flow_attrs attrs;
f2280b41
PB
1781 ovs_u128 ufid;
1782 bool has_next;
1783
1784 ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
1785 ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
1786 ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
1787 has_next = netdev_flow_dump_next(netdev_dump, &match,
d63ca532 1788 &actions, &stats, &attrs,
f2280b41
PB
1789 &ufid,
1790 &thread->nl_flows,
1791 &act);
1792 if (has_next) {
1793 dpif_netlink_netdev_match_to_dpif_flow(&match,
1794 &key, &mask,
1795 actions,
1796 &stats,
d63ca532 1797 &attrs,
f2280b41
PB
1798 &ufid,
1799 f,
1800 dump->up.terse);
1801 n_flows++;
1802 } else {
1803 dpif_netlink_advance_netdev_dump(thread);
1804 }
1805 }
1806
494a7455 1807 if (!(dump->type & DUMP_OVS_FLOWS)) {
7e8b7199
PB
1808 return n_flows;
1809 }
1810
ac64794a 1811 while (!n_flows
6fd6ed71 1812 || (n_flows < max_flows && thread->nl_flows.size)) {
7fe98598 1813 struct dpif_netlink_flow datapath_flow;
ac64794a
BP
1814 struct ofpbuf nl_flow;
1815 int error;
1816
1817 /* Try to grab another flow. */
1818 if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1819 break;
feebdea2 1820 }
30053024 1821
ac64794a 1822 /* Convert the flow to our output format. */
7fe98598 1823 error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
30053024 1824 if (error) {
7424fc44 1825 atomic_store_relaxed(&dump->status, error);
ac64794a 1826 break;
feebdea2 1827 }
30053024 1828
64bb477f
JS
1829 if (dump->up.terse || datapath_flow.actions) {
1830 /* Common case: we don't want actions, or the flow includes
1831 * actions. */
7af12bd7
JS
1832 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1833 &datapath_flow);
ac64794a
BP
1834 } else {
1835 /* Rare case: the flow does not include actions. Retrieve this
1836 * individual flow again to get the actions. */
70e5ed6f 1837 error = dpif_netlink_flow_get(dpif, &datapath_flow,
7fe98598 1838 &datapath_flow, &thread->nl_actions);
30053024
BP
1839 if (error == ENOENT) {
1840 VLOG_DBG("dumped flow disappeared on get");
ac64794a 1841 continue;
30053024 1842 } else if (error) {
10a89ef0
BP
1843 VLOG_WARN("error fetching dumped flow: %s",
1844 ovs_strerror(error));
7424fc44 1845 atomic_store_relaxed(&dump->status, error);
ac64794a 1846 break;
30053024 1847 }
30053024 1848
ac64794a
BP
1849 /* Save this flow. Then exit, because we only have one buffer to
1850 * handle this case. */
7af12bd7
JS
1851 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1852 &datapath_flow);
ac64794a
BP
1853 break;
1854 }
feebdea2 1855 }
ac64794a 1856 return n_flows;
96fba48f
BP
1857}
1858
eabe7c68 1859static void
93451a0a
AS
1860dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1861 struct ofpbuf *buf)
96fba48f 1862{
89625d1e 1863 struct ovs_header *k_exec;
758c456d 1864 size_t key_ofs;
f7cd0081 1865
eabe7c68 1866 ofpbuf_prealloc_tailroom(buf, (64
cf62fa4c 1867 + dp_packet_size(d_exec->packet)
758c456d 1868 + ODP_KEY_METADATA_SIZE
eabe7c68 1869 + d_exec->actions_len));
f7cd0081 1870
df2c07f4 1871 nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
69685a88 1872 OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
f7cd0081 1873
89625d1e
BP
1874 k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1875 k_exec->dp_ifindex = dp_ifindex;
f7cd0081 1876
89625d1e 1877 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
cf62fa4c
PS
1878 dp_packet_data(d_exec->packet),
1879 dp_packet_size(d_exec->packet));
758c456d
JR
1880
1881 key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
beb75a40 1882 odp_key_from_dp_packet(buf, d_exec->packet);
758c456d
JR
1883 nl_msg_end_nested(buf, key_ofs);
1884
89625d1e
BP
1885 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1886 d_exec->actions, d_exec->actions_len);
43f9ac0a 1887 if (d_exec->probe) {
2e460098 1888 nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
43f9ac0a 1889 }
27130224
AZ
1890 if (d_exec->mtu) {
1891 nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1892 }
6bc60024
BP
1893}
1894
0f3358ea
BP
1895/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1896 * Returns the number actually executed (at least 1, if 'n_ops' is
1897 * positive). */
1898static size_t
93451a0a
AS
1899dpif_netlink_operate__(struct dpif_netlink *dpif,
1900 struct dpif_op **ops, size_t n_ops)
6bc60024 1901{
eabe7c68
BP
1902 struct op_auxdata {
1903 struct nl_transaction txn;
72d32ac0 1904
eabe7c68
BP
1905 struct ofpbuf request;
1906 uint64_t request_stub[1024 / 8];
72d32ac0
BP
1907
1908 struct ofpbuf reply;
1909 uint64_t reply_stub[1024 / 8];
8b668ee3 1910 } auxes[OPERATE_MAX_OPS];
eabe7c68 1911
8b668ee3 1912 struct nl_transaction *txnsp[OPERATE_MAX_OPS];
6bc60024
BP
1913 size_t i;
1914
8b668ee3 1915 n_ops = MIN(n_ops, OPERATE_MAX_OPS);
6bc60024 1916 for (i = 0; i < n_ops; i++) {
eabe7c68 1917 struct op_auxdata *aux = &auxes[i];
c2b565b5 1918 struct dpif_op *op = ops[i];
b99d3cee
BP
1919 struct dpif_flow_put *put;
1920 struct dpif_flow_del *del;
6fe09f8c 1921 struct dpif_flow_get *get;
93451a0a 1922 struct dpif_netlink_flow flow;
eabe7c68
BP
1923
1924 ofpbuf_use_stub(&aux->request,
1925 aux->request_stub, sizeof aux->request_stub);
1926 aux->txn.request = &aux->request;
b99d3cee 1927
72d32ac0
BP
1928 ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1929 aux->txn.reply = NULL;
1930
b99d3cee
BP
1931 switch (op->type) {
1932 case DPIF_OP_FLOW_PUT:
fa37affa 1933 put = &op->flow_put;
93451a0a 1934 dpif_netlink_init_flow_put(dpif, put, &flow);
6bc60024 1935 if (put->stats) {
eabe7c68 1936 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1937 aux->txn.reply = &aux->reply;
6bc60024 1938 }
93451a0a 1939 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee
BP
1940 break;
1941
1942 case DPIF_OP_FLOW_DEL:
fa37affa 1943 del = &op->flow_del;
93451a0a 1944 dpif_netlink_init_flow_del(dpif, del, &flow);
b99d3cee 1945 if (del->stats) {
eabe7c68 1946 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1947 aux->txn.reply = &aux->reply;
b99d3cee 1948 }
93451a0a 1949 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee 1950 break;
6bc60024 1951
b99d3cee 1952 case DPIF_OP_EXECUTE:
0f3358ea
BP
1953 /* Can't execute a packet that won't fit in a Netlink attribute. */
1954 if (OVS_UNLIKELY(nl_attr_oversized(
fa37affa 1955 dp_packet_size(op->execute.packet)))) {
0f3358ea
BP
1956 /* Report an error immediately if this is the first operation.
1957 * Otherwise the easiest thing to do is to postpone to the next
1958 * call (when this will be the first operation). */
1959 if (i == 0) {
1960 VLOG_ERR_RL(&error_rl,
1961 "dropping oversized %"PRIu32"-byte packet",
fa37affa 1962 dp_packet_size(op->execute.packet));
0f3358ea
BP
1963 op->error = ENOBUFS;
1964 return 1;
1965 }
1966 n_ops = i;
1967 } else {
fa37affa 1968 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
0f3358ea
BP
1969 &aux->request);
1970 }
b99d3cee
BP
1971 break;
1972
6fe09f8c 1973 case DPIF_OP_FLOW_GET:
fa37affa 1974 get = &op->flow_get;
70e5ed6f 1975 dpif_netlink_init_flow_get(dpif, get, &flow);
6fe09f8c 1976 aux->txn.reply = get->buffer;
93451a0a 1977 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
6fe09f8c
JS
1978 break;
1979
b99d3cee 1980 default:
428b2edd 1981 OVS_NOT_REACHED();
6bc60024
BP
1982 }
1983 }
1984
6bc60024 1985 for (i = 0; i < n_ops; i++) {
eabe7c68 1986 txnsp[i] = &auxes[i].txn;
6bc60024 1987 }
a88b4e04 1988 nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
6bc60024 1989
6bc60024 1990 for (i = 0; i < n_ops; i++) {
72d32ac0 1991 struct op_auxdata *aux = &auxes[i];
eabe7c68 1992 struct nl_transaction *txn = &auxes[i].txn;
c2b565b5 1993 struct dpif_op *op = ops[i];
b99d3cee
BP
1994 struct dpif_flow_put *put;
1995 struct dpif_flow_del *del;
6fe09f8c 1996 struct dpif_flow_get *get;
6bc60024 1997
b99d3cee 1998 op->error = txn->error;
6bc60024 1999
b99d3cee
BP
2000 switch (op->type) {
2001 case DPIF_OP_FLOW_PUT:
fa37affa 2002 put = &op->flow_put;
cfceb2b5 2003 if (put->stats) {
b99d3cee 2004 if (!op->error) {
93451a0a 2005 struct dpif_netlink_flow reply;
cfceb2b5 2006
93451a0a
AS
2007 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
2008 txn->reply);
cfceb2b5 2009 if (!op->error) {
93451a0a 2010 dpif_netlink_flow_get_stats(&reply, put->stats);
cfceb2b5
BP
2011 }
2012 }
6bc60024 2013 }
b99d3cee
BP
2014 break;
2015
2016 case DPIF_OP_FLOW_DEL:
fa37affa 2017 del = &op->flow_del;
cfceb2b5 2018 if (del->stats) {
b99d3cee 2019 if (!op->error) {
93451a0a 2020 struct dpif_netlink_flow reply;
cfceb2b5 2021
93451a0a
AS
2022 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
2023 txn->reply);
cfceb2b5 2024 if (!op->error) {
93451a0a 2025 dpif_netlink_flow_get_stats(&reply, del->stats);
cfceb2b5
BP
2026 }
2027 }
b99d3cee
BP
2028 }
2029 break;
2030
2031 case DPIF_OP_EXECUTE:
2032 break;
2033
6fe09f8c 2034 case DPIF_OP_FLOW_GET:
fa37affa 2035 get = &op->flow_get;
6fe09f8c 2036 if (!op->error) {
93451a0a 2037 struct dpif_netlink_flow reply;
6fe09f8c 2038
93451a0a 2039 op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
6fe09f8c 2040 if (!op->error) {
7af12bd7
JS
2041 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, get->flow,
2042 &reply);
6fe09f8c
JS
2043 }
2044 }
2045 break;
2046
b99d3cee 2047 default:
428b2edd 2048 OVS_NOT_REACHED();
6bc60024
BP
2049 }
2050
72d32ac0
BP
2051 ofpbuf_uninit(&aux->request);
2052 ofpbuf_uninit(&aux->reply);
6bc60024 2053 }
0f3358ea
BP
2054
2055 return n_ops;
eabe7c68
BP
2056}
2057
6c343984
PB
2058static int
2059parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
2060{
2061 struct dpif_flow *dpif_flow = get->flow;
2062 struct match match;
2063 struct nlattr *actions;
2064 struct dpif_flow_stats stats;
d63ca532 2065 struct dpif_flow_attrs attrs;
6c343984
PB
2066 struct ofpbuf buf;
2067 uint64_t act_buf[1024 / 8];
2068 struct odputil_keybuf maskbuf;
2069 struct odputil_keybuf keybuf;
2070 struct odputil_keybuf actbuf;
2071 struct ofpbuf key, mask, act;
2072 int err;
2073
2074 ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
dfaf79dd 2075 err = netdev_ports_flow_get(dpif->dpif.dpif_class, &match,
d63ca532 2076 &actions, get->ufid, &stats, &attrs, &buf);
6c343984
PB
2077 if (err) {
2078 return err;
2079 }
2080
2081 VLOG_DBG("found flow from netdev, translating to dpif flow");
2082
2083 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
2084 ofpbuf_use_stack(&act, &actbuf, sizeof actbuf);
2085 ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
2086 dpif_netlink_netdev_match_to_dpif_flow(&match, &key, &mask, actions,
d63ca532 2087 &stats, &attrs,
6c343984
PB
2088 (ovs_u128 *) get->ufid,
2089 dpif_flow,
2090 false);
2091 ofpbuf_put(get->buffer, nl_attr_get(actions), nl_attr_get_size(actions));
2092 dpif_flow->actions = ofpbuf_at(get->buffer, 0, 0);
2093 dpif_flow->actions_len = nl_attr_get_size(actions);
2094
2095 return 0;
2096}
2097
8b668ee3
PB
2098static int
2099parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
2100{
2101 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
dfaf79dd 2102 const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
8b668ee3
PB
2103 struct match match;
2104 odp_port_t in_port;
2105 const struct nlattr *nla;
2106 size_t left;
8b668ee3
PB
2107 struct netdev *dev;
2108 struct offload_info info;
2109 ovs_be16 dst_port = 0;
2110 int err;
2111
2112 if (put->flags & DPIF_FP_PROBE) {
2113 return EOPNOTSUPP;
2114 }
2115
2116 err = parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
2117 put->mask_len, &match);
2118 if (err) {
2119 return err;
2120 }
2121
2122 /* When we try to install a dummy flow from a probed feature. */
2123 if (match.flow.dl_type == htons(0x1234)) {
2124 return EOPNOTSUPP;
2125 }
2126
2127 in_port = match.flow.in_port.odp_port;
dfaf79dd 2128 dev = netdev_ports_get(in_port, dpif_class);
8b668ee3
PB
2129 if (!dev) {
2130 return EOPNOTSUPP;
2131 }
2132
00a0a011 2133 /* Get tunnel dst port */
8b668ee3
PB
2134 NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
2135 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
2136 const struct netdev_tunnel_config *tnl_cfg;
2137 struct netdev *outdev;
2138 odp_port_t out_port;
2139
8b668ee3 2140 out_port = nl_attr_get_odp_port(nla);
dfaf79dd 2141 outdev = netdev_ports_get(out_port, dpif_class);
8b668ee3
PB
2142 if (!outdev) {
2143 err = EOPNOTSUPP;
2144 goto out;
2145 }
2146 tnl_cfg = netdev_get_tunnel_config(outdev);
2147 if (tnl_cfg && tnl_cfg->dst_port != 0) {
2148 dst_port = tnl_cfg->dst_port;
2149 }
2150 netdev_close(outdev);
2151 }
2152 }
2153
dfaf79dd 2154 info.dpif_class = dpif_class;
8b668ee3
PB
2155 info.tp_dst_port = dst_port;
2156 err = netdev_flow_put(dev, &match,
2157 CONST_CAST(struct nlattr *, put->actions),
2158 put->actions_len,
2159 CONST_CAST(ovs_u128 *, put->ufid),
2160 &info, put->stats);
2161
2162 if (!err) {
2163 if (put->flags & DPIF_FP_MODIFY) {
2164 struct dpif_op *opp;
2165 struct dpif_op op;
2166
2167 op.type = DPIF_OP_FLOW_DEL;
fa37affa
BP
2168 op.flow_del.key = put->key;
2169 op.flow_del.key_len = put->key_len;
2170 op.flow_del.ufid = put->ufid;
2171 op.flow_del.pmd_id = put->pmd_id;
2172 op.flow_del.stats = NULL;
2173 op.flow_del.terse = false;
8b668ee3
PB
2174
2175 opp = &op;
2176 dpif_netlink_operate__(dpif, &opp, 1);
2177 }
2178
2179 VLOG_DBG("added flow");
2180 } else if (err != EEXIST) {
2181 VLOG_ERR_RL(&rl, "failed to offload flow: %s", ovs_strerror(err));
2182 }
2183
2184out:
2185 if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
2186 /* Modified rule can't be offloaded, try and delete from HW */
2187 int del_err = netdev_flow_del(dev, put->ufid, put->stats);
2188
2189 if (!del_err) {
2190 /* Delete from hw success, so old flow was offloaded.
2191 * Change flags to create the flow in kernel */
2192 put->flags &= ~DPIF_FP_MODIFY;
2193 put->flags |= DPIF_FP_CREATE;
2194 } else if (del_err != ENOENT) {
2195 VLOG_ERR_RL(&rl, "failed to delete offloaded flow: %s",
2196 ovs_strerror(del_err));
2197 /* stop proccesing the flow in kernel */
2198 err = 0;
2199 }
2200 }
2201
2202 netdev_close(dev);
2203
2204 return err;
2205}
2206
8b668ee3
PB
2207static int
2208try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
eabe7c68 2209{
8b668ee3 2210 int err = EOPNOTSUPP;
9b00386b 2211
8b668ee3
PB
2212 switch (op->type) {
2213 case DPIF_OP_FLOW_PUT: {
fa37affa 2214 struct dpif_flow_put *put = &op->flow_put;
8b668ee3
PB
2215
2216 if (!put->ufid) {
2217 break;
2218 }
3cd99886
RD
2219
2220 log_flow_put_message(&dpif->dpif, &this_module, put, 0);
8b668ee3
PB
2221 err = parse_flow_put(dpif, put);
2222 break;
2223 }
0335a89c 2224 case DPIF_OP_FLOW_DEL: {
fa37affa 2225 struct dpif_flow_del *del = &op->flow_del;
0335a89c
PB
2226
2227 if (!del->ufid) {
2228 break;
2229 }
3cd99886
RD
2230
2231 log_flow_del_message(&dpif->dpif, &this_module, del, 0);
dfaf79dd 2232 err = netdev_ports_flow_del(dpif->dpif.dpif_class, del->ufid,
0335a89c
PB
2233 del->stats);
2234 break;
2235 }
6c343984 2236 case DPIF_OP_FLOW_GET: {
fa37affa 2237 struct dpif_flow_get *get = &op->flow_get;
6c343984 2238
fa37affa 2239 if (!op->flow_get.ufid) {
6c343984
PB
2240 break;
2241 }
3cd99886
RD
2242
2243 log_flow_get_message(&dpif->dpif, &this_module, get, 0);
6c343984
PB
2244 err = parse_flow_get(dpif, get);
2245 break;
2246 }
8b668ee3
PB
2247 case DPIF_OP_EXECUTE:
2248 default:
2249 break;
2250 }
2251
2252 return err;
2253}
2254
2255static void
2256dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
2257 size_t n_ops)
2258{
eabe7c68 2259 while (n_ops > 0) {
0f3358ea 2260 size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
8b668ee3 2261
eabe7c68
BP
2262 ops += chunk;
2263 n_ops -= chunk;
2264 }
6bc60024
BP
2265}
2266
8b668ee3
PB
2267static void
2268dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
2269{
2270 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2271 struct dpif_op *new_ops[OPERATE_MAX_OPS];
2272 int count = 0;
2273 int i = 0;
2274 int err = 0;
2275
2276 if (netdev_is_flow_api_enabled()) {
2277 while (n_ops > 0) {
2278 count = 0;
2279
2280 while (n_ops > 0 && count < OPERATE_MAX_OPS) {
2281 struct dpif_op *op = ops[i++];
2282
2283 err = try_send_to_netdev(dpif, op);
2284 if (err && err != EEXIST) {
2285 new_ops[count++] = op;
2286 } else {
2287 op->error = err;
2288 }
2289
2290 n_ops--;
2291 }
2292
2293 dpif_netlink_operate_chunks(dpif, new_ops, count);
2294 }
2295 } else {
2296 dpif_netlink_operate_chunks(dpif, ops, n_ops);
2297 }
2298}
2299
09cac43f
NR
2300#if _WIN32
2301static void
2302dpif_netlink_handler_uninit(struct dpif_handler *handler)
2303{
2304 vport_delete_sock_pool(handler);
2305}
2306
2307static int
2308dpif_netlink_handler_init(struct dpif_handler *handler)
2309{
2310 return vport_create_sock_pool(handler);
2311}
2312#else
2313
2314static int
2315dpif_netlink_handler_init(struct dpif_handler *handler)
2316{
2317 handler->epoll_fd = epoll_create(10);
2318 return handler->epoll_fd < 0 ? errno : 0;
2319}
2320
2321static void
2322dpif_netlink_handler_uninit(struct dpif_handler *handler)
2323{
2324 close(handler->epoll_fd);
2325}
2326#endif
2327
1579cf67
AW
2328/* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
2329 * currently in 'dpif' in the kernel, by adding a new set of channels for
2330 * any kernel vport that lacks one and deleting any channels that have no
2331 * backing kernel vports. */
96fba48f 2332static int
93451a0a 2333dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
b90de034 2334 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 2335{
8381a3d3 2336 unsigned long int *keep_channels;
93451a0a 2337 struct dpif_netlink_vport vport;
8381a3d3
BP
2338 size_t keep_channels_nbits;
2339 struct nl_dump dump;
d57695d7
JS
2340 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2341 struct ofpbuf buf;
8381a3d3
BP
2342 int retval = 0;
2343 size_t i;
982b8810 2344
09cac43f
NR
2345 ovs_assert(!WINDOWS || n_handlers <= 1);
2346 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
2347
1579cf67
AW
2348 if (dpif->n_handlers != n_handlers) {
2349 destroy_all_channels(dpif);
2350 dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2351 for (i = 0; i < n_handlers; i++) {
09cac43f 2352 int error;
1579cf67
AW
2353 struct dpif_handler *handler = &dpif->handlers[i];
2354
09cac43f
NR
2355 error = dpif_netlink_handler_init(handler);
2356 if (error) {
1579cf67
AW
2357 size_t j;
2358
2359 for (j = 0; j < i; j++) {
aa5c0216 2360 struct dpif_handler *tmp = &dpif->handlers[j];
09cac43f 2361 dpif_netlink_handler_uninit(tmp);
1579cf67
AW
2362 }
2363 free(dpif->handlers);
2364 dpif->handlers = NULL;
2365
09cac43f 2366 return error;
1579cf67 2367 }
8381a3d3 2368 }
1579cf67
AW
2369 dpif->n_handlers = n_handlers;
2370 }
2371
2372 for (i = 0; i < n_handlers; i++) {
2373 struct dpif_handler *handler = &dpif->handlers[i];
2374
2375 handler->event_offset = handler->n_events = 0;
17411ecf 2376 }
b063d9f0 2377
8381a3d3
BP
2378 keep_channels_nbits = dpif->uc_array_size;
2379 keep_channels = bitmap_allocate(keep_channels_nbits);
982b8810 2380
d57695d7 2381 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a
AS
2382 dpif_netlink_port_dump_start__(dpif, &dump);
2383 while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
8381a3d3 2384 uint32_t port_no = odp_to_u32(vport.port_no);
1579cf67 2385 uint32_t *upcall_pids = NULL;
8381a3d3 2386 int error;
50f80534 2387
1579cf67
AW
2388 if (port_no >= dpif->uc_array_size
2389 || !vport_get_pids(dpif, port_no, &upcall_pids)) {
09cac43f 2390 struct nl_sock **socksp = vport_create_socksp(dpif, &error);
1579cf67
AW
2391
2392 if (!socksp) {
2393 goto error;
2394 }
2395
2396 error = vport_add_channels(dpif, vport.port_no, socksp);
b063d9f0 2397 if (error) {
1579cf67 2398 VLOG_INFO("%s: could not add channels for port %s",
9b00386b 2399 dpif_name(&dpif->dpif), vport.name);
09cac43f 2400 vport_del_socksp(dpif, socksp);
8381a3d3
BP
2401 retval = error;
2402 goto error;
982b8810 2403 }
1579cf67
AW
2404 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
2405 free(socksp);
8381a3d3 2406 }
50f80534 2407
8381a3d3 2408 /* Configure the vport to deliver misses to 'sock'. */
1579cf67
AW
2409 if (vport.upcall_pids[0] == 0
2410 || vport.n_upcall_pids != dpif->n_handlers
2411 || memcmp(upcall_pids, vport.upcall_pids, n_handlers * sizeof
2412 *upcall_pids)) {
93451a0a 2413 struct dpif_netlink_vport vport_request;
989fd548 2414
93451a0a 2415 dpif_netlink_vport_init(&vport_request);
989fd548
JP
2416 vport_request.cmd = OVS_VPORT_CMD_SET;
2417 vport_request.dp_ifindex = dpif->dp_ifindex;
8381a3d3 2418 vport_request.port_no = vport.port_no;
1579cf67
AW
2419 vport_request.n_upcall_pids = dpif->n_handlers;
2420 vport_request.upcall_pids = upcall_pids;
93451a0a 2421 error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67 2422 if (error) {
989fd548
JP
2423 VLOG_WARN_RL(&error_rl,
2424 "%s: failed to set upcall pid on port: %s",
10a89ef0 2425 dpif_name(&dpif->dpif), ovs_strerror(error));
989fd548 2426
8381a3d3
BP
2427 if (error != ENODEV && error != ENOENT) {
2428 retval = error;
989fd548 2429 } else {
8381a3d3
BP
2430 /* The vport isn't really there, even though the dump says
2431 * it is. Probably we just hit a race after a port
2432 * disappeared. */
989fd548 2433 }
8381a3d3 2434 goto error;
50f80534 2435 }
8381a3d3 2436 }
14b4d2f9 2437
8381a3d3
BP
2438 if (port_no < keep_channels_nbits) {
2439 bitmap_set1(keep_channels, port_no);
2440 }
1579cf67 2441 free(upcall_pids);
8381a3d3
BP
2442 continue;
2443
2444 error:
1579cf67
AW
2445 free(upcall_pids);
2446 vport_del_channels(dpif, vport.port_no);
982b8810 2447 }
8381a3d3 2448 nl_dump_done(&dump);
d57695d7 2449 ofpbuf_uninit(&buf);
b063d9f0 2450
8381a3d3
BP
2451 /* Discard any saved channels that we didn't reuse. */
2452 for (i = 0; i < keep_channels_nbits; i++) {
2453 if (!bitmap_is_set(keep_channels, i)) {
1579cf67 2454 vport_del_channels(dpif, u32_to_odp(i));
8381a3d3
BP
2455 }
2456 }
2457 free(keep_channels);
2458
2459 return retval;
2460}
2461
2462static int
93451a0a 2463dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable)
b90de034 2464 OVS_REQ_WRLOCK(dpif->upcall_lock)
8381a3d3 2465{
1579cf67 2466 if ((dpif->handlers != NULL) == enable) {
8381a3d3
BP
2467 return 0;
2468 } else if (!enable) {
1579cf67 2469 destroy_all_channels(dpif);
8381a3d3
BP
2470 return 0;
2471 } else {
93451a0a 2472 return dpif_netlink_refresh_channels(dpif, 1);
8381a3d3 2473 }
96fba48f
BP
2474}
2475
9fafa796 2476static int
93451a0a 2477dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
9fafa796 2478{
93451a0a 2479 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2480 int error;
2481
1579cf67 2482 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2483 error = dpif_netlink_recv_set__(dpif, enable);
1579cf67 2484 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2485
2486 return error;
2487}
2488
1954e6bb 2489static int
93451a0a 2490dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
1954e6bb 2491{
93451a0a 2492 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67
AW
2493 int error = 0;
2494
09cac43f
NR
2495#ifdef _WIN32
2496 /* Multiple upcall handlers will be supported once kernel datapath supports
2497 * it. */
2498 if (n_handlers > 1) {
2499 return error;
2500 }
2501#endif
2502
1579cf67
AW
2503 fat_rwlock_wrlock(&dpif->upcall_lock);
2504 if (dpif->handlers) {
93451a0a 2505 error = dpif_netlink_refresh_channels(dpif, n_handlers);
1579cf67
AW
2506 }
2507 fat_rwlock_unlock(&dpif->upcall_lock);
2508
2509 return error;
1954e6bb
AW
2510}
2511
aae51f53 2512static int
93451a0a 2513dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
aae51f53
BP
2514 uint32_t queue_id, uint32_t *priority)
2515{
2516 if (queue_id < 0xf000) {
17ee3c1f 2517 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
aae51f53
BP
2518 return 0;
2519 } else {
2520 return EINVAL;
2521 }
2522}
2523
96fba48f 2524static int
7af12bd7
JS
2525parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
2526 struct dpif_upcall *upcall, int *dp_ifindex)
856081f6 2527{
df2c07f4 2528 static const struct nl_policy ovs_packet_policy[] = {
856081f6 2529 /* Always present. */
df2c07f4 2530 [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
856081f6 2531 .min_len = ETH_HEADER_LEN },
df2c07f4 2532 [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
856081f6 2533
df2c07f4 2534 /* OVS_PACKET_CMD_ACTION only. */
e995e3df 2535 [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
8b7ea2d4 2536 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
7321bda3 2537 [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
27130224 2538 [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
856081f6
BP
2539 };
2540
0a2869d5
BP
2541 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2542 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2543 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2544 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
982b8810 2545
0a2869d5 2546 struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
df2c07f4
JP
2547 if (!nlmsg || !genl || !ovs_header
2548 || nlmsg->nlmsg_type != ovs_packet_family
2549 || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2550 ARRAY_SIZE(ovs_packet_policy))) {
856081f6
BP
2551 return EINVAL;
2552 }
2553
0a2869d5
BP
2554 int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2555 : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2556 : -1);
aaff4b55
BP
2557 if (type < 0) {
2558 return EINVAL;
2559 }
82272ede 2560
877c9270 2561 /* (Re)set ALL fields of '*upcall' on successful return. */
aaff4b55 2562 upcall->type = type;
ebc56baa
BP
2563 upcall->key = CONST_CAST(struct nlattr *,
2564 nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
df2c07f4 2565 upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
7af12bd7 2566 dpif_flow_hash(&dpif->dpif, upcall->key, upcall->key_len, &upcall->ufid);
e995e3df 2567 upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
8b7ea2d4 2568 upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
7321bda3 2569 upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
27130224 2570 upcall->mru = a[OVS_PACKET_ATTR_MRU];
da546e07
JR
2571
2572 /* Allow overwriting the netlink attribute header without reallocating. */
cf62fa4c 2573 dp_packet_use_stub(&upcall->packet,
da546e07
JR
2574 CONST_CAST(struct nlattr *,
2575 nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2576 nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2577 sizeof(struct nlattr));
cf62fa4c
PS
2578 dp_packet_set_data(&upcall->packet,
2579 (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2580 dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
da546e07 2581
2482b0b0
JS
2582 if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2583 /* Ethernet frame */
2584 upcall->packet.packet_type = htonl(PT_ETH);
2585 } else {
2586 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2587 ovs_be16 ethertype = 0;
2588 const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2589 upcall->key_len,
2590 OVS_KEY_ATTR_ETHERTYPE);
2591 if (et_nla) {
2592 ethertype = nl_attr_get_be16(et_nla);
2593 }
2594 upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2595 ntohs(ethertype));
2596 dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2597 }
2598
df2c07f4 2599 *dp_ifindex = ovs_header->dp_ifindex;
982b8810 2600
856081f6
BP
2601 return 0;
2602}
2603
09cac43f
NR
2604#ifdef _WIN32
2605#define PACKET_RECV_BATCH_SIZE 50
2606static int
2607dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2608 struct dpif_upcall *upcall, struct ofpbuf *buf)
2609 OVS_REQ_RDLOCK(dpif->upcall_lock)
2610{
2611 struct dpif_handler *handler;
2612 int read_tries = 0;
2613 struct dpif_windows_vport_sock *sock_pool;
2614 uint32_t i;
2615
2616 if (!dpif->handlers) {
2617 return EAGAIN;
2618 }
2619
2620 /* Only one handler is supported currently. */
2621 if (handler_id >= 1) {
2622 return EAGAIN;
2623 }
2624
2625 if (handler_id >= dpif->n_handlers) {
2626 return EAGAIN;
2627 }
2628
2629 handler = &dpif->handlers[handler_id];
2630 sock_pool = handler->vport_sock_pool;
2631
2632 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2633 for (;;) {
2634 int dp_ifindex;
2635 int error;
2636
2637 if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2638 return EAGAIN;
2639 }
2640
a86bd14e 2641 error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
09cac43f
NR
2642 if (error == ENOBUFS) {
2643 /* ENOBUFS typically means that we've received so many
2644 * packets that the buffer overflowed. Try again
2645 * immediately because there's almost certainly a packet
2646 * waiting for us. */
2647 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2648 continue;
2649 }
2650
2651 /* XXX: ch->last_poll = time_msec(); */
2652 if (error) {
2653 if (error == EAGAIN) {
2654 break;
2655 }
2656 return error;
2657 }
2658
27edb4aa 2659 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
09cac43f
NR
2660 if (!error && dp_ifindex == dpif->dp_ifindex) {
2661 return 0;
2662 } else if (error) {
2663 return error;
2664 }
2665 }
2666 }
2667
2668 return EAGAIN;
2669}
2670#else
856081f6 2671static int
93451a0a
AS
2672dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
2673 struct dpif_upcall *upcall, struct ofpbuf *buf)
b90de034 2674 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2675{
1579cf67 2676 struct dpif_handler *handler;
17411ecf 2677 int read_tries = 0;
96fba48f 2678
1579cf67
AW
2679 if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2680 return EAGAIN;
982b8810
BP
2681 }
2682
1579cf67
AW
2683 handler = &dpif->handlers[handler_id];
2684 if (handler->event_offset >= handler->n_events) {
8522ba09 2685 int retval;
989fd548 2686
1579cf67 2687 handler->event_offset = handler->n_events = 0;
f6d1465c 2688
8522ba09 2689 do {
1579cf67 2690 retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
989fd548 2691 dpif->uc_array_size, 0);
8522ba09 2692 } while (retval < 0 && errno == EINTR);
09cac43f 2693
8522ba09
BP
2694 if (retval < 0) {
2695 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
10a89ef0 2696 VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
989fd548 2697 } else if (retval > 0) {
1579cf67 2698 handler->n_events = retval;
8522ba09 2699 }
8522ba09
BP
2700 }
2701
1579cf67
AW
2702 while (handler->event_offset < handler->n_events) {
2703 int idx = handler->epoll_events[handler->event_offset].data.u32;
2704 struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx];
8522ba09 2705
1579cf67 2706 handler->event_offset++;
17411ecf 2707
f6d1465c 2708 for (;;) {
8522ba09 2709 int dp_ifindex;
f6d1465c 2710 int error;
17411ecf 2711
f6d1465c
BP
2712 if (++read_tries > 50) {
2713 return EAGAIN;
2714 }
17411ecf 2715
a86bd14e 2716 error = nl_sock_recv(ch->sock, buf, NULL, false);
14b4d2f9
BP
2717 if (error == ENOBUFS) {
2718 /* ENOBUFS typically means that we've received so many
2719 * packets that the buffer overflowed. Try again
2720 * immediately because there's almost certainly a packet
2721 * waiting for us. */
9b00386b 2722 report_loss(dpif, ch, idx, handler_id);
14b4d2f9
BP
2723 continue;
2724 }
2725
2726 ch->last_poll = time_msec();
72d32ac0 2727 if (error) {
72d32ac0
BP
2728 if (error == EAGAIN) {
2729 break;
2730 }
f6d1465c
BP
2731 return error;
2732 }
17411ecf 2733
7af12bd7 2734 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
a12b3ead 2735 if (!error && dp_ifindex == dpif->dp_ifindex) {
f6d1465c 2736 return 0;
989fd548 2737 } else if (error) {
f6d1465c 2738 return error;
17411ecf 2739 }
982b8810 2740 }
50f80534 2741 }
982b8810
BP
2742
2743 return EAGAIN;
96fba48f 2744}
09cac43f 2745#endif
96fba48f 2746
9fafa796 2747static int
93451a0a
AS
2748dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2749 struct dpif_upcall *upcall, struct ofpbuf *buf)
9fafa796 2750{
93451a0a 2751 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2752 int error;
2753
1579cf67 2754 fat_rwlock_rdlock(&dpif->upcall_lock);
09cac43f
NR
2755#ifdef _WIN32
2756 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2757#else
93451a0a 2758 error = dpif_netlink_recv__(dpif, handler_id, upcall, buf);
09cac43f 2759#endif
1579cf67 2760 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2761
2762 return error;
2763}
2764
96fba48f 2765static void
93451a0a 2766dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id)
b90de034 2767 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2768{
93451a0a 2769#ifdef _WIN32
09cac43f
NR
2770 uint32_t i;
2771 struct dpif_windows_vport_sock *sock_pool =
2772 dpif->handlers[handler_id].vport_sock_pool;
2773
2774 /* Only one handler is supported currently. */
2775 if (handler_id >= 1) {
2776 return;
2777 }
2778
2779 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2780 nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2781 }
93451a0a 2782#else
1579cf67
AW
2783 if (dpif->handlers && handler_id < dpif->n_handlers) {
2784 struct dpif_handler *handler = &dpif->handlers[handler_id];
2785
2786 poll_fd_wait(handler->epoll_fd, POLLIN);
17411ecf 2787 }
93451a0a 2788#endif
96fba48f
BP
2789}
2790
1ba530f4 2791static void
93451a0a 2792dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
1ba530f4 2793{
93451a0a 2794 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
17411ecf 2795
b90de034 2796 fat_rwlock_rdlock(&dpif->upcall_lock);
93451a0a 2797 dpif_netlink_recv_wait__(dpif, handler_id);
b90de034
AW
2798 fat_rwlock_unlock(&dpif->upcall_lock);
2799}
2800
2801static void
93451a0a 2802dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
b90de034
AW
2803 OVS_REQ_WRLOCK(dpif->upcall_lock)
2804{
1579cf67
AW
2805 if (dpif->handlers) {
2806 size_t i, j;
2807
2808 for (i = 0; i < dpif->uc_array_size; i++ ) {
2809 if (!dpif->handlers[0].channels[i].sock) {
2810 continue;
2811 }
1ba530f4 2812
1579cf67
AW
2813 for (j = 0; j < dpif->n_handlers; j++) {
2814 nl_sock_drain(dpif->handlers[j].channels[i].sock);
9fafa796 2815 }
989fd548 2816 }
1ba530f4 2817 }
b90de034
AW
2818}
2819
2820static void
93451a0a 2821dpif_netlink_recv_purge(struct dpif *dpif_)
b90de034 2822{
93451a0a 2823 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
2824
2825 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2826 dpif_netlink_recv_purge__(dpif);
1579cf67 2827 fat_rwlock_unlock(&dpif->upcall_lock);
1ba530f4
BP
2828}
2829
b5cbbcf6
AZ
2830static char *
2831dpif_netlink_get_datapath_version(void)
2832{
2833 char *version_str = NULL;
2834
2835#ifdef __linux__
2836
2837#define MAX_VERSION_STR_SIZE 80
2838#define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2839 FILE *f;
2840
2841 f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2842 if (f) {
2843 char *newline;
2844 char version[MAX_VERSION_STR_SIZE];
2845
2846 if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2847 newline = strchr(version, '\n');
2848 if (newline) {
2849 *newline = '\0';
2850 }
2851 version_str = xstrdup(version);
2852 }
2853 fclose(f);
2854 }
2855#endif
2856
2857 return version_str;
2858}
2859
c11c9f4a
DDP
2860struct dpif_netlink_ct_dump_state {
2861 struct ct_dpif_dump_state up;
2862 struct nl_ct_dump_state *nl_ct_dump;
2863};
2864
2865static int
2866dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2867 struct ct_dpif_dump_state **dump_,
ded30c74 2868 const uint16_t *zone, int *ptot_bkts)
c11c9f4a
DDP
2869{
2870 struct dpif_netlink_ct_dump_state *dump;
2871 int err;
2872
2873 dump = xzalloc(sizeof *dump);
ded30c74 2874 err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
c11c9f4a
DDP
2875 if (err) {
2876 free(dump);
2877 return err;
2878 }
2879
2880 *dump_ = &dump->up;
2881
2882 return 0;
2883}
2884
2885static int
2886dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2887 struct ct_dpif_dump_state *dump_,
2888 struct ct_dpif_entry *entry)
2889{
2890 struct dpif_netlink_ct_dump_state *dump;
2891
2892 INIT_CONTAINER(dump, dump_, up);
2893
2894 return nl_ct_dump_next(dump->nl_ct_dump, entry);
2895}
2896
2897static int
2898dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2899 struct ct_dpif_dump_state *dump_)
2900{
2901 struct dpif_netlink_ct_dump_state *dump;
2902 int err;
2903
2904 INIT_CONTAINER(dump, dump_, up);
2905
2906 err = nl_ct_dump_done(dump->nl_ct_dump);
2907 free(dump);
2908 return err;
2909}
15eabc97
DDP
2910
2911static int
817a7657
YHW
2912dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
2913 const struct ct_dpif_tuple *tuple)
15eabc97 2914{
817a7657
YHW
2915 if (tuple) {
2916 return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
2917 } else if (zone) {
15eabc97
DDP
2918 return nl_ct_flush_zone(*zone);
2919 } else {
2920 return nl_ct_flush();
2921 }
2922}
c11c9f4a 2923
906ff9d2
YHW
2924static int
2925dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED,
2926 const uint32_t *default_limits,
2927 const struct ovs_list *zone_limits)
2928{
2929 struct ovs_zone_limit req_zone_limit;
2930
2931 if (ovs_ct_limit_family < 0) {
2932 return EOPNOTSUPP;
2933 }
2934
2935 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2936 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2937 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_SET,
2938 OVS_CT_LIMIT_VERSION);
2939
2940 struct ovs_header *ovs_header;
2941 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2942 ovs_header->dp_ifindex = 0;
2943
2944 size_t opt_offset;
2945 opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2946 if (default_limits) {
2947 req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
2948 req_zone_limit.limit = *default_limits;
2949 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2950 }
2951
2952 if (!ovs_list_is_empty(zone_limits)) {
2953 struct ct_dpif_zone_limit *zone_limit;
2954
2955 LIST_FOR_EACH (zone_limit, node, zone_limits) {
2956 req_zone_limit.zone_id = zone_limit->zone;
2957 req_zone_limit.limit = zone_limit->limit;
2958 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2959 }
2960 }
2961 nl_msg_end_nested(request, opt_offset);
2962
2963 int err = nl_transact(NETLINK_GENERIC, request, NULL);
2964 ofpbuf_uninit(request);
2965 return err;
2966}
2967
2968static int
2969dpif_netlink_zone_limits_from_ofpbuf(const struct ofpbuf *buf,
2970 uint32_t *default_limit,
2971 struct ovs_list *zone_limits)
2972{
2973 static const struct nl_policy ovs_ct_limit_policy[] = {
2974 [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED,
2975 .optional = true },
2976 };
2977
2978 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2979 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2980 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2981 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2982
2983 struct nlattr *attr[ARRAY_SIZE(ovs_ct_limit_policy)];
2984
2985 if (!nlmsg || !genl || !ovs_header
2986 || nlmsg->nlmsg_type != ovs_ct_limit_family
2987 || !nl_policy_parse(&b, 0, ovs_ct_limit_policy, attr,
2988 ARRAY_SIZE(ovs_ct_limit_policy))) {
2989 return EINVAL;
2990 }
2991
2992
2993 if (!attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
2994 return EINVAL;
2995 }
2996
2997 int rem = NLA_ALIGN(
2998 nl_attr_get_size(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]));
2999 const struct ovs_zone_limit *zone_limit =
3000 nl_attr_get(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]);
3001
3002 while (rem >= sizeof *zone_limit) {
3003 if (zone_limit->zone_id == OVS_ZONE_LIMIT_DEFAULT_ZONE) {
3004 *default_limit = zone_limit->limit;
3005 } else if (zone_limit->zone_id < OVS_ZONE_LIMIT_DEFAULT_ZONE ||
3006 zone_limit->zone_id > UINT16_MAX) {
3007 } else {
3008 ct_dpif_push_zone_limit(zone_limits, zone_limit->zone_id,
3009 zone_limit->limit, zone_limit->count);
3010 }
3011 rem -= NLA_ALIGN(sizeof *zone_limit);
3012 zone_limit = ALIGNED_CAST(struct ovs_zone_limit *,
3013 (unsigned char *) zone_limit + NLA_ALIGN(sizeof *zone_limit));
3014 }
3015 return 0;
3016}
3017
3018static int
3019dpif_netlink_ct_get_limits(struct dpif *dpif OVS_UNUSED,
3020 uint32_t *default_limit,
3021 const struct ovs_list *zone_limits_request,
3022 struct ovs_list *zone_limits_reply)
3023{
3024 if (ovs_ct_limit_family < 0) {
3025 return EOPNOTSUPP;
3026 }
3027
3028 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
3029 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
3030 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_GET,
3031 OVS_CT_LIMIT_VERSION);
3032
3033 struct ovs_header *ovs_header;
3034 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
3035 ovs_header->dp_ifindex = 0;
3036
3037 if (!ovs_list_is_empty(zone_limits_request)) {
3038 size_t opt_offset = nl_msg_start_nested(request,
3039 OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
3040
3041 struct ovs_zone_limit req_zone_limit;
3042 req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
3043 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3044
3045 struct ct_dpif_zone_limit *zone_limit;
3046 LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
3047 req_zone_limit.zone_id = zone_limit->zone;
3048 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3049 }
3050
3051 nl_msg_end_nested(request, opt_offset);
3052 }
3053
3054 struct ofpbuf *reply;
3055 int err = nl_transact(NETLINK_GENERIC, request, &reply);
3056 if (err) {
3057 goto out;
3058 }
3059
3060 err = dpif_netlink_zone_limits_from_ofpbuf(reply, default_limit,
3061 zone_limits_reply);
3062
3063out:
3064 ofpbuf_uninit(request);
3065 ofpbuf_uninit(reply);
3066 return err;
3067}
3068
3069static int
3070dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED,
3071 const struct ovs_list *zone_limits)
3072{
3073 if (ovs_ct_limit_family < 0) {
3074 return EOPNOTSUPP;
3075 }
3076
3077 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
3078 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
3079 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_DEL,
3080 OVS_CT_LIMIT_VERSION);
3081
3082 struct ovs_header *ovs_header;
3083 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
3084 ovs_header->dp_ifindex = 0;
3085
3086 if (!ovs_list_is_empty(zone_limits)) {
3087 size_t opt_offset =
3088 nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
3089
3090 struct ct_dpif_zone_limit *zone_limit;
3091 LIST_FOR_EACH (zone_limit, node, zone_limits) {
3092 struct ovs_zone_limit req_zone_limit;
3093 req_zone_limit.zone_id = zone_limit->zone;
3094 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
3095 }
3096 nl_msg_end_nested(request, opt_offset);
3097 }
3098
3099 int err = nl_transact(NETLINK_GENERIC, request, NULL);
3100
3101 ofpbuf_uninit(request);
3102 return err;
3103}
5dddf960
JR
3104\f
3105/* Meters */
80738e5f
AZ
3106
3107/* Set of supported meter flags */
3108#define DP_SUPPORTED_METER_FLAGS_MASK \
3109 (OFPMF13_STATS | OFPMF13_PKTPS | OFPMF13_KBPS | OFPMF13_BURST)
3110
92d0d515
JP
3111/* Meter support was introduced in Linux 4.15. In some versions of
3112 * Linux 4.15, 4.16, and 4.17, there was a bug that never set the id
3113 * when the meter was created, so all meters essentially had an id of
3114 * zero. Check for that condition and disable meters on those kernels. */
3115static bool probe_broken_meters(struct dpif *);
3116
5dddf960 3117static void
80738e5f
AZ
3118dpif_netlink_meter_init(struct dpif_netlink *dpif, struct ofpbuf *buf,
3119 void *stub, size_t size, uint32_t command)
3120{
3121 ofpbuf_use_stub(buf, stub, size);
3122
3123 nl_msg_put_genlmsghdr(buf, 0, ovs_meter_family, NLM_F_REQUEST | NLM_F_ECHO,
3124 command, OVS_METER_VERSION);
3125
3126 struct ovs_header *ovs_header;
3127 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3128 ovs_header->dp_ifindex = dpif->dp_ifindex;
3129}
3130
3131/* Execute meter 'request' in the kernel datapath. If the command
3132 * fails, returns a positive errno value. Otherwise, stores the reply
3133 * in '*replyp', parses the policy according to 'reply_policy' into the
3134 * array of Netlink attribute in 'a', and returns 0. On success, the
3135 * caller is responsible for calling ofpbuf_delete() on '*replyp'
3136 * ('replyp' will contain pointers into 'a'). */
3137static int
3138dpif_netlink_meter_transact(struct ofpbuf *request, struct ofpbuf **replyp,
3139 const struct nl_policy *reply_policy,
3140 struct nlattr **a, size_t size_a)
3141{
3142 int error = nl_transact(NETLINK_GENERIC, request, replyp);
3143 ofpbuf_uninit(request);
3144
3145 if (error) {
3146 return error;
3147 }
3148
3149 struct nlmsghdr *nlmsg = ofpbuf_try_pull(*replyp, sizeof *nlmsg);
3150 struct genlmsghdr *genl = ofpbuf_try_pull(*replyp, sizeof *genl);
3151 struct ovs_header *ovs_header = ofpbuf_try_pull(*replyp,
3152 sizeof *ovs_header);
3153 if (!nlmsg || !genl || !ovs_header
3154 || nlmsg->nlmsg_type != ovs_meter_family
3155 || !nl_policy_parse(*replyp, 0, reply_policy, a, size_a)) {
3156 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3157 VLOG_DBG_RL(&rl,
3158 "Kernel module response to meter tranaction is invalid");
3159 return EINVAL;
3160 }
3161 return 0;
3162}
3163
3164static void
3165dpif_netlink_meter_get_features(const struct dpif *dpif_,
5dddf960
JR
3166 struct ofputil_meter_features *features)
3167{
92d0d515
JP
3168 if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
3169 features = NULL;
3170 return;
3171 }
3172
80738e5f
AZ
3173 struct ofpbuf buf, *msg;
3174 uint64_t stub[1024 / 8];
3175
3176 static const struct nl_policy ovs_meter_features_policy[] = {
3177 [OVS_METER_ATTR_MAX_METERS] = { .type = NL_A_U32 },
3178 [OVS_METER_ATTR_MAX_BANDS] = { .type = NL_A_U32 },
3179 [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3180 };
3181 struct nlattr *a[ARRAY_SIZE(ovs_meter_features_policy)];
3182
3183 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3184 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub,
3185 OVS_METER_CMD_FEATURES);
3186 if (dpif_netlink_meter_transact(&buf, &msg, ovs_meter_features_policy, a,
3187 ARRAY_SIZE(ovs_meter_features_policy))) {
3188 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3189 VLOG_INFO_RL(&rl,
3190 "dpif_netlink_meter_transact OVS_METER_CMD_FEATURES failed");
3191 return;
3192 }
3193
3194 features->max_meters = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_METERS]);
3195 features->max_bands = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_BANDS]);
3196
3197 /* Bands is a nested attribute of zero or more nested
3198 * band attributes. */
3199 if (a[OVS_METER_ATTR_BANDS]) {
3200 const struct nlattr *nla;
3201 size_t left;
3202
3203 NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3204 const struct nlattr *band_nla;
3205 size_t band_left;
3206
3207 NL_NESTED_FOR_EACH (band_nla, band_left, nla) {
3208 if (nl_attr_type(band_nla) == OVS_BAND_ATTR_TYPE) {
3209 if (nl_attr_get_size(band_nla) == sizeof(uint32_t)) {
3210 switch (nl_attr_get_u32(band_nla)) {
3211 case OVS_METER_BAND_TYPE_DROP:
3212 features->band_types |= 1 << OFPMBT13_DROP;
3213 break;
3214 }
3215 }
3216 }
3217 }
3218 }
3219 }
3220 features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
3221
3222 ofpbuf_delete(msg);
5dddf960
JR
3223}
3224
3225static int
8101f03f 3226dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
80738e5f 3227 struct ofputil_meter_config *config)
5dddf960 3228{
92d0d515
JP
3229 if (probe_broken_meters(dpif_)) {
3230 return ENOMEM;
3231 }
3232
80738e5f
AZ
3233 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3234 struct ofpbuf buf, *msg;
3235 uint64_t stub[1024 / 8];
3236
3237 static const struct nl_policy ovs_meter_set_response_policy[] = {
3238 [OVS_METER_ATTR_ID] = { .type = NL_A_U32 },
3239 };
3240 struct nlattr *a[ARRAY_SIZE(ovs_meter_set_response_policy)];
3241
3242 if (config->flags & ~DP_SUPPORTED_METER_FLAGS_MASK) {
3243 return EBADF; /* Unsupported flags set */
3244 }
3245
3246 for (size_t i = 0; i < config->n_bands; i++) {
3247 switch (config->bands[i].type) {
3248 case OFPMBT13_DROP:
3249 break;
3250 default:
3251 return ENODEV; /* Unsupported band type */
3252 }
3253 }
3254
3255 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, OVS_METER_CMD_SET);
3256
8101f03f
JP
3257 nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3258
80738e5f
AZ
3259 if (config->flags & OFPMF13_KBPS) {
3260 nl_msg_put_flag(&buf, OVS_METER_ATTR_KBPS);
3261 }
3262
3263 size_t bands_offset = nl_msg_start_nested(&buf, OVS_METER_ATTR_BANDS);
3264 /* Bands */
3265 for (size_t i = 0; i < config->n_bands; ++i) {
3266 struct ofputil_meter_band * band = &config->bands[i];
3267 uint32_t band_type;
3268
3269 size_t band_offset = nl_msg_start_nested(&buf, OVS_BAND_ATTR_UNSPEC);
3270
3271 switch (band->type) {
3272 case OFPMBT13_DROP:
3273 band_type = OVS_METER_BAND_TYPE_DROP;
3274 break;
3275 default:
3276 band_type = OVS_METER_BAND_TYPE_UNSPEC;
3277 }
3278 nl_msg_put_u32(&buf, OVS_BAND_ATTR_TYPE, band_type);
3279 nl_msg_put_u32(&buf, OVS_BAND_ATTR_RATE, band->rate);
3280 nl_msg_put_u32(&buf, OVS_BAND_ATTR_BURST,
3281 config->flags & OFPMF13_BURST ?
3282 band->burst_size : band->rate);
3283 nl_msg_end_nested(&buf, band_offset);
3284 }
3285 nl_msg_end_nested(&buf, bands_offset);
3286
3287 int error = dpif_netlink_meter_transact(&buf, &msg,
3288 ovs_meter_set_response_policy, a,
3289 ARRAY_SIZE(ovs_meter_set_response_policy));
3290 if (error) {
3291 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3292 VLOG_INFO_RL(&rl,
3293 "dpif_netlink_meter_transact OVS_METER_CMD_SET failed");
3294 return error;
3295 }
3296
8101f03f
JP
3297 if (nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
3298 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3299 VLOG_INFO_RL(&rl,
3300 "Kernel returned a different meter id than requested");
3301 }
80738e5f
AZ
3302 ofpbuf_delete(msg);
3303 return 0;
5dddf960
JR
3304}
3305
80738e5f
AZ
3306/* Retrieve statistics and/or delete meter 'meter_id'. Statistics are
3307 * stored in 'stats', if it is not null. If 'command' is
3308 * OVS_METER_CMD_DEL, the meter is deleted and statistics are optionally
3309 * retrieved. If 'command' is OVS_METER_CMD_GET, then statistics are
3310 * simply retrieved. */
5dddf960 3311static int
80738e5f
AZ
3312dpif_netlink_meter_get_stats(const struct dpif *dpif_,
3313 ofproto_meter_id meter_id,
3314 struct ofputil_meter_stats *stats,
3315 uint16_t max_bands,
3316 enum ovs_meter_cmd command)
5dddf960 3317{
80738e5f
AZ
3318 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3319 struct ofpbuf buf, *msg;
3320 uint64_t stub[1024 / 8];
3321
3322 static const struct nl_policy ovs_meter_stats_policy[] = {
3323 [OVS_METER_ATTR_ID] = { .type = NL_A_U32, .optional = true},
3324 [OVS_METER_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
3325 .optional = true},
3326 [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3327 };
3328 struct nlattr *a[ARRAY_SIZE(ovs_meter_stats_policy)];
3329
3330 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, command);
3331
3332 nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3333
3334 int error = dpif_netlink_meter_transact(&buf, &msg,
3335 ovs_meter_stats_policy, a,
3336 ARRAY_SIZE(ovs_meter_stats_policy));
3337 if (error) {
3338 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3339 VLOG_INFO_RL(&rl, "dpif_netlink_meter_transact %s failed",
3340 command == OVS_METER_CMD_GET ? "get" : "del");
3341 return error;
3342 }
3343
3344 if (stats
3345 && a[OVS_METER_ATTR_ID]
3346 && a[OVS_METER_ATTR_STATS]
3347 && nl_attr_get_u32(a[OVS_METER_ATTR_ID]) == meter_id.uint32) {
3348 /* return stats */
3349 const struct ovs_flow_stats *stat;
3350 const struct nlattr *nla;
3351 size_t left;
3352
3353 stat = nl_attr_get(a[OVS_METER_ATTR_STATS]);
3354 stats->packet_in_count = get_32aligned_u64(&stat->n_packets);
3355 stats->byte_in_count = get_32aligned_u64(&stat->n_bytes);
3356
3357 if (a[OVS_METER_ATTR_BANDS]) {
3358 size_t n_bands = 0;
3359 NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3360 const struct nlattr *band_nla;
3361 band_nla = nl_attr_find_nested(nla, OVS_BAND_ATTR_STATS);
3362 if (band_nla && nl_attr_get_size(band_nla) \
3363 == sizeof(struct ovs_flow_stats)) {
3364 stat = nl_attr_get(band_nla);
3365
3366 if (n_bands < max_bands) {
3367 stats->bands[n_bands].packet_count
3368 = get_32aligned_u64(&stat->n_packets);
3369 stats->bands[n_bands].byte_count
3370 = get_32aligned_u64(&stat->n_bytes);
3371 ++n_bands;
3372 }
3373 } else {
3374 stats->bands[n_bands].packet_count = 0;
3375 stats->bands[n_bands].byte_count = 0;
3376 ++n_bands;
3377 }
3378 }
3379 stats->n_bands = n_bands;
3380 } else {
3381 /* For a non-existent meter, return 0 stats. */
3382 stats->n_bands = 0;
3383 }
3384 }
3385
3386 ofpbuf_delete(msg);
3387 return error;
5dddf960
JR
3388}
3389
3390static int
80738e5f
AZ
3391dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
3392 struct ofputil_meter_stats *stats, uint16_t max_bands)
5dddf960 3393{
80738e5f
AZ
3394 return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3395 OVS_METER_CMD_GET);
3396}
3397
3398static int
3399dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
3400 struct ofputil_meter_stats *stats, uint16_t max_bands)
3401{
3402 return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3403 OVS_METER_CMD_DEL);
5dddf960
JR
3404}
3405
92d0d515
JP
3406static bool
3407probe_broken_meters__(struct dpif *dpif)
3408{
3409 /* This test is destructive if a probe occurs while ovs-vswitchd is
3410 * running (e.g., an ovs-dpctl meter command is called), so choose a
3411 * random high meter id to make this less likely to occur. */
3412 ofproto_meter_id id1 = { 54545401 };
3413 ofproto_meter_id id2 = { 54545402 };
3414 struct ofputil_meter_band band = {OFPMBT13_DROP, 0, 1, 0};
3415 struct ofputil_meter_config config1 = { 1, OFPMF13_KBPS, 1, &band};
3416 struct ofputil_meter_config config2 = { 2, OFPMF13_KBPS, 1, &band};
3417
3418 /* Try adding two meters and make sure that they both come back with
3419 * the proper meter id. */
3420 dpif_netlink_meter_set(dpif, id1, &config1);
3421 dpif_netlink_meter_set(dpif, id2, &config2);
3422
3423 if (dpif_netlink_meter_get(dpif, id1, NULL, 0)
3424 || dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
3425 VLOG_INFO("The kernel module has a broken meter implementation.");
3426 return true;
3427 }
3428
3429 dpif_netlink_meter_del(dpif, id1, NULL, 0);
3430 dpif_netlink_meter_del(dpif, id2, NULL, 0);
3431
3432 return false;
3433}
3434
3435static bool
3436probe_broken_meters(struct dpif *dpif)
3437{
3438 /* This is a once-only test because currently OVS only has at most a single
3439 * Netlink capable datapath on any given platform. */
3440 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3441
3442 static bool broken_meters = false;
3443 if (ovsthread_once_start(&once)) {
3444 broken_meters = probe_broken_meters__(dpif);
3445 ovsthread_once_done(&once);
3446 }
3447 return broken_meters;
3448}
5dddf960 3449\f
93451a0a 3450const struct dpif_class dpif_netlink_class = {
1a6f1e2a 3451 "system",
c8973eb6 3452 NULL, /* init */
93451a0a 3453 dpif_netlink_enumerate,
0aeaabc8 3454 NULL,
93451a0a
AS
3455 dpif_netlink_open,
3456 dpif_netlink_close,
3457 dpif_netlink_destroy,
3458 dpif_netlink_run,
e4516b20 3459 NULL, /* wait */
93451a0a
AS
3460 dpif_netlink_get_stats,
3461 dpif_netlink_port_add,
3462 dpif_netlink_port_del,
91364d18 3463 NULL, /* port_set_config */
93451a0a
AS
3464 dpif_netlink_port_query_by_number,
3465 dpif_netlink_port_query_by_name,
3466 dpif_netlink_port_get_pid,
3467 dpif_netlink_port_dump_start,
3468 dpif_netlink_port_dump_next,
3469 dpif_netlink_port_dump_done,
3470 dpif_netlink_port_poll,
3471 dpif_netlink_port_poll_wait,
3472 dpif_netlink_flow_flush,
3473 dpif_netlink_flow_dump_create,
3474 dpif_netlink_flow_dump_destroy,
3475 dpif_netlink_flow_dump_thread_create,
3476 dpif_netlink_flow_dump_thread_destroy,
3477 dpif_netlink_flow_dump_next,
3478 dpif_netlink_operate,
3479 dpif_netlink_recv_set,
3480 dpif_netlink_handlers_set,
d4f6865c 3481 NULL, /* set_config */
93451a0a
AS
3482 dpif_netlink_queue_to_priority,
3483 dpif_netlink_recv,
3484 dpif_netlink_recv_wait,
3485 dpif_netlink_recv_purge,
e4e74c3a 3486 NULL, /* register_dp_purge_cb */
6b31e073
RW
3487 NULL, /* register_upcall_cb */
3488 NULL, /* enable_upcall */
3489 NULL, /* disable_upcall */
b5cbbcf6 3490 dpif_netlink_get_datapath_version, /* get_datapath_version */
c11c9f4a
DDP
3491 dpif_netlink_ct_dump_start,
3492 dpif_netlink_ct_dump_next,
3493 dpif_netlink_ct_dump_done,
5dddf960 3494 dpif_netlink_ct_flush,
c92339ad
DB
3495 NULL, /* ct_set_maxconns */
3496 NULL, /* ct_get_maxconns */
875075b3 3497 NULL, /* ct_get_nconns */
906ff9d2
YHW
3498 dpif_netlink_ct_set_limits,
3499 dpif_netlink_ct_get_limits,
3500 dpif_netlink_ct_del_limits,
5dddf960
JR
3501 dpif_netlink_meter_get_features,
3502 dpif_netlink_meter_set,
3503 dpif_netlink_meter_get,
3504 dpif_netlink_meter_del,
96fba48f 3505};
93451a0a 3506
96fba48f 3507static int
93451a0a 3508dpif_netlink_init(void)
96fba48f 3509{
eb8ed438
BP
3510 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3511 static int error;
982b8810 3512
eb8ed438 3513 if (ovsthread_once_start(&once)) {
df2c07f4
JP
3514 error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
3515 &ovs_datapath_family);
37a1300c 3516 if (error) {
e0e2410d 3517 VLOG_INFO("Generic Netlink family '%s' does not exist. "
cae7529c
CL
3518 "The Open vSwitch kernel module is probably not loaded.",
3519 OVS_DATAPATH_FAMILY);
37a1300c 3520 }
f0fef760 3521 if (!error) {
df2c07f4 3522 error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
f0fef760 3523 }
37a1300c 3524 if (!error) {
df2c07f4 3525 error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
37a1300c 3526 }
aaff4b55 3527 if (!error) {
df2c07f4
JP
3528 error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
3529 &ovs_packet_family);
aaff4b55 3530 }
c7178a0b
EJ
3531 if (!error) {
3532 error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
b3dcb73c 3533 &ovs_vport_mcgroup);
c7178a0b 3534 }
80738e5f
AZ
3535 if (!error) {
3536 if (nl_lookup_genl_family(OVS_METER_FAMILY, &ovs_meter_family)) {
3537 VLOG_INFO("The kernel module does not support meters.");
3538 }
3539 }
906ff9d2
YHW
3540 if (nl_lookup_genl_family(OVS_CT_LIMIT_FAMILY,
3541 &ovs_ct_limit_family) < 0) {
3542 VLOG_INFO("Generic Netlink family '%s' does not exist. "
3543 "Please update the Open vSwitch kernel module to enable "
3544 "the conntrack limit feature.", OVS_CT_LIMIT_FAMILY);
3545 }
eb8ed438 3546
921c370a
EG
3547 ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
3548
eb8ed438 3549 ovsthread_once_done(&once);
982b8810
BP
3550 }
3551
3552 return error;
96fba48f
BP
3553}
3554
c19e6535 3555bool
93451a0a 3556dpif_netlink_is_internal_device(const char *name)
9fe3b9a2 3557{
93451a0a 3558 struct dpif_netlink_vport reply;
c19e6535 3559 struct ofpbuf *buf;
9fe3b9a2 3560 int error;
96fba48f 3561
93451a0a 3562 error = dpif_netlink_vport_get(name, &reply, &buf);
c19e6535
BP
3563 if (!error) {
3564 ofpbuf_delete(buf);
141d9ce4 3565 } else if (error != ENODEV && error != ENOENT) {
c19e6535 3566 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
10a89ef0 3567 name, ovs_strerror(error));
96fba48f
BP
3568 }
3569
df2c07f4 3570 return reply.type == OVS_VPORT_TYPE_INTERNAL;
96fba48f 3571}
e0467f6d 3572
df2c07f4 3573/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
c19e6535
BP
3574 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
3575 * positive errno value.
3576 *
3577 * 'vport' will contain pointers into 'buf', so the caller should not free
3578 * 'buf' while 'vport' is still in use. */
3579static int
93451a0a 3580dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
c19e6535
BP
3581 const struct ofpbuf *buf)
3582{
df2c07f4
JP
3583 static const struct nl_policy ovs_vport_policy[] = {
3584 [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
3585 [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
3586 [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1579cf67 3587 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
f7df9823 3588 [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
c19e6535 3589 .optional = true },
df2c07f4 3590 [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
bfda5239 3591 [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
c19e6535
BP
3592 };
3593
93451a0a 3594 dpif_netlink_vport_init(vport);
c19e6535 3595
0a2869d5
BP
3596 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3597 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3598 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3599 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3600
3601 struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
df2c07f4
JP
3602 if (!nlmsg || !genl || !ovs_header
3603 || nlmsg->nlmsg_type != ovs_vport_family
3604 || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
3605 ARRAY_SIZE(ovs_vport_policy))) {
c19e6535
BP
3606 return EINVAL;
3607 }
c19e6535 3608
f0fef760 3609 vport->cmd = genl->cmd;
df2c07f4 3610 vport->dp_ifindex = ovs_header->dp_ifindex;
4e022ec0 3611 vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
df2c07f4
JP
3612 vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
3613 vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
b063d9f0 3614 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
1579cf67
AW
3615 vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
3616 / (sizeof *vport->upcall_pids);
3617 vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
3618
b063d9f0 3619 }
df2c07f4
JP
3620 if (a[OVS_VPORT_ATTR_STATS]) {
3621 vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
3622 }
df2c07f4
JP
3623 if (a[OVS_VPORT_ATTR_OPTIONS]) {
3624 vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
3625 vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
c19e6535 3626 }
bfda5239
FL
3627 if (a[OVS_VPORT_ATTR_NETNSID]) {
3628 netnsid_set(&vport->netnsid,
3629 nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
3630 } else {
3631 netnsid_set_local(&vport->netnsid);
3632 }
c19e6535
BP
3633 return 0;
3634}
3635
df2c07f4 3636/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
c19e6535
BP
3637 * followed by Netlink attributes corresponding to 'vport'. */
3638static void
93451a0a
AS
3639dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
3640 struct ofpbuf *buf)
c19e6535 3641{
df2c07f4 3642 struct ovs_header *ovs_header;
f0fef760 3643
df2c07f4 3644 nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
69685a88 3645 vport->cmd, OVS_VPORT_VERSION);
c19e6535 3646
df2c07f4
JP
3647 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3648 ovs_header->dp_ifindex = vport->dp_ifindex;
c19e6535 3649
4e022ec0
AW
3650 if (vport->port_no != ODPP_NONE) {
3651 nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
c19e6535
BP
3652 }
3653
df2c07f4
JP
3654 if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
3655 nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
c19e6535
BP
3656 }
3657
3658 if (vport->name) {
df2c07f4 3659 nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
c19e6535
BP
3660 }
3661
1579cf67
AW
3662 if (vport->upcall_pids) {
3663 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
3664 vport->upcall_pids,
3665 vport->n_upcall_pids * sizeof *vport->upcall_pids);
a24a6574 3666 }
b063d9f0 3667
c19e6535 3668 if (vport->stats) {
df2c07f4 3669 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
c19e6535
BP
3670 vport->stats, sizeof *vport->stats);
3671 }
3672
c19e6535 3673 if (vport->options) {
df2c07f4 3674 nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
c19e6535
BP
3675 vport->options, vport->options_len);
3676 }
c19e6535
BP
3677}
3678
3679/* Clears 'vport' to "empty" values. */
3680void
93451a0a 3681dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
c19e6535
BP
3682{
3683 memset(vport, 0, sizeof *vport);
4e022ec0 3684 vport->port_no = ODPP_NONE;
c19e6535
BP
3685}
3686
3687/* Executes 'request' in the kernel datapath. If the command fails, returns a
3688 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3689 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
df2c07f4 3690 * result of the command is expected to be an ovs_vport also, which is decoded
c19e6535
BP
3691 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3692 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
3693int
93451a0a
AS
3694dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
3695 struct dpif_netlink_vport *reply,
3696 struct ofpbuf **bufp)
c19e6535 3697{
f0fef760 3698 struct ofpbuf *request_buf;
c19e6535
BP
3699 int error;
3700
cb22974d 3701 ovs_assert((reply != NULL) == (bufp != NULL));
c19e6535 3702
93451a0a 3703 error = dpif_netlink_init();
42bb6c72
BP
3704 if (error) {
3705 if (reply) {
3706 *bufp = NULL;
93451a0a 3707 dpif_netlink_vport_init(reply);
42bb6c72
BP
3708 }
3709 return error;
3710 }
3711
f0fef760 3712 request_buf = ofpbuf_new(1024);
93451a0a 3713 dpif_netlink_vport_to_ofpbuf(request, request_buf);
a88b4e04 3714 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
f0fef760 3715 ofpbuf_delete(request_buf);
c19e6535 3716
f0fef760
BP
3717 if (reply) {
3718 if (!error) {
93451a0a 3719 error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
f0fef760 3720 }
c19e6535 3721 if (error) {
93451a0a 3722 dpif_netlink_vport_init(reply);
f0fef760
BP
3723 ofpbuf_delete(*bufp);
3724 *bufp = NULL;
c19e6535 3725 }
c19e6535
BP
3726 }
3727 return error;
3728}
3729
3730/* Obtains information about the kernel vport named 'name' and stores it into
3731 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
3732 * longer needed ('reply' will contain pointers into '*bufp'). */
3733int
93451a0a
AS
3734dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
3735 struct ofpbuf **bufp)
c19e6535 3736{
93451a0a 3737 struct dpif_netlink_vport request;
c19e6535 3738
93451a0a 3739 dpif_netlink_vport_init(&request);
df2c07f4 3740 request.cmd = OVS_VPORT_CMD_GET;
c19e6535
BP
3741 request.name = name;
3742
93451a0a 3743 return dpif_netlink_vport_transact(&request, reply, bufp);
c19e6535 3744}
93451a0a 3745
df2c07f4 3746/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
aaff4b55
BP
3747 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
3748 * positive errno value.
d6569377
BP
3749 *
3750 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
3751 * while 'dp' is still in use. */
3752static int
93451a0a 3753dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
d6569377 3754{
df2c07f4
JP
3755 static const struct nl_policy ovs_datapath_policy[] = {
3756 [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
f7df9823 3757 [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
d6569377 3758 .optional = true },
847108dc
AZ
3759 [OVS_DP_ATTR_MEGAFLOW_STATS] = {
3760 NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
3761 .optional = true },
d6569377
BP
3762 };
3763
93451a0a 3764 dpif_netlink_dp_init(dp);
d6569377 3765
0a2869d5
BP
3766 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3767 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3768 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3769 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3770
3771 struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
df2c07f4
JP
3772 if (!nlmsg || !genl || !ovs_header
3773 || nlmsg->nlmsg_type != ovs_datapath_family
3774 || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
3775 ARRAY_SIZE(ovs_datapath_policy))) {
d6569377
BP
3776 return EINVAL;
3777 }
d6569377 3778
aaff4b55 3779 dp->cmd = genl->cmd;
df2c07f4
JP
3780 dp->dp_ifindex = ovs_header->dp_ifindex;
3781 dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
3782 if (a[OVS_DP_ATTR_STATS]) {
6a54dedc 3783 dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
d6569377 3784 }
982b8810 3785
847108dc 3786 if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
6a54dedc 3787 dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
847108dc
AZ
3788 }
3789
d6569377
BP
3790 return 0;
3791}
3792
aaff4b55 3793/* Appends to 'buf' the Generic Netlink message described by 'dp'. */
d6569377 3794static void
93451a0a 3795dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
d6569377 3796{
df2c07f4 3797 struct ovs_header *ovs_header;
d6569377 3798
df2c07f4 3799 nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
69685a88
JG
3800 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
3801 OVS_DATAPATH_VERSION);
aaff4b55 3802
df2c07f4
JP
3803 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3804 ovs_header->dp_ifindex = dp->dp_ifindex;
d6569377
BP
3805
3806 if (dp->name) {
df2c07f4 3807 nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
d6569377
BP
3808 }
3809
a24a6574
BP
3810 if (dp->upcall_pid) {
3811 nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
3812 }
b063d9f0 3813
b7fd5e38
TG
3814 if (dp->user_features) {
3815 nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
3816 }
3817
df2c07f4 3818 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
d6569377
BP
3819}
3820
3821/* Clears 'dp' to "empty" values. */
d3d8f1f7 3822static void
93451a0a 3823dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
d6569377
BP
3824{
3825 memset(dp, 0, sizeof *dp);
d6569377
BP
3826}
3827
aaff4b55 3828static void
93451a0a 3829dpif_netlink_dp_dump_start(struct nl_dump *dump)
aaff4b55 3830{
93451a0a 3831 struct dpif_netlink_dp request;
aaff4b55
BP
3832 struct ofpbuf *buf;
3833
93451a0a 3834 dpif_netlink_dp_init(&request);
df2c07f4 3835 request.cmd = OVS_DP_CMD_GET;
aaff4b55
BP
3836
3837 buf = ofpbuf_new(1024);
93451a0a 3838 dpif_netlink_dp_to_ofpbuf(&request, buf);
a88b4e04 3839 nl_dump_start(dump, NETLINK_GENERIC, buf);
aaff4b55
BP
3840 ofpbuf_delete(buf);
3841}
3842
d6569377
BP
3843/* Executes 'request' in the kernel datapath. If the command fails, returns a
3844 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3845 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
aaff4b55
BP
3846 * result of the command is expected to be of the same form, which is decoded
3847 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3848 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 3849static int
93451a0a
AS
3850dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
3851 struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
d6569377 3852{
aaff4b55 3853 struct ofpbuf *request_buf;
d6569377 3854 int error;
d6569377 3855
cb22974d 3856 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 3857
aaff4b55 3858 request_buf = ofpbuf_new(1024);
93451a0a 3859 dpif_netlink_dp_to_ofpbuf(request, request_buf);
a88b4e04 3860 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
aaff4b55 3861 ofpbuf_delete(request_buf);
d6569377 3862
aaff4b55 3863 if (reply) {
93451a0a 3864 dpif_netlink_dp_init(reply);
aaff4b55 3865 if (!error) {
93451a0a 3866 error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
aaff4b55 3867 }
d6569377 3868 if (error) {
aaff4b55
BP
3869 ofpbuf_delete(*bufp);
3870 *bufp = NULL;
d6569377 3871 }
d6569377
BP
3872 }
3873 return error;
3874}
3875
3876/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
3877 * The caller must free '*bufp' when the reply is no longer needed ('reply'
3878 * will contain pointers into '*bufp'). */
d3d8f1f7 3879static int
93451a0a
AS
3880dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
3881 struct ofpbuf **bufp)
d6569377 3882{
93451a0a
AS
3883 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3884 struct dpif_netlink_dp request;
d6569377 3885
93451a0a 3886 dpif_netlink_dp_init(&request);
df2c07f4 3887 request.cmd = OVS_DP_CMD_GET;
254f2dc8 3888 request.dp_ifindex = dpif->dp_ifindex;
d6569377 3889
93451a0a 3890 return dpif_netlink_dp_transact(&request, reply, bufp);
d6569377 3891}
93451a0a 3892
df2c07f4 3893/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
37a1300c 3894 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
d6569377
BP
3895 * positive errno value.
3896 *
3897 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
3898 * while 'flow' is still in use. */
3899static int
93451a0a
AS
3900dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
3901 const struct ofpbuf *buf)
d6569377 3902{
70e5ed6f
JS
3903 static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
3904 [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
e6cc0bab 3905 [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
df2c07f4 3906 [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
f7df9823 3907 [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
d6569377 3908 .optional = true },
df2c07f4
JP
3909 [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
3910 [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
ab79d262 3911 [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
df2c07f4 3912 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 3913 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
70e5ed6f 3914 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
d6569377
BP
3915 };
3916
93451a0a 3917 dpif_netlink_flow_init(flow);
d6569377 3918
0a2869d5
BP
3919 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3920 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3921 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3922 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3923
3924 struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
df2c07f4
JP
3925 if (!nlmsg || !genl || !ovs_header
3926 || nlmsg->nlmsg_type != ovs_flow_family
3927 || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
3928 ARRAY_SIZE(ovs_flow_policy))) {
d6569377
BP
3929 return EINVAL;
3930 }
70e5ed6f
JS
3931 if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
3932 return EINVAL;
3933 }
d6569377 3934
37a1300c 3935 flow->nlmsg_flags = nlmsg->nlmsg_flags;
df2c07f4 3936 flow->dp_ifindex = ovs_header->dp_ifindex;
70e5ed6f
JS
3937 if (a[OVS_FLOW_ATTR_KEY]) {
3938 flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
3939 flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
3940 }
e6cc0bab 3941
70e5ed6f 3942 if (a[OVS_FLOW_ATTR_UFID]) {
ab79d262 3943 flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
70e5ed6f
JS
3944 flow->ufid_present = true;
3945 }
e6cc0bab
AZ
3946 if (a[OVS_FLOW_ATTR_MASK]) {
3947 flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
3948 flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
3949 }
df2c07f4
JP
3950 if (a[OVS_FLOW_ATTR_ACTIONS]) {
3951 flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
3952 flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
d6569377 3953 }
df2c07f4
JP
3954 if (a[OVS_FLOW_ATTR_STATS]) {
3955 flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
d6569377 3956 }
df2c07f4
JP
3957 if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
3958 flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
d6569377 3959 }
df2c07f4
JP
3960 if (a[OVS_FLOW_ATTR_USED]) {
3961 flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
9e980142 3962 }
d6569377
BP
3963 return 0;
3964}
3965
beb75a40
JS
3966
3967/*
a8a3eee4
JS
3968 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
3969 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
3970 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
beb75a40
JS
3971 */
3972static void
3973put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
3974 const struct nlattr *data, uint16_t data_len)
3975{
3976 const struct nlattr *packet_type;
3977
3978 packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
3979
3980 if (packet_type) {
3981 /* exclude PACKET_TYPE Netlink attribute. */
3982 ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
3983 size_t packet_type_len = NL_A_U32_SIZE;
3984 size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
3985 size_t second_chunk_size = data_len - first_chunk_size
3986 - packet_type_len;
beb75a40 3987 struct nlattr *next_attr = nl_attr_next(packet_type);
1ca5b61b 3988 size_t ofs;
beb75a40 3989
1ca5b61b
JS
3990 ofs = nl_msg_start_nested(buf, type);
3991 nl_msg_put(buf, data, first_chunk_size);
3992 nl_msg_put(buf, next_attr, second_chunk_size);
a8a3eee4
JS
3993 if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
3994 ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
3995 const struct nlattr *nla;
3996
3997 nla = nl_attr_find(buf, NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
3998 if (nla) {
3999 ovs_be16 *ethertype;
4000
4001 ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
4002 *ethertype = pt;
4003 } else {
4004 nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
4005 }
4006 }
1ca5b61b 4007 nl_msg_end_nested(buf, ofs);
beb75a40
JS
4008 } else {
4009 nl_msg_put_unspec(buf, type, data, data_len);
4010 }
4011}
4012
df2c07f4 4013/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
d6569377
BP
4014 * followed by Netlink attributes corresponding to 'flow'. */
4015static void
93451a0a
AS
4016dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
4017 struct ofpbuf *buf)
d6569377 4018{
df2c07f4 4019 struct ovs_header *ovs_header;
d6569377 4020
df2c07f4 4021 nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
30b44744 4022 NLM_F_REQUEST | flow->nlmsg_flags,
69685a88 4023 flow->cmd, OVS_FLOW_VERSION);
37a1300c 4024
df2c07f4
JP
4025 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
4026 ovs_header->dp_ifindex = flow->dp_ifindex;
d6569377 4027
70e5ed6f 4028 if (flow->ufid_present) {
ab79d262 4029 nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
70e5ed6f
JS
4030 }
4031 if (flow->ufid_terse) {
4032 nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
4033 OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
4034 | OVS_UFID_F_OMIT_ACTIONS);
4035 }
64bb477f
JS
4036 if (!flow->ufid_terse || !flow->ufid_present) {
4037 if (flow->key_len) {
beb75a40
JS
4038 put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
4039 flow->key_len);
64bb477f 4040 }
64bb477f 4041 if (flow->mask_len) {
beb75a40
JS
4042 put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
4043 flow->mask_len);
64bb477f
JS
4044 }
4045 if (flow->actions || flow->actions_len) {
4046 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
4047 flow->actions, flow->actions_len);
4048 }
d6569377
BP
4049 }
4050
4051 /* We never need to send these to the kernel. */
cb22974d
BP
4052 ovs_assert(!flow->stats);
4053 ovs_assert(!flow->tcp_flags);
4054 ovs_assert(!flow->used);
d6569377
BP
4055
4056 if (flow->clear) {
df2c07f4 4057 nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
d6569377 4058 }
43f9ac0a
JR
4059 if (flow->probe) {
4060 nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
4061 }
d6569377
BP
4062}
4063
4064/* Clears 'flow' to "empty" values. */
d3d8f1f7 4065static void
93451a0a 4066dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
d6569377
BP
4067{
4068 memset(flow, 0, sizeof *flow);
4069}
4070
4071/* Executes 'request' in the kernel datapath. If the command fails, returns a
4072 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
4073 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
37a1300c
BP
4074 * result of the command is expected to be a flow also, which is decoded and
4075 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
4076 * is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 4077static int
93451a0a
AS
4078dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
4079 struct dpif_netlink_flow *reply,
4080 struct ofpbuf **bufp)
d6569377 4081{
37a1300c 4082 struct ofpbuf *request_buf;
d6569377 4083 int error;
d6569377 4084
cb22974d 4085 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 4086
30b44744
BP
4087 if (reply) {
4088 request->nlmsg_flags |= NLM_F_ECHO;
4089 }
4090
37a1300c 4091 request_buf = ofpbuf_new(1024);
93451a0a 4092 dpif_netlink_flow_to_ofpbuf(request, request_buf);
a88b4e04 4093 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
37a1300c 4094 ofpbuf_delete(request_buf);
d6569377 4095
37a1300c
BP
4096 if (reply) {
4097 if (!error) {
93451a0a 4098 error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
37a1300c 4099 }
d6569377 4100 if (error) {
93451a0a 4101 dpif_netlink_flow_init(reply);
37a1300c
BP
4102 ofpbuf_delete(*bufp);
4103 *bufp = NULL;
d6569377 4104 }
d6569377
BP
4105 }
4106 return error;
4107}
4108
4109static void
93451a0a
AS
4110dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
4111 struct dpif_flow_stats *stats)
d6569377
BP
4112{
4113 if (flow->stats) {
6a54dedc
BP
4114 stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
4115 stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
d6569377
BP
4116 } else {
4117 stats->n_packets = 0;
4118 stats->n_bytes = 0;
4119 }
0e70cdcb 4120 stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
d6569377
BP
4121 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
4122}
e0467f6d 4123
14b4d2f9
BP
4124/* Logs information about a packet that was recently lost in 'ch' (in
4125 * 'dpif_'). */
4126static void
93451a0a 4127report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
1579cf67 4128 uint32_t handler_id)
14b4d2f9 4129{
14b4d2f9 4130 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
14b4d2f9
BP
4131 struct ds s;
4132
8d675c5a 4133 if (VLOG_DROP_WARN(&rl)) {
14b4d2f9
BP
4134 return;
4135 }
4136
4137 ds_init(&s);
4138 if (ch->last_poll != LLONG_MIN) {
4139 ds_put_format(&s, " (last polled %lld ms ago)",
4140 time_msec() - ch->last_poll);
4141 }
14b4d2f9 4142
1579cf67 4143 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
9b00386b 4144 dpif_name(&dpif->dpif), ch_idx, handler_id);
14b4d2f9
BP
4145 ds_destroy(&s);
4146}