]> git.proxmox.com Git - ovs.git/blame - lib/dpif-netlink.c
compat: Add ipv6 GRE and IPV6 Tunneling
[ovs.git] / lib / dpif-netlink.c
CommitLineData
96fba48f 1/*
aa5c0216 2 * Copyright (c) 2008-2017 Nicira, Inc.
96fba48f
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
9fe3b9a2 18
93451a0a 19#include "dpif-netlink.h"
96fba48f 20
96fba48f
BP
21#include <ctype.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <net/if.h>
b90fa799 26#include <linux/types.h>
aae51f53 27#include <linux/pkt_sched.h>
8522ba09 28#include <poll.h>
96fba48f 29#include <stdlib.h>
8522ba09 30#include <strings.h>
50f80534 31#include <sys/epoll.h>
10dcf8de 32#include <sys/stat.h>
96fba48f
BP
33#include <unistd.h>
34
773cd538 35#include "bitmap.h"
c4e08753 36#include "dpif-netlink-rtnl.h"
0d71302e 37#include "dpif-provider.h"
1579cf67 38#include "fat-rwlock.h"
0d71302e 39#include "flow.h"
032aa6a3 40#include "netdev-linux.h"
0d71302e 41#include "netdev-provider.h"
c3827f61 42#include "netdev-vport.h"
0d71302e 43#include "netdev.h"
c11c9f4a 44#include "netlink-conntrack.h"
45c8d3a1 45#include "netlink-notifier.h"
982b8810 46#include "netlink-socket.h"
856081f6 47#include "netlink.h"
bfda5239 48#include "netnsid.h"
feebdea2 49#include "odp-util.h"
0d71302e
BP
50#include "openvswitch/dynamic-string.h"
51#include "openvswitch/flow.h"
52#include "openvswitch/match.h"
64c96779 53#include "openvswitch/ofpbuf.h"
fd016ae3 54#include "openvswitch/poll-loop.h"
ee89ea7b 55#include "openvswitch/shash.h"
0d71302e
BP
56#include "openvswitch/vlog.h"
57#include "packets.h"
58#include "random.h"
b3c01ed3 59#include "sset.h"
14b4d2f9 60#include "timeval.h"
d6569377 61#include "unaligned.h"
96fba48f 62#include "util.h"
5136ce49 63
93451a0a 64VLOG_DEFINE_THIS_MODULE(dpif_netlink);
09cac43f 65#ifdef _WIN32
da467899 66#include "wmi.h"
09cac43f
NR
67enum { WINDOWS = 1 };
68#else
69enum { WINDOWS = 0 };
70#endif
95b1d73a 71enum { MAX_PORTS = USHRT_MAX };
773cd538 72
24b019f8
JP
73/* This ethtool flag was introduced in Linux 2.6.24, so it might be
74 * missing if we have old headers. */
75#define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
76
f2280b41 77#define FLOW_DUMP_MAX_BATCH 50
8b668ee3 78#define OPERATE_MAX_OPS 50
f2280b41 79
93451a0a 80struct dpif_netlink_dp {
aaff4b55
BP
81 /* Generic Netlink header. */
82 uint8_t cmd;
d6569377 83
df2c07f4 84 /* struct ovs_header. */
254f2dc8 85 int dp_ifindex;
d6569377
BP
86
87 /* Attributes. */
df2c07f4 88 const char *name; /* OVS_DP_ATTR_NAME. */
fcd5d230 89 const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */
b7fd5e38 90 uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */
6a54dedc
BP
91 const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */
92 const struct ovs_dp_megaflow_stats *megaflow_stats;
847108dc 93 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
d6569377
BP
94};
95
93451a0a
AS
96static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
97static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
98 const struct ofpbuf *);
99static void dpif_netlink_dp_dump_start(struct nl_dump *);
100static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
101 struct dpif_netlink_dp *reply,
102 struct ofpbuf **bufp);
103static int dpif_netlink_dp_get(const struct dpif *,
104 struct dpif_netlink_dp *reply,
105 struct ofpbuf **bufp);
106
107struct dpif_netlink_flow {
37a1300c
BP
108 /* Generic Netlink header. */
109 uint8_t cmd;
d6569377 110
df2c07f4 111 /* struct ovs_header. */
d6569377 112 unsigned int nlmsg_flags;
254f2dc8 113 int dp_ifindex;
d6569377
BP
114
115 /* Attributes.
116 *
0e70cdcb
BP
117 * The 'stats' member points to 64-bit data that might only be aligned on
118 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
119 * values.
d2a23af2 120 *
df2c07f4 121 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
d2a23af2 122 * the Netlink version of the command, even if actions_len is zero. */
df2c07f4 123 const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
d6569377 124 size_t key_len;
e6cc0bab
AZ
125 const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */
126 size_t mask_len;
df2c07f4 127 const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
d6569377 128 size_t actions_len;
70e5ed6f
JS
129 ovs_u128 ufid; /* OVS_FLOW_ATTR_FLOW_ID. */
130 bool ufid_present; /* Is there a UFID? */
131 bool ufid_terse; /* Skip serializing key/mask/acts? */
df2c07f4
JP
132 const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
133 const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
0e70cdcb 134 const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
df2c07f4 135 bool clear; /* OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 136 bool probe; /* OVS_FLOW_ATTR_PROBE. */
d6569377
BP
137};
138
93451a0a
AS
139static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
140static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
141 const struct ofpbuf *);
142static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
143 struct ofpbuf *);
144static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
145 struct dpif_netlink_flow *reply,
146 struct ofpbuf **bufp);
147static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
148 struct dpif_flow_stats *);
7af12bd7 149static void dpif_netlink_flow_to_dpif_flow(struct dpif *, struct dpif_flow *,
93451a0a 150 const struct dpif_netlink_flow *);
d6569377 151
989fd548 152/* One of the dpif channels between the kernel and userspace. */
fe3d61b3 153struct dpif_channel {
14b4d2f9 154 struct nl_sock *sock; /* Netlink socket. */
14b4d2f9 155 long long int last_poll; /* Last time this channel was polled. */
fe3d61b3
BP
156};
157
09cac43f
NR
158#ifdef _WIN32
159#define VPORT_SOCK_POOL_SIZE 1
160/* On Windows, there is no native support for epoll. There are equivalent
161 * interfaces though, that are not used currently. For simpicity, a pool of
162 * netlink sockets is used. Each socket is represented by 'struct
163 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
164 * sharing the same socket. In the future, we can add a reference count and
165 * such fields. */
166struct dpif_windows_vport_sock {
167 struct nl_sock *nl_sock; /* netlink socket. */
168};
169#endif
170
1579cf67
AW
171struct dpif_handler {
172 struct dpif_channel *channels;/* Array of channels for each handler. */
173 struct epoll_event *epoll_events;
174 int epoll_fd; /* epoll fd that includes channel socks. */
175 int n_events; /* Num events returned by epoll_wait(). */
176 int event_offset; /* Offset into 'epoll_events'. */
09cac43f
NR
177
178#ifdef _WIN32
179 /* Pool of sockets. */
180 struct dpif_windows_vport_sock *vport_sock_pool;
181 size_t last_used_pool_idx; /* Index to aid in allocating a
182 socket in the pool to a port. */
183#endif
1579cf67 184};
14b4d2f9 185
96fba48f 186/* Datapath interface for the openvswitch Linux kernel module. */
93451a0a 187struct dpif_netlink {
96fba48f 188 struct dpif dpif;
254f2dc8 189 int dp_ifindex;
e9e28be3 190
b063d9f0 191 /* Upcall messages. */
1579cf67
AW
192 struct fat_rwlock upcall_lock;
193 struct dpif_handler *handlers;
194 uint32_t n_handlers; /* Num of upcall handlers. */
195 int uc_array_size; /* Size of 'handler->channels' and */
196 /* 'handler->epoll_events'. */
982b8810 197
e9e28be3 198 /* Change notification. */
e4516b20 199 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
61eae437 200 bool refresh_channels;
96fba48f
BP
201};
202
93451a0a 203static void report_loss(struct dpif_netlink *, struct dpif_channel *,
9b00386b 204 uint32_t ch_idx, uint32_t handler_id);
1579cf67 205
96fba48f
BP
206static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
207
e4516b20
BP
208/* Generic Netlink family numbers for OVS.
209 *
93451a0a 210 * Initialized by dpif_netlink_init(). */
df2c07f4
JP
211static int ovs_datapath_family;
212static int ovs_vport_family;
213static int ovs_flow_family;
214static int ovs_packet_family;
982b8810 215
e4516b20
BP
216/* Generic Netlink multicast groups for OVS.
217 *
93451a0a 218 * Initialized by dpif_netlink_init(). */
e4516b20 219static unsigned int ovs_vport_mcgroup;
982b8810 220
921c370a
EG
221/* If true, tunnel devices are created using OVS compat/genetlink.
222 * If false, tunnel devices are created with rtnetlink and using light weight
223 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
224 * to using the compat interface. */
225static bool ovs_tunnels_out_of_tree = true;
226
93451a0a
AS
227static int dpif_netlink_init(void);
228static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
229static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
230 odp_port_t port_no, uint32_t hash);
09cac43f 231static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
93451a0a
AS
232static int dpif_netlink_refresh_channels(struct dpif_netlink *,
233 uint32_t n_handlers);
234static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
235 struct ofpbuf *);
236static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
237 const struct ofpbuf *);
921c370a
EG
238static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
239 odp_port_t port_no, const char *port_name,
240 struct dpif_port *dpif_port);
f0fef760 241
93451a0a
AS
242static struct dpif_netlink *
243dpif_netlink_cast(const struct dpif *dpif)
96fba48f 244{
93451a0a
AS
245 dpif_assert_class(dpif, &dpif_netlink_class);
246 return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
96fba48f
BP
247}
248
d3d22744 249static int
93451a0a
AS
250dpif_netlink_enumerate(struct sset *all_dps,
251 const struct dpif_class *dpif_class OVS_UNUSED)
d3d22744 252{
aaff4b55 253 struct nl_dump dump;
d57695d7
JS
254 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
255 struct ofpbuf msg, buf;
aaff4b55 256 int error;
982b8810 257
93451a0a 258 error = dpif_netlink_init();
aaff4b55
BP
259 if (error) {
260 return error;
982b8810 261 }
d3d22744 262
d57695d7 263 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a 264 dpif_netlink_dp_dump_start(&dump);
d57695d7 265 while (nl_dump_next(&dump, &msg, &buf)) {
93451a0a 266 struct dpif_netlink_dp dp;
d6569377 267
93451a0a 268 if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
d0c23a1a 269 sset_add(all_dps, dp.name);
d3d22744
BP
270 }
271 }
d57695d7 272 ofpbuf_uninit(&buf);
aaff4b55 273 return nl_dump_done(&dump);
d3d22744
BP
274}
275
96fba48f 276static int
93451a0a
AS
277dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
278 bool create, struct dpif **dpifp)
96fba48f 279{
93451a0a 280 struct dpif_netlink_dp dp_request, dp;
c19e6535 281 struct ofpbuf *buf;
ea36840f 282 uint32_t upcall_pid;
c19e6535 283 int error;
96fba48f 284
93451a0a 285 error = dpif_netlink_init();
982b8810
BP
286 if (error) {
287 return error;
288 }
289
982b8810 290 /* Create or look up datapath. */
93451a0a 291 dpif_netlink_dp_init(&dp_request);
ea36840f
BP
292 if (create) {
293 dp_request.cmd = OVS_DP_CMD_NEW;
294 upcall_pid = 0;
295 dp_request.upcall_pid = &upcall_pid;
296 } else {
b7fd5e38
TG
297 /* Use OVS_DP_CMD_SET to report user features */
298 dp_request.cmd = OVS_DP_CMD_SET;
ea36840f 299 }
254f2dc8 300 dp_request.name = name;
b7fd5e38 301 dp_request.user_features |= OVS_DP_F_UNALIGNED;
1579cf67 302 dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
93451a0a 303 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
982b8810
BP
304 if (error) {
305 return error;
c19e6535 306 }
254f2dc8 307
e4516b20 308 error = open_dpif(&dp, dpifp);
8f4a4df5 309 ofpbuf_delete(buf);
e4516b20 310 return error;
c19e6535
BP
311}
312
e4516b20 313static int
93451a0a 314open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
c19e6535 315{
93451a0a 316 struct dpif_netlink *dpif;
c19e6535 317
17411ecf 318 dpif = xzalloc(sizeof *dpif);
e4516b20 319 dpif->port_notifier = NULL;
1579cf67 320 fat_rwlock_init(&dpif->upcall_lock);
c19e6535 321
93451a0a 322 dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
254f2dc8 323 dp->dp_ifindex, dp->dp_ifindex);
c19e6535 324
254f2dc8 325 dpif->dp_ifindex = dp->dp_ifindex;
c19e6535 326 *dpifp = &dpif->dpif;
e4516b20
BP
327
328 return 0;
96fba48f
BP
329}
330
1579cf67
AW
331/* Destroys the netlink sockets pointed by the elements in 'socksp'
332 * and frees the 'socksp'. */
17411ecf 333static void
09cac43f 334vport_del_socksp__(struct nl_sock **socksp, uint32_t n_socks)
17411ecf 335{
1579cf67 336 size_t i;
17411ecf 337
1579cf67
AW
338 for (i = 0; i < n_socks; i++) {
339 nl_sock_destroy(socksp[i]);
50f80534 340 }
989fd548 341
1579cf67
AW
342 free(socksp);
343}
989fd548 344
1579cf67
AW
345/* Creates an array of netlink sockets. Returns an array of the
346 * corresponding pointers. Records the error in 'error'. */
347static struct nl_sock **
09cac43f 348vport_create_socksp__(uint32_t n_socks, int *error)
1579cf67
AW
349{
350 struct nl_sock **socksp = xzalloc(n_socks * sizeof *socksp);
351 size_t i;
352
353 for (i = 0; i < n_socks; i++) {
354 *error = nl_sock_create(NETLINK_GENERIC, &socksp[i]);
355 if (*error) {
356 goto error;
989fd548 357 }
1579cf67 358 }
989fd548 359
1579cf67 360 return socksp;
9fafa796 361
1579cf67 362error:
09cac43f 363 vport_del_socksp__(socksp, n_socks);
989fd548 364
1579cf67
AW
365 return NULL;
366}
367
09cac43f
NR
368#ifdef _WIN32
369static void
370vport_delete_sock_pool(struct dpif_handler *handler)
371 OVS_REQ_WRLOCK(dpif->upcall_lock)
372{
373 if (handler->vport_sock_pool) {
374 uint32_t i;
375 struct dpif_windows_vport_sock *sock_pool =
376 handler->vport_sock_pool;
377
378 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
379 if (sock_pool[i].nl_sock) {
380 nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
381 nl_sock_destroy(sock_pool[i].nl_sock);
382 sock_pool[i].nl_sock = NULL;
383 }
384 }
385
386 free(handler->vport_sock_pool);
387 handler->vport_sock_pool = NULL;
388 }
389}
390
391static int
392vport_create_sock_pool(struct dpif_handler *handler)
393 OVS_REQ_WRLOCK(dpif->upcall_lock)
394{
395 struct dpif_windows_vport_sock *sock_pool;
396 size_t i;
397 int error = 0;
398
399 sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
400 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
401 error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
402 if (error) {
403 goto error;
404 }
405
406 /* Enable the netlink socket to receive packets. This is equivalent to
407 * calling nl_sock_join_mcgroup() to receive events. */
408 error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
409 if (error) {
410 goto error;
411 }
412 }
413
414 handler->vport_sock_pool = sock_pool;
415 handler->last_used_pool_idx = 0;
416 return 0;
417
418error:
419 vport_delete_sock_pool(handler);
420 return error;
421}
422
423/* Returns an array pointers to netlink sockets. The sockets are picked from a
424 * pool. Records the error in 'error'. */
425static struct nl_sock **
426vport_create_socksp_windows(struct dpif_netlink *dpif, int *error)
427 OVS_REQ_WRLOCK(dpif->upcall_lock)
428{
429 uint32_t n_socks = dpif->n_handlers;
430 struct nl_sock **socksp;
431 size_t i;
432
433 ovs_assert(n_socks <= 1);
434 socksp = xzalloc(n_socks * sizeof *socksp);
435
436 /* Pick netlink sockets to use in a round-robin fashion from each
437 * handler's pool of sockets. */
438 for (i = 0; i < n_socks; i++) {
439 struct dpif_handler *handler = &dpif->handlers[i];
440 struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
441 size_t index = handler->last_used_pool_idx;
442
443 /* A pool of sockets is allocated when the handler is initialized. */
444 if (sock_pool == NULL) {
445 free(socksp);
446 *error = EINVAL;
447 return NULL;
448 }
449
450 ovs_assert(index < VPORT_SOCK_POOL_SIZE);
451 socksp[i] = sock_pool[index].nl_sock;
452 socksp[i] = sock_pool[index].nl_sock;
453 ovs_assert(socksp[i]);
454 index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
455 handler->last_used_pool_idx = index;
456 }
457
458 return socksp;
459}
460
461static void
462vport_del_socksp_windows(struct dpif_netlink *dpif, struct nl_sock **socksp)
463{
464 free(socksp);
465}
466#endif /* _WIN32 */
467
468static struct nl_sock **
469vport_create_socksp(struct dpif_netlink *dpif, int *error)
470{
471#ifdef _WIN32
472 return vport_create_socksp_windows(dpif, error);
473#else
474 return vport_create_socksp__(dpif->n_handlers, error);
475#endif
476}
477
478static void
479vport_del_socksp(struct dpif_netlink *dpif, struct nl_sock **socksp)
480{
481#ifdef _WIN32
482 vport_del_socksp_windows(dpif, socksp);
483#else
484 vport_del_socksp__(socksp, dpif->n_handlers);
485#endif
486}
487
1579cf67
AW
488/* Given the array of pointers to netlink sockets 'socksp', returns
489 * the array of corresponding pids. If the 'socksp' is NULL, returns
490 * a single-element array of value 0. */
491static uint32_t *
492vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks)
493{
494 uint32_t *pids;
495
496 if (!socksp) {
497 pids = xzalloc(sizeof *pids);
498 } else {
499 size_t i;
500
501 pids = xzalloc(n_socks * sizeof *pids);
502 for (i = 0; i < n_socks; i++) {
503 pids[i] = nl_sock_pid(socksp[i]);
504 }
17411ecf 505 }
989fd548 506
1579cf67
AW
507 return pids;
508}
509
510/* Given the port number 'port_idx', extracts the pids of netlink sockets
511 * associated to the port and assigns it to 'upcall_pids'. */
512static bool
93451a0a 513vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
1579cf67
AW
514 uint32_t **upcall_pids)
515{
516 uint32_t *pids;
517 size_t i;
989fd548 518
1579cf67
AW
519 /* Since the nl_sock can only be assigned in either all
520 * or none "dpif->handlers" channels, the following check
521 * would suffice. */
522 if (!dpif->handlers[0].channels[port_idx].sock) {
523 return false;
524 }
09cac43f 525 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
1579cf67
AW
526
527 pids = xzalloc(dpif->n_handlers * sizeof *pids);
528
529 for (i = 0; i < dpif->n_handlers; i++) {
530 pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock);
531 }
532
533 *upcall_pids = pids;
989fd548 534
1579cf67 535 return true;
989fd548
JP
536}
537
538static int
93451a0a 539vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
1579cf67 540 struct nl_sock **socksp)
989fd548
JP
541{
542 struct epoll_event event;
4e022ec0 543 uint32_t port_idx = odp_to_u32(port_no);
1579cf67
AW
544 size_t i, j;
545 int error;
989fd548 546
1579cf67 547 if (dpif->handlers == NULL) {
989fd548
JP
548 return 0;
549 }
550
1579cf67
AW
551 /* We assume that the datapath densely chooses port numbers, which can
552 * therefore be used as an index into 'channels' and 'epoll_events' of
553 * 'dpif->handler'. */
4e022ec0
AW
554 if (port_idx >= dpif->uc_array_size) {
555 uint32_t new_size = port_idx + 1;
989fd548 556
12d76859 557 if (new_size > MAX_PORTS) {
989fd548
JP
558 VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
559 dpif_name(&dpif->dpif), port_no);
560 return EFBIG;
561 }
562
1579cf67
AW
563 for (i = 0; i < dpif->n_handlers; i++) {
564 struct dpif_handler *handler = &dpif->handlers[i];
565
566 handler->channels = xrealloc(handler->channels,
567 new_size * sizeof *handler->channels);
568
569 for (j = dpif->uc_array_size; j < new_size; j++) {
570 handler->channels[j].sock = NULL;
571 }
572
573 handler->epoll_events = xrealloc(handler->epoll_events,
574 new_size * sizeof *handler->epoll_events);
989fd548 575
1579cf67 576 }
989fd548
JP
577 dpif->uc_array_size = new_size;
578 }
579
580 memset(&event, 0, sizeof event);
581 event.events = EPOLLIN;
4e022ec0 582 event.data.u32 = port_idx;
989fd548 583
1579cf67
AW
584 for (i = 0; i < dpif->n_handlers; i++) {
585 struct dpif_handler *handler = &dpif->handlers[i];
586
09cac43f 587#ifndef _WIN32
1579cf67
AW
588 if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp[i]),
589 &event) < 0) {
590 error = errno;
591 goto error;
592 }
93451a0a 593#endif
1579cf67
AW
594 dpif->handlers[i].channels[port_idx].sock = socksp[i];
595 dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN;
596 }
989fd548
JP
597
598 return 0;
1579cf67
AW
599
600error:
601 for (j = 0; j < i; j++) {
09cac43f 602#ifndef _WIN32
1579cf67
AW
603 epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL,
604 nl_sock_fd(socksp[j]), NULL);
93451a0a 605#endif
1579cf67
AW
606 dpif->handlers[j].channels[port_idx].sock = NULL;
607 }
608
609 return error;
989fd548
JP
610}
611
612static void
93451a0a 613vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
989fd548 614{
4e022ec0 615 uint32_t port_idx = odp_to_u32(port_no);
1579cf67 616 size_t i;
989fd548 617
1579cf67 618 if (!dpif->handlers || port_idx >= dpif->uc_array_size) {
989fd548
JP
619 return;
620 }
621
1579cf67
AW
622 /* Since the sock can only be assigned in either all or none
623 * of "dpif->handlers" channels, the following check would
624 * suffice. */
625 if (!dpif->handlers[0].channels[port_idx].sock) {
989fd548
JP
626 return;
627 }
628
1579cf67
AW
629 for (i = 0; i < dpif->n_handlers; i++) {
630 struct dpif_handler *handler = &dpif->handlers[i];
09cac43f 631#ifndef _WIN32
1579cf67
AW
632 epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
633 nl_sock_fd(handler->channels[port_idx].sock), NULL);
634 nl_sock_destroy(handler->channels[port_idx].sock);
09cac43f 635#endif
1579cf67
AW
636 handler->channels[port_idx].sock = NULL;
637 handler->event_offset = handler->n_events = 0;
638 }
639}
640
641static void
93451a0a
AS
642destroy_all_channels(struct dpif_netlink *dpif)
643 OVS_REQ_WRLOCK(dpif->upcall_lock)
1579cf67
AW
644{
645 unsigned int i;
646
647 if (!dpif->handlers) {
648 return;
649 }
650
651 for (i = 0; i < dpif->uc_array_size; i++ ) {
93451a0a 652 struct dpif_netlink_vport vport_request;
1579cf67
AW
653 uint32_t upcall_pids = 0;
654
655 /* Since the sock can only be assigned in either all or none
656 * of "dpif->handlers" channels, the following check would
657 * suffice. */
658 if (!dpif->handlers[0].channels[i].sock) {
659 continue;
660 }
661
662 /* Turn off upcalls. */
93451a0a 663 dpif_netlink_vport_init(&vport_request);
1579cf67
AW
664 vport_request.cmd = OVS_VPORT_CMD_SET;
665 vport_request.dp_ifindex = dpif->dp_ifindex;
666 vport_request.port_no = u32_to_odp(i);
a78f446a 667 vport_request.n_upcall_pids = 1;
1579cf67 668 vport_request.upcall_pids = &upcall_pids;
93451a0a 669 dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67
AW
670
671 vport_del_channels(dpif, u32_to_odp(i));
672 }
673
674 for (i = 0; i < dpif->n_handlers; i++) {
675 struct dpif_handler *handler = &dpif->handlers[i];
676
09cac43f 677 dpif_netlink_handler_uninit(handler);
1579cf67
AW
678 free(handler->epoll_events);
679 free(handler->channels);
680 }
989fd548 681
1579cf67
AW
682 free(dpif->handlers);
683 dpif->handlers = NULL;
684 dpif->n_handlers = 0;
685 dpif->uc_array_size = 0;
17411ecf
JG
686}
687
96fba48f 688static void
93451a0a 689dpif_netlink_close(struct dpif *dpif_)
96fba48f 690{
93451a0a 691 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
c7178a0b 692
e4516b20 693 nl_sock_destroy(dpif->port_notifier);
1579cf67
AW
694
695 fat_rwlock_wrlock(&dpif->upcall_lock);
696 destroy_all_channels(dpif);
697 fat_rwlock_unlock(&dpif->upcall_lock);
698
699 fat_rwlock_destroy(&dpif->upcall_lock);
96fba48f
BP
700 free(dpif);
701}
702
703static int
93451a0a 704dpif_netlink_destroy(struct dpif *dpif_)
96fba48f 705{
93451a0a
AS
706 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
707 struct dpif_netlink_dp dp;
d6569377 708
93451a0a 709 dpif_netlink_dp_init(&dp);
df2c07f4 710 dp.cmd = OVS_DP_CMD_DEL;
254f2dc8 711 dp.dp_ifindex = dpif->dp_ifindex;
93451a0a 712 return dpif_netlink_dp_transact(&dp, NULL, NULL);
96fba48f
BP
713}
714
a36de779 715static bool
93451a0a 716dpif_netlink_run(struct dpif *dpif_)
61eae437 717{
93451a0a 718 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67 719
61eae437
BP
720 if (dpif->refresh_channels) {
721 dpif->refresh_channels = false;
1579cf67 722 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 723 dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
1579cf67 724 fat_rwlock_unlock(&dpif->upcall_lock);
61eae437 725 }
a36de779 726 return false;
61eae437
BP
727}
728
96fba48f 729static int
93451a0a 730dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
96fba48f 731{
93451a0a 732 struct dpif_netlink_dp dp;
d6569377
BP
733 struct ofpbuf *buf;
734 int error;
735
93451a0a 736 error = dpif_netlink_dp_get(dpif_, &dp, &buf);
d6569377 737 if (!error) {
6a54dedc
BP
738 memset(stats, 0, sizeof *stats);
739
740 if (dp.stats) {
741 stats->n_hit = get_32aligned_u64(&dp.stats->n_hit);
742 stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
743 stats->n_lost = get_32aligned_u64(&dp.stats->n_lost);
744 stats->n_flows = get_32aligned_u64(&dp.stats->n_flows);
745 }
746
747 if (dp.megaflow_stats) {
748 stats->n_masks = dp.megaflow_stats->n_masks;
749 stats->n_mask_hit = get_32aligned_u64(
750 &dp.megaflow_stats->n_mask_hit);
751 } else {
752 stats->n_masks = UINT32_MAX;
753 stats->n_mask_hit = UINT64_MAX;
754 }
d6569377
BP
755 ofpbuf_delete(buf);
756 }
757 return error;
96fba48f
BP
758}
759
b9ad7294 760static const char *
93451a0a 761get_vport_type(const struct dpif_netlink_vport *vport)
b9ad7294
EJ
762{
763 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
764
765 switch (vport->type) {
5ed51209
JS
766 case OVS_VPORT_TYPE_NETDEV: {
767 const char *type = netdev_get_type_from_name(vport->name);
768
769 return type ? type : "system";
770 }
b9ad7294
EJ
771
772 case OVS_VPORT_TYPE_INTERNAL:
773 return "internal";
774
c1fc1411
JG
775 case OVS_VPORT_TYPE_GENEVE:
776 return "geneve";
777
b9ad7294
EJ
778 case OVS_VPORT_TYPE_GRE:
779 return "gre";
780
b9ad7294
EJ
781 case OVS_VPORT_TYPE_VXLAN:
782 return "vxlan";
783
a6ae068b
LJ
784 case OVS_VPORT_TYPE_LISP:
785 return "lisp";
786
4237026e
PS
787 case OVS_VPORT_TYPE_STT:
788 return "stt";
789
c387d817
GR
790 case OVS_VPORT_TYPE_ERSPAN:
791 case OVS_VPORT_TYPE_IP6ERSPAN:
792 case OVS_VPORT_TYPE_IP6GRE:
793 return "";
794
b9ad7294
EJ
795 case OVS_VPORT_TYPE_UNSPEC:
796 case __OVS_VPORT_TYPE_MAX:
797 break;
798 }
799
800 VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
801 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
802 return "unknown";
803}
804
c4e08753 805enum ovs_vport_type
20c57607 806netdev_to_ovs_vport_type(const char *type)
c060c4cf 807{
c060c4cf
EJ
808 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
809 return OVS_VPORT_TYPE_NETDEV;
810 } else if (!strcmp(type, "internal")) {
811 return OVS_VPORT_TYPE_INTERNAL;
4237026e
PS
812 } else if (strstr(type, "stt")) {
813 return OVS_VPORT_TYPE_STT;
c1fc1411
JG
814 } else if (!strcmp(type, "geneve")) {
815 return OVS_VPORT_TYPE_GENEVE;
c060c4cf
EJ
816 } else if (strstr(type, "gre")) {
817 return OVS_VPORT_TYPE_GRE;
c060c4cf
EJ
818 } else if (!strcmp(type, "vxlan")) {
819 return OVS_VPORT_TYPE_VXLAN;
a6ae068b
LJ
820 } else if (!strcmp(type, "lisp")) {
821 return OVS_VPORT_TYPE_LISP;
c060c4cf
EJ
822 } else {
823 return OVS_VPORT_TYPE_UNSPEC;
824 }
825}
826
96fba48f 827static int
20c57607
EG
828dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
829 enum ovs_vport_type type,
830 struct ofpbuf *options,
93451a0a 831 odp_port_t *port_nop)
b90de034 832 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 833{
93451a0a 834 struct dpif_netlink_vport request, reply;
c19e6535 835 struct ofpbuf *buf;
1579cf67
AW
836 struct nl_sock **socksp = NULL;
837 uint32_t *upcall_pids;
838 int error = 0;
96fba48f 839
1579cf67 840 if (dpif->handlers) {
09cac43f 841 socksp = vport_create_socksp(dpif, &error);
1579cf67 842 if (!socksp) {
989fd548
JP
843 return error;
844 }
845 }
846
93451a0a 847 dpif_netlink_vport_init(&request);
df2c07f4 848 request.cmd = OVS_VPORT_CMD_NEW;
254f2dc8 849 request.dp_ifindex = dpif->dp_ifindex;
20c57607
EG
850 request.type = type;
851 request.name = name;
852
853 request.port_no = *port_nop;
854 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
855 request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
856 request.upcall_pids = upcall_pids;
857
858 if (options) {
859 request.options = options->data;
860 request.options_len = options->size;
861 }
862
863 error = dpif_netlink_vport_transact(&request, &reply, &buf);
864 if (!error) {
865 *port_nop = reply.port_no;
866 } else {
867 if (error == EBUSY && *port_nop != ODPP_NONE) {
868 VLOG_INFO("%s: requested port %"PRIu32" is in use",
869 dpif_name(&dpif->dpif), *port_nop);
870 }
871
872 vport_del_socksp(dpif, socksp);
873 goto exit;
874 }
875
876 if (socksp) {
877 error = vport_add_channels(dpif, *port_nop, socksp);
878 if (error) {
879 VLOG_INFO("%s: could not add channel for port %s",
880 dpif_name(&dpif->dpif), name);
881
882 /* Delete the port. */
883 dpif_netlink_vport_init(&request);
884 request.cmd = OVS_VPORT_CMD_DEL;
885 request.dp_ifindex = dpif->dp_ifindex;
886 request.port_no = *port_nop;
887 dpif_netlink_vport_transact(&request, NULL, NULL);
888 vport_del_socksp(dpif, socksp);
889 goto exit;
890 }
891 }
892 free(socksp);
893
894exit:
895 ofpbuf_delete(buf);
896 free(upcall_pids);
897
898 return error;
899}
900
901static int
902dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
903 odp_port_t *port_nop)
904 OVS_REQ_WRLOCK(dpif->upcall_lock)
905{
906 const struct netdev_tunnel_config *tnl_cfg;
907 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
908 const char *type = netdev_get_type(netdev);
909 uint64_t options_stub[64 / 8];
910 enum ovs_vport_type ovs_type;
911 struct ofpbuf options;
912 const char *name;
913
914 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
915
916 ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
917 if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
c283069c
BP
918 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
919 "unsupported type `%s'",
9b00386b 920 dpif_name(&dpif->dpif), name, type);
c283069c
BP
921 return EINVAL;
922 }
c3827f61 923
20c57607 924 if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
93451a0a 925#ifdef _WIN32
09cac43f 926 /* XXX : Map appropiate Windows handle */
93451a0a 927#else
24b019f8 928 netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
93451a0a 929#endif
24b019f8
JP
930 }
931
da467899 932#ifdef _WIN32
20c57607 933 if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
da467899
AS
934 if (!create_wmi_port(name)){
935 VLOG_ERR("Could not create wmi internal port with name:%s", name);
da467899
AS
936 return EINVAL;
937 };
938 }
939#endif
940
26508d9a 941 tnl_cfg = netdev_get_tunnel_config(netdev);
526df7d8 942 if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
26508d9a 943 ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
526df7d8
TG
944 if (tnl_cfg->dst_port) {
945 nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
946 ntohs(tnl_cfg->dst_port));
947 }
948 if (tnl_cfg->exts) {
949 size_t ext_ofs;
950 int i;
951
952 ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
953 for (i = 0; i < 32; i++) {
954 if (tnl_cfg->exts & (1 << i)) {
955 nl_msg_put_flag(&options, i);
956 }
957 }
958 nl_msg_end_nested(&options, ext_ofs);
959 }
20c57607
EG
960 return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
961 port_nop);
2510ba7c 962 } else {
20c57607 963 return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
78a2d59c 964 }
c3827f61 965
20c57607 966}
989fd548 967
921c370a 968static int
c4e08753
EG
969dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
970 struct netdev *netdev,
971 odp_port_t *port_nop)
972 OVS_REQ_WRLOCK(dpif->upcall_lock)
973{
974 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
975 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
976 const char *name;
977 int error;
989fd548 978
c4e08753
EG
979 error = dpif_netlink_rtnl_port_create(netdev);
980 if (error) {
981 if (error != EOPNOTSUPP) {
d52ef4eb 982 VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
c4e08753
EG
983 netdev_get_name(netdev), ovs_strerror(error));
984 }
985 return error;
986 }
1579cf67 987
c4e08753
EG
988 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
989 error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
990 port_nop);
991 if (error) {
992 dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
993 }
994 return error;
995}
96fba48f
BP
996
997static int
93451a0a
AS
998dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
999 odp_port_t *port_nop)
9fafa796 1000{
93451a0a 1001 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
921c370a 1002 int error = EOPNOTSUPP;
9fafa796 1003
1579cf67 1004 fat_rwlock_wrlock(&dpif->upcall_lock);
921c370a
EG
1005 if (!ovs_tunnels_out_of_tree) {
1006 error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
1007 }
1008 if (error) {
1009 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
1010 }
1579cf67 1011 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
1012
1013 return error;
1014}
1015
1016static int
93451a0a 1017dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
b90de034 1018 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 1019{
93451a0a 1020 struct dpif_netlink_vport vport;
921c370a 1021 struct dpif_port dpif_port;
773cd538 1022 int error;
c19e6535 1023
921c370a
EG
1024 error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
1025 if (error) {
1026 return error;
1027 }
1028
93451a0a 1029 dpif_netlink_vport_init(&vport);
df2c07f4 1030 vport.cmd = OVS_VPORT_CMD_DEL;
254f2dc8 1031 vport.dp_ifindex = dpif->dp_ifindex;
c19e6535 1032 vport.port_no = port_no;
da467899 1033#ifdef _WIN32
921c370a
EG
1034 if (!strcmp(dpif_port.type, "internal")) {
1035 if (!delete_wmi_port(dpif_port.name)) {
da467899 1036 VLOG_ERR("Could not delete wmi port with name: %s",
921c370a 1037 dpif_port.name);
da467899
AS
1038 };
1039 }
1040#endif
93451a0a 1041 error = dpif_netlink_vport_transact(&vport, NULL, NULL);
773cd538 1042
1579cf67 1043 vport_del_channels(dpif, port_no);
989fd548 1044
921c370a
EG
1045 if (!error && !ovs_tunnels_out_of_tree) {
1046 error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
1047 if (error == EOPNOTSUPP) {
1048 error = 0;
1049 }
1050 }
1051
1052 dpif_port_destroy(&dpif_port);
1053
773cd538 1054 return error;
c3827f61 1055}
3abc4a1a 1056
9fafa796 1057static int
93451a0a 1058dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
9fafa796 1059{
93451a0a 1060 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
1061 int error;
1062
1579cf67 1063 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 1064 error = dpif_netlink_port_del__(dpif, port_no);
1579cf67 1065 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
1066
1067 return error;
1068}
1069
c3827f61 1070static int
93451a0a
AS
1071dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
1072 const char *port_name, struct dpif_port *dpif_port)
c3827f61 1073{
93451a0a
AS
1074 struct dpif_netlink_vport request;
1075 struct dpif_netlink_vport reply;
c19e6535 1076 struct ofpbuf *buf;
4c738a8d
BP
1077 int error;
1078
93451a0a 1079 dpif_netlink_vport_init(&request);
df2c07f4 1080 request.cmd = OVS_VPORT_CMD_GET;
9b00386b 1081 request.dp_ifindex = dpif->dp_ifindex;
c19e6535
BP
1082 request.port_no = port_no;
1083 request.name = port_name;
4c738a8d 1084
93451a0a 1085 error = dpif_netlink_vport_transact(&request, &reply, &buf);
c19e6535 1086 if (!error) {
33db1592
BP
1087 if (reply.dp_ifindex != request.dp_ifindex) {
1088 /* A query by name reported that 'port_name' is in some datapath
1089 * other than 'dpif', but the caller wants to know about 'dpif'. */
1090 error = ENODEV;
4afba28d 1091 } else if (dpif_port) {
33db1592 1092 dpif_port->name = xstrdup(reply.name);
b9ad7294 1093 dpif_port->type = xstrdup(get_vport_type(&reply));
33db1592
BP
1094 dpif_port->port_no = reply.port_no;
1095 }
c19e6535 1096 ofpbuf_delete(buf);
3abc4a1a 1097 }
c19e6535 1098 return error;
96fba48f
BP
1099}
1100
1101static int
93451a0a
AS
1102dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1103 struct dpif_port *dpif_port)
96fba48f 1104{
93451a0a 1105 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1106
93451a0a 1107 return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
96fba48f
BP
1108}
1109
1110static int
93451a0a 1111dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
4c738a8d 1112 struct dpif_port *dpif_port)
96fba48f 1113{
93451a0a 1114 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1115
93451a0a 1116 return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
96fba48f
BP
1117}
1118
98403001 1119static uint32_t
93451a0a
AS
1120dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
1121 odp_port_t port_no, uint32_t hash)
b90de034 1122 OVS_REQ_RDLOCK(dpif->upcall_lock)
98403001 1123{
4e022ec0 1124 uint32_t port_idx = odp_to_u32(port_no);
9fafa796 1125 uint32_t pid = 0;
98403001 1126
f8fc5489 1127 if (dpif->handlers && dpif->uc_array_size > 0) {
4e022ec0 1128 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
989fd548 1129 * channel, since it is not heavily loaded. */
4e022ec0 1130 uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1579cf67
AW
1131 struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers];
1132
17f2748d
AW
1133 /* Needs to check in case the socket pointer is changed in between
1134 * the holding of upcall_lock. A known case happens when the main
1135 * thread deletes the vport while the handler thread is handling
1136 * the upcall from that port. */
1137 if (h->channels[idx].sock) {
1138 pid = nl_sock_pid(h->channels[idx].sock);
1139 }
98403001 1140 }
9fafa796
BP
1141
1142 return pid;
98403001
BP
1143}
1144
b90de034 1145static uint32_t
93451a0a
AS
1146dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
1147 uint32_t hash)
b90de034 1148{
93451a0a 1149 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
1150 uint32_t ret;
1151
1152 fat_rwlock_rdlock(&dpif->upcall_lock);
93451a0a 1153 ret = dpif_netlink_port_get_pid__(dpif, port_no, hash);
b90de034
AW
1154 fat_rwlock_unlock(&dpif->upcall_lock);
1155
1156 return ret;
1157}
1158
96fba48f 1159static int
93451a0a 1160dpif_netlink_flow_flush(struct dpif *dpif_)
96fba48f 1161{
93451a0a
AS
1162 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1163 struct dpif_netlink_flow flow;
37a1300c 1164
93451a0a 1165 dpif_netlink_flow_init(&flow);
df2c07f4 1166 flow.cmd = OVS_FLOW_CMD_DEL;
254f2dc8 1167 flow.dp_ifindex = dpif->dp_ifindex;
f7dde6df
PB
1168
1169 if (netdev_is_flow_api_enabled()) {
dfaf79dd 1170 netdev_ports_flow_flush(dpif_->dpif_class);
f7dde6df
PB
1171 }
1172
93451a0a 1173 return dpif_netlink_flow_transact(&flow, NULL, NULL);
96fba48f
BP
1174}
1175
93451a0a 1176struct dpif_netlink_port_state {
f0fef760 1177 struct nl_dump dump;
d57695d7 1178 struct ofpbuf buf;
c19e6535
BP
1179};
1180
222837c4 1181static void
93451a0a
AS
1182dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1183 struct nl_dump *dump)
96fba48f 1184{
93451a0a 1185 struct dpif_netlink_vport request;
f0fef760
BP
1186 struct ofpbuf *buf;
1187
93451a0a 1188 dpif_netlink_vport_init(&request);
067f1e23 1189 request.cmd = OVS_VPORT_CMD_GET;
254f2dc8 1190 request.dp_ifindex = dpif->dp_ifindex;
f0fef760
BP
1191
1192 buf = ofpbuf_new(1024);
93451a0a 1193 dpif_netlink_vport_to_ofpbuf(&request, buf);
222837c4 1194 nl_dump_start(dump, NETLINK_GENERIC, buf);
f0fef760 1195 ofpbuf_delete(buf);
222837c4
BP
1196}
1197
1198static int
93451a0a 1199dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
222837c4 1200{
93451a0a
AS
1201 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1202 struct dpif_netlink_port_state *state;
222837c4
BP
1203
1204 *statep = state = xmalloc(sizeof *state);
93451a0a 1205 dpif_netlink_port_dump_start__(dpif, &state->dump);
f0fef760 1206
d57695d7 1207 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
b0ec0f27
BP
1208 return 0;
1209}
1210
7c1ef244 1211static int
93451a0a
AS
1212dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1213 struct nl_dump *dump,
1214 struct dpif_netlink_vport *vport,
1215 struct ofpbuf *buffer)
222837c4 1216{
222837c4
BP
1217 struct ofpbuf buf;
1218 int error;
1219
d57695d7 1220 if (!nl_dump_next(dump, &buf, buffer)) {
222837c4
BP
1221 return EOF;
1222 }
1223
93451a0a 1224 error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
222837c4
BP
1225 if (error) {
1226 VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1227 dpif_name(&dpif->dpif), ovs_strerror(error));
1228 }
1229 return error;
1230}
1231
b0ec0f27 1232static int
93451a0a
AS
1233dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1234 struct dpif_port *dpif_port)
b0ec0f27 1235{
93451a0a
AS
1236 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1237 struct dpif_netlink_port_state *state = state_;
1238 struct dpif_netlink_vport vport;
96fba48f
BP
1239 int error;
1240
93451a0a
AS
1241 error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1242 &state->buf);
c3827f61 1243 if (error) {
f0fef760 1244 return error;
c3827f61 1245 }
ebc56baa 1246 dpif_port->name = CONST_CAST(char *, vport.name);
b9ad7294 1247 dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
f0fef760
BP
1248 dpif_port->port_no = vport.port_no;
1249 return 0;
b0ec0f27
BP
1250}
1251
1252static int
93451a0a 1253dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
b0ec0f27 1254{
93451a0a 1255 struct dpif_netlink_port_state *state = state_;
f0fef760 1256 int error = nl_dump_done(&state->dump);
8522b383 1257
d57695d7 1258 ofpbuf_uninit(&state->buf);
b0ec0f27 1259 free(state);
f0fef760 1260 return error;
96fba48f
BP
1261}
1262
e9e28be3 1263static int
93451a0a 1264dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
e9e28be3 1265{
93451a0a 1266 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e9e28be3 1267
e4516b20
BP
1268 /* Lazily create the Netlink socket to listen for notifications. */
1269 if (!dpif->port_notifier) {
1270 struct nl_sock *sock;
1271 int error;
1272
1273 error = nl_sock_create(NETLINK_GENERIC, &sock);
1274 if (error) {
1275 return error;
1276 }
1277
1278 error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1279 if (error) {
1280 nl_sock_destroy(sock);
1281 return error;
1282 }
1283 dpif->port_notifier = sock;
1284
1285 /* We have no idea of the current state so report that everything
1286 * changed. */
1287 return ENOBUFS;
1288 }
1289
1290 for (;;) {
1291 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1292 uint64_t buf_stub[4096 / 8];
1293 struct ofpbuf buf;
1294 int error;
1295
1296 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
a86bd14e 1297 error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
e4516b20 1298 if (!error) {
93451a0a 1299 struct dpif_netlink_vport vport;
e4516b20 1300
93451a0a 1301 error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
e4516b20
BP
1302 if (!error) {
1303 if (vport.dp_ifindex == dpif->dp_ifindex
1304 && (vport.cmd == OVS_VPORT_CMD_NEW
1305 || vport.cmd == OVS_VPORT_CMD_DEL
1306 || vport.cmd == OVS_VPORT_CMD_SET)) {
1307 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1308 dpif->dpif.full_name, vport.name, vport.cmd);
1579cf67 1309 if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
61eae437
BP
1310 dpif->refresh_channels = true;
1311 }
e4516b20 1312 *devnamep = xstrdup(vport.name);
59e0c910 1313 ofpbuf_uninit(&buf);
e4516b20 1314 return 0;
e4516b20
BP
1315 }
1316 }
59e0c910
BP
1317 } else if (error != EAGAIN) {
1318 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1319 ovs_strerror(error));
1320 nl_sock_drain(dpif->port_notifier);
1321 error = ENOBUFS;
e4516b20
BP
1322 }
1323
59e0c910
BP
1324 ofpbuf_uninit(&buf);
1325 if (error) {
1326 return error;
1327 }
e9e28be3 1328 }
e9e28be3
BP
1329}
1330
1331static void
93451a0a 1332dpif_netlink_port_poll_wait(const struct dpif *dpif_)
e9e28be3 1333{
93451a0a 1334 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e4516b20
BP
1335
1336 if (dpif->port_notifier) {
1337 nl_sock_wait(dpif->port_notifier, POLLIN);
1338 } else {
e9e28be3 1339 poll_immediate_wake();
e9e28be3
BP
1340 }
1341}
1342
6fe09f8c 1343static void
70e5ed6f
JS
1344dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1345 const ovs_u128 *ufid, bool terse)
1346{
1347 if (ufid) {
1348 request->ufid = *ufid;
1349 request->ufid_present = true;
1350 } else {
1351 request->ufid_present = false;
1352 }
1353 request->ufid_terse = terse;
1354}
1355
1356static void
1357dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1358 const struct nlattr *key, size_t key_len,
1359 const ovs_u128 *ufid, bool terse,
1360 struct dpif_netlink_flow *request)
96fba48f 1361{
93451a0a 1362 dpif_netlink_flow_init(request);
6fe09f8c
JS
1363 request->cmd = OVS_FLOW_CMD_GET;
1364 request->dp_ifindex = dpif->dp_ifindex;
1365 request->key = key;
1366 request->key_len = key_len;
70e5ed6f
JS
1367 dpif_netlink_flow_init_ufid(request, ufid, terse);
1368}
1369
1370static void
1371dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1372 const struct dpif_flow_get *get,
1373 struct dpif_netlink_flow *request)
1374{
1375 dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1376 false, request);
30053024
BP
1377}
1378
1379static int
70e5ed6f
JS
1380dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1381 const struct nlattr *key, size_t key_len,
1382 const ovs_u128 *ufid, bool terse,
1383 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
30053024 1384{
93451a0a 1385 struct dpif_netlink_flow request;
30053024 1386
70e5ed6f 1387 dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
93451a0a 1388 return dpif_netlink_flow_transact(&request, reply, bufp);
96fba48f
BP
1389}
1390
70e5ed6f
JS
1391static int
1392dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1393 const struct dpif_netlink_flow *flow,
1394 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1395{
1396 return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1397 flow->ufid_present ? &flow->ufid : NULL,
1398 false, reply, bufp);
1399}
1400
6bc60024 1401static void
93451a0a
AS
1402dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1403 const struct dpif_flow_put *put,
1404 struct dpif_netlink_flow *request)
6bc60024 1405{
d64e176c 1406 static const struct nlattr dummy_action;
6bc60024 1407
93451a0a 1408 dpif_netlink_flow_init(request);
89625d1e 1409 request->cmd = (put->flags & DPIF_FP_CREATE
6bc60024
BP
1410 ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1411 request->dp_ifindex = dpif->dp_ifindex;
89625d1e
BP
1412 request->key = put->key;
1413 request->key_len = put->key_len;
e6cc0bab
AZ
1414 request->mask = put->mask;
1415 request->mask_len = put->mask_len;
70e5ed6f
JS
1416 dpif_netlink_flow_init_ufid(request, put->ufid, false);
1417
6bc60024 1418 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
d64e176c
BP
1419 request->actions = (put->actions
1420 ? put->actions
1421 : CONST_CAST(struct nlattr *, &dummy_action));
89625d1e
BP
1422 request->actions_len = put->actions_len;
1423 if (put->flags & DPIF_FP_ZERO_STATS) {
6bc60024
BP
1424 request->clear = true;
1425 }
43f9ac0a
JR
1426 if (put->flags & DPIF_FP_PROBE) {
1427 request->probe = true;
1428 }
89625d1e 1429 request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
6bc60024
BP
1430}
1431
b99d3cee 1432static void
70e5ed6f
JS
1433dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1434 const struct nlattr *key, size_t key_len,
1435 const ovs_u128 *ufid, bool terse,
1436 struct dpif_netlink_flow *request)
96fba48f 1437{
93451a0a 1438 dpif_netlink_flow_init(request);
b99d3cee
BP
1439 request->cmd = OVS_FLOW_CMD_DEL;
1440 request->dp_ifindex = dpif->dp_ifindex;
70e5ed6f
JS
1441 request->key = key;
1442 request->key_len = key_len;
1443 dpif_netlink_flow_init_ufid(request, ufid, terse);
1444}
1445
1446static void
1447dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1448 const struct dpif_flow_del *del,
1449 struct dpif_netlink_flow *request)
1450{
37382aa6
AS
1451 dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1452 del->ufid, del->terse, request);
70e5ed6f
JS
1453}
1454
7e8b7199
PB
1455enum {
1456 DUMP_OVS_FLOWS_BIT = 0,
1457 DUMP_OFFLOADED_FLOWS_BIT = 1,
1458};
1459
1460enum {
1461 DUMP_OVS_FLOWS = (1 << DUMP_OVS_FLOWS_BIT),
1462 DUMP_OFFLOADED_FLOWS = (1 << DUMP_OFFLOADED_FLOWS_BIT),
1463};
1464
93451a0a 1465struct dpif_netlink_flow_dump {
ac64794a
BP
1466 struct dpif_flow_dump up;
1467 struct nl_dump nl_dump;
d2ad7ef1 1468 atomic_int status;
f2280b41
PB
1469 struct netdev_flow_dump **netdev_dumps;
1470 int netdev_dumps_num; /* Number of netdev_flow_dumps */
1471 struct ovs_mutex netdev_lock; /* Guards the following. */
1472 int netdev_current_dump OVS_GUARDED; /* Shared current dump */
7e8b7199 1473 int type; /* Type of dump */
e723fd32
JS
1474};
1475
93451a0a
AS
1476static struct dpif_netlink_flow_dump *
1477dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
e723fd32 1478{
93451a0a 1479 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
e723fd32
JS
1480}
1481
f2280b41
PB
1482static void
1483start_netdev_dump(const struct dpif *dpif_,
1484 struct dpif_netlink_flow_dump *dump)
1485{
1486 ovs_mutex_init(&dump->netdev_lock);
1487
7e8b7199 1488 if (!(dump->type & DUMP_OFFLOADED_FLOWS)) {
f2280b41
PB
1489 dump->netdev_dumps_num = 0;
1490 dump->netdev_dumps = NULL;
1491 return;
1492 }
1493
1494 ovs_mutex_lock(&dump->netdev_lock);
1495 dump->netdev_current_dump = 0;
1496 dump->netdev_dumps
dfaf79dd 1497 = netdev_ports_flow_dump_create(dpif_->dpif_class,
f2280b41
PB
1498 &dump->netdev_dumps_num);
1499 ovs_mutex_unlock(&dump->netdev_lock);
1500}
1501
7e8b7199
PB
1502static int
1503dpif_netlink_get_dump_type(char *str) {
1504 int type = 0;
1505
1506 if (!str || !strcmp(str, "ovs") || !strcmp(str, "dpctl")) {
1507 type |= DUMP_OVS_FLOWS;
1508 }
1509 if ((netdev_is_flow_api_enabled() && !str)
1510 || (str && (!strcmp(str, "offloaded") || !strcmp(str, "dpctl")))) {
1511 type |= DUMP_OFFLOADED_FLOWS;
1512 }
1513
1514 return type;
1515}
1516
ac64794a 1517static struct dpif_flow_dump *
7e8b7199
PB
1518dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
1519 char *type)
96fba48f 1520{
93451a0a
AS
1521 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1522 struct dpif_netlink_flow_dump *dump;
1523 struct dpif_netlink_flow request;
37a1300c
BP
1524 struct ofpbuf *buf;
1525
ac64794a
BP
1526 dump = xmalloc(sizeof *dump);
1527 dpif_flow_dump_init(&dump->up, dpif_);
37a1300c 1528
7e8b7199 1529 dump->type = dpif_netlink_get_dump_type(type);
37a1300c 1530
7e8b7199
PB
1531 if (dump->type & DUMP_OVS_FLOWS) {
1532 dpif_netlink_flow_init(&request);
1533 request.cmd = OVS_FLOW_CMD_GET;
1534 request.dp_ifindex = dpif->dp_ifindex;
1535 request.ufid_present = false;
1536 request.ufid_terse = terse;
1537
1538 buf = ofpbuf_new(1024);
1539 dpif_netlink_flow_to_ofpbuf(&request, buf);
1540 nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1541 ofpbuf_delete(buf);
1542 }
ac64794a 1543 atomic_init(&dump->status, 0);
64bb477f 1544 dump->up.terse = terse;
30053024 1545
f2280b41
PB
1546 start_netdev_dump(dpif_, dump);
1547
ac64794a 1548 return &dump->up;
704a1e09
BP
1549}
1550
1551static int
93451a0a 1552dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
704a1e09 1553{
93451a0a 1554 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
7e8b7199 1555 unsigned int nl_status = 0;
ac64794a 1556 int dump_status;
96fba48f 1557
7e8b7199
PB
1558 if (dump->type & DUMP_OVS_FLOWS) {
1559 nl_status = nl_dump_done(&dump->nl_dump);
1560 }
1561
f2280b41
PB
1562 for (int i = 0; i < dump->netdev_dumps_num; i++) {
1563 int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
1564
1565 if (err != 0 && err != EOPNOTSUPP) {
1566 VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
1567 }
1568 }
1569
1570 free(dump->netdev_dumps);
1571 ovs_mutex_destroy(&dump->netdev_lock);
1572
7424fc44
JR
1573 /* No other thread has access to 'dump' at this point. */
1574 atomic_read_relaxed(&dump->status, &dump_status);
ac64794a
BP
1575 free(dump);
1576 return dump_status ? dump_status : nl_status;
1577}
feebdea2 1578
93451a0a 1579struct dpif_netlink_flow_dump_thread {
ac64794a 1580 struct dpif_flow_dump_thread up;
93451a0a
AS
1581 struct dpif_netlink_flow_dump *dump;
1582 struct dpif_netlink_flow flow;
ac64794a
BP
1583 struct dpif_flow_stats stats;
1584 struct ofpbuf nl_flows; /* Always used to store flows. */
1585 struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */
f2280b41
PB
1586 int netdev_dump_idx; /* This thread current netdev dump index */
1587 bool netdev_done; /* If we are finished dumping netdevs */
1588
1589 /* (Key/Mask/Actions) Buffers for netdev dumping */
1590 struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
1591 struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
1592 struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
ac64794a
BP
1593};
1594
93451a0a
AS
1595static struct dpif_netlink_flow_dump_thread *
1596dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
ac64794a 1597{
93451a0a 1598 return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
ac64794a
BP
1599}
1600
1601static struct dpif_flow_dump_thread *
93451a0a 1602dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
ac64794a 1603{
93451a0a
AS
1604 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1605 struct dpif_netlink_flow_dump_thread *thread;
ac64794a
BP
1606
1607 thread = xmalloc(sizeof *thread);
1608 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1609 thread->dump = dump;
1610 ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1611 thread->nl_actions = NULL;
f2280b41
PB
1612 thread->netdev_dump_idx = 0;
1613 thread->netdev_done = !(thread->netdev_dump_idx < dump->netdev_dumps_num);
ac64794a
BP
1614
1615 return &thread->up;
1616}
1617
1618static void
93451a0a 1619dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
ac64794a 1620{
93451a0a
AS
1621 struct dpif_netlink_flow_dump_thread *thread
1622 = dpif_netlink_flow_dump_thread_cast(thread_);
ac64794a
BP
1623
1624 ofpbuf_uninit(&thread->nl_flows);
1625 ofpbuf_delete(thread->nl_actions);
1626 free(thread);
1627}
1628
1629static void
7af12bd7 1630dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
7fe98598 1631 const struct dpif_netlink_flow *datapath_flow)
ac64794a 1632{
7fe98598
NR
1633 dpif_flow->key = datapath_flow->key;
1634 dpif_flow->key_len = datapath_flow->key_len;
1635 dpif_flow->mask = datapath_flow->mask;
1636 dpif_flow->mask_len = datapath_flow->mask_len;
1637 dpif_flow->actions = datapath_flow->actions;
1638 dpif_flow->actions_len = datapath_flow->actions_len;
70e5ed6f 1639 dpif_flow->ufid_present = datapath_flow->ufid_present;
ec97c2df 1640 dpif_flow->pmd_id = PMD_ID_NULL;
70e5ed6f
JS
1641 if (datapath_flow->ufid_present) {
1642 dpif_flow->ufid = datapath_flow->ufid;
1643 } else {
1644 ovs_assert(datapath_flow->key && datapath_flow->key_len);
1645 dpif_flow_hash(dpif, datapath_flow->key, datapath_flow->key_len,
1646 &dpif_flow->ufid);
1647 }
7fe98598 1648 dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
4742003c 1649 dpif_flow->offloaded = false;
ac64794a
BP
1650}
1651
f2280b41
PB
1652/* The design is such that all threads are working together on the first dump
1653 * to the last, in order (at first they all on dump 0).
1654 * When the first thread finds that the given dump is finished,
1655 * they all move to the next. If two or more threads find the same dump
1656 * is finished at the same time, the first one will advance the shared
1657 * netdev_current_dump and the others will catch up. */
1658static void
1659dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
1660{
1661 struct dpif_netlink_flow_dump *dump = thread->dump;
1662
1663 ovs_mutex_lock(&dump->netdev_lock);
1664 /* if we haven't finished (dumped everything) */
1665 if (dump->netdev_current_dump < dump->netdev_dumps_num) {
1666 /* if we are the first to find that current dump is finished
1667 * advance it. */
1668 if (thread->netdev_dump_idx == dump->netdev_current_dump) {
1669 thread->netdev_dump_idx = ++dump->netdev_current_dump;
1670 /* did we just finish the last dump? done. */
1671 if (dump->netdev_current_dump == dump->netdev_dumps_num) {
1672 thread->netdev_done = true;
1673 }
1674 } else {
1675 /* otherwise, we are behind, catch up */
1676 thread->netdev_dump_idx = dump->netdev_current_dump;
1677 }
1678 } else {
1679 /* some other thread finished */
1680 thread->netdev_done = true;
1681 }
1682 ovs_mutex_unlock(&dump->netdev_lock);
1683}
1684
1685static int
1686dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
1687 struct ofpbuf *key_buf,
1688 struct ofpbuf *mask_buf,
1689 struct nlattr *actions,
1690 struct dpif_flow_stats *stats,
1691 ovs_u128 *ufid,
1692 struct dpif_flow *flow,
1693 bool terse OVS_UNUSED)
1694{
1695
1696 struct odp_flow_key_parms odp_parms = {
1697 .flow = &match->flow,
1698 .mask = &match->wc.masks,
1699 .support = {
1700 .max_vlan_headers = 1,
1701 },
1702 };
1703 size_t offset;
1704
1705 memset(flow, 0, sizeof *flow);
1706
1707 /* Key */
1708 offset = key_buf->size;
1709 flow->key = ofpbuf_tail(key_buf);
1710 odp_flow_key_from_flow(&odp_parms, key_buf);
1711 flow->key_len = key_buf->size - offset;
1712
1713 /* Mask */
1714 offset = mask_buf->size;
1715 flow->mask = ofpbuf_tail(mask_buf);
1716 odp_parms.key_buf = key_buf;
1717 odp_flow_key_from_mask(&odp_parms, mask_buf);
1718 flow->mask_len = mask_buf->size - offset;
1719
1720 /* Actions */
1721 flow->actions = nl_attr_get(actions);
1722 flow->actions_len = nl_attr_get_size(actions);
1723
1724 /* Stats */
1725 memcpy(&flow->stats, stats, sizeof *stats);
1726
1727 /* UFID */
1728 flow->ufid_present = true;
1729 flow->ufid = *ufid;
1730
1731 flow->pmd_id = PMD_ID_NULL;
4742003c
PB
1732
1733 flow->offloaded = true;
1734
f2280b41
PB
1735 return 0;
1736}
1737
ac64794a 1738static int
93451a0a
AS
1739dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1740 struct dpif_flow *flows, int max_flows)
ac64794a 1741{
93451a0a
AS
1742 struct dpif_netlink_flow_dump_thread *thread
1743 = dpif_netlink_flow_dump_thread_cast(thread_);
1744 struct dpif_netlink_flow_dump *dump = thread->dump;
1745 struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
ac64794a
BP
1746 int n_flows;
1747
1748 ofpbuf_delete(thread->nl_actions);
1749 thread->nl_actions = NULL;
1750
1751 n_flows = 0;
f2280b41
PB
1752 max_flows = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
1753
1754 while (!thread->netdev_done && n_flows < max_flows) {
1755 struct odputil_keybuf *maskbuf = &thread->maskbuf[n_flows];
1756 struct odputil_keybuf *keybuf = &thread->keybuf[n_flows];
1757 struct odputil_keybuf *actbuf = &thread->actbuf[n_flows];
1758 struct ofpbuf key, mask, act;
1759 struct dpif_flow *f = &flows[n_flows];
1760 int cur = thread->netdev_dump_idx;
1761 struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
1762 struct match match;
1763 struct nlattr *actions;
1764 struct dpif_flow_stats stats;
1765 ovs_u128 ufid;
1766 bool has_next;
1767
1768 ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
1769 ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
1770 ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
1771 has_next = netdev_flow_dump_next(netdev_dump, &match,
1772 &actions, &stats,
1773 &ufid,
1774 &thread->nl_flows,
1775 &act);
1776 if (has_next) {
1777 dpif_netlink_netdev_match_to_dpif_flow(&match,
1778 &key, &mask,
1779 actions,
1780 &stats,
1781 &ufid,
1782 f,
1783 dump->up.terse);
1784 n_flows++;
1785 } else {
1786 dpif_netlink_advance_netdev_dump(thread);
1787 }
1788 }
1789
7e8b7199
PB
1790 if (!(dump->type & DUMP_OVS_FLOWS)) {
1791 return n_flows;
1792 }
1793
ac64794a 1794 while (!n_flows
6fd6ed71 1795 || (n_flows < max_flows && thread->nl_flows.size)) {
7fe98598 1796 struct dpif_netlink_flow datapath_flow;
ac64794a
BP
1797 struct ofpbuf nl_flow;
1798 int error;
1799
1800 /* Try to grab another flow. */
1801 if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1802 break;
feebdea2 1803 }
30053024 1804
ac64794a 1805 /* Convert the flow to our output format. */
7fe98598 1806 error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
30053024 1807 if (error) {
7424fc44 1808 atomic_store_relaxed(&dump->status, error);
ac64794a 1809 break;
feebdea2 1810 }
30053024 1811
64bb477f
JS
1812 if (dump->up.terse || datapath_flow.actions) {
1813 /* Common case: we don't want actions, or the flow includes
1814 * actions. */
7af12bd7
JS
1815 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1816 &datapath_flow);
ac64794a
BP
1817 } else {
1818 /* Rare case: the flow does not include actions. Retrieve this
1819 * individual flow again to get the actions. */
70e5ed6f 1820 error = dpif_netlink_flow_get(dpif, &datapath_flow,
7fe98598 1821 &datapath_flow, &thread->nl_actions);
30053024
BP
1822 if (error == ENOENT) {
1823 VLOG_DBG("dumped flow disappeared on get");
ac64794a 1824 continue;
30053024 1825 } else if (error) {
10a89ef0
BP
1826 VLOG_WARN("error fetching dumped flow: %s",
1827 ovs_strerror(error));
7424fc44 1828 atomic_store_relaxed(&dump->status, error);
ac64794a 1829 break;
30053024 1830 }
30053024 1831
ac64794a
BP
1832 /* Save this flow. Then exit, because we only have one buffer to
1833 * handle this case. */
7af12bd7
JS
1834 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1835 &datapath_flow);
ac64794a
BP
1836 break;
1837 }
feebdea2 1838 }
ac64794a 1839 return n_flows;
96fba48f
BP
1840}
1841
eabe7c68 1842static void
93451a0a
AS
1843dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1844 struct ofpbuf *buf)
96fba48f 1845{
89625d1e 1846 struct ovs_header *k_exec;
758c456d 1847 size_t key_ofs;
f7cd0081 1848
eabe7c68 1849 ofpbuf_prealloc_tailroom(buf, (64
cf62fa4c 1850 + dp_packet_size(d_exec->packet)
758c456d 1851 + ODP_KEY_METADATA_SIZE
eabe7c68 1852 + d_exec->actions_len));
f7cd0081 1853
df2c07f4 1854 nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
69685a88 1855 OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
f7cd0081 1856
89625d1e
BP
1857 k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1858 k_exec->dp_ifindex = dp_ifindex;
f7cd0081 1859
89625d1e 1860 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
cf62fa4c
PS
1861 dp_packet_data(d_exec->packet),
1862 dp_packet_size(d_exec->packet));
758c456d
JR
1863
1864 key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
beb75a40 1865 odp_key_from_dp_packet(buf, d_exec->packet);
758c456d
JR
1866 nl_msg_end_nested(buf, key_ofs);
1867
89625d1e
BP
1868 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1869 d_exec->actions, d_exec->actions_len);
43f9ac0a 1870 if (d_exec->probe) {
2e460098 1871 nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
43f9ac0a 1872 }
27130224
AZ
1873 if (d_exec->mtu) {
1874 nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1875 }
6bc60024
BP
1876}
1877
0f3358ea
BP
1878/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1879 * Returns the number actually executed (at least 1, if 'n_ops' is
1880 * positive). */
1881static size_t
93451a0a
AS
1882dpif_netlink_operate__(struct dpif_netlink *dpif,
1883 struct dpif_op **ops, size_t n_ops)
6bc60024 1884{
eabe7c68
BP
1885 struct op_auxdata {
1886 struct nl_transaction txn;
72d32ac0 1887
eabe7c68
BP
1888 struct ofpbuf request;
1889 uint64_t request_stub[1024 / 8];
72d32ac0
BP
1890
1891 struct ofpbuf reply;
1892 uint64_t reply_stub[1024 / 8];
8b668ee3 1893 } auxes[OPERATE_MAX_OPS];
eabe7c68 1894
8b668ee3 1895 struct nl_transaction *txnsp[OPERATE_MAX_OPS];
6bc60024
BP
1896 size_t i;
1897
8b668ee3 1898 n_ops = MIN(n_ops, OPERATE_MAX_OPS);
6bc60024 1899 for (i = 0; i < n_ops; i++) {
eabe7c68 1900 struct op_auxdata *aux = &auxes[i];
c2b565b5 1901 struct dpif_op *op = ops[i];
b99d3cee
BP
1902 struct dpif_flow_put *put;
1903 struct dpif_flow_del *del;
6fe09f8c 1904 struct dpif_flow_get *get;
93451a0a 1905 struct dpif_netlink_flow flow;
eabe7c68
BP
1906
1907 ofpbuf_use_stub(&aux->request,
1908 aux->request_stub, sizeof aux->request_stub);
1909 aux->txn.request = &aux->request;
b99d3cee 1910
72d32ac0
BP
1911 ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1912 aux->txn.reply = NULL;
1913
b99d3cee
BP
1914 switch (op->type) {
1915 case DPIF_OP_FLOW_PUT:
1916 put = &op->u.flow_put;
93451a0a 1917 dpif_netlink_init_flow_put(dpif, put, &flow);
6bc60024 1918 if (put->stats) {
eabe7c68 1919 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1920 aux->txn.reply = &aux->reply;
6bc60024 1921 }
93451a0a 1922 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee
BP
1923 break;
1924
1925 case DPIF_OP_FLOW_DEL:
1926 del = &op->u.flow_del;
93451a0a 1927 dpif_netlink_init_flow_del(dpif, del, &flow);
b99d3cee 1928 if (del->stats) {
eabe7c68 1929 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1930 aux->txn.reply = &aux->reply;
b99d3cee 1931 }
93451a0a 1932 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee 1933 break;
6bc60024 1934
b99d3cee 1935 case DPIF_OP_EXECUTE:
0f3358ea
BP
1936 /* Can't execute a packet that won't fit in a Netlink attribute. */
1937 if (OVS_UNLIKELY(nl_attr_oversized(
cf62fa4c 1938 dp_packet_size(op->u.execute.packet)))) {
0f3358ea
BP
1939 /* Report an error immediately if this is the first operation.
1940 * Otherwise the easiest thing to do is to postpone to the next
1941 * call (when this will be the first operation). */
1942 if (i == 0) {
1943 VLOG_ERR_RL(&error_rl,
1944 "dropping oversized %"PRIu32"-byte packet",
cf62fa4c 1945 dp_packet_size(op->u.execute.packet));
0f3358ea
BP
1946 op->error = ENOBUFS;
1947 return 1;
1948 }
1949 n_ops = i;
1950 } else {
1951 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->u.execute,
1952 &aux->request);
1953 }
b99d3cee
BP
1954 break;
1955
6fe09f8c
JS
1956 case DPIF_OP_FLOW_GET:
1957 get = &op->u.flow_get;
70e5ed6f 1958 dpif_netlink_init_flow_get(dpif, get, &flow);
6fe09f8c 1959 aux->txn.reply = get->buffer;
93451a0a 1960 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
6fe09f8c
JS
1961 break;
1962
b99d3cee 1963 default:
428b2edd 1964 OVS_NOT_REACHED();
6bc60024
BP
1965 }
1966 }
1967
6bc60024 1968 for (i = 0; i < n_ops; i++) {
eabe7c68 1969 txnsp[i] = &auxes[i].txn;
6bc60024 1970 }
a88b4e04 1971 nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
6bc60024 1972
6bc60024 1973 for (i = 0; i < n_ops; i++) {
72d32ac0 1974 struct op_auxdata *aux = &auxes[i];
eabe7c68 1975 struct nl_transaction *txn = &auxes[i].txn;
c2b565b5 1976 struct dpif_op *op = ops[i];
b99d3cee
BP
1977 struct dpif_flow_put *put;
1978 struct dpif_flow_del *del;
6fe09f8c 1979 struct dpif_flow_get *get;
6bc60024 1980
b99d3cee 1981 op->error = txn->error;
6bc60024 1982
b99d3cee
BP
1983 switch (op->type) {
1984 case DPIF_OP_FLOW_PUT:
1985 put = &op->u.flow_put;
cfceb2b5 1986 if (put->stats) {
b99d3cee 1987 if (!op->error) {
93451a0a 1988 struct dpif_netlink_flow reply;
cfceb2b5 1989
93451a0a
AS
1990 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1991 txn->reply);
cfceb2b5 1992 if (!op->error) {
93451a0a 1993 dpif_netlink_flow_get_stats(&reply, put->stats);
cfceb2b5
BP
1994 }
1995 }
6bc60024 1996 }
b99d3cee
BP
1997 break;
1998
1999 case DPIF_OP_FLOW_DEL:
2000 del = &op->u.flow_del;
cfceb2b5 2001 if (del->stats) {
b99d3cee 2002 if (!op->error) {
93451a0a 2003 struct dpif_netlink_flow reply;
cfceb2b5 2004
93451a0a
AS
2005 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
2006 txn->reply);
cfceb2b5 2007 if (!op->error) {
93451a0a 2008 dpif_netlink_flow_get_stats(&reply, del->stats);
cfceb2b5
BP
2009 }
2010 }
b99d3cee
BP
2011 }
2012 break;
2013
2014 case DPIF_OP_EXECUTE:
2015 break;
2016
6fe09f8c
JS
2017 case DPIF_OP_FLOW_GET:
2018 get = &op->u.flow_get;
2019 if (!op->error) {
93451a0a 2020 struct dpif_netlink_flow reply;
6fe09f8c 2021
93451a0a 2022 op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
6fe09f8c 2023 if (!op->error) {
7af12bd7
JS
2024 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, get->flow,
2025 &reply);
6fe09f8c
JS
2026 }
2027 }
2028 break;
2029
b99d3cee 2030 default:
428b2edd 2031 OVS_NOT_REACHED();
6bc60024
BP
2032 }
2033
72d32ac0
BP
2034 ofpbuf_uninit(&aux->request);
2035 ofpbuf_uninit(&aux->reply);
6bc60024 2036 }
0f3358ea
BP
2037
2038 return n_ops;
eabe7c68
BP
2039}
2040
6c343984
PB
2041static int
2042parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
2043{
2044 struct dpif_flow *dpif_flow = get->flow;
2045 struct match match;
2046 struct nlattr *actions;
2047 struct dpif_flow_stats stats;
2048 struct ofpbuf buf;
2049 uint64_t act_buf[1024 / 8];
2050 struct odputil_keybuf maskbuf;
2051 struct odputil_keybuf keybuf;
2052 struct odputil_keybuf actbuf;
2053 struct ofpbuf key, mask, act;
2054 int err;
2055
2056 ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
dfaf79dd 2057 err = netdev_ports_flow_get(dpif->dpif.dpif_class, &match,
6c343984
PB
2058 &actions, get->ufid, &stats, &buf);
2059 if (err) {
2060 return err;
2061 }
2062
2063 VLOG_DBG("found flow from netdev, translating to dpif flow");
2064
2065 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
2066 ofpbuf_use_stack(&act, &actbuf, sizeof actbuf);
2067 ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
2068 dpif_netlink_netdev_match_to_dpif_flow(&match, &key, &mask, actions,
2069 &stats,
2070 (ovs_u128 *) get->ufid,
2071 dpif_flow,
2072 false);
2073 ofpbuf_put(get->buffer, nl_attr_get(actions), nl_attr_get_size(actions));
2074 dpif_flow->actions = ofpbuf_at(get->buffer, 0, 0);
2075 dpif_flow->actions_len = nl_attr_get_size(actions);
2076
2077 return 0;
2078}
2079
8b668ee3
PB
2080static int
2081parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
2082{
2083 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
dfaf79dd 2084 const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
8b668ee3
PB
2085 struct match match;
2086 odp_port_t in_port;
2087 const struct nlattr *nla;
2088 size_t left;
8b668ee3
PB
2089 struct netdev *dev;
2090 struct offload_info info;
2091 ovs_be16 dst_port = 0;
2092 int err;
2093
2094 if (put->flags & DPIF_FP_PROBE) {
2095 return EOPNOTSUPP;
2096 }
2097
2098 err = parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
2099 put->mask_len, &match);
2100 if (err) {
2101 return err;
2102 }
2103
2104 /* When we try to install a dummy flow from a probed feature. */
2105 if (match.flow.dl_type == htons(0x1234)) {
2106 return EOPNOTSUPP;
2107 }
2108
2109 in_port = match.flow.in_port.odp_port;
dfaf79dd 2110 dev = netdev_ports_get(in_port, dpif_class);
8b668ee3
PB
2111 if (!dev) {
2112 return EOPNOTSUPP;
2113 }
2114
00a0a011 2115 /* Get tunnel dst port */
8b668ee3
PB
2116 NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
2117 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
2118 const struct netdev_tunnel_config *tnl_cfg;
2119 struct netdev *outdev;
2120 odp_port_t out_port;
2121
8b668ee3 2122 out_port = nl_attr_get_odp_port(nla);
dfaf79dd 2123 outdev = netdev_ports_get(out_port, dpif_class);
8b668ee3
PB
2124 if (!outdev) {
2125 err = EOPNOTSUPP;
2126 goto out;
2127 }
2128 tnl_cfg = netdev_get_tunnel_config(outdev);
2129 if (tnl_cfg && tnl_cfg->dst_port != 0) {
2130 dst_port = tnl_cfg->dst_port;
2131 }
2132 netdev_close(outdev);
2133 }
2134 }
2135
dfaf79dd 2136 info.dpif_class = dpif_class;
8b668ee3
PB
2137 info.tp_dst_port = dst_port;
2138 err = netdev_flow_put(dev, &match,
2139 CONST_CAST(struct nlattr *, put->actions),
2140 put->actions_len,
2141 CONST_CAST(ovs_u128 *, put->ufid),
2142 &info, put->stats);
2143
2144 if (!err) {
2145 if (put->flags & DPIF_FP_MODIFY) {
2146 struct dpif_op *opp;
2147 struct dpif_op op;
2148
2149 op.type = DPIF_OP_FLOW_DEL;
2150 op.u.flow_del.key = put->key;
2151 op.u.flow_del.key_len = put->key_len;
2152 op.u.flow_del.ufid = put->ufid;
2153 op.u.flow_del.pmd_id = put->pmd_id;
2154 op.u.flow_del.stats = NULL;
2155 op.u.flow_del.terse = false;
2156
2157 opp = &op;
2158 dpif_netlink_operate__(dpif, &opp, 1);
2159 }
2160
2161 VLOG_DBG("added flow");
2162 } else if (err != EEXIST) {
2163 VLOG_ERR_RL(&rl, "failed to offload flow: %s", ovs_strerror(err));
2164 }
2165
2166out:
2167 if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
2168 /* Modified rule can't be offloaded, try and delete from HW */
2169 int del_err = netdev_flow_del(dev, put->ufid, put->stats);
2170
2171 if (!del_err) {
2172 /* Delete from hw success, so old flow was offloaded.
2173 * Change flags to create the flow in kernel */
2174 put->flags &= ~DPIF_FP_MODIFY;
2175 put->flags |= DPIF_FP_CREATE;
2176 } else if (del_err != ENOENT) {
2177 VLOG_ERR_RL(&rl, "failed to delete offloaded flow: %s",
2178 ovs_strerror(del_err));
2179 /* stop proccesing the flow in kernel */
2180 err = 0;
2181 }
2182 }
2183
2184 netdev_close(dev);
2185
2186 return err;
2187}
2188
8b668ee3
PB
2189static int
2190try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
eabe7c68 2191{
8b668ee3 2192 int err = EOPNOTSUPP;
9b00386b 2193
8b668ee3
PB
2194 switch (op->type) {
2195 case DPIF_OP_FLOW_PUT: {
2196 struct dpif_flow_put *put = &op->u.flow_put;
2197
2198 if (!put->ufid) {
2199 break;
2200 }
3cd99886
RD
2201
2202 log_flow_put_message(&dpif->dpif, &this_module, put, 0);
8b668ee3
PB
2203 err = parse_flow_put(dpif, put);
2204 break;
2205 }
0335a89c
PB
2206 case DPIF_OP_FLOW_DEL: {
2207 struct dpif_flow_del *del = &op->u.flow_del;
2208
2209 if (!del->ufid) {
2210 break;
2211 }
3cd99886
RD
2212
2213 log_flow_del_message(&dpif->dpif, &this_module, del, 0);
dfaf79dd 2214 err = netdev_ports_flow_del(dpif->dpif.dpif_class, del->ufid,
0335a89c
PB
2215 del->stats);
2216 break;
2217 }
6c343984
PB
2218 case DPIF_OP_FLOW_GET: {
2219 struct dpif_flow_get *get = &op->u.flow_get;
2220
2221 if (!op->u.flow_get.ufid) {
2222 break;
2223 }
3cd99886
RD
2224
2225 log_flow_get_message(&dpif->dpif, &this_module, get, 0);
6c343984
PB
2226 err = parse_flow_get(dpif, get);
2227 break;
2228 }
8b668ee3
PB
2229 case DPIF_OP_EXECUTE:
2230 default:
2231 break;
2232 }
2233
2234 return err;
2235}
2236
2237static void
2238dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
2239 size_t n_ops)
2240{
eabe7c68 2241 while (n_ops > 0) {
0f3358ea 2242 size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
8b668ee3 2243
eabe7c68
BP
2244 ops += chunk;
2245 n_ops -= chunk;
2246 }
6bc60024
BP
2247}
2248
8b668ee3
PB
2249static void
2250dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
2251{
2252 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2253 struct dpif_op *new_ops[OPERATE_MAX_OPS];
2254 int count = 0;
2255 int i = 0;
2256 int err = 0;
2257
2258 if (netdev_is_flow_api_enabled()) {
2259 while (n_ops > 0) {
2260 count = 0;
2261
2262 while (n_ops > 0 && count < OPERATE_MAX_OPS) {
2263 struct dpif_op *op = ops[i++];
2264
2265 err = try_send_to_netdev(dpif, op);
2266 if (err && err != EEXIST) {
2267 new_ops[count++] = op;
2268 } else {
2269 op->error = err;
2270 }
2271
2272 n_ops--;
2273 }
2274
2275 dpif_netlink_operate_chunks(dpif, new_ops, count);
2276 }
2277 } else {
2278 dpif_netlink_operate_chunks(dpif, ops, n_ops);
2279 }
2280}
2281
09cac43f
NR
2282#if _WIN32
2283static void
2284dpif_netlink_handler_uninit(struct dpif_handler *handler)
2285{
2286 vport_delete_sock_pool(handler);
2287}
2288
2289static int
2290dpif_netlink_handler_init(struct dpif_handler *handler)
2291{
2292 return vport_create_sock_pool(handler);
2293}
2294#else
2295
2296static int
2297dpif_netlink_handler_init(struct dpif_handler *handler)
2298{
2299 handler->epoll_fd = epoll_create(10);
2300 return handler->epoll_fd < 0 ? errno : 0;
2301}
2302
2303static void
2304dpif_netlink_handler_uninit(struct dpif_handler *handler)
2305{
2306 close(handler->epoll_fd);
2307}
2308#endif
2309
1579cf67
AW
2310/* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
2311 * currently in 'dpif' in the kernel, by adding a new set of channels for
2312 * any kernel vport that lacks one and deleting any channels that have no
2313 * backing kernel vports. */
96fba48f 2314static int
93451a0a 2315dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
b90de034 2316 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 2317{
8381a3d3 2318 unsigned long int *keep_channels;
93451a0a 2319 struct dpif_netlink_vport vport;
8381a3d3
BP
2320 size_t keep_channels_nbits;
2321 struct nl_dump dump;
d57695d7
JS
2322 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2323 struct ofpbuf buf;
8381a3d3
BP
2324 int retval = 0;
2325 size_t i;
982b8810 2326
09cac43f
NR
2327 ovs_assert(!WINDOWS || n_handlers <= 1);
2328 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
2329
1579cf67
AW
2330 if (dpif->n_handlers != n_handlers) {
2331 destroy_all_channels(dpif);
2332 dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2333 for (i = 0; i < n_handlers; i++) {
09cac43f 2334 int error;
1579cf67
AW
2335 struct dpif_handler *handler = &dpif->handlers[i];
2336
09cac43f
NR
2337 error = dpif_netlink_handler_init(handler);
2338 if (error) {
1579cf67
AW
2339 size_t j;
2340
2341 for (j = 0; j < i; j++) {
aa5c0216 2342 struct dpif_handler *tmp = &dpif->handlers[j];
09cac43f 2343 dpif_netlink_handler_uninit(tmp);
1579cf67
AW
2344 }
2345 free(dpif->handlers);
2346 dpif->handlers = NULL;
2347
09cac43f 2348 return error;
1579cf67 2349 }
8381a3d3 2350 }
1579cf67
AW
2351 dpif->n_handlers = n_handlers;
2352 }
2353
2354 for (i = 0; i < n_handlers; i++) {
2355 struct dpif_handler *handler = &dpif->handlers[i];
2356
2357 handler->event_offset = handler->n_events = 0;
17411ecf 2358 }
b063d9f0 2359
8381a3d3
BP
2360 keep_channels_nbits = dpif->uc_array_size;
2361 keep_channels = bitmap_allocate(keep_channels_nbits);
982b8810 2362
d57695d7 2363 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a
AS
2364 dpif_netlink_port_dump_start__(dpif, &dump);
2365 while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
8381a3d3 2366 uint32_t port_no = odp_to_u32(vport.port_no);
1579cf67 2367 uint32_t *upcall_pids = NULL;
8381a3d3 2368 int error;
50f80534 2369
1579cf67
AW
2370 if (port_no >= dpif->uc_array_size
2371 || !vport_get_pids(dpif, port_no, &upcall_pids)) {
09cac43f 2372 struct nl_sock **socksp = vport_create_socksp(dpif, &error);
1579cf67
AW
2373
2374 if (!socksp) {
2375 goto error;
2376 }
2377
2378 error = vport_add_channels(dpif, vport.port_no, socksp);
b063d9f0 2379 if (error) {
1579cf67 2380 VLOG_INFO("%s: could not add channels for port %s",
9b00386b 2381 dpif_name(&dpif->dpif), vport.name);
09cac43f 2382 vport_del_socksp(dpif, socksp);
8381a3d3
BP
2383 retval = error;
2384 goto error;
982b8810 2385 }
1579cf67
AW
2386 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
2387 free(socksp);
8381a3d3 2388 }
50f80534 2389
8381a3d3 2390 /* Configure the vport to deliver misses to 'sock'. */
1579cf67
AW
2391 if (vport.upcall_pids[0] == 0
2392 || vport.n_upcall_pids != dpif->n_handlers
2393 || memcmp(upcall_pids, vport.upcall_pids, n_handlers * sizeof
2394 *upcall_pids)) {
93451a0a 2395 struct dpif_netlink_vport vport_request;
989fd548 2396
93451a0a 2397 dpif_netlink_vport_init(&vport_request);
989fd548
JP
2398 vport_request.cmd = OVS_VPORT_CMD_SET;
2399 vport_request.dp_ifindex = dpif->dp_ifindex;
8381a3d3 2400 vport_request.port_no = vport.port_no;
1579cf67
AW
2401 vport_request.n_upcall_pids = dpif->n_handlers;
2402 vport_request.upcall_pids = upcall_pids;
93451a0a 2403 error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67 2404 if (error) {
989fd548
JP
2405 VLOG_WARN_RL(&error_rl,
2406 "%s: failed to set upcall pid on port: %s",
10a89ef0 2407 dpif_name(&dpif->dpif), ovs_strerror(error));
989fd548 2408
8381a3d3
BP
2409 if (error != ENODEV && error != ENOENT) {
2410 retval = error;
989fd548 2411 } else {
8381a3d3
BP
2412 /* The vport isn't really there, even though the dump says
2413 * it is. Probably we just hit a race after a port
2414 * disappeared. */
989fd548 2415 }
8381a3d3 2416 goto error;
50f80534 2417 }
8381a3d3 2418 }
14b4d2f9 2419
8381a3d3
BP
2420 if (port_no < keep_channels_nbits) {
2421 bitmap_set1(keep_channels, port_no);
2422 }
1579cf67 2423 free(upcall_pids);
8381a3d3
BP
2424 continue;
2425
2426 error:
1579cf67
AW
2427 free(upcall_pids);
2428 vport_del_channels(dpif, vport.port_no);
982b8810 2429 }
8381a3d3 2430 nl_dump_done(&dump);
d57695d7 2431 ofpbuf_uninit(&buf);
b063d9f0 2432
8381a3d3
BP
2433 /* Discard any saved channels that we didn't reuse. */
2434 for (i = 0; i < keep_channels_nbits; i++) {
2435 if (!bitmap_is_set(keep_channels, i)) {
1579cf67 2436 vport_del_channels(dpif, u32_to_odp(i));
8381a3d3
BP
2437 }
2438 }
2439 free(keep_channels);
2440
2441 return retval;
2442}
2443
2444static int
93451a0a 2445dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable)
b90de034 2446 OVS_REQ_WRLOCK(dpif->upcall_lock)
8381a3d3 2447{
1579cf67 2448 if ((dpif->handlers != NULL) == enable) {
8381a3d3
BP
2449 return 0;
2450 } else if (!enable) {
1579cf67 2451 destroy_all_channels(dpif);
8381a3d3
BP
2452 return 0;
2453 } else {
93451a0a 2454 return dpif_netlink_refresh_channels(dpif, 1);
8381a3d3 2455 }
96fba48f
BP
2456}
2457
9fafa796 2458static int
93451a0a 2459dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
9fafa796 2460{
93451a0a 2461 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2462 int error;
2463
1579cf67 2464 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2465 error = dpif_netlink_recv_set__(dpif, enable);
1579cf67 2466 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2467
2468 return error;
2469}
2470
1954e6bb 2471static int
93451a0a 2472dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
1954e6bb 2473{
93451a0a 2474 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67
AW
2475 int error = 0;
2476
09cac43f
NR
2477#ifdef _WIN32
2478 /* Multiple upcall handlers will be supported once kernel datapath supports
2479 * it. */
2480 if (n_handlers > 1) {
2481 return error;
2482 }
2483#endif
2484
1579cf67
AW
2485 fat_rwlock_wrlock(&dpif->upcall_lock);
2486 if (dpif->handlers) {
93451a0a 2487 error = dpif_netlink_refresh_channels(dpif, n_handlers);
1579cf67
AW
2488 }
2489 fat_rwlock_unlock(&dpif->upcall_lock);
2490
2491 return error;
1954e6bb
AW
2492}
2493
aae51f53 2494static int
93451a0a 2495dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
aae51f53
BP
2496 uint32_t queue_id, uint32_t *priority)
2497{
2498 if (queue_id < 0xf000) {
17ee3c1f 2499 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
aae51f53
BP
2500 return 0;
2501 } else {
2502 return EINVAL;
2503 }
2504}
2505
96fba48f 2506static int
7af12bd7
JS
2507parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
2508 struct dpif_upcall *upcall, int *dp_ifindex)
856081f6 2509{
df2c07f4 2510 static const struct nl_policy ovs_packet_policy[] = {
856081f6 2511 /* Always present. */
df2c07f4 2512 [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
856081f6 2513 .min_len = ETH_HEADER_LEN },
df2c07f4 2514 [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
856081f6 2515
df2c07f4 2516 /* OVS_PACKET_CMD_ACTION only. */
e995e3df 2517 [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
8b7ea2d4 2518 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
7321bda3 2519 [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
27130224 2520 [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
856081f6
BP
2521 };
2522
0a2869d5
BP
2523 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2524 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2525 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2526 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
982b8810 2527
0a2869d5 2528 struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
df2c07f4
JP
2529 if (!nlmsg || !genl || !ovs_header
2530 || nlmsg->nlmsg_type != ovs_packet_family
2531 || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2532 ARRAY_SIZE(ovs_packet_policy))) {
856081f6
BP
2533 return EINVAL;
2534 }
2535
0a2869d5
BP
2536 int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2537 : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2538 : -1);
aaff4b55
BP
2539 if (type < 0) {
2540 return EINVAL;
2541 }
82272ede 2542
877c9270 2543 /* (Re)set ALL fields of '*upcall' on successful return. */
aaff4b55 2544 upcall->type = type;
ebc56baa
BP
2545 upcall->key = CONST_CAST(struct nlattr *,
2546 nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
df2c07f4 2547 upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
7af12bd7 2548 dpif_flow_hash(&dpif->dpif, upcall->key, upcall->key_len, &upcall->ufid);
e995e3df 2549 upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
8b7ea2d4 2550 upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
7321bda3 2551 upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
27130224 2552 upcall->mru = a[OVS_PACKET_ATTR_MRU];
da546e07
JR
2553
2554 /* Allow overwriting the netlink attribute header without reallocating. */
cf62fa4c 2555 dp_packet_use_stub(&upcall->packet,
da546e07
JR
2556 CONST_CAST(struct nlattr *,
2557 nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2558 nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2559 sizeof(struct nlattr));
cf62fa4c
PS
2560 dp_packet_set_data(&upcall->packet,
2561 (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2562 dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
da546e07 2563
2482b0b0
JS
2564 if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2565 /* Ethernet frame */
2566 upcall->packet.packet_type = htonl(PT_ETH);
2567 } else {
2568 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2569 ovs_be16 ethertype = 0;
2570 const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2571 upcall->key_len,
2572 OVS_KEY_ATTR_ETHERTYPE);
2573 if (et_nla) {
2574 ethertype = nl_attr_get_be16(et_nla);
2575 }
2576 upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2577 ntohs(ethertype));
2578 dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2579 }
2580
df2c07f4 2581 *dp_ifindex = ovs_header->dp_ifindex;
982b8810 2582
856081f6
BP
2583 return 0;
2584}
2585
09cac43f
NR
2586#ifdef _WIN32
2587#define PACKET_RECV_BATCH_SIZE 50
2588static int
2589dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2590 struct dpif_upcall *upcall, struct ofpbuf *buf)
2591 OVS_REQ_RDLOCK(dpif->upcall_lock)
2592{
2593 struct dpif_handler *handler;
2594 int read_tries = 0;
2595 struct dpif_windows_vport_sock *sock_pool;
2596 uint32_t i;
2597
2598 if (!dpif->handlers) {
2599 return EAGAIN;
2600 }
2601
2602 /* Only one handler is supported currently. */
2603 if (handler_id >= 1) {
2604 return EAGAIN;
2605 }
2606
2607 if (handler_id >= dpif->n_handlers) {
2608 return EAGAIN;
2609 }
2610
2611 handler = &dpif->handlers[handler_id];
2612 sock_pool = handler->vport_sock_pool;
2613
2614 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2615 for (;;) {
2616 int dp_ifindex;
2617 int error;
2618
2619 if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2620 return EAGAIN;
2621 }
2622
a86bd14e 2623 error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
09cac43f
NR
2624 if (error == ENOBUFS) {
2625 /* ENOBUFS typically means that we've received so many
2626 * packets that the buffer overflowed. Try again
2627 * immediately because there's almost certainly a packet
2628 * waiting for us. */
2629 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2630 continue;
2631 }
2632
2633 /* XXX: ch->last_poll = time_msec(); */
2634 if (error) {
2635 if (error == EAGAIN) {
2636 break;
2637 }
2638 return error;
2639 }
2640
27edb4aa 2641 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
09cac43f
NR
2642 if (!error && dp_ifindex == dpif->dp_ifindex) {
2643 return 0;
2644 } else if (error) {
2645 return error;
2646 }
2647 }
2648 }
2649
2650 return EAGAIN;
2651}
2652#else
856081f6 2653static int
93451a0a
AS
2654dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
2655 struct dpif_upcall *upcall, struct ofpbuf *buf)
b90de034 2656 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2657{
1579cf67 2658 struct dpif_handler *handler;
17411ecf 2659 int read_tries = 0;
96fba48f 2660
1579cf67
AW
2661 if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2662 return EAGAIN;
982b8810
BP
2663 }
2664
1579cf67
AW
2665 handler = &dpif->handlers[handler_id];
2666 if (handler->event_offset >= handler->n_events) {
8522ba09 2667 int retval;
989fd548 2668
1579cf67 2669 handler->event_offset = handler->n_events = 0;
f6d1465c 2670
8522ba09 2671 do {
1579cf67 2672 retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
989fd548 2673 dpif->uc_array_size, 0);
8522ba09 2674 } while (retval < 0 && errno == EINTR);
09cac43f 2675
8522ba09
BP
2676 if (retval < 0) {
2677 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
10a89ef0 2678 VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
989fd548 2679 } else if (retval > 0) {
1579cf67 2680 handler->n_events = retval;
8522ba09 2681 }
8522ba09
BP
2682 }
2683
1579cf67
AW
2684 while (handler->event_offset < handler->n_events) {
2685 int idx = handler->epoll_events[handler->event_offset].data.u32;
2686 struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx];
8522ba09 2687
1579cf67 2688 handler->event_offset++;
17411ecf 2689
f6d1465c 2690 for (;;) {
8522ba09 2691 int dp_ifindex;
f6d1465c 2692 int error;
17411ecf 2693
f6d1465c
BP
2694 if (++read_tries > 50) {
2695 return EAGAIN;
2696 }
17411ecf 2697
a86bd14e 2698 error = nl_sock_recv(ch->sock, buf, NULL, false);
14b4d2f9
BP
2699 if (error == ENOBUFS) {
2700 /* ENOBUFS typically means that we've received so many
2701 * packets that the buffer overflowed. Try again
2702 * immediately because there's almost certainly a packet
2703 * waiting for us. */
9b00386b 2704 report_loss(dpif, ch, idx, handler_id);
14b4d2f9
BP
2705 continue;
2706 }
2707
2708 ch->last_poll = time_msec();
72d32ac0 2709 if (error) {
72d32ac0
BP
2710 if (error == EAGAIN) {
2711 break;
2712 }
f6d1465c
BP
2713 return error;
2714 }
17411ecf 2715
7af12bd7 2716 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
a12b3ead 2717 if (!error && dp_ifindex == dpif->dp_ifindex) {
f6d1465c 2718 return 0;
989fd548 2719 } else if (error) {
f6d1465c 2720 return error;
17411ecf 2721 }
982b8810 2722 }
50f80534 2723 }
982b8810
BP
2724
2725 return EAGAIN;
96fba48f 2726}
09cac43f 2727#endif
96fba48f 2728
9fafa796 2729static int
93451a0a
AS
2730dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2731 struct dpif_upcall *upcall, struct ofpbuf *buf)
9fafa796 2732{
93451a0a 2733 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2734 int error;
2735
1579cf67 2736 fat_rwlock_rdlock(&dpif->upcall_lock);
09cac43f
NR
2737#ifdef _WIN32
2738 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2739#else
93451a0a 2740 error = dpif_netlink_recv__(dpif, handler_id, upcall, buf);
09cac43f 2741#endif
1579cf67 2742 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2743
2744 return error;
2745}
2746
96fba48f 2747static void
93451a0a 2748dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id)
b90de034 2749 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2750{
93451a0a 2751#ifdef _WIN32
09cac43f
NR
2752 uint32_t i;
2753 struct dpif_windows_vport_sock *sock_pool =
2754 dpif->handlers[handler_id].vport_sock_pool;
2755
2756 /* Only one handler is supported currently. */
2757 if (handler_id >= 1) {
2758 return;
2759 }
2760
2761 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2762 nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2763 }
93451a0a 2764#else
1579cf67
AW
2765 if (dpif->handlers && handler_id < dpif->n_handlers) {
2766 struct dpif_handler *handler = &dpif->handlers[handler_id];
2767
2768 poll_fd_wait(handler->epoll_fd, POLLIN);
17411ecf 2769 }
93451a0a 2770#endif
96fba48f
BP
2771}
2772
1ba530f4 2773static void
93451a0a 2774dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
1ba530f4 2775{
93451a0a 2776 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
17411ecf 2777
b90de034 2778 fat_rwlock_rdlock(&dpif->upcall_lock);
93451a0a 2779 dpif_netlink_recv_wait__(dpif, handler_id);
b90de034
AW
2780 fat_rwlock_unlock(&dpif->upcall_lock);
2781}
2782
2783static void
93451a0a 2784dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
b90de034
AW
2785 OVS_REQ_WRLOCK(dpif->upcall_lock)
2786{
1579cf67
AW
2787 if (dpif->handlers) {
2788 size_t i, j;
2789
2790 for (i = 0; i < dpif->uc_array_size; i++ ) {
2791 if (!dpif->handlers[0].channels[i].sock) {
2792 continue;
2793 }
1ba530f4 2794
1579cf67
AW
2795 for (j = 0; j < dpif->n_handlers; j++) {
2796 nl_sock_drain(dpif->handlers[j].channels[i].sock);
9fafa796 2797 }
989fd548 2798 }
1ba530f4 2799 }
b90de034
AW
2800}
2801
2802static void
93451a0a 2803dpif_netlink_recv_purge(struct dpif *dpif_)
b90de034 2804{
93451a0a 2805 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
2806
2807 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2808 dpif_netlink_recv_purge__(dpif);
1579cf67 2809 fat_rwlock_unlock(&dpif->upcall_lock);
1ba530f4
BP
2810}
2811
b5cbbcf6
AZ
2812static char *
2813dpif_netlink_get_datapath_version(void)
2814{
2815 char *version_str = NULL;
2816
2817#ifdef __linux__
2818
2819#define MAX_VERSION_STR_SIZE 80
2820#define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2821 FILE *f;
2822
2823 f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2824 if (f) {
2825 char *newline;
2826 char version[MAX_VERSION_STR_SIZE];
2827
2828 if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2829 newline = strchr(version, '\n');
2830 if (newline) {
2831 *newline = '\0';
2832 }
2833 version_str = xstrdup(version);
2834 }
2835 fclose(f);
2836 }
2837#endif
2838
2839 return version_str;
2840}
2841
c11c9f4a
DDP
2842struct dpif_netlink_ct_dump_state {
2843 struct ct_dpif_dump_state up;
2844 struct nl_ct_dump_state *nl_ct_dump;
2845};
2846
2847static int
2848dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2849 struct ct_dpif_dump_state **dump_,
ded30c74 2850 const uint16_t *zone, int *ptot_bkts)
c11c9f4a
DDP
2851{
2852 struct dpif_netlink_ct_dump_state *dump;
2853 int err;
2854
2855 dump = xzalloc(sizeof *dump);
ded30c74 2856 err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
c11c9f4a
DDP
2857 if (err) {
2858 free(dump);
2859 return err;
2860 }
2861
2862 *dump_ = &dump->up;
2863
2864 return 0;
2865}
2866
2867static int
2868dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2869 struct ct_dpif_dump_state *dump_,
2870 struct ct_dpif_entry *entry)
2871{
2872 struct dpif_netlink_ct_dump_state *dump;
2873
2874 INIT_CONTAINER(dump, dump_, up);
2875
2876 return nl_ct_dump_next(dump->nl_ct_dump, entry);
2877}
2878
2879static int
2880dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2881 struct ct_dpif_dump_state *dump_)
2882{
2883 struct dpif_netlink_ct_dump_state *dump;
2884 int err;
2885
2886 INIT_CONTAINER(dump, dump_, up);
2887
2888 err = nl_ct_dump_done(dump->nl_ct_dump);
2889 free(dump);
2890 return err;
2891}
15eabc97
DDP
2892
2893static int
817a7657
YHW
2894dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
2895 const struct ct_dpif_tuple *tuple)
15eabc97 2896{
817a7657
YHW
2897 if (tuple) {
2898 return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
2899 } else if (zone) {
15eabc97
DDP
2900 return nl_ct_flush_zone(*zone);
2901 } else {
2902 return nl_ct_flush();
2903 }
2904}
c11c9f4a 2905
5dddf960
JR
2906\f
2907/* Meters */
2908static void
2909dpif_netlink_meter_get_features(const struct dpif * dpif OVS_UNUSED,
2910 struct ofputil_meter_features *features)
2911{
2912 features->max_meters = 0;
2913 features->band_types = 0;
2914 features->capabilities = 0;
2915 features->max_bands = 0;
2916 features->max_color = 0;
2917}
2918
2919static int
2920dpif_netlink_meter_set(struct dpif *dpif OVS_UNUSED,
2921 ofproto_meter_id *meter_id OVS_UNUSED,
2922 struct ofputil_meter_config *config OVS_UNUSED)
2923{
2924 return EFBIG; /* meter_id out of range */
2925}
2926
2927static int
2928dpif_netlink_meter_get(const struct dpif *dpif OVS_UNUSED,
2929 ofproto_meter_id meter_id OVS_UNUSED,
2930 struct ofputil_meter_stats *stats OVS_UNUSED,
2931 uint16_t n_bands OVS_UNUSED)
2932{
2933 return EFBIG; /* meter_id out of range */
2934}
2935
2936static int
2937dpif_netlink_meter_del(struct dpif *dpif OVS_UNUSED,
2938 ofproto_meter_id meter_id OVS_UNUSED,
2939 struct ofputil_meter_stats *stats OVS_UNUSED,
2940 uint16_t n_bands OVS_UNUSED)
2941{
2942 return EFBIG; /* meter_id out of range */
2943}
2944
2945\f
93451a0a 2946const struct dpif_class dpif_netlink_class = {
1a6f1e2a 2947 "system",
c8973eb6 2948 NULL, /* init */
93451a0a 2949 dpif_netlink_enumerate,
0aeaabc8 2950 NULL,
93451a0a
AS
2951 dpif_netlink_open,
2952 dpif_netlink_close,
2953 dpif_netlink_destroy,
2954 dpif_netlink_run,
e4516b20 2955 NULL, /* wait */
93451a0a
AS
2956 dpif_netlink_get_stats,
2957 dpif_netlink_port_add,
2958 dpif_netlink_port_del,
91364d18 2959 NULL, /* port_set_config */
93451a0a
AS
2960 dpif_netlink_port_query_by_number,
2961 dpif_netlink_port_query_by_name,
2962 dpif_netlink_port_get_pid,
2963 dpif_netlink_port_dump_start,
2964 dpif_netlink_port_dump_next,
2965 dpif_netlink_port_dump_done,
2966 dpif_netlink_port_poll,
2967 dpif_netlink_port_poll_wait,
2968 dpif_netlink_flow_flush,
2969 dpif_netlink_flow_dump_create,
2970 dpif_netlink_flow_dump_destroy,
2971 dpif_netlink_flow_dump_thread_create,
2972 dpif_netlink_flow_dump_thread_destroy,
2973 dpif_netlink_flow_dump_next,
2974 dpif_netlink_operate,
2975 dpif_netlink_recv_set,
2976 dpif_netlink_handlers_set,
d4f6865c 2977 NULL, /* set_config */
93451a0a
AS
2978 dpif_netlink_queue_to_priority,
2979 dpif_netlink_recv,
2980 dpif_netlink_recv_wait,
2981 dpif_netlink_recv_purge,
e4e74c3a 2982 NULL, /* register_dp_purge_cb */
6b31e073
RW
2983 NULL, /* register_upcall_cb */
2984 NULL, /* enable_upcall */
2985 NULL, /* disable_upcall */
b5cbbcf6 2986 dpif_netlink_get_datapath_version, /* get_datapath_version */
c11c9f4a
DDP
2987 dpif_netlink_ct_dump_start,
2988 dpif_netlink_ct_dump_next,
2989 dpif_netlink_ct_dump_done,
5dddf960 2990 dpif_netlink_ct_flush,
c92339ad
DB
2991 NULL, /* ct_set_maxconns */
2992 NULL, /* ct_get_maxconns */
875075b3 2993 NULL, /* ct_get_nconns */
5dddf960
JR
2994 dpif_netlink_meter_get_features,
2995 dpif_netlink_meter_set,
2996 dpif_netlink_meter_get,
2997 dpif_netlink_meter_del,
96fba48f 2998};
93451a0a 2999
96fba48f 3000static int
93451a0a 3001dpif_netlink_init(void)
96fba48f 3002{
eb8ed438
BP
3003 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3004 static int error;
982b8810 3005
eb8ed438 3006 if (ovsthread_once_start(&once)) {
df2c07f4
JP
3007 error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
3008 &ovs_datapath_family);
37a1300c 3009 if (error) {
e0e2410d 3010 VLOG_INFO("Generic Netlink family '%s' does not exist. "
cae7529c
CL
3011 "The Open vSwitch kernel module is probably not loaded.",
3012 OVS_DATAPATH_FAMILY);
37a1300c 3013 }
f0fef760 3014 if (!error) {
df2c07f4 3015 error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
f0fef760 3016 }
37a1300c 3017 if (!error) {
df2c07f4 3018 error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
37a1300c 3019 }
aaff4b55 3020 if (!error) {
df2c07f4
JP
3021 error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
3022 &ovs_packet_family);
aaff4b55 3023 }
c7178a0b
EJ
3024 if (!error) {
3025 error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
b3dcb73c 3026 &ovs_vport_mcgroup);
c7178a0b 3027 }
eb8ed438 3028
921c370a
EG
3029 ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
3030
eb8ed438 3031 ovsthread_once_done(&once);
982b8810
BP
3032 }
3033
3034 return error;
96fba48f
BP
3035}
3036
c19e6535 3037bool
93451a0a 3038dpif_netlink_is_internal_device(const char *name)
9fe3b9a2 3039{
93451a0a 3040 struct dpif_netlink_vport reply;
c19e6535 3041 struct ofpbuf *buf;
9fe3b9a2 3042 int error;
96fba48f 3043
93451a0a 3044 error = dpif_netlink_vport_get(name, &reply, &buf);
c19e6535
BP
3045 if (!error) {
3046 ofpbuf_delete(buf);
141d9ce4 3047 } else if (error != ENODEV && error != ENOENT) {
c19e6535 3048 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
10a89ef0 3049 name, ovs_strerror(error));
96fba48f
BP
3050 }
3051
df2c07f4 3052 return reply.type == OVS_VPORT_TYPE_INTERNAL;
96fba48f 3053}
e0467f6d 3054
df2c07f4 3055/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
c19e6535
BP
3056 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
3057 * positive errno value.
3058 *
3059 * 'vport' will contain pointers into 'buf', so the caller should not free
3060 * 'buf' while 'vport' is still in use. */
3061static int
93451a0a 3062dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
c19e6535
BP
3063 const struct ofpbuf *buf)
3064{
df2c07f4
JP
3065 static const struct nl_policy ovs_vport_policy[] = {
3066 [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
3067 [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
3068 [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1579cf67 3069 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
f7df9823 3070 [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
c19e6535 3071 .optional = true },
df2c07f4 3072 [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
bfda5239 3073 [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
c19e6535
BP
3074 };
3075
93451a0a 3076 dpif_netlink_vport_init(vport);
c19e6535 3077
0a2869d5
BP
3078 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3079 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3080 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3081 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3082
3083 struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
df2c07f4
JP
3084 if (!nlmsg || !genl || !ovs_header
3085 || nlmsg->nlmsg_type != ovs_vport_family
3086 || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
3087 ARRAY_SIZE(ovs_vport_policy))) {
c19e6535
BP
3088 return EINVAL;
3089 }
c19e6535 3090
f0fef760 3091 vport->cmd = genl->cmd;
df2c07f4 3092 vport->dp_ifindex = ovs_header->dp_ifindex;
4e022ec0 3093 vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
df2c07f4
JP
3094 vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
3095 vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
b063d9f0 3096 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
1579cf67
AW
3097 vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
3098 / (sizeof *vport->upcall_pids);
3099 vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
3100
b063d9f0 3101 }
df2c07f4
JP
3102 if (a[OVS_VPORT_ATTR_STATS]) {
3103 vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
3104 }
df2c07f4
JP
3105 if (a[OVS_VPORT_ATTR_OPTIONS]) {
3106 vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
3107 vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
c19e6535 3108 }
bfda5239
FL
3109 if (a[OVS_VPORT_ATTR_NETNSID]) {
3110 netnsid_set(&vport->netnsid,
3111 nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
3112 } else {
3113 netnsid_set_local(&vport->netnsid);
3114 }
c19e6535
BP
3115 return 0;
3116}
3117
df2c07f4 3118/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
c19e6535
BP
3119 * followed by Netlink attributes corresponding to 'vport'. */
3120static void
93451a0a
AS
3121dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
3122 struct ofpbuf *buf)
c19e6535 3123{
df2c07f4 3124 struct ovs_header *ovs_header;
f0fef760 3125
df2c07f4 3126 nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
69685a88 3127 vport->cmd, OVS_VPORT_VERSION);
c19e6535 3128
df2c07f4
JP
3129 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3130 ovs_header->dp_ifindex = vport->dp_ifindex;
c19e6535 3131
4e022ec0
AW
3132 if (vport->port_no != ODPP_NONE) {
3133 nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
c19e6535
BP
3134 }
3135
df2c07f4
JP
3136 if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
3137 nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
c19e6535
BP
3138 }
3139
3140 if (vport->name) {
df2c07f4 3141 nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
c19e6535
BP
3142 }
3143
1579cf67
AW
3144 if (vport->upcall_pids) {
3145 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
3146 vport->upcall_pids,
3147 vport->n_upcall_pids * sizeof *vport->upcall_pids);
a24a6574 3148 }
b063d9f0 3149
c19e6535 3150 if (vport->stats) {
df2c07f4 3151 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
c19e6535
BP
3152 vport->stats, sizeof *vport->stats);
3153 }
3154
c19e6535 3155 if (vport->options) {
df2c07f4 3156 nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
c19e6535
BP
3157 vport->options, vport->options_len);
3158 }
c19e6535
BP
3159}
3160
3161/* Clears 'vport' to "empty" values. */
3162void
93451a0a 3163dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
c19e6535
BP
3164{
3165 memset(vport, 0, sizeof *vport);
4e022ec0 3166 vport->port_no = ODPP_NONE;
c19e6535
BP
3167}
3168
3169/* Executes 'request' in the kernel datapath. If the command fails, returns a
3170 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3171 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
df2c07f4 3172 * result of the command is expected to be an ovs_vport also, which is decoded
c19e6535
BP
3173 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3174 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
3175int
93451a0a
AS
3176dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
3177 struct dpif_netlink_vport *reply,
3178 struct ofpbuf **bufp)
c19e6535 3179{
f0fef760 3180 struct ofpbuf *request_buf;
c19e6535
BP
3181 int error;
3182
cb22974d 3183 ovs_assert((reply != NULL) == (bufp != NULL));
c19e6535 3184
93451a0a 3185 error = dpif_netlink_init();
42bb6c72
BP
3186 if (error) {
3187 if (reply) {
3188 *bufp = NULL;
93451a0a 3189 dpif_netlink_vport_init(reply);
42bb6c72
BP
3190 }
3191 return error;
3192 }
3193
f0fef760 3194 request_buf = ofpbuf_new(1024);
93451a0a 3195 dpif_netlink_vport_to_ofpbuf(request, request_buf);
a88b4e04 3196 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
f0fef760 3197 ofpbuf_delete(request_buf);
c19e6535 3198
f0fef760
BP
3199 if (reply) {
3200 if (!error) {
93451a0a 3201 error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
f0fef760 3202 }
c19e6535 3203 if (error) {
93451a0a 3204 dpif_netlink_vport_init(reply);
f0fef760
BP
3205 ofpbuf_delete(*bufp);
3206 *bufp = NULL;
c19e6535 3207 }
c19e6535
BP
3208 }
3209 return error;
3210}
3211
3212/* Obtains information about the kernel vport named 'name' and stores it into
3213 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
3214 * longer needed ('reply' will contain pointers into '*bufp'). */
3215int
93451a0a
AS
3216dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
3217 struct ofpbuf **bufp)
c19e6535 3218{
93451a0a 3219 struct dpif_netlink_vport request;
c19e6535 3220
93451a0a 3221 dpif_netlink_vport_init(&request);
df2c07f4 3222 request.cmd = OVS_VPORT_CMD_GET;
c19e6535
BP
3223 request.name = name;
3224
93451a0a 3225 return dpif_netlink_vport_transact(&request, reply, bufp);
c19e6535 3226}
93451a0a 3227
df2c07f4 3228/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
aaff4b55
BP
3229 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
3230 * positive errno value.
d6569377
BP
3231 *
3232 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
3233 * while 'dp' is still in use. */
3234static int
93451a0a 3235dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
d6569377 3236{
df2c07f4
JP
3237 static const struct nl_policy ovs_datapath_policy[] = {
3238 [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
f7df9823 3239 [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
d6569377 3240 .optional = true },
847108dc
AZ
3241 [OVS_DP_ATTR_MEGAFLOW_STATS] = {
3242 NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
3243 .optional = true },
d6569377
BP
3244 };
3245
93451a0a 3246 dpif_netlink_dp_init(dp);
d6569377 3247
0a2869d5
BP
3248 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3249 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3250 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3251 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3252
3253 struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
df2c07f4
JP
3254 if (!nlmsg || !genl || !ovs_header
3255 || nlmsg->nlmsg_type != ovs_datapath_family
3256 || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
3257 ARRAY_SIZE(ovs_datapath_policy))) {
d6569377
BP
3258 return EINVAL;
3259 }
d6569377 3260
aaff4b55 3261 dp->cmd = genl->cmd;
df2c07f4
JP
3262 dp->dp_ifindex = ovs_header->dp_ifindex;
3263 dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
3264 if (a[OVS_DP_ATTR_STATS]) {
6a54dedc 3265 dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
d6569377 3266 }
982b8810 3267
847108dc 3268 if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
6a54dedc 3269 dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
847108dc
AZ
3270 }
3271
d6569377
BP
3272 return 0;
3273}
3274
aaff4b55 3275/* Appends to 'buf' the Generic Netlink message described by 'dp'. */
d6569377 3276static void
93451a0a 3277dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
d6569377 3278{
df2c07f4 3279 struct ovs_header *ovs_header;
d6569377 3280
df2c07f4 3281 nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
69685a88
JG
3282 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
3283 OVS_DATAPATH_VERSION);
aaff4b55 3284
df2c07f4
JP
3285 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3286 ovs_header->dp_ifindex = dp->dp_ifindex;
d6569377
BP
3287
3288 if (dp->name) {
df2c07f4 3289 nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
d6569377
BP
3290 }
3291
a24a6574
BP
3292 if (dp->upcall_pid) {
3293 nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
3294 }
b063d9f0 3295
b7fd5e38
TG
3296 if (dp->user_features) {
3297 nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
3298 }
3299
df2c07f4 3300 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
d6569377
BP
3301}
3302
3303/* Clears 'dp' to "empty" values. */
d3d8f1f7 3304static void
93451a0a 3305dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
d6569377
BP
3306{
3307 memset(dp, 0, sizeof *dp);
d6569377
BP
3308}
3309
aaff4b55 3310static void
93451a0a 3311dpif_netlink_dp_dump_start(struct nl_dump *dump)
aaff4b55 3312{
93451a0a 3313 struct dpif_netlink_dp request;
aaff4b55
BP
3314 struct ofpbuf *buf;
3315
93451a0a 3316 dpif_netlink_dp_init(&request);
df2c07f4 3317 request.cmd = OVS_DP_CMD_GET;
aaff4b55
BP
3318
3319 buf = ofpbuf_new(1024);
93451a0a 3320 dpif_netlink_dp_to_ofpbuf(&request, buf);
a88b4e04 3321 nl_dump_start(dump, NETLINK_GENERIC, buf);
aaff4b55
BP
3322 ofpbuf_delete(buf);
3323}
3324
d6569377
BP
3325/* Executes 'request' in the kernel datapath. If the command fails, returns a
3326 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3327 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
aaff4b55
BP
3328 * result of the command is expected to be of the same form, which is decoded
3329 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3330 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 3331static int
93451a0a
AS
3332dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
3333 struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
d6569377 3334{
aaff4b55 3335 struct ofpbuf *request_buf;
d6569377 3336 int error;
d6569377 3337
cb22974d 3338 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 3339
aaff4b55 3340 request_buf = ofpbuf_new(1024);
93451a0a 3341 dpif_netlink_dp_to_ofpbuf(request, request_buf);
a88b4e04 3342 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
aaff4b55 3343 ofpbuf_delete(request_buf);
d6569377 3344
aaff4b55 3345 if (reply) {
93451a0a 3346 dpif_netlink_dp_init(reply);
aaff4b55 3347 if (!error) {
93451a0a 3348 error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
aaff4b55 3349 }
d6569377 3350 if (error) {
aaff4b55
BP
3351 ofpbuf_delete(*bufp);
3352 *bufp = NULL;
d6569377 3353 }
d6569377
BP
3354 }
3355 return error;
3356}
3357
3358/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
3359 * The caller must free '*bufp' when the reply is no longer needed ('reply'
3360 * will contain pointers into '*bufp'). */
d3d8f1f7 3361static int
93451a0a
AS
3362dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
3363 struct ofpbuf **bufp)
d6569377 3364{
93451a0a
AS
3365 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3366 struct dpif_netlink_dp request;
d6569377 3367
93451a0a 3368 dpif_netlink_dp_init(&request);
df2c07f4 3369 request.cmd = OVS_DP_CMD_GET;
254f2dc8 3370 request.dp_ifindex = dpif->dp_ifindex;
d6569377 3371
93451a0a 3372 return dpif_netlink_dp_transact(&request, reply, bufp);
d6569377 3373}
93451a0a 3374
df2c07f4 3375/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
37a1300c 3376 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
d6569377
BP
3377 * positive errno value.
3378 *
3379 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
3380 * while 'flow' is still in use. */
3381static int
93451a0a
AS
3382dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
3383 const struct ofpbuf *buf)
d6569377 3384{
70e5ed6f
JS
3385 static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
3386 [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
e6cc0bab 3387 [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
df2c07f4 3388 [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
f7df9823 3389 [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
d6569377 3390 .optional = true },
df2c07f4
JP
3391 [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
3392 [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
ab79d262 3393 [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
df2c07f4 3394 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 3395 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
70e5ed6f 3396 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
d6569377
BP
3397 };
3398
93451a0a 3399 dpif_netlink_flow_init(flow);
d6569377 3400
0a2869d5
BP
3401 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3402 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3403 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3404 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3405
3406 struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
df2c07f4
JP
3407 if (!nlmsg || !genl || !ovs_header
3408 || nlmsg->nlmsg_type != ovs_flow_family
3409 || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
3410 ARRAY_SIZE(ovs_flow_policy))) {
d6569377
BP
3411 return EINVAL;
3412 }
70e5ed6f
JS
3413 if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
3414 return EINVAL;
3415 }
d6569377 3416
37a1300c 3417 flow->nlmsg_flags = nlmsg->nlmsg_flags;
df2c07f4 3418 flow->dp_ifindex = ovs_header->dp_ifindex;
70e5ed6f
JS
3419 if (a[OVS_FLOW_ATTR_KEY]) {
3420 flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
3421 flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
3422 }
e6cc0bab 3423
70e5ed6f 3424 if (a[OVS_FLOW_ATTR_UFID]) {
ab79d262 3425 flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
70e5ed6f
JS
3426 flow->ufid_present = true;
3427 }
e6cc0bab
AZ
3428 if (a[OVS_FLOW_ATTR_MASK]) {
3429 flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
3430 flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
3431 }
df2c07f4
JP
3432 if (a[OVS_FLOW_ATTR_ACTIONS]) {
3433 flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
3434 flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
d6569377 3435 }
df2c07f4
JP
3436 if (a[OVS_FLOW_ATTR_STATS]) {
3437 flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
d6569377 3438 }
df2c07f4
JP
3439 if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
3440 flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
d6569377 3441 }
df2c07f4
JP
3442 if (a[OVS_FLOW_ATTR_USED]) {
3443 flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
9e980142 3444 }
d6569377
BP
3445 return 0;
3446}
3447
beb75a40
JS
3448
3449/*
a8a3eee4
JS
3450 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
3451 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
3452 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
beb75a40
JS
3453 */
3454static void
3455put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
3456 const struct nlattr *data, uint16_t data_len)
3457{
3458 const struct nlattr *packet_type;
3459
3460 packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
3461
3462 if (packet_type) {
3463 /* exclude PACKET_TYPE Netlink attribute. */
3464 ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
3465 size_t packet_type_len = NL_A_U32_SIZE;
3466 size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
3467 size_t second_chunk_size = data_len - first_chunk_size
3468 - packet_type_len;
beb75a40 3469 struct nlattr *next_attr = nl_attr_next(packet_type);
1ca5b61b 3470 size_t ofs;
beb75a40 3471
1ca5b61b
JS
3472 ofs = nl_msg_start_nested(buf, type);
3473 nl_msg_put(buf, data, first_chunk_size);
3474 nl_msg_put(buf, next_attr, second_chunk_size);
a8a3eee4
JS
3475 if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
3476 ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
3477 const struct nlattr *nla;
3478
3479 nla = nl_attr_find(buf, NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
3480 if (nla) {
3481 ovs_be16 *ethertype;
3482
3483 ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
3484 *ethertype = pt;
3485 } else {
3486 nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
3487 }
3488 }
1ca5b61b 3489 nl_msg_end_nested(buf, ofs);
beb75a40
JS
3490 } else {
3491 nl_msg_put_unspec(buf, type, data, data_len);
3492 }
3493}
3494
df2c07f4 3495/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
d6569377
BP
3496 * followed by Netlink attributes corresponding to 'flow'. */
3497static void
93451a0a
AS
3498dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
3499 struct ofpbuf *buf)
d6569377 3500{
df2c07f4 3501 struct ovs_header *ovs_header;
d6569377 3502
df2c07f4 3503 nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
30b44744 3504 NLM_F_REQUEST | flow->nlmsg_flags,
69685a88 3505 flow->cmd, OVS_FLOW_VERSION);
37a1300c 3506
df2c07f4
JP
3507 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3508 ovs_header->dp_ifindex = flow->dp_ifindex;
d6569377 3509
70e5ed6f 3510 if (flow->ufid_present) {
ab79d262 3511 nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
70e5ed6f
JS
3512 }
3513 if (flow->ufid_terse) {
3514 nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
3515 OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
3516 | OVS_UFID_F_OMIT_ACTIONS);
3517 }
64bb477f
JS
3518 if (!flow->ufid_terse || !flow->ufid_present) {
3519 if (flow->key_len) {
beb75a40
JS
3520 put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
3521 flow->key_len);
64bb477f 3522 }
64bb477f 3523 if (flow->mask_len) {
beb75a40
JS
3524 put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
3525 flow->mask_len);
64bb477f
JS
3526 }
3527 if (flow->actions || flow->actions_len) {
3528 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
3529 flow->actions, flow->actions_len);
3530 }
d6569377
BP
3531 }
3532
3533 /* We never need to send these to the kernel. */
cb22974d
BP
3534 ovs_assert(!flow->stats);
3535 ovs_assert(!flow->tcp_flags);
3536 ovs_assert(!flow->used);
d6569377
BP
3537
3538 if (flow->clear) {
df2c07f4 3539 nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
d6569377 3540 }
43f9ac0a
JR
3541 if (flow->probe) {
3542 nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
3543 }
d6569377
BP
3544}
3545
3546/* Clears 'flow' to "empty" values. */
d3d8f1f7 3547static void
93451a0a 3548dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
d6569377
BP
3549{
3550 memset(flow, 0, sizeof *flow);
3551}
3552
3553/* Executes 'request' in the kernel datapath. If the command fails, returns a
3554 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3555 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
37a1300c
BP
3556 * result of the command is expected to be a flow also, which is decoded and
3557 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
3558 * is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 3559static int
93451a0a
AS
3560dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
3561 struct dpif_netlink_flow *reply,
3562 struct ofpbuf **bufp)
d6569377 3563{
37a1300c 3564 struct ofpbuf *request_buf;
d6569377 3565 int error;
d6569377 3566
cb22974d 3567 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 3568
30b44744
BP
3569 if (reply) {
3570 request->nlmsg_flags |= NLM_F_ECHO;
3571 }
3572
37a1300c 3573 request_buf = ofpbuf_new(1024);
93451a0a 3574 dpif_netlink_flow_to_ofpbuf(request, request_buf);
a88b4e04 3575 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
37a1300c 3576 ofpbuf_delete(request_buf);
d6569377 3577
37a1300c
BP
3578 if (reply) {
3579 if (!error) {
93451a0a 3580 error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
37a1300c 3581 }
d6569377 3582 if (error) {
93451a0a 3583 dpif_netlink_flow_init(reply);
37a1300c
BP
3584 ofpbuf_delete(*bufp);
3585 *bufp = NULL;
d6569377 3586 }
d6569377
BP
3587 }
3588 return error;
3589}
3590
3591static void
93451a0a
AS
3592dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
3593 struct dpif_flow_stats *stats)
d6569377
BP
3594{
3595 if (flow->stats) {
6a54dedc
BP
3596 stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
3597 stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
d6569377
BP
3598 } else {
3599 stats->n_packets = 0;
3600 stats->n_bytes = 0;
3601 }
0e70cdcb 3602 stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
d6569377
BP
3603 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
3604}
e0467f6d 3605
14b4d2f9
BP
3606/* Logs information about a packet that was recently lost in 'ch' (in
3607 * 'dpif_'). */
3608static void
93451a0a 3609report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
1579cf67 3610 uint32_t handler_id)
14b4d2f9 3611{
14b4d2f9 3612 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
14b4d2f9
BP
3613 struct ds s;
3614
8d675c5a 3615 if (VLOG_DROP_WARN(&rl)) {
14b4d2f9
BP
3616 return;
3617 }
3618
3619 ds_init(&s);
3620 if (ch->last_poll != LLONG_MIN) {
3621 ds_put_format(&s, " (last polled %lld ms ago)",
3622 time_msec() - ch->last_poll);
3623 }
14b4d2f9 3624
1579cf67 3625 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
9b00386b 3626 dpif_name(&dpif->dpif), ch_idx, handler_id);
14b4d2f9
BP
3627 ds_destroy(&s);
3628}