]> git.proxmox.com Git - mirror_ovs.git/blame - lib/dpif-netlink.c
userspace: Add packet_type in dp_packet and flow
[mirror_ovs.git] / lib / dpif-netlink.c
CommitLineData
96fba48f 1/*
0a2869d5 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
96fba48f
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
9fe3b9a2 18
93451a0a 19#include "dpif-netlink.h"
96fba48f 20
96fba48f
BP
21#include <ctype.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <net/if.h>
b90fa799 26#include <linux/types.h>
aae51f53 27#include <linux/pkt_sched.h>
8522ba09 28#include <poll.h>
96fba48f 29#include <stdlib.h>
8522ba09 30#include <strings.h>
50f80534 31#include <sys/epoll.h>
10dcf8de 32#include <sys/stat.h>
96fba48f
BP
33#include <unistd.h>
34
773cd538 35#include "bitmap.h"
96fba48f 36#include "dpif-provider.h"
3e8a2ad1 37#include "openvswitch/dynamic-string.h"
eb8b28e7 38#include "flow.h"
1579cf67 39#include "fat-rwlock.h"
3abc4a1a 40#include "netdev.h"
032aa6a3 41#include "netdev-linux.h"
c3827f61 42#include "netdev-vport.h"
c11c9f4a 43#include "netlink-conntrack.h"
45c8d3a1 44#include "netlink-notifier.h"
982b8810 45#include "netlink-socket.h"
856081f6 46#include "netlink.h"
feebdea2 47#include "odp-util.h"
64c96779 48#include "openvswitch/ofpbuf.h"
856081f6 49#include "packets.h"
96fba48f 50#include "poll-loop.h"
17411ecf 51#include "random.h"
ee89ea7b 52#include "openvswitch/shash.h"
b3c01ed3 53#include "sset.h"
14b4d2f9 54#include "timeval.h"
d6569377 55#include "unaligned.h"
96fba48f 56#include "util.h"
e6211adc 57#include "openvswitch/vlog.h"
2482b0b0 58#include "openvswitch/flow.h"
5136ce49 59
93451a0a 60VLOG_DEFINE_THIS_MODULE(dpif_netlink);
09cac43f 61#ifdef _WIN32
da467899 62#include "wmi.h"
09cac43f 63enum { WINDOWS = 1 };
5517b839
AS
64static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
65 odp_port_t port_no, const char *port_name,
66 struct dpif_port *dpif_port);
09cac43f
NR
67#else
68enum { WINDOWS = 0 };
69#endif
95b1d73a 70enum { MAX_PORTS = USHRT_MAX };
773cd538 71
24b019f8
JP
72/* This ethtool flag was introduced in Linux 2.6.24, so it might be
73 * missing if we have old headers. */
74#define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
75
93451a0a 76struct dpif_netlink_dp {
aaff4b55
BP
77 /* Generic Netlink header. */
78 uint8_t cmd;
d6569377 79
df2c07f4 80 /* struct ovs_header. */
254f2dc8 81 int dp_ifindex;
d6569377
BP
82
83 /* Attributes. */
df2c07f4 84 const char *name; /* OVS_DP_ATTR_NAME. */
fcd5d230 85 const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */
b7fd5e38 86 uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */
6a54dedc
BP
87 const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */
88 const struct ovs_dp_megaflow_stats *megaflow_stats;
847108dc 89 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
d6569377
BP
90};
91
93451a0a
AS
92static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
93static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
94 const struct ofpbuf *);
95static void dpif_netlink_dp_dump_start(struct nl_dump *);
96static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
97 struct dpif_netlink_dp *reply,
98 struct ofpbuf **bufp);
99static int dpif_netlink_dp_get(const struct dpif *,
100 struct dpif_netlink_dp *reply,
101 struct ofpbuf **bufp);
102
103struct dpif_netlink_flow {
37a1300c
BP
104 /* Generic Netlink header. */
105 uint8_t cmd;
d6569377 106
df2c07f4 107 /* struct ovs_header. */
d6569377 108 unsigned int nlmsg_flags;
254f2dc8 109 int dp_ifindex;
d6569377
BP
110
111 /* Attributes.
112 *
0e70cdcb
BP
113 * The 'stats' member points to 64-bit data that might only be aligned on
114 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
115 * values.
d2a23af2 116 *
df2c07f4 117 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
d2a23af2 118 * the Netlink version of the command, even if actions_len is zero. */
df2c07f4 119 const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
d6569377 120 size_t key_len;
e6cc0bab
AZ
121 const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */
122 size_t mask_len;
df2c07f4 123 const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
d6569377 124 size_t actions_len;
70e5ed6f
JS
125 ovs_u128 ufid; /* OVS_FLOW_ATTR_FLOW_ID. */
126 bool ufid_present; /* Is there a UFID? */
127 bool ufid_terse; /* Skip serializing key/mask/acts? */
df2c07f4
JP
128 const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
129 const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
0e70cdcb 130 const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
df2c07f4 131 bool clear; /* OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 132 bool probe; /* OVS_FLOW_ATTR_PROBE. */
d6569377
BP
133};
134
93451a0a
AS
135static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
136static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
137 const struct ofpbuf *);
138static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
139 struct ofpbuf *);
140static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
141 struct dpif_netlink_flow *reply,
142 struct ofpbuf **bufp);
143static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
144 struct dpif_flow_stats *);
7af12bd7 145static void dpif_netlink_flow_to_dpif_flow(struct dpif *, struct dpif_flow *,
93451a0a 146 const struct dpif_netlink_flow *);
d6569377 147
989fd548 148/* One of the dpif channels between the kernel and userspace. */
fe3d61b3 149struct dpif_channel {
14b4d2f9 150 struct nl_sock *sock; /* Netlink socket. */
14b4d2f9 151 long long int last_poll; /* Last time this channel was polled. */
fe3d61b3
BP
152};
153
09cac43f
NR
154#ifdef _WIN32
155#define VPORT_SOCK_POOL_SIZE 1
156/* On Windows, there is no native support for epoll. There are equivalent
157 * interfaces though, that are not used currently. For simpicity, a pool of
158 * netlink sockets is used. Each socket is represented by 'struct
159 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
160 * sharing the same socket. In the future, we can add a reference count and
161 * such fields. */
162struct dpif_windows_vport_sock {
163 struct nl_sock *nl_sock; /* netlink socket. */
164};
165#endif
166
1579cf67
AW
167struct dpif_handler {
168 struct dpif_channel *channels;/* Array of channels for each handler. */
169 struct epoll_event *epoll_events;
170 int epoll_fd; /* epoll fd that includes channel socks. */
171 int n_events; /* Num events returned by epoll_wait(). */
172 int event_offset; /* Offset into 'epoll_events'. */
09cac43f
NR
173
174#ifdef _WIN32
175 /* Pool of sockets. */
176 struct dpif_windows_vport_sock *vport_sock_pool;
177 size_t last_used_pool_idx; /* Index to aid in allocating a
178 socket in the pool to a port. */
179#endif
1579cf67 180};
14b4d2f9 181
96fba48f 182/* Datapath interface for the openvswitch Linux kernel module. */
93451a0a 183struct dpif_netlink {
96fba48f 184 struct dpif dpif;
254f2dc8 185 int dp_ifindex;
e9e28be3 186
b063d9f0 187 /* Upcall messages. */
1579cf67
AW
188 struct fat_rwlock upcall_lock;
189 struct dpif_handler *handlers;
190 uint32_t n_handlers; /* Num of upcall handlers. */
191 int uc_array_size; /* Size of 'handler->channels' and */
192 /* 'handler->epoll_events'. */
982b8810 193
e9e28be3 194 /* Change notification. */
e4516b20 195 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
61eae437 196 bool refresh_channels;
96fba48f
BP
197};
198
93451a0a 199static void report_loss(struct dpif_netlink *, struct dpif_channel *,
9b00386b 200 uint32_t ch_idx, uint32_t handler_id);
1579cf67 201
96fba48f
BP
202static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
203
e4516b20
BP
204/* Generic Netlink family numbers for OVS.
205 *
93451a0a 206 * Initialized by dpif_netlink_init(). */
df2c07f4
JP
207static int ovs_datapath_family;
208static int ovs_vport_family;
209static int ovs_flow_family;
210static int ovs_packet_family;
982b8810 211
e4516b20
BP
212/* Generic Netlink multicast groups for OVS.
213 *
93451a0a 214 * Initialized by dpif_netlink_init(). */
e4516b20 215static unsigned int ovs_vport_mcgroup;
982b8810 216
93451a0a
AS
217static int dpif_netlink_init(void);
218static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
219static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
220 odp_port_t port_no, uint32_t hash);
09cac43f 221static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
93451a0a
AS
222static int dpif_netlink_refresh_channels(struct dpif_netlink *,
223 uint32_t n_handlers);
224static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
225 struct ofpbuf *);
226static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
227 const struct ofpbuf *);
f0fef760 228
93451a0a
AS
229static struct dpif_netlink *
230dpif_netlink_cast(const struct dpif *dpif)
96fba48f 231{
93451a0a
AS
232 dpif_assert_class(dpif, &dpif_netlink_class);
233 return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
96fba48f
BP
234}
235
d3d22744 236static int
93451a0a
AS
237dpif_netlink_enumerate(struct sset *all_dps,
238 const struct dpif_class *dpif_class OVS_UNUSED)
d3d22744 239{
aaff4b55 240 struct nl_dump dump;
d57695d7
JS
241 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
242 struct ofpbuf msg, buf;
aaff4b55 243 int error;
982b8810 244
93451a0a 245 error = dpif_netlink_init();
aaff4b55
BP
246 if (error) {
247 return error;
982b8810 248 }
d3d22744 249
d57695d7 250 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a 251 dpif_netlink_dp_dump_start(&dump);
d57695d7 252 while (nl_dump_next(&dump, &msg, &buf)) {
93451a0a 253 struct dpif_netlink_dp dp;
d6569377 254
93451a0a 255 if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
d0c23a1a 256 sset_add(all_dps, dp.name);
d3d22744
BP
257 }
258 }
d57695d7 259 ofpbuf_uninit(&buf);
aaff4b55 260 return nl_dump_done(&dump);
d3d22744
BP
261}
262
96fba48f 263static int
93451a0a
AS
264dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
265 bool create, struct dpif **dpifp)
96fba48f 266{
93451a0a 267 struct dpif_netlink_dp dp_request, dp;
c19e6535 268 struct ofpbuf *buf;
ea36840f 269 uint32_t upcall_pid;
c19e6535 270 int error;
96fba48f 271
93451a0a 272 error = dpif_netlink_init();
982b8810
BP
273 if (error) {
274 return error;
275 }
276
982b8810 277 /* Create or look up datapath. */
93451a0a 278 dpif_netlink_dp_init(&dp_request);
ea36840f
BP
279 if (create) {
280 dp_request.cmd = OVS_DP_CMD_NEW;
281 upcall_pid = 0;
282 dp_request.upcall_pid = &upcall_pid;
283 } else {
b7fd5e38
TG
284 /* Use OVS_DP_CMD_SET to report user features */
285 dp_request.cmd = OVS_DP_CMD_SET;
ea36840f 286 }
254f2dc8 287 dp_request.name = name;
b7fd5e38 288 dp_request.user_features |= OVS_DP_F_UNALIGNED;
1579cf67 289 dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
93451a0a 290 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
982b8810
BP
291 if (error) {
292 return error;
c19e6535 293 }
254f2dc8 294
e4516b20 295 error = open_dpif(&dp, dpifp);
8f4a4df5 296 ofpbuf_delete(buf);
e4516b20 297 return error;
c19e6535
BP
298}
299
e4516b20 300static int
93451a0a 301open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
c19e6535 302{
93451a0a 303 struct dpif_netlink *dpif;
c19e6535 304
17411ecf 305 dpif = xzalloc(sizeof *dpif);
e4516b20 306 dpif->port_notifier = NULL;
1579cf67 307 fat_rwlock_init(&dpif->upcall_lock);
c19e6535 308
93451a0a 309 dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
254f2dc8 310 dp->dp_ifindex, dp->dp_ifindex);
c19e6535 311
254f2dc8 312 dpif->dp_ifindex = dp->dp_ifindex;
c19e6535 313 *dpifp = &dpif->dpif;
e4516b20
BP
314
315 return 0;
96fba48f
BP
316}
317
1579cf67
AW
318/* Destroys the netlink sockets pointed by the elements in 'socksp'
319 * and frees the 'socksp'. */
17411ecf 320static void
09cac43f 321vport_del_socksp__(struct nl_sock **socksp, uint32_t n_socks)
17411ecf 322{
1579cf67 323 size_t i;
17411ecf 324
1579cf67
AW
325 for (i = 0; i < n_socks; i++) {
326 nl_sock_destroy(socksp[i]);
50f80534 327 }
989fd548 328
1579cf67
AW
329 free(socksp);
330}
989fd548 331
1579cf67
AW
332/* Creates an array of netlink sockets. Returns an array of the
333 * corresponding pointers. Records the error in 'error'. */
334static struct nl_sock **
09cac43f 335vport_create_socksp__(uint32_t n_socks, int *error)
1579cf67
AW
336{
337 struct nl_sock **socksp = xzalloc(n_socks * sizeof *socksp);
338 size_t i;
339
340 for (i = 0; i < n_socks; i++) {
341 *error = nl_sock_create(NETLINK_GENERIC, &socksp[i]);
342 if (*error) {
343 goto error;
989fd548 344 }
1579cf67 345 }
989fd548 346
1579cf67 347 return socksp;
9fafa796 348
1579cf67 349error:
09cac43f 350 vport_del_socksp__(socksp, n_socks);
989fd548 351
1579cf67
AW
352 return NULL;
353}
354
09cac43f
NR
355#ifdef _WIN32
356static void
357vport_delete_sock_pool(struct dpif_handler *handler)
358 OVS_REQ_WRLOCK(dpif->upcall_lock)
359{
360 if (handler->vport_sock_pool) {
361 uint32_t i;
362 struct dpif_windows_vport_sock *sock_pool =
363 handler->vport_sock_pool;
364
365 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
366 if (sock_pool[i].nl_sock) {
367 nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
368 nl_sock_destroy(sock_pool[i].nl_sock);
369 sock_pool[i].nl_sock = NULL;
370 }
371 }
372
373 free(handler->vport_sock_pool);
374 handler->vport_sock_pool = NULL;
375 }
376}
377
378static int
379vport_create_sock_pool(struct dpif_handler *handler)
380 OVS_REQ_WRLOCK(dpif->upcall_lock)
381{
382 struct dpif_windows_vport_sock *sock_pool;
383 size_t i;
384 int error = 0;
385
386 sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
387 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
388 error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
389 if (error) {
390 goto error;
391 }
392
393 /* Enable the netlink socket to receive packets. This is equivalent to
394 * calling nl_sock_join_mcgroup() to receive events. */
395 error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
396 if (error) {
397 goto error;
398 }
399 }
400
401 handler->vport_sock_pool = sock_pool;
402 handler->last_used_pool_idx = 0;
403 return 0;
404
405error:
406 vport_delete_sock_pool(handler);
407 return error;
408}
409
410/* Returns an array pointers to netlink sockets. The sockets are picked from a
411 * pool. Records the error in 'error'. */
412static struct nl_sock **
413vport_create_socksp_windows(struct dpif_netlink *dpif, int *error)
414 OVS_REQ_WRLOCK(dpif->upcall_lock)
415{
416 uint32_t n_socks = dpif->n_handlers;
417 struct nl_sock **socksp;
418 size_t i;
419
420 ovs_assert(n_socks <= 1);
421 socksp = xzalloc(n_socks * sizeof *socksp);
422
423 /* Pick netlink sockets to use in a round-robin fashion from each
424 * handler's pool of sockets. */
425 for (i = 0; i < n_socks; i++) {
426 struct dpif_handler *handler = &dpif->handlers[i];
427 struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
428 size_t index = handler->last_used_pool_idx;
429
430 /* A pool of sockets is allocated when the handler is initialized. */
431 if (sock_pool == NULL) {
432 free(socksp);
433 *error = EINVAL;
434 return NULL;
435 }
436
437 ovs_assert(index < VPORT_SOCK_POOL_SIZE);
438 socksp[i] = sock_pool[index].nl_sock;
439 socksp[i] = sock_pool[index].nl_sock;
440 ovs_assert(socksp[i]);
441 index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
442 handler->last_used_pool_idx = index;
443 }
444
445 return socksp;
446}
447
448static void
449vport_del_socksp_windows(struct dpif_netlink *dpif, struct nl_sock **socksp)
450{
451 free(socksp);
452}
453#endif /* _WIN32 */
454
455static struct nl_sock **
456vport_create_socksp(struct dpif_netlink *dpif, int *error)
457{
458#ifdef _WIN32
459 return vport_create_socksp_windows(dpif, error);
460#else
461 return vport_create_socksp__(dpif->n_handlers, error);
462#endif
463}
464
465static void
466vport_del_socksp(struct dpif_netlink *dpif, struct nl_sock **socksp)
467{
468#ifdef _WIN32
469 vport_del_socksp_windows(dpif, socksp);
470#else
471 vport_del_socksp__(socksp, dpif->n_handlers);
472#endif
473}
474
1579cf67
AW
475/* Given the array of pointers to netlink sockets 'socksp', returns
476 * the array of corresponding pids. If the 'socksp' is NULL, returns
477 * a single-element array of value 0. */
478static uint32_t *
479vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks)
480{
481 uint32_t *pids;
482
483 if (!socksp) {
484 pids = xzalloc(sizeof *pids);
485 } else {
486 size_t i;
487
488 pids = xzalloc(n_socks * sizeof *pids);
489 for (i = 0; i < n_socks; i++) {
490 pids[i] = nl_sock_pid(socksp[i]);
491 }
17411ecf 492 }
989fd548 493
1579cf67
AW
494 return pids;
495}
496
497/* Given the port number 'port_idx', extracts the pids of netlink sockets
498 * associated to the port and assigns it to 'upcall_pids'. */
499static bool
93451a0a 500vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
1579cf67
AW
501 uint32_t **upcall_pids)
502{
503 uint32_t *pids;
504 size_t i;
989fd548 505
1579cf67
AW
506 /* Since the nl_sock can only be assigned in either all
507 * or none "dpif->handlers" channels, the following check
508 * would suffice. */
509 if (!dpif->handlers[0].channels[port_idx].sock) {
510 return false;
511 }
09cac43f 512 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
1579cf67
AW
513
514 pids = xzalloc(dpif->n_handlers * sizeof *pids);
515
516 for (i = 0; i < dpif->n_handlers; i++) {
517 pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock);
518 }
519
520 *upcall_pids = pids;
989fd548 521
1579cf67 522 return true;
989fd548
JP
523}
524
525static int
93451a0a 526vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
1579cf67 527 struct nl_sock **socksp)
989fd548
JP
528{
529 struct epoll_event event;
4e022ec0 530 uint32_t port_idx = odp_to_u32(port_no);
1579cf67
AW
531 size_t i, j;
532 int error;
989fd548 533
1579cf67 534 if (dpif->handlers == NULL) {
989fd548
JP
535 return 0;
536 }
537
1579cf67
AW
538 /* We assume that the datapath densely chooses port numbers, which can
539 * therefore be used as an index into 'channels' and 'epoll_events' of
540 * 'dpif->handler'. */
4e022ec0
AW
541 if (port_idx >= dpif->uc_array_size) {
542 uint32_t new_size = port_idx + 1;
989fd548 543
12d76859 544 if (new_size > MAX_PORTS) {
989fd548
JP
545 VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
546 dpif_name(&dpif->dpif), port_no);
547 return EFBIG;
548 }
549
1579cf67
AW
550 for (i = 0; i < dpif->n_handlers; i++) {
551 struct dpif_handler *handler = &dpif->handlers[i];
552
553 handler->channels = xrealloc(handler->channels,
554 new_size * sizeof *handler->channels);
555
556 for (j = dpif->uc_array_size; j < new_size; j++) {
557 handler->channels[j].sock = NULL;
558 }
559
560 handler->epoll_events = xrealloc(handler->epoll_events,
561 new_size * sizeof *handler->epoll_events);
989fd548 562
1579cf67 563 }
989fd548
JP
564 dpif->uc_array_size = new_size;
565 }
566
567 memset(&event, 0, sizeof event);
568 event.events = EPOLLIN;
4e022ec0 569 event.data.u32 = port_idx;
989fd548 570
1579cf67
AW
571 for (i = 0; i < dpif->n_handlers; i++) {
572 struct dpif_handler *handler = &dpif->handlers[i];
573
09cac43f 574#ifndef _WIN32
1579cf67
AW
575 if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp[i]),
576 &event) < 0) {
577 error = errno;
578 goto error;
579 }
93451a0a 580#endif
1579cf67
AW
581 dpif->handlers[i].channels[port_idx].sock = socksp[i];
582 dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN;
583 }
989fd548
JP
584
585 return 0;
1579cf67
AW
586
587error:
588 for (j = 0; j < i; j++) {
09cac43f 589#ifndef _WIN32
1579cf67
AW
590 epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL,
591 nl_sock_fd(socksp[j]), NULL);
93451a0a 592#endif
1579cf67
AW
593 dpif->handlers[j].channels[port_idx].sock = NULL;
594 }
595
596 return error;
989fd548
JP
597}
598
599static void
93451a0a 600vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
989fd548 601{
4e022ec0 602 uint32_t port_idx = odp_to_u32(port_no);
1579cf67 603 size_t i;
989fd548 604
1579cf67 605 if (!dpif->handlers || port_idx >= dpif->uc_array_size) {
989fd548
JP
606 return;
607 }
608
1579cf67
AW
609 /* Since the sock can only be assigned in either all or none
610 * of "dpif->handlers" channels, the following check would
611 * suffice. */
612 if (!dpif->handlers[0].channels[port_idx].sock) {
989fd548
JP
613 return;
614 }
615
1579cf67
AW
616 for (i = 0; i < dpif->n_handlers; i++) {
617 struct dpif_handler *handler = &dpif->handlers[i];
09cac43f 618#ifndef _WIN32
1579cf67
AW
619 epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
620 nl_sock_fd(handler->channels[port_idx].sock), NULL);
621 nl_sock_destroy(handler->channels[port_idx].sock);
09cac43f 622#endif
1579cf67
AW
623 handler->channels[port_idx].sock = NULL;
624 handler->event_offset = handler->n_events = 0;
625 }
626}
627
628static void
93451a0a
AS
629destroy_all_channels(struct dpif_netlink *dpif)
630 OVS_REQ_WRLOCK(dpif->upcall_lock)
1579cf67
AW
631{
632 unsigned int i;
633
634 if (!dpif->handlers) {
635 return;
636 }
637
638 for (i = 0; i < dpif->uc_array_size; i++ ) {
93451a0a 639 struct dpif_netlink_vport vport_request;
1579cf67
AW
640 uint32_t upcall_pids = 0;
641
642 /* Since the sock can only be assigned in either all or none
643 * of "dpif->handlers" channels, the following check would
644 * suffice. */
645 if (!dpif->handlers[0].channels[i].sock) {
646 continue;
647 }
648
649 /* Turn off upcalls. */
93451a0a 650 dpif_netlink_vport_init(&vport_request);
1579cf67
AW
651 vport_request.cmd = OVS_VPORT_CMD_SET;
652 vport_request.dp_ifindex = dpif->dp_ifindex;
653 vport_request.port_no = u32_to_odp(i);
a78f446a 654 vport_request.n_upcall_pids = 1;
1579cf67 655 vport_request.upcall_pids = &upcall_pids;
93451a0a 656 dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67
AW
657
658 vport_del_channels(dpif, u32_to_odp(i));
659 }
660
661 for (i = 0; i < dpif->n_handlers; i++) {
662 struct dpif_handler *handler = &dpif->handlers[i];
663
09cac43f 664 dpif_netlink_handler_uninit(handler);
1579cf67
AW
665 free(handler->epoll_events);
666 free(handler->channels);
667 }
989fd548 668
1579cf67
AW
669 free(dpif->handlers);
670 dpif->handlers = NULL;
671 dpif->n_handlers = 0;
672 dpif->uc_array_size = 0;
17411ecf
JG
673}
674
96fba48f 675static void
93451a0a 676dpif_netlink_close(struct dpif *dpif_)
96fba48f 677{
93451a0a 678 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
c7178a0b 679
e4516b20 680 nl_sock_destroy(dpif->port_notifier);
1579cf67
AW
681
682 fat_rwlock_wrlock(&dpif->upcall_lock);
683 destroy_all_channels(dpif);
684 fat_rwlock_unlock(&dpif->upcall_lock);
685
686 fat_rwlock_destroy(&dpif->upcall_lock);
96fba48f
BP
687 free(dpif);
688}
689
690static int
93451a0a 691dpif_netlink_destroy(struct dpif *dpif_)
96fba48f 692{
93451a0a
AS
693 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
694 struct dpif_netlink_dp dp;
d6569377 695
93451a0a 696 dpif_netlink_dp_init(&dp);
df2c07f4 697 dp.cmd = OVS_DP_CMD_DEL;
254f2dc8 698 dp.dp_ifindex = dpif->dp_ifindex;
93451a0a 699 return dpif_netlink_dp_transact(&dp, NULL, NULL);
96fba48f
BP
700}
701
a36de779 702static bool
93451a0a 703dpif_netlink_run(struct dpif *dpif_)
61eae437 704{
93451a0a 705 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67 706
61eae437
BP
707 if (dpif->refresh_channels) {
708 dpif->refresh_channels = false;
1579cf67 709 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 710 dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
1579cf67 711 fat_rwlock_unlock(&dpif->upcall_lock);
61eae437 712 }
a36de779 713 return false;
61eae437
BP
714}
715
96fba48f 716static int
93451a0a 717dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
96fba48f 718{
93451a0a 719 struct dpif_netlink_dp dp;
d6569377
BP
720 struct ofpbuf *buf;
721 int error;
722
93451a0a 723 error = dpif_netlink_dp_get(dpif_, &dp, &buf);
d6569377 724 if (!error) {
6a54dedc
BP
725 memset(stats, 0, sizeof *stats);
726
727 if (dp.stats) {
728 stats->n_hit = get_32aligned_u64(&dp.stats->n_hit);
729 stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
730 stats->n_lost = get_32aligned_u64(&dp.stats->n_lost);
731 stats->n_flows = get_32aligned_u64(&dp.stats->n_flows);
732 }
733
734 if (dp.megaflow_stats) {
735 stats->n_masks = dp.megaflow_stats->n_masks;
736 stats->n_mask_hit = get_32aligned_u64(
737 &dp.megaflow_stats->n_mask_hit);
738 } else {
739 stats->n_masks = UINT32_MAX;
740 stats->n_mask_hit = UINT64_MAX;
741 }
d6569377
BP
742 ofpbuf_delete(buf);
743 }
744 return error;
96fba48f
BP
745}
746
b9ad7294 747static const char *
93451a0a 748get_vport_type(const struct dpif_netlink_vport *vport)
b9ad7294
EJ
749{
750 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
751
752 switch (vport->type) {
5ed51209
JS
753 case OVS_VPORT_TYPE_NETDEV: {
754 const char *type = netdev_get_type_from_name(vport->name);
755
756 return type ? type : "system";
757 }
b9ad7294
EJ
758
759 case OVS_VPORT_TYPE_INTERNAL:
760 return "internal";
761
c1fc1411
JG
762 case OVS_VPORT_TYPE_GENEVE:
763 return "geneve";
764
b9ad7294
EJ
765 case OVS_VPORT_TYPE_GRE:
766 return "gre";
767
b9ad7294
EJ
768 case OVS_VPORT_TYPE_VXLAN:
769 return "vxlan";
770
a6ae068b
LJ
771 case OVS_VPORT_TYPE_LISP:
772 return "lisp";
773
4237026e
PS
774 case OVS_VPORT_TYPE_STT:
775 return "stt";
776
b9ad7294
EJ
777 case OVS_VPORT_TYPE_UNSPEC:
778 case __OVS_VPORT_TYPE_MAX:
779 break;
780 }
781
782 VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
783 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
784 return "unknown";
785}
786
c060c4cf
EJ
787static enum ovs_vport_type
788netdev_to_ovs_vport_type(const struct netdev *netdev)
789{
790 const char *type = netdev_get_type(netdev);
791
792 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
793 return OVS_VPORT_TYPE_NETDEV;
794 } else if (!strcmp(type, "internal")) {
795 return OVS_VPORT_TYPE_INTERNAL;
4237026e
PS
796 } else if (strstr(type, "stt")) {
797 return OVS_VPORT_TYPE_STT;
c1fc1411
JG
798 } else if (!strcmp(type, "geneve")) {
799 return OVS_VPORT_TYPE_GENEVE;
c060c4cf
EJ
800 } else if (strstr(type, "gre")) {
801 return OVS_VPORT_TYPE_GRE;
c060c4cf
EJ
802 } else if (!strcmp(type, "vxlan")) {
803 return OVS_VPORT_TYPE_VXLAN;
a6ae068b
LJ
804 } else if (!strcmp(type, "lisp")) {
805 return OVS_VPORT_TYPE_LISP;
c060c4cf
EJ
806 } else {
807 return OVS_VPORT_TYPE_UNSPEC;
808 }
809}
810
96fba48f 811static int
93451a0a
AS
812dpif_netlink_port_add__(struct dpif_netlink *dpif, struct netdev *netdev,
813 odp_port_t *port_nop)
b90de034 814 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 815{
26508d9a 816 const struct netdev_tunnel_config *tnl_cfg;
3aa30359
BP
817 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
818 const char *name = netdev_vport_get_dpif_port(netdev,
819 namebuf, sizeof namebuf);
c3827f61 820 const char *type = netdev_get_type(netdev);
93451a0a 821 struct dpif_netlink_vport request, reply;
c19e6535 822 struct ofpbuf *buf;
26508d9a
KM
823 uint64_t options_stub[64 / 8];
824 struct ofpbuf options;
1579cf67
AW
825 struct nl_sock **socksp = NULL;
826 uint32_t *upcall_pids;
827 int error = 0;
96fba48f 828
1579cf67 829 if (dpif->handlers) {
09cac43f 830 socksp = vport_create_socksp(dpif, &error);
1579cf67 831 if (!socksp) {
989fd548
JP
832 return error;
833 }
834 }
835
93451a0a 836 dpif_netlink_vport_init(&request);
df2c07f4 837 request.cmd = OVS_VPORT_CMD_NEW;
254f2dc8 838 request.dp_ifindex = dpif->dp_ifindex;
c060c4cf 839 request.type = netdev_to_ovs_vport_type(netdev);
df2c07f4 840 if (request.type == OVS_VPORT_TYPE_UNSPEC) {
c283069c
BP
841 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
842 "unsupported type `%s'",
9b00386b 843 dpif_name(&dpif->dpif), name, type);
09cac43f 844 vport_del_socksp(dpif, socksp);
c283069c
BP
845 return EINVAL;
846 }
c19e6535 847 request.name = name;
c3827f61 848
24b019f8 849 if (request.type == OVS_VPORT_TYPE_NETDEV) {
93451a0a 850#ifdef _WIN32
09cac43f 851 /* XXX : Map appropiate Windows handle */
93451a0a 852#else
24b019f8 853 netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
93451a0a 854#endif
24b019f8
JP
855 }
856
da467899
AS
857#ifdef _WIN32
858 if (request.type == OVS_VPORT_TYPE_INTERNAL) {
859 if (!create_wmi_port(name)){
860 VLOG_ERR("Could not create wmi internal port with name:%s", name);
861 vport_del_socksp(dpif, socksp);
862 return EINVAL;
863 };
864 }
865#endif
866
26508d9a 867 tnl_cfg = netdev_get_tunnel_config(netdev);
526df7d8 868 if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
26508d9a 869 ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
526df7d8
TG
870 if (tnl_cfg->dst_port) {
871 nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
872 ntohs(tnl_cfg->dst_port));
873 }
874 if (tnl_cfg->exts) {
875 size_t ext_ofs;
876 int i;
877
878 ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
879 for (i = 0; i < 32; i++) {
880 if (tnl_cfg->exts & (1 << i)) {
881 nl_msg_put_flag(&options, i);
882 }
883 }
884 nl_msg_end_nested(&options, ext_ofs);
885 }
6fd6ed71
PS
886 request.options = options.data;
887 request.options_len = options.size;
26508d9a
KM
888 }
889
78a2d59c 890 request.port_no = *port_nop;
1579cf67 891 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
aeaae11f 892 request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
1579cf67 893 request.upcall_pids = upcall_pids;
95b1d73a 894
93451a0a 895 error = dpif_netlink_vport_transact(&request, &reply, &buf);
78a2d59c
JP
896 if (!error) {
897 *port_nop = reply.port_no;
2510ba7c 898 } else {
4e022ec0 899 if (error == EBUSY && *port_nop != ODPP_NONE) {
2510ba7c 900 VLOG_INFO("%s: requested port %"PRIu32" is in use",
9b00386b 901 dpif_name(&dpif->dpif), *port_nop);
2510ba7c 902 }
1579cf67 903
09cac43f 904 vport_del_socksp(dpif, socksp);
1579cf67 905 goto exit;
78a2d59c 906 }
c3827f61 907
1579cf67
AW
908 if (socksp) {
909 error = vport_add_channels(dpif, *port_nop, socksp);
989fd548
JP
910 if (error) {
911 VLOG_INFO("%s: could not add channel for port %s",
9b00386b 912 dpif_name(&dpif->dpif), name);
989fd548
JP
913
914 /* Delete the port. */
93451a0a 915 dpif_netlink_vport_init(&request);
989fd548
JP
916 request.cmd = OVS_VPORT_CMD_DEL;
917 request.dp_ifindex = dpif->dp_ifindex;
918 request.port_no = *port_nop;
93451a0a 919 dpif_netlink_vport_transact(&request, NULL, NULL);
09cac43f 920 vport_del_socksp(dpif, socksp);
1579cf67 921 goto exit;
989fd548
JP
922 }
923 }
1579cf67 924 free(socksp);
989fd548 925
1579cf67
AW
926exit:
927 ofpbuf_delete(buf);
928 free(upcall_pids);
929
930 return error;
96fba48f
BP
931}
932
933static int
93451a0a
AS
934dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
935 odp_port_t *port_nop)
9fafa796 936{
93451a0a 937 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
938 int error;
939
1579cf67 940 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 941 error = dpif_netlink_port_add__(dpif, netdev, port_nop);
1579cf67 942 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
943
944 return error;
945}
946
947static int
93451a0a 948dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
b90de034 949 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 950{
93451a0a 951 struct dpif_netlink_vport vport;
773cd538 952 int error;
c19e6535 953
93451a0a 954 dpif_netlink_vport_init(&vport);
df2c07f4 955 vport.cmd = OVS_VPORT_CMD_DEL;
254f2dc8 956 vport.dp_ifindex = dpif->dp_ifindex;
c19e6535 957 vport.port_no = port_no;
da467899
AS
958#ifdef _WIN32
959 struct dpif_port temp_dpif_port;
933228b2
EG
960
961 error = dpif_netlink_port_query__(dpif, port_no, NULL, &temp_dpif_port);
962 if (error) {
963 return error;
964 }
da467899
AS
965 if (!strcmp(temp_dpif_port.type, "internal")) {
966 if (!delete_wmi_port(temp_dpif_port.name)){
967 VLOG_ERR("Could not delete wmi port with name: %s",
968 temp_dpif_port.name);
969 };
970 }
de5739e2 971 dpif_port_destroy(&temp_dpif_port);
da467899 972#endif
93451a0a 973 error = dpif_netlink_vport_transact(&vport, NULL, NULL);
773cd538 974
1579cf67 975 vport_del_channels(dpif, port_no);
989fd548 976
773cd538 977 return error;
c3827f61 978}
3abc4a1a 979
9fafa796 980static int
93451a0a 981dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
9fafa796 982{
93451a0a 983 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
984 int error;
985
1579cf67 986 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 987 error = dpif_netlink_port_del__(dpif, port_no);
1579cf67 988 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
989
990 return error;
991}
992
c3827f61 993static int
93451a0a
AS
994dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
995 const char *port_name, struct dpif_port *dpif_port)
c3827f61 996{
93451a0a
AS
997 struct dpif_netlink_vport request;
998 struct dpif_netlink_vport reply;
c19e6535 999 struct ofpbuf *buf;
4c738a8d
BP
1000 int error;
1001
93451a0a 1002 dpif_netlink_vport_init(&request);
df2c07f4 1003 request.cmd = OVS_VPORT_CMD_GET;
9b00386b 1004 request.dp_ifindex = dpif->dp_ifindex;
c19e6535
BP
1005 request.port_no = port_no;
1006 request.name = port_name;
4c738a8d 1007
93451a0a 1008 error = dpif_netlink_vport_transact(&request, &reply, &buf);
c19e6535 1009 if (!error) {
33db1592
BP
1010 if (reply.dp_ifindex != request.dp_ifindex) {
1011 /* A query by name reported that 'port_name' is in some datapath
1012 * other than 'dpif', but the caller wants to know about 'dpif'. */
1013 error = ENODEV;
4afba28d 1014 } else if (dpif_port) {
33db1592 1015 dpif_port->name = xstrdup(reply.name);
b9ad7294 1016 dpif_port->type = xstrdup(get_vport_type(&reply));
33db1592
BP
1017 dpif_port->port_no = reply.port_no;
1018 }
c19e6535 1019 ofpbuf_delete(buf);
3abc4a1a 1020 }
c19e6535 1021 return error;
96fba48f
BP
1022}
1023
1024static int
93451a0a
AS
1025dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1026 struct dpif_port *dpif_port)
96fba48f 1027{
93451a0a 1028 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1029
93451a0a 1030 return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
96fba48f
BP
1031}
1032
1033static int
93451a0a 1034dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
4c738a8d 1035 struct dpif_port *dpif_port)
96fba48f 1036{
93451a0a 1037 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1038
93451a0a 1039 return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
96fba48f
BP
1040}
1041
98403001 1042static uint32_t
93451a0a
AS
1043dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
1044 odp_port_t port_no, uint32_t hash)
b90de034 1045 OVS_REQ_RDLOCK(dpif->upcall_lock)
98403001 1046{
4e022ec0 1047 uint32_t port_idx = odp_to_u32(port_no);
9fafa796 1048 uint32_t pid = 0;
98403001 1049
f8fc5489 1050 if (dpif->handlers && dpif->uc_array_size > 0) {
4e022ec0 1051 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
989fd548 1052 * channel, since it is not heavily loaded. */
4e022ec0 1053 uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1579cf67
AW
1054 struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers];
1055
17f2748d
AW
1056 /* Needs to check in case the socket pointer is changed in between
1057 * the holding of upcall_lock. A known case happens when the main
1058 * thread deletes the vport while the handler thread is handling
1059 * the upcall from that port. */
1060 if (h->channels[idx].sock) {
1061 pid = nl_sock_pid(h->channels[idx].sock);
1062 }
98403001 1063 }
9fafa796
BP
1064
1065 return pid;
98403001
BP
1066}
1067
b90de034 1068static uint32_t
93451a0a
AS
1069dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
1070 uint32_t hash)
b90de034 1071{
93451a0a 1072 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
1073 uint32_t ret;
1074
1075 fat_rwlock_rdlock(&dpif->upcall_lock);
93451a0a 1076 ret = dpif_netlink_port_get_pid__(dpif, port_no, hash);
b90de034
AW
1077 fat_rwlock_unlock(&dpif->upcall_lock);
1078
1079 return ret;
1080}
1081
96fba48f 1082static int
93451a0a 1083dpif_netlink_flow_flush(struct dpif *dpif_)
96fba48f 1084{
93451a0a
AS
1085 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1086 struct dpif_netlink_flow flow;
37a1300c 1087
93451a0a 1088 dpif_netlink_flow_init(&flow);
df2c07f4 1089 flow.cmd = OVS_FLOW_CMD_DEL;
254f2dc8 1090 flow.dp_ifindex = dpif->dp_ifindex;
93451a0a 1091 return dpif_netlink_flow_transact(&flow, NULL, NULL);
96fba48f
BP
1092}
1093
93451a0a 1094struct dpif_netlink_port_state {
f0fef760 1095 struct nl_dump dump;
d57695d7 1096 struct ofpbuf buf;
c19e6535
BP
1097};
1098
222837c4 1099static void
93451a0a
AS
1100dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1101 struct nl_dump *dump)
96fba48f 1102{
93451a0a 1103 struct dpif_netlink_vport request;
f0fef760
BP
1104 struct ofpbuf *buf;
1105
93451a0a 1106 dpif_netlink_vport_init(&request);
067f1e23 1107 request.cmd = OVS_VPORT_CMD_GET;
254f2dc8 1108 request.dp_ifindex = dpif->dp_ifindex;
f0fef760
BP
1109
1110 buf = ofpbuf_new(1024);
93451a0a 1111 dpif_netlink_vport_to_ofpbuf(&request, buf);
222837c4 1112 nl_dump_start(dump, NETLINK_GENERIC, buf);
f0fef760 1113 ofpbuf_delete(buf);
222837c4
BP
1114}
1115
1116static int
93451a0a 1117dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
222837c4 1118{
93451a0a
AS
1119 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1120 struct dpif_netlink_port_state *state;
222837c4
BP
1121
1122 *statep = state = xmalloc(sizeof *state);
93451a0a 1123 dpif_netlink_port_dump_start__(dpif, &state->dump);
f0fef760 1124
d57695d7 1125 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
b0ec0f27
BP
1126 return 0;
1127}
1128
7c1ef244 1129static int
93451a0a
AS
1130dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1131 struct nl_dump *dump,
1132 struct dpif_netlink_vport *vport,
1133 struct ofpbuf *buffer)
222837c4 1134{
222837c4
BP
1135 struct ofpbuf buf;
1136 int error;
1137
d57695d7 1138 if (!nl_dump_next(dump, &buf, buffer)) {
222837c4
BP
1139 return EOF;
1140 }
1141
93451a0a 1142 error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
222837c4
BP
1143 if (error) {
1144 VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1145 dpif_name(&dpif->dpif), ovs_strerror(error));
1146 }
1147 return error;
1148}
1149
b0ec0f27 1150static int
93451a0a
AS
1151dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1152 struct dpif_port *dpif_port)
b0ec0f27 1153{
93451a0a
AS
1154 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1155 struct dpif_netlink_port_state *state = state_;
1156 struct dpif_netlink_vport vport;
96fba48f
BP
1157 int error;
1158
93451a0a
AS
1159 error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1160 &state->buf);
c3827f61 1161 if (error) {
f0fef760 1162 return error;
c3827f61 1163 }
ebc56baa 1164 dpif_port->name = CONST_CAST(char *, vport.name);
b9ad7294 1165 dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
f0fef760
BP
1166 dpif_port->port_no = vport.port_no;
1167 return 0;
b0ec0f27
BP
1168}
1169
1170static int
93451a0a 1171dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
b0ec0f27 1172{
93451a0a 1173 struct dpif_netlink_port_state *state = state_;
f0fef760 1174 int error = nl_dump_done(&state->dump);
8522b383 1175
d57695d7 1176 ofpbuf_uninit(&state->buf);
b0ec0f27 1177 free(state);
f0fef760 1178 return error;
96fba48f
BP
1179}
1180
e9e28be3 1181static int
93451a0a 1182dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
e9e28be3 1183{
93451a0a 1184 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e9e28be3 1185
e4516b20
BP
1186 /* Lazily create the Netlink socket to listen for notifications. */
1187 if (!dpif->port_notifier) {
1188 struct nl_sock *sock;
1189 int error;
1190
1191 error = nl_sock_create(NETLINK_GENERIC, &sock);
1192 if (error) {
1193 return error;
1194 }
1195
1196 error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1197 if (error) {
1198 nl_sock_destroy(sock);
1199 return error;
1200 }
1201 dpif->port_notifier = sock;
1202
1203 /* We have no idea of the current state so report that everything
1204 * changed. */
1205 return ENOBUFS;
1206 }
1207
1208 for (;;) {
1209 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1210 uint64_t buf_stub[4096 / 8];
1211 struct ofpbuf buf;
1212 int error;
1213
1214 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
1215 error = nl_sock_recv(dpif->port_notifier, &buf, false);
1216 if (!error) {
93451a0a 1217 struct dpif_netlink_vport vport;
e4516b20 1218
93451a0a 1219 error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
e4516b20
BP
1220 if (!error) {
1221 if (vport.dp_ifindex == dpif->dp_ifindex
1222 && (vport.cmd == OVS_VPORT_CMD_NEW
1223 || vport.cmd == OVS_VPORT_CMD_DEL
1224 || vport.cmd == OVS_VPORT_CMD_SET)) {
1225 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1226 dpif->dpif.full_name, vport.name, vport.cmd);
1579cf67 1227 if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
61eae437
BP
1228 dpif->refresh_channels = true;
1229 }
e4516b20 1230 *devnamep = xstrdup(vport.name);
59e0c910 1231 ofpbuf_uninit(&buf);
e4516b20 1232 return 0;
e4516b20
BP
1233 }
1234 }
59e0c910
BP
1235 } else if (error != EAGAIN) {
1236 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1237 ovs_strerror(error));
1238 nl_sock_drain(dpif->port_notifier);
1239 error = ENOBUFS;
e4516b20
BP
1240 }
1241
59e0c910
BP
1242 ofpbuf_uninit(&buf);
1243 if (error) {
1244 return error;
1245 }
e9e28be3 1246 }
e9e28be3
BP
1247}
1248
1249static void
93451a0a 1250dpif_netlink_port_poll_wait(const struct dpif *dpif_)
e9e28be3 1251{
93451a0a 1252 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
e4516b20
BP
1253
1254 if (dpif->port_notifier) {
1255 nl_sock_wait(dpif->port_notifier, POLLIN);
1256 } else {
e9e28be3 1257 poll_immediate_wake();
e9e28be3
BP
1258 }
1259}
1260
6fe09f8c 1261static void
70e5ed6f
JS
1262dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1263 const ovs_u128 *ufid, bool terse)
1264{
1265 if (ufid) {
1266 request->ufid = *ufid;
1267 request->ufid_present = true;
1268 } else {
1269 request->ufid_present = false;
1270 }
1271 request->ufid_terse = terse;
1272}
1273
1274static void
1275dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1276 const struct nlattr *key, size_t key_len,
1277 const ovs_u128 *ufid, bool terse,
1278 struct dpif_netlink_flow *request)
96fba48f 1279{
93451a0a 1280 dpif_netlink_flow_init(request);
6fe09f8c
JS
1281 request->cmd = OVS_FLOW_CMD_GET;
1282 request->dp_ifindex = dpif->dp_ifindex;
1283 request->key = key;
1284 request->key_len = key_len;
70e5ed6f
JS
1285 dpif_netlink_flow_init_ufid(request, ufid, terse);
1286}
1287
1288static void
1289dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1290 const struct dpif_flow_get *get,
1291 struct dpif_netlink_flow *request)
1292{
1293 dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1294 false, request);
30053024
BP
1295}
1296
1297static int
70e5ed6f
JS
1298dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1299 const struct nlattr *key, size_t key_len,
1300 const ovs_u128 *ufid, bool terse,
1301 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
30053024 1302{
93451a0a 1303 struct dpif_netlink_flow request;
30053024 1304
70e5ed6f 1305 dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
93451a0a 1306 return dpif_netlink_flow_transact(&request, reply, bufp);
96fba48f
BP
1307}
1308
70e5ed6f
JS
1309static int
1310dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1311 const struct dpif_netlink_flow *flow,
1312 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1313{
1314 return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1315 flow->ufid_present ? &flow->ufid : NULL,
1316 false, reply, bufp);
1317}
1318
6bc60024 1319static void
93451a0a
AS
1320dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1321 const struct dpif_flow_put *put,
1322 struct dpif_netlink_flow *request)
6bc60024 1323{
d64e176c 1324 static const struct nlattr dummy_action;
6bc60024 1325
93451a0a 1326 dpif_netlink_flow_init(request);
89625d1e 1327 request->cmd = (put->flags & DPIF_FP_CREATE
6bc60024
BP
1328 ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1329 request->dp_ifindex = dpif->dp_ifindex;
89625d1e
BP
1330 request->key = put->key;
1331 request->key_len = put->key_len;
e6cc0bab
AZ
1332 request->mask = put->mask;
1333 request->mask_len = put->mask_len;
70e5ed6f
JS
1334 dpif_netlink_flow_init_ufid(request, put->ufid, false);
1335
6bc60024 1336 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
d64e176c
BP
1337 request->actions = (put->actions
1338 ? put->actions
1339 : CONST_CAST(struct nlattr *, &dummy_action));
89625d1e
BP
1340 request->actions_len = put->actions_len;
1341 if (put->flags & DPIF_FP_ZERO_STATS) {
6bc60024
BP
1342 request->clear = true;
1343 }
43f9ac0a
JR
1344 if (put->flags & DPIF_FP_PROBE) {
1345 request->probe = true;
1346 }
89625d1e 1347 request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
6bc60024
BP
1348}
1349
b99d3cee 1350static void
70e5ed6f
JS
1351dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1352 const struct nlattr *key, size_t key_len,
1353 const ovs_u128 *ufid, bool terse,
1354 struct dpif_netlink_flow *request)
96fba48f 1355{
93451a0a 1356 dpif_netlink_flow_init(request);
b99d3cee
BP
1357 request->cmd = OVS_FLOW_CMD_DEL;
1358 request->dp_ifindex = dpif->dp_ifindex;
70e5ed6f
JS
1359 request->key = key;
1360 request->key_len = key_len;
1361 dpif_netlink_flow_init_ufid(request, ufid, terse);
1362}
1363
1364static void
1365dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1366 const struct dpif_flow_del *del,
1367 struct dpif_netlink_flow *request)
1368{
37382aa6
AS
1369 dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1370 del->ufid, del->terse, request);
70e5ed6f
JS
1371}
1372
93451a0a 1373struct dpif_netlink_flow_dump {
ac64794a
BP
1374 struct dpif_flow_dump up;
1375 struct nl_dump nl_dump;
d2ad7ef1 1376 atomic_int status;
e723fd32
JS
1377};
1378
93451a0a
AS
1379static struct dpif_netlink_flow_dump *
1380dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
e723fd32 1381{
93451a0a 1382 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
e723fd32
JS
1383}
1384
ac64794a 1385static struct dpif_flow_dump *
64bb477f 1386dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
96fba48f 1387{
93451a0a
AS
1388 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1389 struct dpif_netlink_flow_dump *dump;
1390 struct dpif_netlink_flow request;
37a1300c
BP
1391 struct ofpbuf *buf;
1392
ac64794a
BP
1393 dump = xmalloc(sizeof *dump);
1394 dpif_flow_dump_init(&dump->up, dpif_);
37a1300c 1395
93451a0a 1396 dpif_netlink_flow_init(&request);
067f1e23 1397 request.cmd = OVS_FLOW_CMD_GET;
254f2dc8 1398 request.dp_ifindex = dpif->dp_ifindex;
64bb477f
JS
1399 request.ufid_present = false;
1400 request.ufid_terse = terse;
37a1300c
BP
1401
1402 buf = ofpbuf_new(1024);
93451a0a 1403 dpif_netlink_flow_to_ofpbuf(&request, buf);
ac64794a 1404 nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
37a1300c 1405 ofpbuf_delete(buf);
ac64794a 1406 atomic_init(&dump->status, 0);
64bb477f 1407 dump->up.terse = terse;
30053024 1408
ac64794a 1409 return &dump->up;
704a1e09
BP
1410}
1411
1412static int
93451a0a 1413dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
704a1e09 1414{
93451a0a 1415 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
ac64794a
BP
1416 unsigned int nl_status = nl_dump_done(&dump->nl_dump);
1417 int dump_status;
96fba48f 1418
7424fc44
JR
1419 /* No other thread has access to 'dump' at this point. */
1420 atomic_read_relaxed(&dump->status, &dump_status);
ac64794a
BP
1421 free(dump);
1422 return dump_status ? dump_status : nl_status;
1423}
feebdea2 1424
93451a0a 1425struct dpif_netlink_flow_dump_thread {
ac64794a 1426 struct dpif_flow_dump_thread up;
93451a0a
AS
1427 struct dpif_netlink_flow_dump *dump;
1428 struct dpif_netlink_flow flow;
ac64794a
BP
1429 struct dpif_flow_stats stats;
1430 struct ofpbuf nl_flows; /* Always used to store flows. */
1431 struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */
1432};
1433
93451a0a
AS
1434static struct dpif_netlink_flow_dump_thread *
1435dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
ac64794a 1436{
93451a0a 1437 return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
ac64794a
BP
1438}
1439
1440static struct dpif_flow_dump_thread *
93451a0a 1441dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
ac64794a 1442{
93451a0a
AS
1443 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1444 struct dpif_netlink_flow_dump_thread *thread;
ac64794a
BP
1445
1446 thread = xmalloc(sizeof *thread);
1447 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1448 thread->dump = dump;
1449 ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1450 thread->nl_actions = NULL;
1451
1452 return &thread->up;
1453}
1454
1455static void
93451a0a 1456dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
ac64794a 1457{
93451a0a
AS
1458 struct dpif_netlink_flow_dump_thread *thread
1459 = dpif_netlink_flow_dump_thread_cast(thread_);
ac64794a
BP
1460
1461 ofpbuf_uninit(&thread->nl_flows);
1462 ofpbuf_delete(thread->nl_actions);
1463 free(thread);
1464}
1465
1466static void
7af12bd7 1467dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
7fe98598 1468 const struct dpif_netlink_flow *datapath_flow)
ac64794a 1469{
7fe98598
NR
1470 dpif_flow->key = datapath_flow->key;
1471 dpif_flow->key_len = datapath_flow->key_len;
1472 dpif_flow->mask = datapath_flow->mask;
1473 dpif_flow->mask_len = datapath_flow->mask_len;
1474 dpif_flow->actions = datapath_flow->actions;
1475 dpif_flow->actions_len = datapath_flow->actions_len;
70e5ed6f 1476 dpif_flow->ufid_present = datapath_flow->ufid_present;
ec97c2df 1477 dpif_flow->pmd_id = PMD_ID_NULL;
70e5ed6f
JS
1478 if (datapath_flow->ufid_present) {
1479 dpif_flow->ufid = datapath_flow->ufid;
1480 } else {
1481 ovs_assert(datapath_flow->key && datapath_flow->key_len);
1482 dpif_flow_hash(dpif, datapath_flow->key, datapath_flow->key_len,
1483 &dpif_flow->ufid);
1484 }
7fe98598 1485 dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
ac64794a
BP
1486}
1487
1488static int
93451a0a
AS
1489dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1490 struct dpif_flow *flows, int max_flows)
ac64794a 1491{
93451a0a
AS
1492 struct dpif_netlink_flow_dump_thread *thread
1493 = dpif_netlink_flow_dump_thread_cast(thread_);
1494 struct dpif_netlink_flow_dump *dump = thread->dump;
1495 struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
ac64794a
BP
1496 int n_flows;
1497
1498 ofpbuf_delete(thread->nl_actions);
1499 thread->nl_actions = NULL;
1500
1501 n_flows = 0;
1502 while (!n_flows
6fd6ed71 1503 || (n_flows < max_flows && thread->nl_flows.size)) {
7fe98598 1504 struct dpif_netlink_flow datapath_flow;
ac64794a
BP
1505 struct ofpbuf nl_flow;
1506 int error;
1507
1508 /* Try to grab another flow. */
1509 if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1510 break;
feebdea2 1511 }
30053024 1512
ac64794a 1513 /* Convert the flow to our output format. */
7fe98598 1514 error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
30053024 1515 if (error) {
7424fc44 1516 atomic_store_relaxed(&dump->status, error);
ac64794a 1517 break;
feebdea2 1518 }
30053024 1519
64bb477f
JS
1520 if (dump->up.terse || datapath_flow.actions) {
1521 /* Common case: we don't want actions, or the flow includes
1522 * actions. */
7af12bd7
JS
1523 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1524 &datapath_flow);
ac64794a
BP
1525 } else {
1526 /* Rare case: the flow does not include actions. Retrieve this
1527 * individual flow again to get the actions. */
70e5ed6f 1528 error = dpif_netlink_flow_get(dpif, &datapath_flow,
7fe98598 1529 &datapath_flow, &thread->nl_actions);
30053024
BP
1530 if (error == ENOENT) {
1531 VLOG_DBG("dumped flow disappeared on get");
ac64794a 1532 continue;
30053024 1533 } else if (error) {
10a89ef0
BP
1534 VLOG_WARN("error fetching dumped flow: %s",
1535 ovs_strerror(error));
7424fc44 1536 atomic_store_relaxed(&dump->status, error);
ac64794a 1537 break;
30053024 1538 }
30053024 1539
ac64794a
BP
1540 /* Save this flow. Then exit, because we only have one buffer to
1541 * handle this case. */
7af12bd7
JS
1542 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1543 &datapath_flow);
ac64794a
BP
1544 break;
1545 }
feebdea2 1546 }
ac64794a 1547 return n_flows;
96fba48f
BP
1548}
1549
eabe7c68 1550static void
93451a0a
AS
1551dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1552 struct ofpbuf *buf)
96fba48f 1553{
89625d1e 1554 struct ovs_header *k_exec;
758c456d 1555 size_t key_ofs;
f7cd0081 1556
eabe7c68 1557 ofpbuf_prealloc_tailroom(buf, (64
cf62fa4c 1558 + dp_packet_size(d_exec->packet)
758c456d 1559 + ODP_KEY_METADATA_SIZE
eabe7c68 1560 + d_exec->actions_len));
f7cd0081 1561
df2c07f4 1562 nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
69685a88 1563 OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
f7cd0081 1564
89625d1e
BP
1565 k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1566 k_exec->dp_ifindex = dp_ifindex;
f7cd0081 1567
89625d1e 1568 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
cf62fa4c
PS
1569 dp_packet_data(d_exec->packet),
1570 dp_packet_size(d_exec->packet));
758c456d
JR
1571
1572 key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
cf62fa4c 1573 odp_key_from_pkt_metadata(buf, &d_exec->packet->md);
758c456d
JR
1574 nl_msg_end_nested(buf, key_ofs);
1575
89625d1e
BP
1576 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1577 d_exec->actions, d_exec->actions_len);
43f9ac0a 1578 if (d_exec->probe) {
2e460098 1579 nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
43f9ac0a 1580 }
27130224
AZ
1581 if (d_exec->mtu) {
1582 nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1583 }
6bc60024
BP
1584}
1585
0f3358ea
BP
1586/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1587 * Returns the number actually executed (at least 1, if 'n_ops' is
1588 * positive). */
1589static size_t
93451a0a
AS
1590dpif_netlink_operate__(struct dpif_netlink *dpif,
1591 struct dpif_op **ops, size_t n_ops)
6bc60024 1592{
0f3358ea
BP
1593 enum { MAX_OPS = 50 };
1594
eabe7c68
BP
1595 struct op_auxdata {
1596 struct nl_transaction txn;
72d32ac0 1597
eabe7c68
BP
1598 struct ofpbuf request;
1599 uint64_t request_stub[1024 / 8];
72d32ac0
BP
1600
1601 struct ofpbuf reply;
1602 uint64_t reply_stub[1024 / 8];
eabe7c68
BP
1603 } auxes[MAX_OPS];
1604
1605 struct nl_transaction *txnsp[MAX_OPS];
6bc60024
BP
1606 size_t i;
1607
0f3358ea 1608 n_ops = MIN(n_ops, MAX_OPS);
6bc60024 1609 for (i = 0; i < n_ops; i++) {
eabe7c68 1610 struct op_auxdata *aux = &auxes[i];
c2b565b5 1611 struct dpif_op *op = ops[i];
b99d3cee
BP
1612 struct dpif_flow_put *put;
1613 struct dpif_flow_del *del;
6fe09f8c 1614 struct dpif_flow_get *get;
93451a0a 1615 struct dpif_netlink_flow flow;
eabe7c68
BP
1616
1617 ofpbuf_use_stub(&aux->request,
1618 aux->request_stub, sizeof aux->request_stub);
1619 aux->txn.request = &aux->request;
b99d3cee 1620
72d32ac0
BP
1621 ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1622 aux->txn.reply = NULL;
1623
b99d3cee
BP
1624 switch (op->type) {
1625 case DPIF_OP_FLOW_PUT:
1626 put = &op->u.flow_put;
93451a0a 1627 dpif_netlink_init_flow_put(dpif, put, &flow);
6bc60024 1628 if (put->stats) {
eabe7c68 1629 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1630 aux->txn.reply = &aux->reply;
6bc60024 1631 }
93451a0a 1632 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee
BP
1633 break;
1634
1635 case DPIF_OP_FLOW_DEL:
1636 del = &op->u.flow_del;
93451a0a 1637 dpif_netlink_init_flow_del(dpif, del, &flow);
b99d3cee 1638 if (del->stats) {
eabe7c68 1639 flow.nlmsg_flags |= NLM_F_ECHO;
72d32ac0 1640 aux->txn.reply = &aux->reply;
b99d3cee 1641 }
93451a0a 1642 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
b99d3cee 1643 break;
6bc60024 1644
b99d3cee 1645 case DPIF_OP_EXECUTE:
0f3358ea
BP
1646 /* Can't execute a packet that won't fit in a Netlink attribute. */
1647 if (OVS_UNLIKELY(nl_attr_oversized(
cf62fa4c 1648 dp_packet_size(op->u.execute.packet)))) {
0f3358ea
BP
1649 /* Report an error immediately if this is the first operation.
1650 * Otherwise the easiest thing to do is to postpone to the next
1651 * call (when this will be the first operation). */
1652 if (i == 0) {
1653 VLOG_ERR_RL(&error_rl,
1654 "dropping oversized %"PRIu32"-byte packet",
cf62fa4c 1655 dp_packet_size(op->u.execute.packet));
0f3358ea
BP
1656 op->error = ENOBUFS;
1657 return 1;
1658 }
1659 n_ops = i;
1660 } else {
1661 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->u.execute,
1662 &aux->request);
1663 }
b99d3cee
BP
1664 break;
1665
6fe09f8c
JS
1666 case DPIF_OP_FLOW_GET:
1667 get = &op->u.flow_get;
70e5ed6f 1668 dpif_netlink_init_flow_get(dpif, get, &flow);
6fe09f8c 1669 aux->txn.reply = get->buffer;
93451a0a 1670 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
6fe09f8c
JS
1671 break;
1672
b99d3cee 1673 default:
428b2edd 1674 OVS_NOT_REACHED();
6bc60024
BP
1675 }
1676 }
1677
6bc60024 1678 for (i = 0; i < n_ops; i++) {
eabe7c68 1679 txnsp[i] = &auxes[i].txn;
6bc60024 1680 }
a88b4e04 1681 nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
6bc60024 1682
6bc60024 1683 for (i = 0; i < n_ops; i++) {
72d32ac0 1684 struct op_auxdata *aux = &auxes[i];
eabe7c68 1685 struct nl_transaction *txn = &auxes[i].txn;
c2b565b5 1686 struct dpif_op *op = ops[i];
b99d3cee
BP
1687 struct dpif_flow_put *put;
1688 struct dpif_flow_del *del;
6fe09f8c 1689 struct dpif_flow_get *get;
6bc60024 1690
b99d3cee 1691 op->error = txn->error;
6bc60024 1692
b99d3cee
BP
1693 switch (op->type) {
1694 case DPIF_OP_FLOW_PUT:
1695 put = &op->u.flow_put;
cfceb2b5 1696 if (put->stats) {
b99d3cee 1697 if (!op->error) {
93451a0a 1698 struct dpif_netlink_flow reply;
cfceb2b5 1699
93451a0a
AS
1700 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1701 txn->reply);
cfceb2b5 1702 if (!op->error) {
93451a0a 1703 dpif_netlink_flow_get_stats(&reply, put->stats);
cfceb2b5
BP
1704 }
1705 }
6bc60024 1706 }
b99d3cee
BP
1707 break;
1708
1709 case DPIF_OP_FLOW_DEL:
1710 del = &op->u.flow_del;
cfceb2b5 1711 if (del->stats) {
b99d3cee 1712 if (!op->error) {
93451a0a 1713 struct dpif_netlink_flow reply;
cfceb2b5 1714
93451a0a
AS
1715 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1716 txn->reply);
cfceb2b5 1717 if (!op->error) {
93451a0a 1718 dpif_netlink_flow_get_stats(&reply, del->stats);
cfceb2b5
BP
1719 }
1720 }
b99d3cee
BP
1721 }
1722 break;
1723
1724 case DPIF_OP_EXECUTE:
1725 break;
1726
6fe09f8c
JS
1727 case DPIF_OP_FLOW_GET:
1728 get = &op->u.flow_get;
1729 if (!op->error) {
93451a0a 1730 struct dpif_netlink_flow reply;
6fe09f8c 1731
93451a0a 1732 op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
6fe09f8c 1733 if (!op->error) {
7af12bd7
JS
1734 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, get->flow,
1735 &reply);
6fe09f8c
JS
1736 }
1737 }
1738 break;
1739
b99d3cee 1740 default:
428b2edd 1741 OVS_NOT_REACHED();
6bc60024
BP
1742 }
1743
72d32ac0
BP
1744 ofpbuf_uninit(&aux->request);
1745 ofpbuf_uninit(&aux->reply);
6bc60024 1746 }
0f3358ea
BP
1747
1748 return n_ops;
eabe7c68
BP
1749}
1750
1751static void
93451a0a 1752dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
eabe7c68 1753{
93451a0a 1754 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9b00386b 1755
eabe7c68 1756 while (n_ops > 0) {
0f3358ea 1757 size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
eabe7c68
BP
1758 ops += chunk;
1759 n_ops -= chunk;
1760 }
6bc60024
BP
1761}
1762
09cac43f
NR
1763#if _WIN32
1764static void
1765dpif_netlink_handler_uninit(struct dpif_handler *handler)
1766{
1767 vport_delete_sock_pool(handler);
1768}
1769
1770static int
1771dpif_netlink_handler_init(struct dpif_handler *handler)
1772{
1773 return vport_create_sock_pool(handler);
1774}
1775#else
1776
1777static int
1778dpif_netlink_handler_init(struct dpif_handler *handler)
1779{
1780 handler->epoll_fd = epoll_create(10);
1781 return handler->epoll_fd < 0 ? errno : 0;
1782}
1783
1784static void
1785dpif_netlink_handler_uninit(struct dpif_handler *handler)
1786{
1787 close(handler->epoll_fd);
1788}
1789#endif
1790
1579cf67
AW
1791/* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
1792 * currently in 'dpif' in the kernel, by adding a new set of channels for
1793 * any kernel vport that lacks one and deleting any channels that have no
1794 * backing kernel vports. */
96fba48f 1795static int
93451a0a 1796dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
b90de034 1797 OVS_REQ_WRLOCK(dpif->upcall_lock)
96fba48f 1798{
8381a3d3 1799 unsigned long int *keep_channels;
93451a0a 1800 struct dpif_netlink_vport vport;
8381a3d3
BP
1801 size_t keep_channels_nbits;
1802 struct nl_dump dump;
d57695d7
JS
1803 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
1804 struct ofpbuf buf;
8381a3d3
BP
1805 int retval = 0;
1806 size_t i;
982b8810 1807
09cac43f
NR
1808 ovs_assert(!WINDOWS || n_handlers <= 1);
1809 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
1810
1579cf67
AW
1811 if (dpif->n_handlers != n_handlers) {
1812 destroy_all_channels(dpif);
1813 dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
1814 for (i = 0; i < n_handlers; i++) {
09cac43f 1815 int error;
1579cf67
AW
1816 struct dpif_handler *handler = &dpif->handlers[i];
1817
09cac43f
NR
1818 error = dpif_netlink_handler_init(handler);
1819 if (error) {
1579cf67 1820 size_t j;
09cac43f
NR
1821 struct dpif_handler *tmp = &dpif->handlers[i];
1822
1579cf67
AW
1823
1824 for (j = 0; j < i; j++) {
09cac43f 1825 dpif_netlink_handler_uninit(tmp);
1579cf67
AW
1826 }
1827 free(dpif->handlers);
1828 dpif->handlers = NULL;
1829
09cac43f 1830 return error;
1579cf67 1831 }
8381a3d3 1832 }
1579cf67
AW
1833 dpif->n_handlers = n_handlers;
1834 }
1835
1836 for (i = 0; i < n_handlers; i++) {
1837 struct dpif_handler *handler = &dpif->handlers[i];
1838
1839 handler->event_offset = handler->n_events = 0;
17411ecf 1840 }
b063d9f0 1841
8381a3d3
BP
1842 keep_channels_nbits = dpif->uc_array_size;
1843 keep_channels = bitmap_allocate(keep_channels_nbits);
982b8810 1844
d57695d7 1845 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
93451a0a
AS
1846 dpif_netlink_port_dump_start__(dpif, &dump);
1847 while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
8381a3d3 1848 uint32_t port_no = odp_to_u32(vport.port_no);
1579cf67 1849 uint32_t *upcall_pids = NULL;
8381a3d3 1850 int error;
50f80534 1851
1579cf67
AW
1852 if (port_no >= dpif->uc_array_size
1853 || !vport_get_pids(dpif, port_no, &upcall_pids)) {
09cac43f 1854 struct nl_sock **socksp = vport_create_socksp(dpif, &error);
1579cf67
AW
1855
1856 if (!socksp) {
1857 goto error;
1858 }
1859
1860 error = vport_add_channels(dpif, vport.port_no, socksp);
b063d9f0 1861 if (error) {
1579cf67 1862 VLOG_INFO("%s: could not add channels for port %s",
9b00386b 1863 dpif_name(&dpif->dpif), vport.name);
09cac43f 1864 vport_del_socksp(dpif, socksp);
8381a3d3
BP
1865 retval = error;
1866 goto error;
982b8810 1867 }
1579cf67
AW
1868 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
1869 free(socksp);
8381a3d3 1870 }
50f80534 1871
8381a3d3 1872 /* Configure the vport to deliver misses to 'sock'. */
1579cf67
AW
1873 if (vport.upcall_pids[0] == 0
1874 || vport.n_upcall_pids != dpif->n_handlers
1875 || memcmp(upcall_pids, vport.upcall_pids, n_handlers * sizeof
1876 *upcall_pids)) {
93451a0a 1877 struct dpif_netlink_vport vport_request;
989fd548 1878
93451a0a 1879 dpif_netlink_vport_init(&vport_request);
989fd548
JP
1880 vport_request.cmd = OVS_VPORT_CMD_SET;
1881 vport_request.dp_ifindex = dpif->dp_ifindex;
8381a3d3 1882 vport_request.port_no = vport.port_no;
1579cf67
AW
1883 vport_request.n_upcall_pids = dpif->n_handlers;
1884 vport_request.upcall_pids = upcall_pids;
93451a0a 1885 error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1579cf67 1886 if (error) {
989fd548
JP
1887 VLOG_WARN_RL(&error_rl,
1888 "%s: failed to set upcall pid on port: %s",
10a89ef0 1889 dpif_name(&dpif->dpif), ovs_strerror(error));
989fd548 1890
8381a3d3
BP
1891 if (error != ENODEV && error != ENOENT) {
1892 retval = error;
989fd548 1893 } else {
8381a3d3
BP
1894 /* The vport isn't really there, even though the dump says
1895 * it is. Probably we just hit a race after a port
1896 * disappeared. */
989fd548 1897 }
8381a3d3 1898 goto error;
50f80534 1899 }
8381a3d3 1900 }
14b4d2f9 1901
8381a3d3
BP
1902 if (port_no < keep_channels_nbits) {
1903 bitmap_set1(keep_channels, port_no);
1904 }
1579cf67 1905 free(upcall_pids);
8381a3d3
BP
1906 continue;
1907
1908 error:
1579cf67
AW
1909 free(upcall_pids);
1910 vport_del_channels(dpif, vport.port_no);
982b8810 1911 }
8381a3d3 1912 nl_dump_done(&dump);
d57695d7 1913 ofpbuf_uninit(&buf);
b063d9f0 1914
8381a3d3
BP
1915 /* Discard any saved channels that we didn't reuse. */
1916 for (i = 0; i < keep_channels_nbits; i++) {
1917 if (!bitmap_is_set(keep_channels, i)) {
1579cf67 1918 vport_del_channels(dpif, u32_to_odp(i));
8381a3d3
BP
1919 }
1920 }
1921 free(keep_channels);
1922
1923 return retval;
1924}
1925
1926static int
93451a0a 1927dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable)
b90de034 1928 OVS_REQ_WRLOCK(dpif->upcall_lock)
8381a3d3 1929{
1579cf67 1930 if ((dpif->handlers != NULL) == enable) {
8381a3d3
BP
1931 return 0;
1932 } else if (!enable) {
1579cf67 1933 destroy_all_channels(dpif);
8381a3d3
BP
1934 return 0;
1935 } else {
93451a0a 1936 return dpif_netlink_refresh_channels(dpif, 1);
8381a3d3 1937 }
96fba48f
BP
1938}
1939
9fafa796 1940static int
93451a0a 1941dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
9fafa796 1942{
93451a0a 1943 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
1944 int error;
1945
1579cf67 1946 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 1947 error = dpif_netlink_recv_set__(dpif, enable);
1579cf67 1948 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
1949
1950 return error;
1951}
1952
1954e6bb 1953static int
93451a0a 1954dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
1954e6bb 1955{
93451a0a 1956 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1579cf67
AW
1957 int error = 0;
1958
09cac43f
NR
1959#ifdef _WIN32
1960 /* Multiple upcall handlers will be supported once kernel datapath supports
1961 * it. */
1962 if (n_handlers > 1) {
1963 return error;
1964 }
1965#endif
1966
1579cf67
AW
1967 fat_rwlock_wrlock(&dpif->upcall_lock);
1968 if (dpif->handlers) {
93451a0a 1969 error = dpif_netlink_refresh_channels(dpif, n_handlers);
1579cf67
AW
1970 }
1971 fat_rwlock_unlock(&dpif->upcall_lock);
1972
1973 return error;
1954e6bb
AW
1974}
1975
aae51f53 1976static int
93451a0a 1977dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
aae51f53
BP
1978 uint32_t queue_id, uint32_t *priority)
1979{
1980 if (queue_id < 0xf000) {
17ee3c1f 1981 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
aae51f53
BP
1982 return 0;
1983 } else {
1984 return EINVAL;
1985 }
1986}
1987
96fba48f 1988static int
7af12bd7
JS
1989parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
1990 struct dpif_upcall *upcall, int *dp_ifindex)
856081f6 1991{
df2c07f4 1992 static const struct nl_policy ovs_packet_policy[] = {
856081f6 1993 /* Always present. */
df2c07f4 1994 [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
856081f6 1995 .min_len = ETH_HEADER_LEN },
df2c07f4 1996 [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
856081f6 1997
df2c07f4 1998 /* OVS_PACKET_CMD_ACTION only. */
e995e3df 1999 [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
8b7ea2d4 2000 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
7321bda3 2001 [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
27130224 2002 [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
856081f6
BP
2003 };
2004
0a2869d5
BP
2005 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2006 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2007 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2008 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
982b8810 2009
0a2869d5 2010 struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
df2c07f4
JP
2011 if (!nlmsg || !genl || !ovs_header
2012 || nlmsg->nlmsg_type != ovs_packet_family
2013 || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2014 ARRAY_SIZE(ovs_packet_policy))) {
856081f6
BP
2015 return EINVAL;
2016 }
2017
0a2869d5
BP
2018 int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2019 : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2020 : -1);
aaff4b55
BP
2021 if (type < 0) {
2022 return EINVAL;
2023 }
82272ede 2024
877c9270 2025 /* (Re)set ALL fields of '*upcall' on successful return. */
aaff4b55 2026 upcall->type = type;
ebc56baa
BP
2027 upcall->key = CONST_CAST(struct nlattr *,
2028 nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
df2c07f4 2029 upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
7af12bd7 2030 dpif_flow_hash(&dpif->dpif, upcall->key, upcall->key_len, &upcall->ufid);
e995e3df 2031 upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
8b7ea2d4 2032 upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
7321bda3 2033 upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
27130224 2034 upcall->mru = a[OVS_PACKET_ATTR_MRU];
da546e07
JR
2035
2036 /* Allow overwriting the netlink attribute header without reallocating. */
cf62fa4c 2037 dp_packet_use_stub(&upcall->packet,
da546e07
JR
2038 CONST_CAST(struct nlattr *,
2039 nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2040 nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2041 sizeof(struct nlattr));
cf62fa4c
PS
2042 dp_packet_set_data(&upcall->packet,
2043 (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2044 dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
da546e07 2045
2482b0b0
JS
2046 if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2047 /* Ethernet frame */
2048 upcall->packet.packet_type = htonl(PT_ETH);
2049 } else {
2050 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2051 ovs_be16 ethertype = 0;
2052 const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2053 upcall->key_len,
2054 OVS_KEY_ATTR_ETHERTYPE);
2055 if (et_nla) {
2056 ethertype = nl_attr_get_be16(et_nla);
2057 }
2058 upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2059 ntohs(ethertype));
2060 dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2061 }
2062
df2c07f4 2063 *dp_ifindex = ovs_header->dp_ifindex;
982b8810 2064
856081f6
BP
2065 return 0;
2066}
2067
09cac43f
NR
2068#ifdef _WIN32
2069#define PACKET_RECV_BATCH_SIZE 50
2070static int
2071dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2072 struct dpif_upcall *upcall, struct ofpbuf *buf)
2073 OVS_REQ_RDLOCK(dpif->upcall_lock)
2074{
2075 struct dpif_handler *handler;
2076 int read_tries = 0;
2077 struct dpif_windows_vport_sock *sock_pool;
2078 uint32_t i;
2079
2080 if (!dpif->handlers) {
2081 return EAGAIN;
2082 }
2083
2084 /* Only one handler is supported currently. */
2085 if (handler_id >= 1) {
2086 return EAGAIN;
2087 }
2088
2089 if (handler_id >= dpif->n_handlers) {
2090 return EAGAIN;
2091 }
2092
2093 handler = &dpif->handlers[handler_id];
2094 sock_pool = handler->vport_sock_pool;
2095
2096 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2097 for (;;) {
2098 int dp_ifindex;
2099 int error;
2100
2101 if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2102 return EAGAIN;
2103 }
2104
2105 error = nl_sock_recv(sock_pool[i].nl_sock, buf, false);
2106 if (error == ENOBUFS) {
2107 /* ENOBUFS typically means that we've received so many
2108 * packets that the buffer overflowed. Try again
2109 * immediately because there's almost certainly a packet
2110 * waiting for us. */
2111 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2112 continue;
2113 }
2114
2115 /* XXX: ch->last_poll = time_msec(); */
2116 if (error) {
2117 if (error == EAGAIN) {
2118 break;
2119 }
2120 return error;
2121 }
2122
27edb4aa 2123 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
09cac43f
NR
2124 if (!error && dp_ifindex == dpif->dp_ifindex) {
2125 return 0;
2126 } else if (error) {
2127 return error;
2128 }
2129 }
2130 }
2131
2132 return EAGAIN;
2133}
2134#else
856081f6 2135static int
93451a0a
AS
2136dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
2137 struct dpif_upcall *upcall, struct ofpbuf *buf)
b90de034 2138 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2139{
1579cf67 2140 struct dpif_handler *handler;
17411ecf 2141 int read_tries = 0;
96fba48f 2142
1579cf67
AW
2143 if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2144 return EAGAIN;
982b8810
BP
2145 }
2146
1579cf67
AW
2147 handler = &dpif->handlers[handler_id];
2148 if (handler->event_offset >= handler->n_events) {
8522ba09 2149 int retval;
989fd548 2150
1579cf67 2151 handler->event_offset = handler->n_events = 0;
f6d1465c 2152
8522ba09 2153 do {
1579cf67 2154 retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
989fd548 2155 dpif->uc_array_size, 0);
8522ba09 2156 } while (retval < 0 && errno == EINTR);
09cac43f 2157
8522ba09
BP
2158 if (retval < 0) {
2159 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
10a89ef0 2160 VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
989fd548 2161 } else if (retval > 0) {
1579cf67 2162 handler->n_events = retval;
8522ba09 2163 }
8522ba09
BP
2164 }
2165
1579cf67
AW
2166 while (handler->event_offset < handler->n_events) {
2167 int idx = handler->epoll_events[handler->event_offset].data.u32;
2168 struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx];
8522ba09 2169
1579cf67 2170 handler->event_offset++;
17411ecf 2171
f6d1465c 2172 for (;;) {
8522ba09 2173 int dp_ifindex;
f6d1465c 2174 int error;
17411ecf 2175
f6d1465c
BP
2176 if (++read_tries > 50) {
2177 return EAGAIN;
2178 }
17411ecf 2179
fe3d61b3 2180 error = nl_sock_recv(ch->sock, buf, false);
14b4d2f9
BP
2181 if (error == ENOBUFS) {
2182 /* ENOBUFS typically means that we've received so many
2183 * packets that the buffer overflowed. Try again
2184 * immediately because there's almost certainly a packet
2185 * waiting for us. */
9b00386b 2186 report_loss(dpif, ch, idx, handler_id);
14b4d2f9
BP
2187 continue;
2188 }
2189
2190 ch->last_poll = time_msec();
72d32ac0 2191 if (error) {
72d32ac0
BP
2192 if (error == EAGAIN) {
2193 break;
2194 }
f6d1465c
BP
2195 return error;
2196 }
17411ecf 2197
7af12bd7 2198 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
a12b3ead 2199 if (!error && dp_ifindex == dpif->dp_ifindex) {
f6d1465c 2200 return 0;
989fd548 2201 } else if (error) {
f6d1465c 2202 return error;
17411ecf 2203 }
982b8810 2204 }
50f80534 2205 }
982b8810
BP
2206
2207 return EAGAIN;
96fba48f 2208}
09cac43f 2209#endif
96fba48f 2210
9fafa796 2211static int
93451a0a
AS
2212dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2213 struct dpif_upcall *upcall, struct ofpbuf *buf)
9fafa796 2214{
93451a0a 2215 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
9fafa796
BP
2216 int error;
2217
1579cf67 2218 fat_rwlock_rdlock(&dpif->upcall_lock);
09cac43f
NR
2219#ifdef _WIN32
2220 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2221#else
93451a0a 2222 error = dpif_netlink_recv__(dpif, handler_id, upcall, buf);
09cac43f 2223#endif
1579cf67 2224 fat_rwlock_unlock(&dpif->upcall_lock);
9fafa796
BP
2225
2226 return error;
2227}
2228
96fba48f 2229static void
93451a0a 2230dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id)
b90de034 2231 OVS_REQ_RDLOCK(dpif->upcall_lock)
96fba48f 2232{
93451a0a 2233#ifdef _WIN32
09cac43f
NR
2234 uint32_t i;
2235 struct dpif_windows_vport_sock *sock_pool =
2236 dpif->handlers[handler_id].vport_sock_pool;
2237
2238 /* Only one handler is supported currently. */
2239 if (handler_id >= 1) {
2240 return;
2241 }
2242
2243 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2244 nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2245 }
93451a0a 2246#else
1579cf67
AW
2247 if (dpif->handlers && handler_id < dpif->n_handlers) {
2248 struct dpif_handler *handler = &dpif->handlers[handler_id];
2249
2250 poll_fd_wait(handler->epoll_fd, POLLIN);
17411ecf 2251 }
93451a0a 2252#endif
96fba48f
BP
2253}
2254
1ba530f4 2255static void
93451a0a 2256dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
1ba530f4 2257{
93451a0a 2258 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
17411ecf 2259
b90de034 2260 fat_rwlock_rdlock(&dpif->upcall_lock);
93451a0a 2261 dpif_netlink_recv_wait__(dpif, handler_id);
b90de034
AW
2262 fat_rwlock_unlock(&dpif->upcall_lock);
2263}
2264
2265static void
93451a0a 2266dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
b90de034
AW
2267 OVS_REQ_WRLOCK(dpif->upcall_lock)
2268{
1579cf67
AW
2269 if (dpif->handlers) {
2270 size_t i, j;
2271
2272 for (i = 0; i < dpif->uc_array_size; i++ ) {
2273 if (!dpif->handlers[0].channels[i].sock) {
2274 continue;
2275 }
1ba530f4 2276
1579cf67
AW
2277 for (j = 0; j < dpif->n_handlers; j++) {
2278 nl_sock_drain(dpif->handlers[j].channels[i].sock);
9fafa796 2279 }
989fd548 2280 }
1ba530f4 2281 }
b90de034
AW
2282}
2283
2284static void
93451a0a 2285dpif_netlink_recv_purge(struct dpif *dpif_)
b90de034 2286{
93451a0a 2287 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
b90de034
AW
2288
2289 fat_rwlock_wrlock(&dpif->upcall_lock);
93451a0a 2290 dpif_netlink_recv_purge__(dpif);
1579cf67 2291 fat_rwlock_unlock(&dpif->upcall_lock);
1ba530f4
BP
2292}
2293
b5cbbcf6
AZ
2294static char *
2295dpif_netlink_get_datapath_version(void)
2296{
2297 char *version_str = NULL;
2298
2299#ifdef __linux__
2300
2301#define MAX_VERSION_STR_SIZE 80
2302#define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2303 FILE *f;
2304
2305 f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2306 if (f) {
2307 char *newline;
2308 char version[MAX_VERSION_STR_SIZE];
2309
2310 if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2311 newline = strchr(version, '\n');
2312 if (newline) {
2313 *newline = '\0';
2314 }
2315 version_str = xstrdup(version);
2316 }
2317 fclose(f);
2318 }
2319#endif
2320
2321 return version_str;
2322}
2323
c11c9f4a
DDP
2324struct dpif_netlink_ct_dump_state {
2325 struct ct_dpif_dump_state up;
2326 struct nl_ct_dump_state *nl_ct_dump;
2327};
2328
2329static int
2330dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2331 struct ct_dpif_dump_state **dump_,
2332 const uint16_t *zone)
2333{
2334 struct dpif_netlink_ct_dump_state *dump;
2335 int err;
2336
2337 dump = xzalloc(sizeof *dump);
2338 err = nl_ct_dump_start(&dump->nl_ct_dump, zone);
2339 if (err) {
2340 free(dump);
2341 return err;
2342 }
2343
2344 *dump_ = &dump->up;
2345
2346 return 0;
2347}
2348
2349static int
2350dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2351 struct ct_dpif_dump_state *dump_,
2352 struct ct_dpif_entry *entry)
2353{
2354 struct dpif_netlink_ct_dump_state *dump;
2355
2356 INIT_CONTAINER(dump, dump_, up);
2357
2358 return nl_ct_dump_next(dump->nl_ct_dump, entry);
2359}
2360
2361static int
2362dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2363 struct ct_dpif_dump_state *dump_)
2364{
2365 struct dpif_netlink_ct_dump_state *dump;
2366 int err;
2367
2368 INIT_CONTAINER(dump, dump_, up);
2369
2370 err = nl_ct_dump_done(dump->nl_ct_dump);
2371 free(dump);
2372 return err;
2373}
15eabc97
DDP
2374
2375static int
2376dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone)
2377{
2378 if (zone) {
2379 return nl_ct_flush_zone(*zone);
2380 } else {
2381 return nl_ct_flush();
2382 }
2383}
c11c9f4a 2384
5dddf960
JR
2385\f
2386/* Meters */
2387static void
2388dpif_netlink_meter_get_features(const struct dpif * dpif OVS_UNUSED,
2389 struct ofputil_meter_features *features)
2390{
2391 features->max_meters = 0;
2392 features->band_types = 0;
2393 features->capabilities = 0;
2394 features->max_bands = 0;
2395 features->max_color = 0;
2396}
2397
2398static int
2399dpif_netlink_meter_set(struct dpif *dpif OVS_UNUSED,
2400 ofproto_meter_id *meter_id OVS_UNUSED,
2401 struct ofputil_meter_config *config OVS_UNUSED)
2402{
2403 return EFBIG; /* meter_id out of range */
2404}
2405
2406static int
2407dpif_netlink_meter_get(const struct dpif *dpif OVS_UNUSED,
2408 ofproto_meter_id meter_id OVS_UNUSED,
2409 struct ofputil_meter_stats *stats OVS_UNUSED,
2410 uint16_t n_bands OVS_UNUSED)
2411{
2412 return EFBIG; /* meter_id out of range */
2413}
2414
2415static int
2416dpif_netlink_meter_del(struct dpif *dpif OVS_UNUSED,
2417 ofproto_meter_id meter_id OVS_UNUSED,
2418 struct ofputil_meter_stats *stats OVS_UNUSED,
2419 uint16_t n_bands OVS_UNUSED)
2420{
2421 return EFBIG; /* meter_id out of range */
2422}
2423
2424\f
93451a0a 2425const struct dpif_class dpif_netlink_class = {
1a6f1e2a 2426 "system",
c8973eb6 2427 NULL, /* init */
93451a0a 2428 dpif_netlink_enumerate,
0aeaabc8 2429 NULL,
93451a0a
AS
2430 dpif_netlink_open,
2431 dpif_netlink_close,
2432 dpif_netlink_destroy,
2433 dpif_netlink_run,
e4516b20 2434 NULL, /* wait */
93451a0a
AS
2435 dpif_netlink_get_stats,
2436 dpif_netlink_port_add,
2437 dpif_netlink_port_del,
91364d18 2438 NULL, /* port_set_config */
93451a0a
AS
2439 dpif_netlink_port_query_by_number,
2440 dpif_netlink_port_query_by_name,
2441 dpif_netlink_port_get_pid,
2442 dpif_netlink_port_dump_start,
2443 dpif_netlink_port_dump_next,
2444 dpif_netlink_port_dump_done,
2445 dpif_netlink_port_poll,
2446 dpif_netlink_port_poll_wait,
2447 dpif_netlink_flow_flush,
2448 dpif_netlink_flow_dump_create,
2449 dpif_netlink_flow_dump_destroy,
2450 dpif_netlink_flow_dump_thread_create,
2451 dpif_netlink_flow_dump_thread_destroy,
2452 dpif_netlink_flow_dump_next,
2453 dpif_netlink_operate,
2454 dpif_netlink_recv_set,
2455 dpif_netlink_handlers_set,
d4f6865c 2456 NULL, /* set_config */
93451a0a
AS
2457 dpif_netlink_queue_to_priority,
2458 dpif_netlink_recv,
2459 dpif_netlink_recv_wait,
2460 dpif_netlink_recv_purge,
e4e74c3a 2461 NULL, /* register_dp_purge_cb */
6b31e073
RW
2462 NULL, /* register_upcall_cb */
2463 NULL, /* enable_upcall */
2464 NULL, /* disable_upcall */
b5cbbcf6 2465 dpif_netlink_get_datapath_version, /* get_datapath_version */
c11c9f4a
DDP
2466 dpif_netlink_ct_dump_start,
2467 dpif_netlink_ct_dump_next,
2468 dpif_netlink_ct_dump_done,
5dddf960
JR
2469 dpif_netlink_ct_flush,
2470 dpif_netlink_meter_get_features,
2471 dpif_netlink_meter_set,
2472 dpif_netlink_meter_get,
2473 dpif_netlink_meter_del,
96fba48f 2474};
93451a0a 2475
96fba48f 2476static int
93451a0a 2477dpif_netlink_init(void)
96fba48f 2478{
eb8ed438
BP
2479 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
2480 static int error;
982b8810 2481
eb8ed438 2482 if (ovsthread_once_start(&once)) {
df2c07f4
JP
2483 error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
2484 &ovs_datapath_family);
37a1300c 2485 if (error) {
cae7529c
CL
2486 VLOG_WARN("Generic Netlink family '%s' does not exist. "
2487 "The Open vSwitch kernel module is probably not loaded.",
2488 OVS_DATAPATH_FAMILY);
37a1300c 2489 }
f0fef760 2490 if (!error) {
df2c07f4 2491 error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
f0fef760 2492 }
37a1300c 2493 if (!error) {
df2c07f4 2494 error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
37a1300c 2495 }
aaff4b55 2496 if (!error) {
df2c07f4
JP
2497 error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
2498 &ovs_packet_family);
aaff4b55 2499 }
c7178a0b
EJ
2500 if (!error) {
2501 error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
b3dcb73c 2502 &ovs_vport_mcgroup);
c7178a0b 2503 }
eb8ed438
BP
2504
2505 ovsthread_once_done(&once);
982b8810
BP
2506 }
2507
2508 return error;
96fba48f
BP
2509}
2510
c19e6535 2511bool
93451a0a 2512dpif_netlink_is_internal_device(const char *name)
9fe3b9a2 2513{
93451a0a 2514 struct dpif_netlink_vport reply;
c19e6535 2515 struct ofpbuf *buf;
9fe3b9a2 2516 int error;
96fba48f 2517
93451a0a 2518 error = dpif_netlink_vport_get(name, &reply, &buf);
c19e6535
BP
2519 if (!error) {
2520 ofpbuf_delete(buf);
141d9ce4 2521 } else if (error != ENODEV && error != ENOENT) {
c19e6535 2522 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
10a89ef0 2523 name, ovs_strerror(error));
96fba48f
BP
2524 }
2525
df2c07f4 2526 return reply.type == OVS_VPORT_TYPE_INTERNAL;
96fba48f 2527}
e0467f6d 2528
df2c07f4 2529/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
c19e6535
BP
2530 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
2531 * positive errno value.
2532 *
2533 * 'vport' will contain pointers into 'buf', so the caller should not free
2534 * 'buf' while 'vport' is still in use. */
2535static int
93451a0a 2536dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
c19e6535
BP
2537 const struct ofpbuf *buf)
2538{
df2c07f4
JP
2539 static const struct nl_policy ovs_vport_policy[] = {
2540 [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
2541 [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
2542 [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1579cf67 2543 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
f7df9823 2544 [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
c19e6535 2545 .optional = true },
df2c07f4 2546 [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
c19e6535
BP
2547 };
2548
93451a0a 2549 dpif_netlink_vport_init(vport);
c19e6535 2550
0a2869d5
BP
2551 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2552 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2553 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2554 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2555
2556 struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
df2c07f4
JP
2557 if (!nlmsg || !genl || !ovs_header
2558 || nlmsg->nlmsg_type != ovs_vport_family
2559 || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
2560 ARRAY_SIZE(ovs_vport_policy))) {
c19e6535
BP
2561 return EINVAL;
2562 }
c19e6535 2563
f0fef760 2564 vport->cmd = genl->cmd;
df2c07f4 2565 vport->dp_ifindex = ovs_header->dp_ifindex;
4e022ec0 2566 vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
df2c07f4
JP
2567 vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2568 vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
b063d9f0 2569 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
1579cf67
AW
2570 vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
2571 / (sizeof *vport->upcall_pids);
2572 vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
2573
b063d9f0 2574 }
df2c07f4
JP
2575 if (a[OVS_VPORT_ATTR_STATS]) {
2576 vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
2577 }
df2c07f4
JP
2578 if (a[OVS_VPORT_ATTR_OPTIONS]) {
2579 vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
2580 vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
c19e6535 2581 }
c19e6535
BP
2582 return 0;
2583}
2584
df2c07f4 2585/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
c19e6535
BP
2586 * followed by Netlink attributes corresponding to 'vport'. */
2587static void
93451a0a
AS
2588dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
2589 struct ofpbuf *buf)
c19e6535 2590{
df2c07f4 2591 struct ovs_header *ovs_header;
f0fef760 2592
df2c07f4 2593 nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
69685a88 2594 vport->cmd, OVS_VPORT_VERSION);
c19e6535 2595
df2c07f4
JP
2596 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
2597 ovs_header->dp_ifindex = vport->dp_ifindex;
c19e6535 2598
4e022ec0
AW
2599 if (vport->port_no != ODPP_NONE) {
2600 nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
c19e6535
BP
2601 }
2602
df2c07f4
JP
2603 if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
2604 nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
c19e6535
BP
2605 }
2606
2607 if (vport->name) {
df2c07f4 2608 nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
c19e6535
BP
2609 }
2610
1579cf67
AW
2611 if (vport->upcall_pids) {
2612 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
2613 vport->upcall_pids,
2614 vport->n_upcall_pids * sizeof *vport->upcall_pids);
a24a6574 2615 }
b063d9f0 2616
c19e6535 2617 if (vport->stats) {
df2c07f4 2618 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
c19e6535
BP
2619 vport->stats, sizeof *vport->stats);
2620 }
2621
c19e6535 2622 if (vport->options) {
df2c07f4 2623 nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
c19e6535
BP
2624 vport->options, vport->options_len);
2625 }
c19e6535
BP
2626}
2627
2628/* Clears 'vport' to "empty" values. */
2629void
93451a0a 2630dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
c19e6535
BP
2631{
2632 memset(vport, 0, sizeof *vport);
4e022ec0 2633 vport->port_no = ODPP_NONE;
c19e6535
BP
2634}
2635
2636/* Executes 'request' in the kernel datapath. If the command fails, returns a
2637 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2638 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
df2c07f4 2639 * result of the command is expected to be an ovs_vport also, which is decoded
c19e6535
BP
2640 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2641 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2642int
93451a0a
AS
2643dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
2644 struct dpif_netlink_vport *reply,
2645 struct ofpbuf **bufp)
c19e6535 2646{
f0fef760 2647 struct ofpbuf *request_buf;
c19e6535
BP
2648 int error;
2649
cb22974d 2650 ovs_assert((reply != NULL) == (bufp != NULL));
c19e6535 2651
93451a0a 2652 error = dpif_netlink_init();
42bb6c72
BP
2653 if (error) {
2654 if (reply) {
2655 *bufp = NULL;
93451a0a 2656 dpif_netlink_vport_init(reply);
42bb6c72
BP
2657 }
2658 return error;
2659 }
2660
f0fef760 2661 request_buf = ofpbuf_new(1024);
93451a0a 2662 dpif_netlink_vport_to_ofpbuf(request, request_buf);
a88b4e04 2663 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
f0fef760 2664 ofpbuf_delete(request_buf);
c19e6535 2665
f0fef760
BP
2666 if (reply) {
2667 if (!error) {
93451a0a 2668 error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
f0fef760 2669 }
c19e6535 2670 if (error) {
93451a0a 2671 dpif_netlink_vport_init(reply);
f0fef760
BP
2672 ofpbuf_delete(*bufp);
2673 *bufp = NULL;
c19e6535 2674 }
c19e6535
BP
2675 }
2676 return error;
2677}
2678
2679/* Obtains information about the kernel vport named 'name' and stores it into
2680 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
2681 * longer needed ('reply' will contain pointers into '*bufp'). */
2682int
93451a0a
AS
2683dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
2684 struct ofpbuf **bufp)
c19e6535 2685{
93451a0a 2686 struct dpif_netlink_vport request;
c19e6535 2687
93451a0a 2688 dpif_netlink_vport_init(&request);
df2c07f4 2689 request.cmd = OVS_VPORT_CMD_GET;
c19e6535
BP
2690 request.name = name;
2691
93451a0a 2692 return dpif_netlink_vport_transact(&request, reply, bufp);
c19e6535 2693}
93451a0a 2694
df2c07f4 2695/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
aaff4b55
BP
2696 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
2697 * positive errno value.
d6569377
BP
2698 *
2699 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
2700 * while 'dp' is still in use. */
2701static int
93451a0a 2702dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
d6569377 2703{
df2c07f4
JP
2704 static const struct nl_policy ovs_datapath_policy[] = {
2705 [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
f7df9823 2706 [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
d6569377 2707 .optional = true },
847108dc
AZ
2708 [OVS_DP_ATTR_MEGAFLOW_STATS] = {
2709 NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
2710 .optional = true },
d6569377
BP
2711 };
2712
93451a0a 2713 dpif_netlink_dp_init(dp);
d6569377 2714
0a2869d5
BP
2715 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2716 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2717 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2718 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2719
2720 struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
df2c07f4
JP
2721 if (!nlmsg || !genl || !ovs_header
2722 || nlmsg->nlmsg_type != ovs_datapath_family
2723 || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
2724 ARRAY_SIZE(ovs_datapath_policy))) {
d6569377
BP
2725 return EINVAL;
2726 }
d6569377 2727
aaff4b55 2728 dp->cmd = genl->cmd;
df2c07f4
JP
2729 dp->dp_ifindex = ovs_header->dp_ifindex;
2730 dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
2731 if (a[OVS_DP_ATTR_STATS]) {
6a54dedc 2732 dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
d6569377 2733 }
982b8810 2734
847108dc 2735 if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
6a54dedc 2736 dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
847108dc
AZ
2737 }
2738
d6569377
BP
2739 return 0;
2740}
2741
aaff4b55 2742/* Appends to 'buf' the Generic Netlink message described by 'dp'. */
d6569377 2743static void
93451a0a 2744dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
d6569377 2745{
df2c07f4 2746 struct ovs_header *ovs_header;
d6569377 2747
df2c07f4 2748 nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
69685a88
JG
2749 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
2750 OVS_DATAPATH_VERSION);
aaff4b55 2751
df2c07f4
JP
2752 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
2753 ovs_header->dp_ifindex = dp->dp_ifindex;
d6569377
BP
2754
2755 if (dp->name) {
df2c07f4 2756 nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
d6569377
BP
2757 }
2758
a24a6574
BP
2759 if (dp->upcall_pid) {
2760 nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
2761 }
b063d9f0 2762
b7fd5e38
TG
2763 if (dp->user_features) {
2764 nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
2765 }
2766
df2c07f4 2767 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
d6569377
BP
2768}
2769
2770/* Clears 'dp' to "empty" values. */
d3d8f1f7 2771static void
93451a0a 2772dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
d6569377
BP
2773{
2774 memset(dp, 0, sizeof *dp);
d6569377
BP
2775}
2776
aaff4b55 2777static void
93451a0a 2778dpif_netlink_dp_dump_start(struct nl_dump *dump)
aaff4b55 2779{
93451a0a 2780 struct dpif_netlink_dp request;
aaff4b55
BP
2781 struct ofpbuf *buf;
2782
93451a0a 2783 dpif_netlink_dp_init(&request);
df2c07f4 2784 request.cmd = OVS_DP_CMD_GET;
aaff4b55
BP
2785
2786 buf = ofpbuf_new(1024);
93451a0a 2787 dpif_netlink_dp_to_ofpbuf(&request, buf);
a88b4e04 2788 nl_dump_start(dump, NETLINK_GENERIC, buf);
aaff4b55
BP
2789 ofpbuf_delete(buf);
2790}
2791
d6569377
BP
2792/* Executes 'request' in the kernel datapath. If the command fails, returns a
2793 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2794 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
aaff4b55
BP
2795 * result of the command is expected to be of the same form, which is decoded
2796 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2797 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 2798static int
93451a0a
AS
2799dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
2800 struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
d6569377 2801{
aaff4b55 2802 struct ofpbuf *request_buf;
d6569377 2803 int error;
d6569377 2804
cb22974d 2805 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 2806
aaff4b55 2807 request_buf = ofpbuf_new(1024);
93451a0a 2808 dpif_netlink_dp_to_ofpbuf(request, request_buf);
a88b4e04 2809 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
aaff4b55 2810 ofpbuf_delete(request_buf);
d6569377 2811
aaff4b55 2812 if (reply) {
93451a0a 2813 dpif_netlink_dp_init(reply);
aaff4b55 2814 if (!error) {
93451a0a 2815 error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
aaff4b55 2816 }
d6569377 2817 if (error) {
aaff4b55
BP
2818 ofpbuf_delete(*bufp);
2819 *bufp = NULL;
d6569377 2820 }
d6569377
BP
2821 }
2822 return error;
2823}
2824
2825/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
2826 * The caller must free '*bufp' when the reply is no longer needed ('reply'
2827 * will contain pointers into '*bufp'). */
d3d8f1f7 2828static int
93451a0a
AS
2829dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
2830 struct ofpbuf **bufp)
d6569377 2831{
93451a0a
AS
2832 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2833 struct dpif_netlink_dp request;
d6569377 2834
93451a0a 2835 dpif_netlink_dp_init(&request);
df2c07f4 2836 request.cmd = OVS_DP_CMD_GET;
254f2dc8 2837 request.dp_ifindex = dpif->dp_ifindex;
d6569377 2838
93451a0a 2839 return dpif_netlink_dp_transact(&request, reply, bufp);
d6569377 2840}
93451a0a 2841
df2c07f4 2842/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
37a1300c 2843 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
d6569377
BP
2844 * positive errno value.
2845 *
2846 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
2847 * while 'flow' is still in use. */
2848static int
93451a0a
AS
2849dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
2850 const struct ofpbuf *buf)
d6569377 2851{
70e5ed6f
JS
2852 static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
2853 [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
e6cc0bab 2854 [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
df2c07f4 2855 [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
f7df9823 2856 [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
d6569377 2857 .optional = true },
df2c07f4
JP
2858 [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
2859 [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
70e5ed6f
JS
2860 [OVS_FLOW_ATTR_UFID] = { .type = NL_A_UNSPEC, .optional = true,
2861 .min_len = sizeof(ovs_u128) },
df2c07f4 2862 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
43f9ac0a 2863 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
70e5ed6f 2864 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
d6569377
BP
2865 };
2866
93451a0a 2867 dpif_netlink_flow_init(flow);
d6569377 2868
0a2869d5
BP
2869 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2870 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2871 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2872 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2873
2874 struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
df2c07f4
JP
2875 if (!nlmsg || !genl || !ovs_header
2876 || nlmsg->nlmsg_type != ovs_flow_family
2877 || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
2878 ARRAY_SIZE(ovs_flow_policy))) {
d6569377
BP
2879 return EINVAL;
2880 }
70e5ed6f
JS
2881 if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
2882 return EINVAL;
2883 }
d6569377 2884
37a1300c 2885 flow->nlmsg_flags = nlmsg->nlmsg_flags;
df2c07f4 2886 flow->dp_ifindex = ovs_header->dp_ifindex;
70e5ed6f
JS
2887 if (a[OVS_FLOW_ATTR_KEY]) {
2888 flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
2889 flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
2890 }
e6cc0bab 2891
70e5ed6f
JS
2892 if (a[OVS_FLOW_ATTR_UFID]) {
2893 const ovs_u128 *ufid;
2894
2895 ufid = nl_attr_get_unspec(a[OVS_FLOW_ATTR_UFID],
2896 nl_attr_get_size(a[OVS_FLOW_ATTR_UFID]));
2897 flow->ufid = *ufid;
2898 flow->ufid_present = true;
2899 }
e6cc0bab
AZ
2900 if (a[OVS_FLOW_ATTR_MASK]) {
2901 flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
2902 flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
2903 }
df2c07f4
JP
2904 if (a[OVS_FLOW_ATTR_ACTIONS]) {
2905 flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
2906 flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
d6569377 2907 }
df2c07f4
JP
2908 if (a[OVS_FLOW_ATTR_STATS]) {
2909 flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
d6569377 2910 }
df2c07f4
JP
2911 if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
2912 flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
d6569377 2913 }
df2c07f4
JP
2914 if (a[OVS_FLOW_ATTR_USED]) {
2915 flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
9e980142 2916 }
d6569377
BP
2917 return 0;
2918}
2919
df2c07f4 2920/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
d6569377
BP
2921 * followed by Netlink attributes corresponding to 'flow'. */
2922static void
93451a0a
AS
2923dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
2924 struct ofpbuf *buf)
d6569377 2925{
df2c07f4 2926 struct ovs_header *ovs_header;
d6569377 2927
df2c07f4 2928 nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
30b44744 2929 NLM_F_REQUEST | flow->nlmsg_flags,
69685a88 2930 flow->cmd, OVS_FLOW_VERSION);
37a1300c 2931
df2c07f4
JP
2932 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
2933 ovs_header->dp_ifindex = flow->dp_ifindex;
d6569377 2934
70e5ed6f
JS
2935 if (flow->ufid_present) {
2936 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_UFID, &flow->ufid,
2937 sizeof flow->ufid);
2938 }
2939 if (flow->ufid_terse) {
2940 nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
2941 OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
2942 | OVS_UFID_F_OMIT_ACTIONS);
2943 }
64bb477f
JS
2944 if (!flow->ufid_terse || !flow->ufid_present) {
2945 if (flow->key_len) {
2946 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_KEY,
2947 flow->key, flow->key_len);
2948 }
e6cc0bab 2949
64bb477f
JS
2950 if (flow->mask_len) {
2951 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_MASK,
2952 flow->mask, flow->mask_len);
2953 }
2954 if (flow->actions || flow->actions_len) {
2955 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
2956 flow->actions, flow->actions_len);
2957 }
d6569377
BP
2958 }
2959
2960 /* We never need to send these to the kernel. */
cb22974d
BP
2961 ovs_assert(!flow->stats);
2962 ovs_assert(!flow->tcp_flags);
2963 ovs_assert(!flow->used);
d6569377
BP
2964
2965 if (flow->clear) {
df2c07f4 2966 nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
d6569377 2967 }
43f9ac0a
JR
2968 if (flow->probe) {
2969 nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
2970 }
d6569377
BP
2971}
2972
2973/* Clears 'flow' to "empty" values. */
d3d8f1f7 2974static void
93451a0a 2975dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
d6569377
BP
2976{
2977 memset(flow, 0, sizeof *flow);
2978}
2979
2980/* Executes 'request' in the kernel datapath. If the command fails, returns a
2981 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2982 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
37a1300c
BP
2983 * result of the command is expected to be a flow also, which is decoded and
2984 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
2985 * is no longer needed ('reply' will contain pointers into '*bufp'). */
d3d8f1f7 2986static int
93451a0a
AS
2987dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
2988 struct dpif_netlink_flow *reply,
2989 struct ofpbuf **bufp)
d6569377 2990{
37a1300c 2991 struct ofpbuf *request_buf;
d6569377 2992 int error;
d6569377 2993
cb22974d 2994 ovs_assert((reply != NULL) == (bufp != NULL));
d6569377 2995
30b44744
BP
2996 if (reply) {
2997 request->nlmsg_flags |= NLM_F_ECHO;
2998 }
2999
37a1300c 3000 request_buf = ofpbuf_new(1024);
93451a0a 3001 dpif_netlink_flow_to_ofpbuf(request, request_buf);
a88b4e04 3002 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
37a1300c 3003 ofpbuf_delete(request_buf);
d6569377 3004
37a1300c
BP
3005 if (reply) {
3006 if (!error) {
93451a0a 3007 error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
37a1300c 3008 }
d6569377 3009 if (error) {
93451a0a 3010 dpif_netlink_flow_init(reply);
37a1300c
BP
3011 ofpbuf_delete(*bufp);
3012 *bufp = NULL;
d6569377 3013 }
d6569377
BP
3014 }
3015 return error;
3016}
3017
3018static void
93451a0a
AS
3019dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
3020 struct dpif_flow_stats *stats)
d6569377
BP
3021{
3022 if (flow->stats) {
6a54dedc
BP
3023 stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
3024 stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
d6569377
BP
3025 } else {
3026 stats->n_packets = 0;
3027 stats->n_bytes = 0;
3028 }
0e70cdcb 3029 stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
d6569377
BP
3030 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
3031}
e0467f6d 3032
14b4d2f9
BP
3033/* Logs information about a packet that was recently lost in 'ch' (in
3034 * 'dpif_'). */
3035static void
93451a0a 3036report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
1579cf67 3037 uint32_t handler_id)
14b4d2f9 3038{
14b4d2f9 3039 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
14b4d2f9
BP
3040 struct ds s;
3041
8d675c5a 3042 if (VLOG_DROP_WARN(&rl)) {
14b4d2f9
BP
3043 return;
3044 }
3045
3046 ds_init(&s);
3047 if (ch->last_poll != LLONG_MIN) {
3048 ds_put_format(&s, " (last polled %lld ms ago)",
3049 time_msec() - ch->last_poll);
3050 }
14b4d2f9 3051
1579cf67 3052 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
9b00386b 3053 dpif_name(&dpif->dpif), ch_idx, handler_id);
14b4d2f9
BP
3054 ds_destroy(&s);
3055}