]> git.proxmox.com Git - ovs.git/blob - lib/dpif-netlink.c
bb9e95df7ad3f9b0410fecf64366f4a871b8fa65
[ovs.git] / lib / dpif-netlink.c
1 /*
2 * Copyright (c) 2008-2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "dpif-netlink.h"
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <net/if.h>
26 #include <linux/types.h>
27 #include <linux/pkt_sched.h>
28 #include <poll.h>
29 #include <stdlib.h>
30 #include <strings.h>
31 #include <sys/epoll.h>
32 #include <sys/stat.h>
33 #include <unistd.h>
34
35 #include "bitmap.h"
36 #include "dpif-netlink-rtnl.h"
37 #include "dpif-provider.h"
38 #include "fat-rwlock.h"
39 #include "flow.h"
40 #include "netdev-linux.h"
41 #include "netdev-provider.h"
42 #include "netdev-vport.h"
43 #include "netdev.h"
44 #include "netlink-conntrack.h"
45 #include "netlink-notifier.h"
46 #include "netlink-socket.h"
47 #include "netlink.h"
48 #include "netnsid.h"
49 #include "odp-util.h"
50 #include "openvswitch/dynamic-string.h"
51 #include "openvswitch/flow.h"
52 #include "openvswitch/match.h"
53 #include "openvswitch/ofpbuf.h"
54 #include "openvswitch/poll-loop.h"
55 #include "openvswitch/shash.h"
56 #include "openvswitch/vlog.h"
57 #include "packets.h"
58 #include "random.h"
59 #include "sset.h"
60 #include "timeval.h"
61 #include "unaligned.h"
62 #include "util.h"
63
64 VLOG_DEFINE_THIS_MODULE(dpif_netlink);
65 #ifdef _WIN32
66 #include "wmi.h"
67 enum { WINDOWS = 1 };
68 #else
69 enum { WINDOWS = 0 };
70 #endif
71 enum { MAX_PORTS = USHRT_MAX };
72
73 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
74 * missing if we have old headers. */
75 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
76
77 #define FLOW_DUMP_MAX_BATCH 50
78 #define OPERATE_MAX_OPS 50
79
80 struct dpif_netlink_dp {
81 /* Generic Netlink header. */
82 uint8_t cmd;
83
84 /* struct ovs_header. */
85 int dp_ifindex;
86
87 /* Attributes. */
88 const char *name; /* OVS_DP_ATTR_NAME. */
89 const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */
90 uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */
91 const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */
92 const struct ovs_dp_megaflow_stats *megaflow_stats;
93 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
94 };
95
96 static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
97 static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
98 const struct ofpbuf *);
99 static void dpif_netlink_dp_dump_start(struct nl_dump *);
100 static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
101 struct dpif_netlink_dp *reply,
102 struct ofpbuf **bufp);
103 static int dpif_netlink_dp_get(const struct dpif *,
104 struct dpif_netlink_dp *reply,
105 struct ofpbuf **bufp);
106
107 struct dpif_netlink_flow {
108 /* Generic Netlink header. */
109 uint8_t cmd;
110
111 /* struct ovs_header. */
112 unsigned int nlmsg_flags;
113 int dp_ifindex;
114
115 /* Attributes.
116 *
117 * The 'stats' member points to 64-bit data that might only be aligned on
118 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
119 * values.
120 *
121 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
122 * the Netlink version of the command, even if actions_len is zero. */
123 const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
124 size_t key_len;
125 const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */
126 size_t mask_len;
127 const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
128 size_t actions_len;
129 ovs_u128 ufid; /* OVS_FLOW_ATTR_FLOW_ID. */
130 bool ufid_present; /* Is there a UFID? */
131 bool ufid_terse; /* Skip serializing key/mask/acts? */
132 const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
133 const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
134 const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
135 bool clear; /* OVS_FLOW_ATTR_CLEAR. */
136 bool probe; /* OVS_FLOW_ATTR_PROBE. */
137 };
138
139 static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
140 static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
141 const struct ofpbuf *);
142 static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
143 struct ofpbuf *);
144 static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
145 struct dpif_netlink_flow *reply,
146 struct ofpbuf **bufp);
147 static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
148 struct dpif_flow_stats *);
149 static void dpif_netlink_flow_to_dpif_flow(struct dpif *, struct dpif_flow *,
150 const struct dpif_netlink_flow *);
151
152 /* One of the dpif channels between the kernel and userspace. */
153 struct dpif_channel {
154 struct nl_sock *sock; /* Netlink socket. */
155 long long int last_poll; /* Last time this channel was polled. */
156 };
157
158 #ifdef _WIN32
159 #define VPORT_SOCK_POOL_SIZE 1
160 /* On Windows, there is no native support for epoll. There are equivalent
161 * interfaces though, that are not used currently. For simpicity, a pool of
162 * netlink sockets is used. Each socket is represented by 'struct
163 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
164 * sharing the same socket. In the future, we can add a reference count and
165 * such fields. */
166 struct dpif_windows_vport_sock {
167 struct nl_sock *nl_sock; /* netlink socket. */
168 };
169 #endif
170
171 struct dpif_handler {
172 struct dpif_channel *channels;/* Array of channels for each handler. */
173 struct epoll_event *epoll_events;
174 int epoll_fd; /* epoll fd that includes channel socks. */
175 int n_events; /* Num events returned by epoll_wait(). */
176 int event_offset; /* Offset into 'epoll_events'. */
177
178 #ifdef _WIN32
179 /* Pool of sockets. */
180 struct dpif_windows_vport_sock *vport_sock_pool;
181 size_t last_used_pool_idx; /* Index to aid in allocating a
182 socket in the pool to a port. */
183 #endif
184 };
185
186 /* Datapath interface for the openvswitch Linux kernel module. */
187 struct dpif_netlink {
188 struct dpif dpif;
189 int dp_ifindex;
190
191 /* Upcall messages. */
192 struct fat_rwlock upcall_lock;
193 struct dpif_handler *handlers;
194 uint32_t n_handlers; /* Num of upcall handlers. */
195 int uc_array_size; /* Size of 'handler->channels' and */
196 /* 'handler->epoll_events'. */
197
198 /* Change notification. */
199 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
200 bool refresh_channels;
201 };
202
203 static void report_loss(struct dpif_netlink *, struct dpif_channel *,
204 uint32_t ch_idx, uint32_t handler_id);
205
206 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
207
208 /* Generic Netlink family numbers for OVS.
209 *
210 * Initialized by dpif_netlink_init(). */
211 static int ovs_datapath_family;
212 static int ovs_vport_family;
213 static int ovs_flow_family;
214 static int ovs_packet_family;
215
216 /* Generic Netlink multicast groups for OVS.
217 *
218 * Initialized by dpif_netlink_init(). */
219 static unsigned int ovs_vport_mcgroup;
220
221 /* If true, tunnel devices are created using OVS compat/genetlink.
222 * If false, tunnel devices are created with rtnetlink and using light weight
223 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
224 * to using the compat interface. */
225 static bool ovs_tunnels_out_of_tree = true;
226
227 static int dpif_netlink_init(void);
228 static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
229 static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
230 odp_port_t port_no, uint32_t hash);
231 static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
232 static int dpif_netlink_refresh_channels(struct dpif_netlink *,
233 uint32_t n_handlers);
234 static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
235 struct ofpbuf *);
236 static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
237 const struct ofpbuf *);
238 static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
239 odp_port_t port_no, const char *port_name,
240 struct dpif_port *dpif_port);
241
242 static struct dpif_netlink *
243 dpif_netlink_cast(const struct dpif *dpif)
244 {
245 dpif_assert_class(dpif, &dpif_netlink_class);
246 return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
247 }
248
249 static int
250 dpif_netlink_enumerate(struct sset *all_dps,
251 const struct dpif_class *dpif_class OVS_UNUSED)
252 {
253 struct nl_dump dump;
254 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
255 struct ofpbuf msg, buf;
256 int error;
257
258 error = dpif_netlink_init();
259 if (error) {
260 return error;
261 }
262
263 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
264 dpif_netlink_dp_dump_start(&dump);
265 while (nl_dump_next(&dump, &msg, &buf)) {
266 struct dpif_netlink_dp dp;
267
268 if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
269 sset_add(all_dps, dp.name);
270 }
271 }
272 ofpbuf_uninit(&buf);
273 return nl_dump_done(&dump);
274 }
275
276 static int
277 dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
278 bool create, struct dpif **dpifp)
279 {
280 struct dpif_netlink_dp dp_request, dp;
281 struct ofpbuf *buf;
282 uint32_t upcall_pid;
283 int error;
284
285 error = dpif_netlink_init();
286 if (error) {
287 return error;
288 }
289
290 /* Create or look up datapath. */
291 dpif_netlink_dp_init(&dp_request);
292 if (create) {
293 dp_request.cmd = OVS_DP_CMD_NEW;
294 upcall_pid = 0;
295 dp_request.upcall_pid = &upcall_pid;
296 } else {
297 /* Use OVS_DP_CMD_SET to report user features */
298 dp_request.cmd = OVS_DP_CMD_SET;
299 }
300 dp_request.name = name;
301 dp_request.user_features |= OVS_DP_F_UNALIGNED;
302 dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
303 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
304 if (error) {
305 return error;
306 }
307
308 error = open_dpif(&dp, dpifp);
309 ofpbuf_delete(buf);
310 return error;
311 }
312
313 static int
314 open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
315 {
316 struct dpif_netlink *dpif;
317
318 dpif = xzalloc(sizeof *dpif);
319 dpif->port_notifier = NULL;
320 fat_rwlock_init(&dpif->upcall_lock);
321
322 dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
323 dp->dp_ifindex, dp->dp_ifindex);
324
325 dpif->dp_ifindex = dp->dp_ifindex;
326 *dpifp = &dpif->dpif;
327
328 return 0;
329 }
330
331 /* Destroys the netlink sockets pointed by the elements in 'socksp'
332 * and frees the 'socksp'. */
333 static void
334 vport_del_socksp__(struct nl_sock **socksp, uint32_t n_socks)
335 {
336 size_t i;
337
338 for (i = 0; i < n_socks; i++) {
339 nl_sock_destroy(socksp[i]);
340 }
341
342 free(socksp);
343 }
344
345 /* Creates an array of netlink sockets. Returns an array of the
346 * corresponding pointers. Records the error in 'error'. */
347 static struct nl_sock **
348 vport_create_socksp__(uint32_t n_socks, int *error)
349 {
350 struct nl_sock **socksp = xzalloc(n_socks * sizeof *socksp);
351 size_t i;
352
353 for (i = 0; i < n_socks; i++) {
354 *error = nl_sock_create(NETLINK_GENERIC, &socksp[i]);
355 if (*error) {
356 goto error;
357 }
358 }
359
360 return socksp;
361
362 error:
363 vport_del_socksp__(socksp, n_socks);
364
365 return NULL;
366 }
367
368 #ifdef _WIN32
369 static void
370 vport_delete_sock_pool(struct dpif_handler *handler)
371 OVS_REQ_WRLOCK(dpif->upcall_lock)
372 {
373 if (handler->vport_sock_pool) {
374 uint32_t i;
375 struct dpif_windows_vport_sock *sock_pool =
376 handler->vport_sock_pool;
377
378 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
379 if (sock_pool[i].nl_sock) {
380 nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
381 nl_sock_destroy(sock_pool[i].nl_sock);
382 sock_pool[i].nl_sock = NULL;
383 }
384 }
385
386 free(handler->vport_sock_pool);
387 handler->vport_sock_pool = NULL;
388 }
389 }
390
391 static int
392 vport_create_sock_pool(struct dpif_handler *handler)
393 OVS_REQ_WRLOCK(dpif->upcall_lock)
394 {
395 struct dpif_windows_vport_sock *sock_pool;
396 size_t i;
397 int error = 0;
398
399 sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
400 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
401 error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
402 if (error) {
403 goto error;
404 }
405
406 /* Enable the netlink socket to receive packets. This is equivalent to
407 * calling nl_sock_join_mcgroup() to receive events. */
408 error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
409 if (error) {
410 goto error;
411 }
412 }
413
414 handler->vport_sock_pool = sock_pool;
415 handler->last_used_pool_idx = 0;
416 return 0;
417
418 error:
419 vport_delete_sock_pool(handler);
420 return error;
421 }
422
423 /* Returns an array pointers to netlink sockets. The sockets are picked from a
424 * pool. Records the error in 'error'. */
425 static struct nl_sock **
426 vport_create_socksp_windows(struct dpif_netlink *dpif, int *error)
427 OVS_REQ_WRLOCK(dpif->upcall_lock)
428 {
429 uint32_t n_socks = dpif->n_handlers;
430 struct nl_sock **socksp;
431 size_t i;
432
433 ovs_assert(n_socks <= 1);
434 socksp = xzalloc(n_socks * sizeof *socksp);
435
436 /* Pick netlink sockets to use in a round-robin fashion from each
437 * handler's pool of sockets. */
438 for (i = 0; i < n_socks; i++) {
439 struct dpif_handler *handler = &dpif->handlers[i];
440 struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
441 size_t index = handler->last_used_pool_idx;
442
443 /* A pool of sockets is allocated when the handler is initialized. */
444 if (sock_pool == NULL) {
445 free(socksp);
446 *error = EINVAL;
447 return NULL;
448 }
449
450 ovs_assert(index < VPORT_SOCK_POOL_SIZE);
451 socksp[i] = sock_pool[index].nl_sock;
452 socksp[i] = sock_pool[index].nl_sock;
453 ovs_assert(socksp[i]);
454 index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
455 handler->last_used_pool_idx = index;
456 }
457
458 return socksp;
459 }
460
461 static void
462 vport_del_socksp_windows(struct dpif_netlink *dpif, struct nl_sock **socksp)
463 {
464 free(socksp);
465 }
466 #endif /* _WIN32 */
467
468 static struct nl_sock **
469 vport_create_socksp(struct dpif_netlink *dpif, int *error)
470 {
471 #ifdef _WIN32
472 return vport_create_socksp_windows(dpif, error);
473 #else
474 return vport_create_socksp__(dpif->n_handlers, error);
475 #endif
476 }
477
478 static void
479 vport_del_socksp(struct dpif_netlink *dpif, struct nl_sock **socksp)
480 {
481 #ifdef _WIN32
482 vport_del_socksp_windows(dpif, socksp);
483 #else
484 vport_del_socksp__(socksp, dpif->n_handlers);
485 #endif
486 }
487
488 /* Given the array of pointers to netlink sockets 'socksp', returns
489 * the array of corresponding pids. If the 'socksp' is NULL, returns
490 * a single-element array of value 0. */
491 static uint32_t *
492 vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks)
493 {
494 uint32_t *pids;
495
496 if (!socksp) {
497 pids = xzalloc(sizeof *pids);
498 } else {
499 size_t i;
500
501 pids = xzalloc(n_socks * sizeof *pids);
502 for (i = 0; i < n_socks; i++) {
503 pids[i] = nl_sock_pid(socksp[i]);
504 }
505 }
506
507 return pids;
508 }
509
510 /* Given the port number 'port_idx', extracts the pids of netlink sockets
511 * associated to the port and assigns it to 'upcall_pids'. */
512 static bool
513 vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
514 uint32_t **upcall_pids)
515 {
516 uint32_t *pids;
517 size_t i;
518
519 /* Since the nl_sock can only be assigned in either all
520 * or none "dpif->handlers" channels, the following check
521 * would suffice. */
522 if (!dpif->handlers[0].channels[port_idx].sock) {
523 return false;
524 }
525 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
526
527 pids = xzalloc(dpif->n_handlers * sizeof *pids);
528
529 for (i = 0; i < dpif->n_handlers; i++) {
530 pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock);
531 }
532
533 *upcall_pids = pids;
534
535 return true;
536 }
537
538 static int
539 vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
540 struct nl_sock **socksp)
541 {
542 struct epoll_event event;
543 uint32_t port_idx = odp_to_u32(port_no);
544 size_t i, j;
545 int error;
546
547 if (dpif->handlers == NULL) {
548 return 0;
549 }
550
551 /* We assume that the datapath densely chooses port numbers, which can
552 * therefore be used as an index into 'channels' and 'epoll_events' of
553 * 'dpif->handler'. */
554 if (port_idx >= dpif->uc_array_size) {
555 uint32_t new_size = port_idx + 1;
556
557 if (new_size > MAX_PORTS) {
558 VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
559 dpif_name(&dpif->dpif), port_no);
560 return EFBIG;
561 }
562
563 for (i = 0; i < dpif->n_handlers; i++) {
564 struct dpif_handler *handler = &dpif->handlers[i];
565
566 handler->channels = xrealloc(handler->channels,
567 new_size * sizeof *handler->channels);
568
569 for (j = dpif->uc_array_size; j < new_size; j++) {
570 handler->channels[j].sock = NULL;
571 }
572
573 handler->epoll_events = xrealloc(handler->epoll_events,
574 new_size * sizeof *handler->epoll_events);
575
576 }
577 dpif->uc_array_size = new_size;
578 }
579
580 memset(&event, 0, sizeof event);
581 event.events = EPOLLIN;
582 event.data.u32 = port_idx;
583
584 for (i = 0; i < dpif->n_handlers; i++) {
585 struct dpif_handler *handler = &dpif->handlers[i];
586
587 #ifndef _WIN32
588 if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp[i]),
589 &event) < 0) {
590 error = errno;
591 goto error;
592 }
593 #endif
594 dpif->handlers[i].channels[port_idx].sock = socksp[i];
595 dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN;
596 }
597
598 return 0;
599
600 error:
601 for (j = 0; j < i; j++) {
602 #ifndef _WIN32
603 epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL,
604 nl_sock_fd(socksp[j]), NULL);
605 #endif
606 dpif->handlers[j].channels[port_idx].sock = NULL;
607 }
608
609 return error;
610 }
611
612 static void
613 vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
614 {
615 uint32_t port_idx = odp_to_u32(port_no);
616 size_t i;
617
618 if (!dpif->handlers || port_idx >= dpif->uc_array_size) {
619 return;
620 }
621
622 /* Since the sock can only be assigned in either all or none
623 * of "dpif->handlers" channels, the following check would
624 * suffice. */
625 if (!dpif->handlers[0].channels[port_idx].sock) {
626 return;
627 }
628
629 for (i = 0; i < dpif->n_handlers; i++) {
630 struct dpif_handler *handler = &dpif->handlers[i];
631 #ifndef _WIN32
632 epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
633 nl_sock_fd(handler->channels[port_idx].sock), NULL);
634 nl_sock_destroy(handler->channels[port_idx].sock);
635 #endif
636 handler->channels[port_idx].sock = NULL;
637 handler->event_offset = handler->n_events = 0;
638 }
639 }
640
641 static void
642 destroy_all_channels(struct dpif_netlink *dpif)
643 OVS_REQ_WRLOCK(dpif->upcall_lock)
644 {
645 unsigned int i;
646
647 if (!dpif->handlers) {
648 return;
649 }
650
651 for (i = 0; i < dpif->uc_array_size; i++ ) {
652 struct dpif_netlink_vport vport_request;
653 uint32_t upcall_pids = 0;
654
655 /* Since the sock can only be assigned in either all or none
656 * of "dpif->handlers" channels, the following check would
657 * suffice. */
658 if (!dpif->handlers[0].channels[i].sock) {
659 continue;
660 }
661
662 /* Turn off upcalls. */
663 dpif_netlink_vport_init(&vport_request);
664 vport_request.cmd = OVS_VPORT_CMD_SET;
665 vport_request.dp_ifindex = dpif->dp_ifindex;
666 vport_request.port_no = u32_to_odp(i);
667 vport_request.n_upcall_pids = 1;
668 vport_request.upcall_pids = &upcall_pids;
669 dpif_netlink_vport_transact(&vport_request, NULL, NULL);
670
671 vport_del_channels(dpif, u32_to_odp(i));
672 }
673
674 for (i = 0; i < dpif->n_handlers; i++) {
675 struct dpif_handler *handler = &dpif->handlers[i];
676
677 dpif_netlink_handler_uninit(handler);
678 free(handler->epoll_events);
679 free(handler->channels);
680 }
681
682 free(dpif->handlers);
683 dpif->handlers = NULL;
684 dpif->n_handlers = 0;
685 dpif->uc_array_size = 0;
686 }
687
688 static void
689 dpif_netlink_close(struct dpif *dpif_)
690 {
691 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
692
693 nl_sock_destroy(dpif->port_notifier);
694
695 fat_rwlock_wrlock(&dpif->upcall_lock);
696 destroy_all_channels(dpif);
697 fat_rwlock_unlock(&dpif->upcall_lock);
698
699 fat_rwlock_destroy(&dpif->upcall_lock);
700 free(dpif);
701 }
702
703 static int
704 dpif_netlink_destroy(struct dpif *dpif_)
705 {
706 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
707 struct dpif_netlink_dp dp;
708
709 dpif_netlink_dp_init(&dp);
710 dp.cmd = OVS_DP_CMD_DEL;
711 dp.dp_ifindex = dpif->dp_ifindex;
712 return dpif_netlink_dp_transact(&dp, NULL, NULL);
713 }
714
715 static bool
716 dpif_netlink_run(struct dpif *dpif_)
717 {
718 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
719
720 if (dpif->refresh_channels) {
721 dpif->refresh_channels = false;
722 fat_rwlock_wrlock(&dpif->upcall_lock);
723 dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
724 fat_rwlock_unlock(&dpif->upcall_lock);
725 }
726 return false;
727 }
728
729 static int
730 dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
731 {
732 struct dpif_netlink_dp dp;
733 struct ofpbuf *buf;
734 int error;
735
736 error = dpif_netlink_dp_get(dpif_, &dp, &buf);
737 if (!error) {
738 memset(stats, 0, sizeof *stats);
739
740 if (dp.stats) {
741 stats->n_hit = get_32aligned_u64(&dp.stats->n_hit);
742 stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
743 stats->n_lost = get_32aligned_u64(&dp.stats->n_lost);
744 stats->n_flows = get_32aligned_u64(&dp.stats->n_flows);
745 }
746
747 if (dp.megaflow_stats) {
748 stats->n_masks = dp.megaflow_stats->n_masks;
749 stats->n_mask_hit = get_32aligned_u64(
750 &dp.megaflow_stats->n_mask_hit);
751 } else {
752 stats->n_masks = UINT32_MAX;
753 stats->n_mask_hit = UINT64_MAX;
754 }
755 ofpbuf_delete(buf);
756 }
757 return error;
758 }
759
760 static const char *
761 get_vport_type(const struct dpif_netlink_vport *vport)
762 {
763 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
764
765 switch (vport->type) {
766 case OVS_VPORT_TYPE_NETDEV: {
767 const char *type = netdev_get_type_from_name(vport->name);
768
769 return type ? type : "system";
770 }
771
772 case OVS_VPORT_TYPE_INTERNAL:
773 return "internal";
774
775 case OVS_VPORT_TYPE_GENEVE:
776 return "geneve";
777
778 case OVS_VPORT_TYPE_GRE:
779 return "gre";
780
781 case OVS_VPORT_TYPE_VXLAN:
782 return "vxlan";
783
784 case OVS_VPORT_TYPE_LISP:
785 return "lisp";
786
787 case OVS_VPORT_TYPE_STT:
788 return "stt";
789
790 case OVS_VPORT_TYPE_UNSPEC:
791 case __OVS_VPORT_TYPE_MAX:
792 break;
793 }
794
795 VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
796 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
797 return "unknown";
798 }
799
800 enum ovs_vport_type
801 netdev_to_ovs_vport_type(const char *type)
802 {
803 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
804 return OVS_VPORT_TYPE_NETDEV;
805 } else if (!strcmp(type, "internal")) {
806 return OVS_VPORT_TYPE_INTERNAL;
807 } else if (strstr(type, "stt")) {
808 return OVS_VPORT_TYPE_STT;
809 } else if (!strcmp(type, "geneve")) {
810 return OVS_VPORT_TYPE_GENEVE;
811 } else if (strstr(type, "gre")) {
812 return OVS_VPORT_TYPE_GRE;
813 } else if (!strcmp(type, "vxlan")) {
814 return OVS_VPORT_TYPE_VXLAN;
815 } else if (!strcmp(type, "lisp")) {
816 return OVS_VPORT_TYPE_LISP;
817 } else {
818 return OVS_VPORT_TYPE_UNSPEC;
819 }
820 }
821
822 static int
823 dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
824 enum ovs_vport_type type,
825 struct ofpbuf *options,
826 odp_port_t *port_nop)
827 OVS_REQ_WRLOCK(dpif->upcall_lock)
828 {
829 struct dpif_netlink_vport request, reply;
830 struct ofpbuf *buf;
831 struct nl_sock **socksp = NULL;
832 uint32_t *upcall_pids;
833 int error = 0;
834
835 if (dpif->handlers) {
836 socksp = vport_create_socksp(dpif, &error);
837 if (!socksp) {
838 return error;
839 }
840 }
841
842 dpif_netlink_vport_init(&request);
843 request.cmd = OVS_VPORT_CMD_NEW;
844 request.dp_ifindex = dpif->dp_ifindex;
845 request.type = type;
846 request.name = name;
847
848 request.port_no = *port_nop;
849 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
850 request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
851 request.upcall_pids = upcall_pids;
852
853 if (options) {
854 request.options = options->data;
855 request.options_len = options->size;
856 }
857
858 error = dpif_netlink_vport_transact(&request, &reply, &buf);
859 if (!error) {
860 *port_nop = reply.port_no;
861 } else {
862 if (error == EBUSY && *port_nop != ODPP_NONE) {
863 VLOG_INFO("%s: requested port %"PRIu32" is in use",
864 dpif_name(&dpif->dpif), *port_nop);
865 }
866
867 vport_del_socksp(dpif, socksp);
868 goto exit;
869 }
870
871 if (socksp) {
872 error = vport_add_channels(dpif, *port_nop, socksp);
873 if (error) {
874 VLOG_INFO("%s: could not add channel for port %s",
875 dpif_name(&dpif->dpif), name);
876
877 /* Delete the port. */
878 dpif_netlink_vport_init(&request);
879 request.cmd = OVS_VPORT_CMD_DEL;
880 request.dp_ifindex = dpif->dp_ifindex;
881 request.port_no = *port_nop;
882 dpif_netlink_vport_transact(&request, NULL, NULL);
883 vport_del_socksp(dpif, socksp);
884 goto exit;
885 }
886 }
887 free(socksp);
888
889 exit:
890 ofpbuf_delete(buf);
891 free(upcall_pids);
892
893 return error;
894 }
895
896 static int
897 dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
898 odp_port_t *port_nop)
899 OVS_REQ_WRLOCK(dpif->upcall_lock)
900 {
901 const struct netdev_tunnel_config *tnl_cfg;
902 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
903 const char *type = netdev_get_type(netdev);
904 uint64_t options_stub[64 / 8];
905 enum ovs_vport_type ovs_type;
906 struct ofpbuf options;
907 const char *name;
908
909 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
910
911 ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
912 if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
913 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
914 "unsupported type `%s'",
915 dpif_name(&dpif->dpif), name, type);
916 return EINVAL;
917 }
918
919 if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
920 #ifdef _WIN32
921 /* XXX : Map appropiate Windows handle */
922 #else
923 netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
924 #endif
925 }
926
927 #ifdef _WIN32
928 if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
929 if (!create_wmi_port(name)){
930 VLOG_ERR("Could not create wmi internal port with name:%s", name);
931 return EINVAL;
932 };
933 }
934 #endif
935
936 tnl_cfg = netdev_get_tunnel_config(netdev);
937 if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
938 ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
939 if (tnl_cfg->dst_port) {
940 nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
941 ntohs(tnl_cfg->dst_port));
942 }
943 if (tnl_cfg->exts) {
944 size_t ext_ofs;
945 int i;
946
947 ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
948 for (i = 0; i < 32; i++) {
949 if (tnl_cfg->exts & (1 << i)) {
950 nl_msg_put_flag(&options, i);
951 }
952 }
953 nl_msg_end_nested(&options, ext_ofs);
954 }
955 return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
956 port_nop);
957 } else {
958 return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
959 }
960
961 }
962
963 static int
964 dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
965 struct netdev *netdev,
966 odp_port_t *port_nop)
967 OVS_REQ_WRLOCK(dpif->upcall_lock)
968 {
969 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
970 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
971 const char *name;
972 int error;
973
974 error = dpif_netlink_rtnl_port_create(netdev);
975 if (error) {
976 if (error != EOPNOTSUPP) {
977 VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
978 netdev_get_name(netdev), ovs_strerror(error));
979 }
980 return error;
981 }
982
983 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
984 error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
985 port_nop);
986 if (error) {
987 dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
988 }
989 return error;
990 }
991
992 static int
993 dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
994 odp_port_t *port_nop)
995 {
996 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
997 int error = EOPNOTSUPP;
998
999 fat_rwlock_wrlock(&dpif->upcall_lock);
1000 if (!ovs_tunnels_out_of_tree) {
1001 error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
1002 }
1003 if (error) {
1004 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
1005 }
1006 fat_rwlock_unlock(&dpif->upcall_lock);
1007
1008 return error;
1009 }
1010
1011 static int
1012 dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
1013 OVS_REQ_WRLOCK(dpif->upcall_lock)
1014 {
1015 struct dpif_netlink_vport vport;
1016 struct dpif_port dpif_port;
1017 int error;
1018
1019 error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
1020 if (error) {
1021 return error;
1022 }
1023
1024 dpif_netlink_vport_init(&vport);
1025 vport.cmd = OVS_VPORT_CMD_DEL;
1026 vport.dp_ifindex = dpif->dp_ifindex;
1027 vport.port_no = port_no;
1028 #ifdef _WIN32
1029 if (!strcmp(dpif_port.type, "internal")) {
1030 if (!delete_wmi_port(dpif_port.name)) {
1031 VLOG_ERR("Could not delete wmi port with name: %s",
1032 dpif_port.name);
1033 };
1034 }
1035 #endif
1036 error = dpif_netlink_vport_transact(&vport, NULL, NULL);
1037
1038 vport_del_channels(dpif, port_no);
1039
1040 if (!error && !ovs_tunnels_out_of_tree) {
1041 error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
1042 if (error == EOPNOTSUPP) {
1043 error = 0;
1044 }
1045 }
1046
1047 dpif_port_destroy(&dpif_port);
1048
1049 return error;
1050 }
1051
1052 static int
1053 dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
1054 {
1055 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1056 int error;
1057
1058 fat_rwlock_wrlock(&dpif->upcall_lock);
1059 error = dpif_netlink_port_del__(dpif, port_no);
1060 fat_rwlock_unlock(&dpif->upcall_lock);
1061
1062 return error;
1063 }
1064
1065 static int
1066 dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
1067 const char *port_name, struct dpif_port *dpif_port)
1068 {
1069 struct dpif_netlink_vport request;
1070 struct dpif_netlink_vport reply;
1071 struct ofpbuf *buf;
1072 int error;
1073
1074 dpif_netlink_vport_init(&request);
1075 request.cmd = OVS_VPORT_CMD_GET;
1076 request.dp_ifindex = dpif->dp_ifindex;
1077 request.port_no = port_no;
1078 request.name = port_name;
1079
1080 error = dpif_netlink_vport_transact(&request, &reply, &buf);
1081 if (!error) {
1082 if (reply.dp_ifindex != request.dp_ifindex) {
1083 /* A query by name reported that 'port_name' is in some datapath
1084 * other than 'dpif', but the caller wants to know about 'dpif'. */
1085 error = ENODEV;
1086 } else if (dpif_port) {
1087 dpif_port->name = xstrdup(reply.name);
1088 dpif_port->type = xstrdup(get_vport_type(&reply));
1089 dpif_port->port_no = reply.port_no;
1090 }
1091 ofpbuf_delete(buf);
1092 }
1093 return error;
1094 }
1095
1096 static int
1097 dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1098 struct dpif_port *dpif_port)
1099 {
1100 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1101
1102 return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
1103 }
1104
1105 static int
1106 dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
1107 struct dpif_port *dpif_port)
1108 {
1109 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1110
1111 return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
1112 }
1113
1114 static uint32_t
1115 dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
1116 odp_port_t port_no, uint32_t hash)
1117 OVS_REQ_RDLOCK(dpif->upcall_lock)
1118 {
1119 uint32_t port_idx = odp_to_u32(port_no);
1120 uint32_t pid = 0;
1121
1122 if (dpif->handlers && dpif->uc_array_size > 0) {
1123 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1124 * channel, since it is not heavily loaded. */
1125 uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1126 struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers];
1127
1128 /* Needs to check in case the socket pointer is changed in between
1129 * the holding of upcall_lock. A known case happens when the main
1130 * thread deletes the vport while the handler thread is handling
1131 * the upcall from that port. */
1132 if (h->channels[idx].sock) {
1133 pid = nl_sock_pid(h->channels[idx].sock);
1134 }
1135 }
1136
1137 return pid;
1138 }
1139
1140 static uint32_t
1141 dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
1142 uint32_t hash)
1143 {
1144 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1145 uint32_t ret;
1146
1147 fat_rwlock_rdlock(&dpif->upcall_lock);
1148 ret = dpif_netlink_port_get_pid__(dpif, port_no, hash);
1149 fat_rwlock_unlock(&dpif->upcall_lock);
1150
1151 return ret;
1152 }
1153
1154 static int
1155 dpif_netlink_flow_flush(struct dpif *dpif_)
1156 {
1157 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1158 struct dpif_netlink_flow flow;
1159
1160 dpif_netlink_flow_init(&flow);
1161 flow.cmd = OVS_FLOW_CMD_DEL;
1162 flow.dp_ifindex = dpif->dp_ifindex;
1163
1164 if (netdev_is_flow_api_enabled()) {
1165 netdev_ports_flow_flush(dpif_->dpif_class);
1166 }
1167
1168 return dpif_netlink_flow_transact(&flow, NULL, NULL);
1169 }
1170
1171 struct dpif_netlink_port_state {
1172 struct nl_dump dump;
1173 struct ofpbuf buf;
1174 };
1175
1176 static void
1177 dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1178 struct nl_dump *dump)
1179 {
1180 struct dpif_netlink_vport request;
1181 struct ofpbuf *buf;
1182
1183 dpif_netlink_vport_init(&request);
1184 request.cmd = OVS_VPORT_CMD_GET;
1185 request.dp_ifindex = dpif->dp_ifindex;
1186
1187 buf = ofpbuf_new(1024);
1188 dpif_netlink_vport_to_ofpbuf(&request, buf);
1189 nl_dump_start(dump, NETLINK_GENERIC, buf);
1190 ofpbuf_delete(buf);
1191 }
1192
1193 static int
1194 dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
1195 {
1196 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1197 struct dpif_netlink_port_state *state;
1198
1199 *statep = state = xmalloc(sizeof *state);
1200 dpif_netlink_port_dump_start__(dpif, &state->dump);
1201
1202 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1203 return 0;
1204 }
1205
1206 static int
1207 dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1208 struct nl_dump *dump,
1209 struct dpif_netlink_vport *vport,
1210 struct ofpbuf *buffer)
1211 {
1212 struct ofpbuf buf;
1213 int error;
1214
1215 if (!nl_dump_next(dump, &buf, buffer)) {
1216 return EOF;
1217 }
1218
1219 error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
1220 if (error) {
1221 VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1222 dpif_name(&dpif->dpif), ovs_strerror(error));
1223 }
1224 return error;
1225 }
1226
1227 static int
1228 dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1229 struct dpif_port *dpif_port)
1230 {
1231 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1232 struct dpif_netlink_port_state *state = state_;
1233 struct dpif_netlink_vport vport;
1234 int error;
1235
1236 error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1237 &state->buf);
1238 if (error) {
1239 return error;
1240 }
1241 dpif_port->name = CONST_CAST(char *, vport.name);
1242 dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
1243 dpif_port->port_no = vport.port_no;
1244 return 0;
1245 }
1246
1247 static int
1248 dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
1249 {
1250 struct dpif_netlink_port_state *state = state_;
1251 int error = nl_dump_done(&state->dump);
1252
1253 ofpbuf_uninit(&state->buf);
1254 free(state);
1255 return error;
1256 }
1257
1258 static int
1259 dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
1260 {
1261 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1262
1263 /* Lazily create the Netlink socket to listen for notifications. */
1264 if (!dpif->port_notifier) {
1265 struct nl_sock *sock;
1266 int error;
1267
1268 error = nl_sock_create(NETLINK_GENERIC, &sock);
1269 if (error) {
1270 return error;
1271 }
1272
1273 error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1274 if (error) {
1275 nl_sock_destroy(sock);
1276 return error;
1277 }
1278 dpif->port_notifier = sock;
1279
1280 /* We have no idea of the current state so report that everything
1281 * changed. */
1282 return ENOBUFS;
1283 }
1284
1285 for (;;) {
1286 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1287 uint64_t buf_stub[4096 / 8];
1288 struct ofpbuf buf;
1289 int error;
1290
1291 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
1292 error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
1293 if (!error) {
1294 struct dpif_netlink_vport vport;
1295
1296 error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
1297 if (!error) {
1298 if (vport.dp_ifindex == dpif->dp_ifindex
1299 && (vport.cmd == OVS_VPORT_CMD_NEW
1300 || vport.cmd == OVS_VPORT_CMD_DEL
1301 || vport.cmd == OVS_VPORT_CMD_SET)) {
1302 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1303 dpif->dpif.full_name, vport.name, vport.cmd);
1304 if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
1305 dpif->refresh_channels = true;
1306 }
1307 *devnamep = xstrdup(vport.name);
1308 ofpbuf_uninit(&buf);
1309 return 0;
1310 }
1311 }
1312 } else if (error != EAGAIN) {
1313 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1314 ovs_strerror(error));
1315 nl_sock_drain(dpif->port_notifier);
1316 error = ENOBUFS;
1317 }
1318
1319 ofpbuf_uninit(&buf);
1320 if (error) {
1321 return error;
1322 }
1323 }
1324 }
1325
1326 static void
1327 dpif_netlink_port_poll_wait(const struct dpif *dpif_)
1328 {
1329 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1330
1331 if (dpif->port_notifier) {
1332 nl_sock_wait(dpif->port_notifier, POLLIN);
1333 } else {
1334 poll_immediate_wake();
1335 }
1336 }
1337
1338 static void
1339 dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1340 const ovs_u128 *ufid, bool terse)
1341 {
1342 if (ufid) {
1343 request->ufid = *ufid;
1344 request->ufid_present = true;
1345 } else {
1346 request->ufid_present = false;
1347 }
1348 request->ufid_terse = terse;
1349 }
1350
1351 static void
1352 dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1353 const struct nlattr *key, size_t key_len,
1354 const ovs_u128 *ufid, bool terse,
1355 struct dpif_netlink_flow *request)
1356 {
1357 dpif_netlink_flow_init(request);
1358 request->cmd = OVS_FLOW_CMD_GET;
1359 request->dp_ifindex = dpif->dp_ifindex;
1360 request->key = key;
1361 request->key_len = key_len;
1362 dpif_netlink_flow_init_ufid(request, ufid, terse);
1363 }
1364
1365 static void
1366 dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1367 const struct dpif_flow_get *get,
1368 struct dpif_netlink_flow *request)
1369 {
1370 dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1371 false, request);
1372 }
1373
1374 static int
1375 dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1376 const struct nlattr *key, size_t key_len,
1377 const ovs_u128 *ufid, bool terse,
1378 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1379 {
1380 struct dpif_netlink_flow request;
1381
1382 dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
1383 return dpif_netlink_flow_transact(&request, reply, bufp);
1384 }
1385
1386 static int
1387 dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1388 const struct dpif_netlink_flow *flow,
1389 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1390 {
1391 return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1392 flow->ufid_present ? &flow->ufid : NULL,
1393 false, reply, bufp);
1394 }
1395
1396 static void
1397 dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1398 const struct dpif_flow_put *put,
1399 struct dpif_netlink_flow *request)
1400 {
1401 static const struct nlattr dummy_action;
1402
1403 dpif_netlink_flow_init(request);
1404 request->cmd = (put->flags & DPIF_FP_CREATE
1405 ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1406 request->dp_ifindex = dpif->dp_ifindex;
1407 request->key = put->key;
1408 request->key_len = put->key_len;
1409 request->mask = put->mask;
1410 request->mask_len = put->mask_len;
1411 dpif_netlink_flow_init_ufid(request, put->ufid, false);
1412
1413 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1414 request->actions = (put->actions
1415 ? put->actions
1416 : CONST_CAST(struct nlattr *, &dummy_action));
1417 request->actions_len = put->actions_len;
1418 if (put->flags & DPIF_FP_ZERO_STATS) {
1419 request->clear = true;
1420 }
1421 if (put->flags & DPIF_FP_PROBE) {
1422 request->probe = true;
1423 }
1424 request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
1425 }
1426
1427 static void
1428 dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1429 const struct nlattr *key, size_t key_len,
1430 const ovs_u128 *ufid, bool terse,
1431 struct dpif_netlink_flow *request)
1432 {
1433 dpif_netlink_flow_init(request);
1434 request->cmd = OVS_FLOW_CMD_DEL;
1435 request->dp_ifindex = dpif->dp_ifindex;
1436 request->key = key;
1437 request->key_len = key_len;
1438 dpif_netlink_flow_init_ufid(request, ufid, terse);
1439 }
1440
1441 static void
1442 dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1443 const struct dpif_flow_del *del,
1444 struct dpif_netlink_flow *request)
1445 {
1446 dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1447 del->ufid, del->terse, request);
1448 }
1449
1450 enum {
1451 DUMP_OVS_FLOWS_BIT = 0,
1452 DUMP_OFFLOADED_FLOWS_BIT = 1,
1453 };
1454
1455 enum {
1456 DUMP_OVS_FLOWS = (1 << DUMP_OVS_FLOWS_BIT),
1457 DUMP_OFFLOADED_FLOWS = (1 << DUMP_OFFLOADED_FLOWS_BIT),
1458 };
1459
1460 struct dpif_netlink_flow_dump {
1461 struct dpif_flow_dump up;
1462 struct nl_dump nl_dump;
1463 atomic_int status;
1464 struct netdev_flow_dump **netdev_dumps;
1465 int netdev_dumps_num; /* Number of netdev_flow_dumps */
1466 struct ovs_mutex netdev_lock; /* Guards the following. */
1467 int netdev_current_dump OVS_GUARDED; /* Shared current dump */
1468 int type; /* Type of dump */
1469 };
1470
1471 static struct dpif_netlink_flow_dump *
1472 dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
1473 {
1474 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
1475 }
1476
1477 static void
1478 start_netdev_dump(const struct dpif *dpif_,
1479 struct dpif_netlink_flow_dump *dump)
1480 {
1481 ovs_mutex_init(&dump->netdev_lock);
1482
1483 if (!(dump->type & DUMP_OFFLOADED_FLOWS)) {
1484 dump->netdev_dumps_num = 0;
1485 dump->netdev_dumps = NULL;
1486 return;
1487 }
1488
1489 ovs_mutex_lock(&dump->netdev_lock);
1490 dump->netdev_current_dump = 0;
1491 dump->netdev_dumps
1492 = netdev_ports_flow_dump_create(dpif_->dpif_class,
1493 &dump->netdev_dumps_num);
1494 ovs_mutex_unlock(&dump->netdev_lock);
1495 }
1496
1497 static int
1498 dpif_netlink_get_dump_type(char *str) {
1499 int type = 0;
1500
1501 if (!str || !strcmp(str, "ovs") || !strcmp(str, "dpctl")) {
1502 type |= DUMP_OVS_FLOWS;
1503 }
1504 if ((netdev_is_flow_api_enabled() && !str)
1505 || (str && (!strcmp(str, "offloaded") || !strcmp(str, "dpctl")))) {
1506 type |= DUMP_OFFLOADED_FLOWS;
1507 }
1508
1509 return type;
1510 }
1511
1512 static struct dpif_flow_dump *
1513 dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
1514 char *type)
1515 {
1516 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1517 struct dpif_netlink_flow_dump *dump;
1518 struct dpif_netlink_flow request;
1519 struct ofpbuf *buf;
1520
1521 dump = xmalloc(sizeof *dump);
1522 dpif_flow_dump_init(&dump->up, dpif_);
1523
1524 dump->type = dpif_netlink_get_dump_type(type);
1525
1526 if (dump->type & DUMP_OVS_FLOWS) {
1527 dpif_netlink_flow_init(&request);
1528 request.cmd = OVS_FLOW_CMD_GET;
1529 request.dp_ifindex = dpif->dp_ifindex;
1530 request.ufid_present = false;
1531 request.ufid_terse = terse;
1532
1533 buf = ofpbuf_new(1024);
1534 dpif_netlink_flow_to_ofpbuf(&request, buf);
1535 nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1536 ofpbuf_delete(buf);
1537 }
1538 atomic_init(&dump->status, 0);
1539 dump->up.terse = terse;
1540
1541 start_netdev_dump(dpif_, dump);
1542
1543 return &dump->up;
1544 }
1545
1546 static int
1547 dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
1548 {
1549 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1550 unsigned int nl_status = 0;
1551 int dump_status;
1552
1553 if (dump->type & DUMP_OVS_FLOWS) {
1554 nl_status = nl_dump_done(&dump->nl_dump);
1555 }
1556
1557 for (int i = 0; i < dump->netdev_dumps_num; i++) {
1558 int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
1559
1560 if (err != 0 && err != EOPNOTSUPP) {
1561 VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
1562 }
1563 }
1564
1565 free(dump->netdev_dumps);
1566 ovs_mutex_destroy(&dump->netdev_lock);
1567
1568 /* No other thread has access to 'dump' at this point. */
1569 atomic_read_relaxed(&dump->status, &dump_status);
1570 free(dump);
1571 return dump_status ? dump_status : nl_status;
1572 }
1573
1574 struct dpif_netlink_flow_dump_thread {
1575 struct dpif_flow_dump_thread up;
1576 struct dpif_netlink_flow_dump *dump;
1577 struct dpif_netlink_flow flow;
1578 struct dpif_flow_stats stats;
1579 struct ofpbuf nl_flows; /* Always used to store flows. */
1580 struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */
1581 int netdev_dump_idx; /* This thread current netdev dump index */
1582 bool netdev_done; /* If we are finished dumping netdevs */
1583
1584 /* (Key/Mask/Actions) Buffers for netdev dumping */
1585 struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
1586 struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
1587 struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
1588 };
1589
1590 static struct dpif_netlink_flow_dump_thread *
1591 dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
1592 {
1593 return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
1594 }
1595
1596 static struct dpif_flow_dump_thread *
1597 dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
1598 {
1599 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1600 struct dpif_netlink_flow_dump_thread *thread;
1601
1602 thread = xmalloc(sizeof *thread);
1603 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1604 thread->dump = dump;
1605 ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1606 thread->nl_actions = NULL;
1607 thread->netdev_dump_idx = 0;
1608 thread->netdev_done = !(thread->netdev_dump_idx < dump->netdev_dumps_num);
1609
1610 return &thread->up;
1611 }
1612
1613 static void
1614 dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
1615 {
1616 struct dpif_netlink_flow_dump_thread *thread
1617 = dpif_netlink_flow_dump_thread_cast(thread_);
1618
1619 ofpbuf_uninit(&thread->nl_flows);
1620 ofpbuf_delete(thread->nl_actions);
1621 free(thread);
1622 }
1623
1624 static void
1625 dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
1626 const struct dpif_netlink_flow *datapath_flow)
1627 {
1628 dpif_flow->key = datapath_flow->key;
1629 dpif_flow->key_len = datapath_flow->key_len;
1630 dpif_flow->mask = datapath_flow->mask;
1631 dpif_flow->mask_len = datapath_flow->mask_len;
1632 dpif_flow->actions = datapath_flow->actions;
1633 dpif_flow->actions_len = datapath_flow->actions_len;
1634 dpif_flow->ufid_present = datapath_flow->ufid_present;
1635 dpif_flow->pmd_id = PMD_ID_NULL;
1636 if (datapath_flow->ufid_present) {
1637 dpif_flow->ufid = datapath_flow->ufid;
1638 } else {
1639 ovs_assert(datapath_flow->key && datapath_flow->key_len);
1640 dpif_flow_hash(dpif, datapath_flow->key, datapath_flow->key_len,
1641 &dpif_flow->ufid);
1642 }
1643 dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
1644 dpif_flow->offloaded = false;
1645 }
1646
1647 /* The design is such that all threads are working together on the first dump
1648 * to the last, in order (at first they all on dump 0).
1649 * When the first thread finds that the given dump is finished,
1650 * they all move to the next. If two or more threads find the same dump
1651 * is finished at the same time, the first one will advance the shared
1652 * netdev_current_dump and the others will catch up. */
1653 static void
1654 dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
1655 {
1656 struct dpif_netlink_flow_dump *dump = thread->dump;
1657
1658 ovs_mutex_lock(&dump->netdev_lock);
1659 /* if we haven't finished (dumped everything) */
1660 if (dump->netdev_current_dump < dump->netdev_dumps_num) {
1661 /* if we are the first to find that current dump is finished
1662 * advance it. */
1663 if (thread->netdev_dump_idx == dump->netdev_current_dump) {
1664 thread->netdev_dump_idx = ++dump->netdev_current_dump;
1665 /* did we just finish the last dump? done. */
1666 if (dump->netdev_current_dump == dump->netdev_dumps_num) {
1667 thread->netdev_done = true;
1668 }
1669 } else {
1670 /* otherwise, we are behind, catch up */
1671 thread->netdev_dump_idx = dump->netdev_current_dump;
1672 }
1673 } else {
1674 /* some other thread finished */
1675 thread->netdev_done = true;
1676 }
1677 ovs_mutex_unlock(&dump->netdev_lock);
1678 }
1679
1680 static int
1681 dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
1682 struct ofpbuf *key_buf,
1683 struct ofpbuf *mask_buf,
1684 struct nlattr *actions,
1685 struct dpif_flow_stats *stats,
1686 ovs_u128 *ufid,
1687 struct dpif_flow *flow,
1688 bool terse OVS_UNUSED)
1689 {
1690
1691 struct odp_flow_key_parms odp_parms = {
1692 .flow = &match->flow,
1693 .mask = &match->wc.masks,
1694 .support = {
1695 .max_vlan_headers = 1,
1696 },
1697 };
1698 size_t offset;
1699
1700 memset(flow, 0, sizeof *flow);
1701
1702 /* Key */
1703 offset = key_buf->size;
1704 flow->key = ofpbuf_tail(key_buf);
1705 odp_flow_key_from_flow(&odp_parms, key_buf);
1706 flow->key_len = key_buf->size - offset;
1707
1708 /* Mask */
1709 offset = mask_buf->size;
1710 flow->mask = ofpbuf_tail(mask_buf);
1711 odp_parms.key_buf = key_buf;
1712 odp_flow_key_from_mask(&odp_parms, mask_buf);
1713 flow->mask_len = mask_buf->size - offset;
1714
1715 /* Actions */
1716 flow->actions = nl_attr_get(actions);
1717 flow->actions_len = nl_attr_get_size(actions);
1718
1719 /* Stats */
1720 memcpy(&flow->stats, stats, sizeof *stats);
1721
1722 /* UFID */
1723 flow->ufid_present = true;
1724 flow->ufid = *ufid;
1725
1726 flow->pmd_id = PMD_ID_NULL;
1727
1728 flow->offloaded = true;
1729
1730 return 0;
1731 }
1732
1733 static int
1734 dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1735 struct dpif_flow *flows, int max_flows)
1736 {
1737 struct dpif_netlink_flow_dump_thread *thread
1738 = dpif_netlink_flow_dump_thread_cast(thread_);
1739 struct dpif_netlink_flow_dump *dump = thread->dump;
1740 struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
1741 int n_flows;
1742
1743 ofpbuf_delete(thread->nl_actions);
1744 thread->nl_actions = NULL;
1745
1746 n_flows = 0;
1747 max_flows = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
1748
1749 while (!thread->netdev_done && n_flows < max_flows) {
1750 struct odputil_keybuf *maskbuf = &thread->maskbuf[n_flows];
1751 struct odputil_keybuf *keybuf = &thread->keybuf[n_flows];
1752 struct odputil_keybuf *actbuf = &thread->actbuf[n_flows];
1753 struct ofpbuf key, mask, act;
1754 struct dpif_flow *f = &flows[n_flows];
1755 int cur = thread->netdev_dump_idx;
1756 struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
1757 struct match match;
1758 struct nlattr *actions;
1759 struct dpif_flow_stats stats;
1760 ovs_u128 ufid;
1761 bool has_next;
1762
1763 ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
1764 ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
1765 ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
1766 has_next = netdev_flow_dump_next(netdev_dump, &match,
1767 &actions, &stats,
1768 &ufid,
1769 &thread->nl_flows,
1770 &act);
1771 if (has_next) {
1772 dpif_netlink_netdev_match_to_dpif_flow(&match,
1773 &key, &mask,
1774 actions,
1775 &stats,
1776 &ufid,
1777 f,
1778 dump->up.terse);
1779 n_flows++;
1780 } else {
1781 dpif_netlink_advance_netdev_dump(thread);
1782 }
1783 }
1784
1785 if (!(dump->type & DUMP_OVS_FLOWS)) {
1786 return n_flows;
1787 }
1788
1789 while (!n_flows
1790 || (n_flows < max_flows && thread->nl_flows.size)) {
1791 struct dpif_netlink_flow datapath_flow;
1792 struct ofpbuf nl_flow;
1793 int error;
1794
1795 /* Try to grab another flow. */
1796 if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1797 break;
1798 }
1799
1800 /* Convert the flow to our output format. */
1801 error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
1802 if (error) {
1803 atomic_store_relaxed(&dump->status, error);
1804 break;
1805 }
1806
1807 if (dump->up.terse || datapath_flow.actions) {
1808 /* Common case: we don't want actions, or the flow includes
1809 * actions. */
1810 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1811 &datapath_flow);
1812 } else {
1813 /* Rare case: the flow does not include actions. Retrieve this
1814 * individual flow again to get the actions. */
1815 error = dpif_netlink_flow_get(dpif, &datapath_flow,
1816 &datapath_flow, &thread->nl_actions);
1817 if (error == ENOENT) {
1818 VLOG_DBG("dumped flow disappeared on get");
1819 continue;
1820 } else if (error) {
1821 VLOG_WARN("error fetching dumped flow: %s",
1822 ovs_strerror(error));
1823 atomic_store_relaxed(&dump->status, error);
1824 break;
1825 }
1826
1827 /* Save this flow. Then exit, because we only have one buffer to
1828 * handle this case. */
1829 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1830 &datapath_flow);
1831 break;
1832 }
1833 }
1834 return n_flows;
1835 }
1836
1837 static void
1838 dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1839 struct ofpbuf *buf)
1840 {
1841 struct ovs_header *k_exec;
1842 size_t key_ofs;
1843
1844 ofpbuf_prealloc_tailroom(buf, (64
1845 + dp_packet_size(d_exec->packet)
1846 + ODP_KEY_METADATA_SIZE
1847 + d_exec->actions_len));
1848
1849 nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
1850 OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
1851
1852 k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1853 k_exec->dp_ifindex = dp_ifindex;
1854
1855 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
1856 dp_packet_data(d_exec->packet),
1857 dp_packet_size(d_exec->packet));
1858
1859 key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
1860 odp_key_from_dp_packet(buf, d_exec->packet);
1861 nl_msg_end_nested(buf, key_ofs);
1862
1863 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1864 d_exec->actions, d_exec->actions_len);
1865 if (d_exec->probe) {
1866 nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
1867 }
1868 if (d_exec->mtu) {
1869 nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1870 }
1871 }
1872
1873 /* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1874 * Returns the number actually executed (at least 1, if 'n_ops' is
1875 * positive). */
1876 static size_t
1877 dpif_netlink_operate__(struct dpif_netlink *dpif,
1878 struct dpif_op **ops, size_t n_ops)
1879 {
1880 struct op_auxdata {
1881 struct nl_transaction txn;
1882
1883 struct ofpbuf request;
1884 uint64_t request_stub[1024 / 8];
1885
1886 struct ofpbuf reply;
1887 uint64_t reply_stub[1024 / 8];
1888 } auxes[OPERATE_MAX_OPS];
1889
1890 struct nl_transaction *txnsp[OPERATE_MAX_OPS];
1891 size_t i;
1892
1893 n_ops = MIN(n_ops, OPERATE_MAX_OPS);
1894 for (i = 0; i < n_ops; i++) {
1895 struct op_auxdata *aux = &auxes[i];
1896 struct dpif_op *op = ops[i];
1897 struct dpif_flow_put *put;
1898 struct dpif_flow_del *del;
1899 struct dpif_flow_get *get;
1900 struct dpif_netlink_flow flow;
1901
1902 ofpbuf_use_stub(&aux->request,
1903 aux->request_stub, sizeof aux->request_stub);
1904 aux->txn.request = &aux->request;
1905
1906 ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1907 aux->txn.reply = NULL;
1908
1909 switch (op->type) {
1910 case DPIF_OP_FLOW_PUT:
1911 put = &op->u.flow_put;
1912 dpif_netlink_init_flow_put(dpif, put, &flow);
1913 if (put->stats) {
1914 flow.nlmsg_flags |= NLM_F_ECHO;
1915 aux->txn.reply = &aux->reply;
1916 }
1917 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1918 break;
1919
1920 case DPIF_OP_FLOW_DEL:
1921 del = &op->u.flow_del;
1922 dpif_netlink_init_flow_del(dpif, del, &flow);
1923 if (del->stats) {
1924 flow.nlmsg_flags |= NLM_F_ECHO;
1925 aux->txn.reply = &aux->reply;
1926 }
1927 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1928 break;
1929
1930 case DPIF_OP_EXECUTE:
1931 /* Can't execute a packet that won't fit in a Netlink attribute. */
1932 if (OVS_UNLIKELY(nl_attr_oversized(
1933 dp_packet_size(op->u.execute.packet)))) {
1934 /* Report an error immediately if this is the first operation.
1935 * Otherwise the easiest thing to do is to postpone to the next
1936 * call (when this will be the first operation). */
1937 if (i == 0) {
1938 VLOG_ERR_RL(&error_rl,
1939 "dropping oversized %"PRIu32"-byte packet",
1940 dp_packet_size(op->u.execute.packet));
1941 op->error = ENOBUFS;
1942 return 1;
1943 }
1944 n_ops = i;
1945 } else {
1946 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->u.execute,
1947 &aux->request);
1948 }
1949 break;
1950
1951 case DPIF_OP_FLOW_GET:
1952 get = &op->u.flow_get;
1953 dpif_netlink_init_flow_get(dpif, get, &flow);
1954 aux->txn.reply = get->buffer;
1955 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1956 break;
1957
1958 default:
1959 OVS_NOT_REACHED();
1960 }
1961 }
1962
1963 for (i = 0; i < n_ops; i++) {
1964 txnsp[i] = &auxes[i].txn;
1965 }
1966 nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
1967
1968 for (i = 0; i < n_ops; i++) {
1969 struct op_auxdata *aux = &auxes[i];
1970 struct nl_transaction *txn = &auxes[i].txn;
1971 struct dpif_op *op = ops[i];
1972 struct dpif_flow_put *put;
1973 struct dpif_flow_del *del;
1974 struct dpif_flow_get *get;
1975
1976 op->error = txn->error;
1977
1978 switch (op->type) {
1979 case DPIF_OP_FLOW_PUT:
1980 put = &op->u.flow_put;
1981 if (put->stats) {
1982 if (!op->error) {
1983 struct dpif_netlink_flow reply;
1984
1985 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1986 txn->reply);
1987 if (!op->error) {
1988 dpif_netlink_flow_get_stats(&reply, put->stats);
1989 }
1990 }
1991 }
1992 break;
1993
1994 case DPIF_OP_FLOW_DEL:
1995 del = &op->u.flow_del;
1996 if (del->stats) {
1997 if (!op->error) {
1998 struct dpif_netlink_flow reply;
1999
2000 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
2001 txn->reply);
2002 if (!op->error) {
2003 dpif_netlink_flow_get_stats(&reply, del->stats);
2004 }
2005 }
2006 }
2007 break;
2008
2009 case DPIF_OP_EXECUTE:
2010 break;
2011
2012 case DPIF_OP_FLOW_GET:
2013 get = &op->u.flow_get;
2014 if (!op->error) {
2015 struct dpif_netlink_flow reply;
2016
2017 op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
2018 if (!op->error) {
2019 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, get->flow,
2020 &reply);
2021 }
2022 }
2023 break;
2024
2025 default:
2026 OVS_NOT_REACHED();
2027 }
2028
2029 ofpbuf_uninit(&aux->request);
2030 ofpbuf_uninit(&aux->reply);
2031 }
2032
2033 return n_ops;
2034 }
2035
2036 static int
2037 parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
2038 {
2039 struct dpif_flow *dpif_flow = get->flow;
2040 struct match match;
2041 struct nlattr *actions;
2042 struct dpif_flow_stats stats;
2043 struct ofpbuf buf;
2044 uint64_t act_buf[1024 / 8];
2045 struct odputil_keybuf maskbuf;
2046 struct odputil_keybuf keybuf;
2047 struct odputil_keybuf actbuf;
2048 struct ofpbuf key, mask, act;
2049 int err;
2050
2051 ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
2052 err = netdev_ports_flow_get(dpif->dpif.dpif_class, &match,
2053 &actions, get->ufid, &stats, &buf);
2054 if (err) {
2055 return err;
2056 }
2057
2058 VLOG_DBG("found flow from netdev, translating to dpif flow");
2059
2060 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
2061 ofpbuf_use_stack(&act, &actbuf, sizeof actbuf);
2062 ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
2063 dpif_netlink_netdev_match_to_dpif_flow(&match, &key, &mask, actions,
2064 &stats,
2065 (ovs_u128 *) get->ufid,
2066 dpif_flow,
2067 false);
2068 ofpbuf_put(get->buffer, nl_attr_get(actions), nl_attr_get_size(actions));
2069 dpif_flow->actions = ofpbuf_at(get->buffer, 0, 0);
2070 dpif_flow->actions_len = nl_attr_get_size(actions);
2071
2072 return 0;
2073 }
2074
2075 static int
2076 parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
2077 {
2078 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
2079 const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
2080 struct match match;
2081 odp_port_t in_port;
2082 const struct nlattr *nla;
2083 size_t left;
2084 struct netdev *dev;
2085 struct offload_info info;
2086 ovs_be16 dst_port = 0;
2087 int err;
2088
2089 if (put->flags & DPIF_FP_PROBE) {
2090 return EOPNOTSUPP;
2091 }
2092
2093 err = parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
2094 put->mask_len, &match);
2095 if (err) {
2096 return err;
2097 }
2098
2099 /* When we try to install a dummy flow from a probed feature. */
2100 if (match.flow.dl_type == htons(0x1234)) {
2101 return EOPNOTSUPP;
2102 }
2103
2104 in_port = match.flow.in_port.odp_port;
2105 dev = netdev_ports_get(in_port, dpif_class);
2106 if (!dev) {
2107 return EOPNOTSUPP;
2108 }
2109
2110 /* Get tunnel dst port */
2111 NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
2112 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
2113 const struct netdev_tunnel_config *tnl_cfg;
2114 struct netdev *outdev;
2115 odp_port_t out_port;
2116
2117 out_port = nl_attr_get_odp_port(nla);
2118 outdev = netdev_ports_get(out_port, dpif_class);
2119 if (!outdev) {
2120 err = EOPNOTSUPP;
2121 goto out;
2122 }
2123 tnl_cfg = netdev_get_tunnel_config(outdev);
2124 if (tnl_cfg && tnl_cfg->dst_port != 0) {
2125 dst_port = tnl_cfg->dst_port;
2126 }
2127 netdev_close(outdev);
2128 }
2129 }
2130
2131 info.dpif_class = dpif_class;
2132 info.tp_dst_port = dst_port;
2133 err = netdev_flow_put(dev, &match,
2134 CONST_CAST(struct nlattr *, put->actions),
2135 put->actions_len,
2136 CONST_CAST(ovs_u128 *, put->ufid),
2137 &info, put->stats);
2138
2139 if (!err) {
2140 if (put->flags & DPIF_FP_MODIFY) {
2141 struct dpif_op *opp;
2142 struct dpif_op op;
2143
2144 op.type = DPIF_OP_FLOW_DEL;
2145 op.u.flow_del.key = put->key;
2146 op.u.flow_del.key_len = put->key_len;
2147 op.u.flow_del.ufid = put->ufid;
2148 op.u.flow_del.pmd_id = put->pmd_id;
2149 op.u.flow_del.stats = NULL;
2150 op.u.flow_del.terse = false;
2151
2152 opp = &op;
2153 dpif_netlink_operate__(dpif, &opp, 1);
2154 }
2155
2156 VLOG_DBG("added flow");
2157 } else if (err != EEXIST) {
2158 VLOG_ERR_RL(&rl, "failed to offload flow: %s", ovs_strerror(err));
2159 }
2160
2161 out:
2162 if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
2163 /* Modified rule can't be offloaded, try and delete from HW */
2164 int del_err = netdev_flow_del(dev, put->ufid, put->stats);
2165
2166 if (!del_err) {
2167 /* Delete from hw success, so old flow was offloaded.
2168 * Change flags to create the flow in kernel */
2169 put->flags &= ~DPIF_FP_MODIFY;
2170 put->flags |= DPIF_FP_CREATE;
2171 } else if (del_err != ENOENT) {
2172 VLOG_ERR_RL(&rl, "failed to delete offloaded flow: %s",
2173 ovs_strerror(del_err));
2174 /* stop proccesing the flow in kernel */
2175 err = 0;
2176 }
2177 }
2178
2179 netdev_close(dev);
2180
2181 return err;
2182 }
2183
2184 static int
2185 try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
2186 {
2187 int err = EOPNOTSUPP;
2188
2189 switch (op->type) {
2190 case DPIF_OP_FLOW_PUT: {
2191 struct dpif_flow_put *put = &op->u.flow_put;
2192
2193 if (!put->ufid) {
2194 break;
2195 }
2196
2197 log_flow_put_message(&dpif->dpif, &this_module, put, 0);
2198 err = parse_flow_put(dpif, put);
2199 break;
2200 }
2201 case DPIF_OP_FLOW_DEL: {
2202 struct dpif_flow_del *del = &op->u.flow_del;
2203
2204 if (!del->ufid) {
2205 break;
2206 }
2207
2208 log_flow_del_message(&dpif->dpif, &this_module, del, 0);
2209 err = netdev_ports_flow_del(dpif->dpif.dpif_class, del->ufid,
2210 del->stats);
2211 break;
2212 }
2213 case DPIF_OP_FLOW_GET: {
2214 struct dpif_flow_get *get = &op->u.flow_get;
2215
2216 if (!op->u.flow_get.ufid) {
2217 break;
2218 }
2219
2220 log_flow_get_message(&dpif->dpif, &this_module, get, 0);
2221 err = parse_flow_get(dpif, get);
2222 break;
2223 }
2224 case DPIF_OP_EXECUTE:
2225 default:
2226 break;
2227 }
2228
2229 return err;
2230 }
2231
2232 static void
2233 dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
2234 size_t n_ops)
2235 {
2236 while (n_ops > 0) {
2237 size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
2238
2239 ops += chunk;
2240 n_ops -= chunk;
2241 }
2242 }
2243
2244 static void
2245 dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
2246 {
2247 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2248 struct dpif_op *new_ops[OPERATE_MAX_OPS];
2249 int count = 0;
2250 int i = 0;
2251 int err = 0;
2252
2253 if (netdev_is_flow_api_enabled()) {
2254 while (n_ops > 0) {
2255 count = 0;
2256
2257 while (n_ops > 0 && count < OPERATE_MAX_OPS) {
2258 struct dpif_op *op = ops[i++];
2259
2260 err = try_send_to_netdev(dpif, op);
2261 if (err && err != EEXIST) {
2262 new_ops[count++] = op;
2263 } else {
2264 op->error = err;
2265 }
2266
2267 n_ops--;
2268 }
2269
2270 dpif_netlink_operate_chunks(dpif, new_ops, count);
2271 }
2272 } else {
2273 dpif_netlink_operate_chunks(dpif, ops, n_ops);
2274 }
2275 }
2276
2277 #if _WIN32
2278 static void
2279 dpif_netlink_handler_uninit(struct dpif_handler *handler)
2280 {
2281 vport_delete_sock_pool(handler);
2282 }
2283
2284 static int
2285 dpif_netlink_handler_init(struct dpif_handler *handler)
2286 {
2287 return vport_create_sock_pool(handler);
2288 }
2289 #else
2290
2291 static int
2292 dpif_netlink_handler_init(struct dpif_handler *handler)
2293 {
2294 handler->epoll_fd = epoll_create(10);
2295 return handler->epoll_fd < 0 ? errno : 0;
2296 }
2297
2298 static void
2299 dpif_netlink_handler_uninit(struct dpif_handler *handler)
2300 {
2301 close(handler->epoll_fd);
2302 }
2303 #endif
2304
2305 /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
2306 * currently in 'dpif' in the kernel, by adding a new set of channels for
2307 * any kernel vport that lacks one and deleting any channels that have no
2308 * backing kernel vports. */
2309 static int
2310 dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
2311 OVS_REQ_WRLOCK(dpif->upcall_lock)
2312 {
2313 unsigned long int *keep_channels;
2314 struct dpif_netlink_vport vport;
2315 size_t keep_channels_nbits;
2316 struct nl_dump dump;
2317 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2318 struct ofpbuf buf;
2319 int retval = 0;
2320 size_t i;
2321
2322 ovs_assert(!WINDOWS || n_handlers <= 1);
2323 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
2324
2325 if (dpif->n_handlers != n_handlers) {
2326 destroy_all_channels(dpif);
2327 dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2328 for (i = 0; i < n_handlers; i++) {
2329 int error;
2330 struct dpif_handler *handler = &dpif->handlers[i];
2331
2332 error = dpif_netlink_handler_init(handler);
2333 if (error) {
2334 size_t j;
2335
2336 for (j = 0; j < i; j++) {
2337 struct dpif_handler *tmp = &dpif->handlers[j];
2338 dpif_netlink_handler_uninit(tmp);
2339 }
2340 free(dpif->handlers);
2341 dpif->handlers = NULL;
2342
2343 return error;
2344 }
2345 }
2346 dpif->n_handlers = n_handlers;
2347 }
2348
2349 for (i = 0; i < n_handlers; i++) {
2350 struct dpif_handler *handler = &dpif->handlers[i];
2351
2352 handler->event_offset = handler->n_events = 0;
2353 }
2354
2355 keep_channels_nbits = dpif->uc_array_size;
2356 keep_channels = bitmap_allocate(keep_channels_nbits);
2357
2358 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
2359 dpif_netlink_port_dump_start__(dpif, &dump);
2360 while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
2361 uint32_t port_no = odp_to_u32(vport.port_no);
2362 uint32_t *upcall_pids = NULL;
2363 int error;
2364
2365 if (port_no >= dpif->uc_array_size
2366 || !vport_get_pids(dpif, port_no, &upcall_pids)) {
2367 struct nl_sock **socksp = vport_create_socksp(dpif, &error);
2368
2369 if (!socksp) {
2370 goto error;
2371 }
2372
2373 error = vport_add_channels(dpif, vport.port_no, socksp);
2374 if (error) {
2375 VLOG_INFO("%s: could not add channels for port %s",
2376 dpif_name(&dpif->dpif), vport.name);
2377 vport_del_socksp(dpif, socksp);
2378 retval = error;
2379 goto error;
2380 }
2381 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
2382 free(socksp);
2383 }
2384
2385 /* Configure the vport to deliver misses to 'sock'. */
2386 if (vport.upcall_pids[0] == 0
2387 || vport.n_upcall_pids != dpif->n_handlers
2388 || memcmp(upcall_pids, vport.upcall_pids, n_handlers * sizeof
2389 *upcall_pids)) {
2390 struct dpif_netlink_vport vport_request;
2391
2392 dpif_netlink_vport_init(&vport_request);
2393 vport_request.cmd = OVS_VPORT_CMD_SET;
2394 vport_request.dp_ifindex = dpif->dp_ifindex;
2395 vport_request.port_no = vport.port_no;
2396 vport_request.n_upcall_pids = dpif->n_handlers;
2397 vport_request.upcall_pids = upcall_pids;
2398 error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
2399 if (error) {
2400 VLOG_WARN_RL(&error_rl,
2401 "%s: failed to set upcall pid on port: %s",
2402 dpif_name(&dpif->dpif), ovs_strerror(error));
2403
2404 if (error != ENODEV && error != ENOENT) {
2405 retval = error;
2406 } else {
2407 /* The vport isn't really there, even though the dump says
2408 * it is. Probably we just hit a race after a port
2409 * disappeared. */
2410 }
2411 goto error;
2412 }
2413 }
2414
2415 if (port_no < keep_channels_nbits) {
2416 bitmap_set1(keep_channels, port_no);
2417 }
2418 free(upcall_pids);
2419 continue;
2420
2421 error:
2422 free(upcall_pids);
2423 vport_del_channels(dpif, vport.port_no);
2424 }
2425 nl_dump_done(&dump);
2426 ofpbuf_uninit(&buf);
2427
2428 /* Discard any saved channels that we didn't reuse. */
2429 for (i = 0; i < keep_channels_nbits; i++) {
2430 if (!bitmap_is_set(keep_channels, i)) {
2431 vport_del_channels(dpif, u32_to_odp(i));
2432 }
2433 }
2434 free(keep_channels);
2435
2436 return retval;
2437 }
2438
2439 static int
2440 dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable)
2441 OVS_REQ_WRLOCK(dpif->upcall_lock)
2442 {
2443 if ((dpif->handlers != NULL) == enable) {
2444 return 0;
2445 } else if (!enable) {
2446 destroy_all_channels(dpif);
2447 return 0;
2448 } else {
2449 return dpif_netlink_refresh_channels(dpif, 1);
2450 }
2451 }
2452
2453 static int
2454 dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
2455 {
2456 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2457 int error;
2458
2459 fat_rwlock_wrlock(&dpif->upcall_lock);
2460 error = dpif_netlink_recv_set__(dpif, enable);
2461 fat_rwlock_unlock(&dpif->upcall_lock);
2462
2463 return error;
2464 }
2465
2466 static int
2467 dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
2468 {
2469 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2470 int error = 0;
2471
2472 #ifdef _WIN32
2473 /* Multiple upcall handlers will be supported once kernel datapath supports
2474 * it. */
2475 if (n_handlers > 1) {
2476 return error;
2477 }
2478 #endif
2479
2480 fat_rwlock_wrlock(&dpif->upcall_lock);
2481 if (dpif->handlers) {
2482 error = dpif_netlink_refresh_channels(dpif, n_handlers);
2483 }
2484 fat_rwlock_unlock(&dpif->upcall_lock);
2485
2486 return error;
2487 }
2488
2489 static int
2490 dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
2491 uint32_t queue_id, uint32_t *priority)
2492 {
2493 if (queue_id < 0xf000) {
2494 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
2495 return 0;
2496 } else {
2497 return EINVAL;
2498 }
2499 }
2500
2501 static int
2502 parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
2503 struct dpif_upcall *upcall, int *dp_ifindex)
2504 {
2505 static const struct nl_policy ovs_packet_policy[] = {
2506 /* Always present. */
2507 [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
2508 .min_len = ETH_HEADER_LEN },
2509 [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
2510
2511 /* OVS_PACKET_CMD_ACTION only. */
2512 [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
2513 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
2514 [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
2515 [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
2516 };
2517
2518 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2519 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2520 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2521 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2522
2523 struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
2524 if (!nlmsg || !genl || !ovs_header
2525 || nlmsg->nlmsg_type != ovs_packet_family
2526 || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2527 ARRAY_SIZE(ovs_packet_policy))) {
2528 return EINVAL;
2529 }
2530
2531 int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2532 : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2533 : -1);
2534 if (type < 0) {
2535 return EINVAL;
2536 }
2537
2538 /* (Re)set ALL fields of '*upcall' on successful return. */
2539 upcall->type = type;
2540 upcall->key = CONST_CAST(struct nlattr *,
2541 nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
2542 upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
2543 dpif_flow_hash(&dpif->dpif, upcall->key, upcall->key_len, &upcall->ufid);
2544 upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
2545 upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
2546 upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
2547 upcall->mru = a[OVS_PACKET_ATTR_MRU];
2548
2549 /* Allow overwriting the netlink attribute header without reallocating. */
2550 dp_packet_use_stub(&upcall->packet,
2551 CONST_CAST(struct nlattr *,
2552 nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2553 nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2554 sizeof(struct nlattr));
2555 dp_packet_set_data(&upcall->packet,
2556 (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2557 dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
2558
2559 if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2560 /* Ethernet frame */
2561 upcall->packet.packet_type = htonl(PT_ETH);
2562 } else {
2563 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2564 ovs_be16 ethertype = 0;
2565 const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2566 upcall->key_len,
2567 OVS_KEY_ATTR_ETHERTYPE);
2568 if (et_nla) {
2569 ethertype = nl_attr_get_be16(et_nla);
2570 }
2571 upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2572 ntohs(ethertype));
2573 dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2574 }
2575
2576 *dp_ifindex = ovs_header->dp_ifindex;
2577
2578 return 0;
2579 }
2580
2581 #ifdef _WIN32
2582 #define PACKET_RECV_BATCH_SIZE 50
2583 static int
2584 dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2585 struct dpif_upcall *upcall, struct ofpbuf *buf)
2586 OVS_REQ_RDLOCK(dpif->upcall_lock)
2587 {
2588 struct dpif_handler *handler;
2589 int read_tries = 0;
2590 struct dpif_windows_vport_sock *sock_pool;
2591 uint32_t i;
2592
2593 if (!dpif->handlers) {
2594 return EAGAIN;
2595 }
2596
2597 /* Only one handler is supported currently. */
2598 if (handler_id >= 1) {
2599 return EAGAIN;
2600 }
2601
2602 if (handler_id >= dpif->n_handlers) {
2603 return EAGAIN;
2604 }
2605
2606 handler = &dpif->handlers[handler_id];
2607 sock_pool = handler->vport_sock_pool;
2608
2609 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2610 for (;;) {
2611 int dp_ifindex;
2612 int error;
2613
2614 if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2615 return EAGAIN;
2616 }
2617
2618 error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
2619 if (error == ENOBUFS) {
2620 /* ENOBUFS typically means that we've received so many
2621 * packets that the buffer overflowed. Try again
2622 * immediately because there's almost certainly a packet
2623 * waiting for us. */
2624 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2625 continue;
2626 }
2627
2628 /* XXX: ch->last_poll = time_msec(); */
2629 if (error) {
2630 if (error == EAGAIN) {
2631 break;
2632 }
2633 return error;
2634 }
2635
2636 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
2637 if (!error && dp_ifindex == dpif->dp_ifindex) {
2638 return 0;
2639 } else if (error) {
2640 return error;
2641 }
2642 }
2643 }
2644
2645 return EAGAIN;
2646 }
2647 #else
2648 static int
2649 dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
2650 struct dpif_upcall *upcall, struct ofpbuf *buf)
2651 OVS_REQ_RDLOCK(dpif->upcall_lock)
2652 {
2653 struct dpif_handler *handler;
2654 int read_tries = 0;
2655
2656 if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2657 return EAGAIN;
2658 }
2659
2660 handler = &dpif->handlers[handler_id];
2661 if (handler->event_offset >= handler->n_events) {
2662 int retval;
2663
2664 handler->event_offset = handler->n_events = 0;
2665
2666 do {
2667 retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
2668 dpif->uc_array_size, 0);
2669 } while (retval < 0 && errno == EINTR);
2670
2671 if (retval < 0) {
2672 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2673 VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
2674 } else if (retval > 0) {
2675 handler->n_events = retval;
2676 }
2677 }
2678
2679 while (handler->event_offset < handler->n_events) {
2680 int idx = handler->epoll_events[handler->event_offset].data.u32;
2681 struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx];
2682
2683 handler->event_offset++;
2684
2685 for (;;) {
2686 int dp_ifindex;
2687 int error;
2688
2689 if (++read_tries > 50) {
2690 return EAGAIN;
2691 }
2692
2693 error = nl_sock_recv(ch->sock, buf, NULL, false);
2694 if (error == ENOBUFS) {
2695 /* ENOBUFS typically means that we've received so many
2696 * packets that the buffer overflowed. Try again
2697 * immediately because there's almost certainly a packet
2698 * waiting for us. */
2699 report_loss(dpif, ch, idx, handler_id);
2700 continue;
2701 }
2702
2703 ch->last_poll = time_msec();
2704 if (error) {
2705 if (error == EAGAIN) {
2706 break;
2707 }
2708 return error;
2709 }
2710
2711 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
2712 if (!error && dp_ifindex == dpif->dp_ifindex) {
2713 return 0;
2714 } else if (error) {
2715 return error;
2716 }
2717 }
2718 }
2719
2720 return EAGAIN;
2721 }
2722 #endif
2723
2724 static int
2725 dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2726 struct dpif_upcall *upcall, struct ofpbuf *buf)
2727 {
2728 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2729 int error;
2730
2731 fat_rwlock_rdlock(&dpif->upcall_lock);
2732 #ifdef _WIN32
2733 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2734 #else
2735 error = dpif_netlink_recv__(dpif, handler_id, upcall, buf);
2736 #endif
2737 fat_rwlock_unlock(&dpif->upcall_lock);
2738
2739 return error;
2740 }
2741
2742 static void
2743 dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id)
2744 OVS_REQ_RDLOCK(dpif->upcall_lock)
2745 {
2746 #ifdef _WIN32
2747 uint32_t i;
2748 struct dpif_windows_vport_sock *sock_pool =
2749 dpif->handlers[handler_id].vport_sock_pool;
2750
2751 /* Only one handler is supported currently. */
2752 if (handler_id >= 1) {
2753 return;
2754 }
2755
2756 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2757 nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2758 }
2759 #else
2760 if (dpif->handlers && handler_id < dpif->n_handlers) {
2761 struct dpif_handler *handler = &dpif->handlers[handler_id];
2762
2763 poll_fd_wait(handler->epoll_fd, POLLIN);
2764 }
2765 #endif
2766 }
2767
2768 static void
2769 dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
2770 {
2771 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2772
2773 fat_rwlock_rdlock(&dpif->upcall_lock);
2774 dpif_netlink_recv_wait__(dpif, handler_id);
2775 fat_rwlock_unlock(&dpif->upcall_lock);
2776 }
2777
2778 static void
2779 dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
2780 OVS_REQ_WRLOCK(dpif->upcall_lock)
2781 {
2782 if (dpif->handlers) {
2783 size_t i, j;
2784
2785 for (i = 0; i < dpif->uc_array_size; i++ ) {
2786 if (!dpif->handlers[0].channels[i].sock) {
2787 continue;
2788 }
2789
2790 for (j = 0; j < dpif->n_handlers; j++) {
2791 nl_sock_drain(dpif->handlers[j].channels[i].sock);
2792 }
2793 }
2794 }
2795 }
2796
2797 static void
2798 dpif_netlink_recv_purge(struct dpif *dpif_)
2799 {
2800 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2801
2802 fat_rwlock_wrlock(&dpif->upcall_lock);
2803 dpif_netlink_recv_purge__(dpif);
2804 fat_rwlock_unlock(&dpif->upcall_lock);
2805 }
2806
2807 static char *
2808 dpif_netlink_get_datapath_version(void)
2809 {
2810 char *version_str = NULL;
2811
2812 #ifdef __linux__
2813
2814 #define MAX_VERSION_STR_SIZE 80
2815 #define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2816 FILE *f;
2817
2818 f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2819 if (f) {
2820 char *newline;
2821 char version[MAX_VERSION_STR_SIZE];
2822
2823 if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2824 newline = strchr(version, '\n');
2825 if (newline) {
2826 *newline = '\0';
2827 }
2828 version_str = xstrdup(version);
2829 }
2830 fclose(f);
2831 }
2832 #endif
2833
2834 return version_str;
2835 }
2836
2837 struct dpif_netlink_ct_dump_state {
2838 struct ct_dpif_dump_state up;
2839 struct nl_ct_dump_state *nl_ct_dump;
2840 };
2841
2842 static int
2843 dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2844 struct ct_dpif_dump_state **dump_,
2845 const uint16_t *zone, int *ptot_bkts)
2846 {
2847 struct dpif_netlink_ct_dump_state *dump;
2848 int err;
2849
2850 dump = xzalloc(sizeof *dump);
2851 err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
2852 if (err) {
2853 free(dump);
2854 return err;
2855 }
2856
2857 *dump_ = &dump->up;
2858
2859 return 0;
2860 }
2861
2862 static int
2863 dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2864 struct ct_dpif_dump_state *dump_,
2865 struct ct_dpif_entry *entry)
2866 {
2867 struct dpif_netlink_ct_dump_state *dump;
2868
2869 INIT_CONTAINER(dump, dump_, up);
2870
2871 return nl_ct_dump_next(dump->nl_ct_dump, entry);
2872 }
2873
2874 static int
2875 dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2876 struct ct_dpif_dump_state *dump_)
2877 {
2878 struct dpif_netlink_ct_dump_state *dump;
2879 int err;
2880
2881 INIT_CONTAINER(dump, dump_, up);
2882
2883 err = nl_ct_dump_done(dump->nl_ct_dump);
2884 free(dump);
2885 return err;
2886 }
2887
2888 static int
2889 dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
2890 const struct ct_dpif_tuple *tuple)
2891 {
2892 if (tuple) {
2893 return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
2894 } else if (zone) {
2895 return nl_ct_flush_zone(*zone);
2896 } else {
2897 return nl_ct_flush();
2898 }
2899 }
2900
2901 \f
2902 /* Meters */
2903 static void
2904 dpif_netlink_meter_get_features(const struct dpif * dpif OVS_UNUSED,
2905 struct ofputil_meter_features *features)
2906 {
2907 features->max_meters = 0;
2908 features->band_types = 0;
2909 features->capabilities = 0;
2910 features->max_bands = 0;
2911 features->max_color = 0;
2912 }
2913
2914 static int
2915 dpif_netlink_meter_set(struct dpif *dpif OVS_UNUSED,
2916 ofproto_meter_id *meter_id OVS_UNUSED,
2917 struct ofputil_meter_config *config OVS_UNUSED)
2918 {
2919 return EFBIG; /* meter_id out of range */
2920 }
2921
2922 static int
2923 dpif_netlink_meter_get(const struct dpif *dpif OVS_UNUSED,
2924 ofproto_meter_id meter_id OVS_UNUSED,
2925 struct ofputil_meter_stats *stats OVS_UNUSED,
2926 uint16_t n_bands OVS_UNUSED)
2927 {
2928 return EFBIG; /* meter_id out of range */
2929 }
2930
2931 static int
2932 dpif_netlink_meter_del(struct dpif *dpif OVS_UNUSED,
2933 ofproto_meter_id meter_id OVS_UNUSED,
2934 struct ofputil_meter_stats *stats OVS_UNUSED,
2935 uint16_t n_bands OVS_UNUSED)
2936 {
2937 return EFBIG; /* meter_id out of range */
2938 }
2939
2940 \f
2941 const struct dpif_class dpif_netlink_class = {
2942 "system",
2943 NULL, /* init */
2944 dpif_netlink_enumerate,
2945 NULL,
2946 dpif_netlink_open,
2947 dpif_netlink_close,
2948 dpif_netlink_destroy,
2949 dpif_netlink_run,
2950 NULL, /* wait */
2951 dpif_netlink_get_stats,
2952 dpif_netlink_port_add,
2953 dpif_netlink_port_del,
2954 NULL, /* port_set_config */
2955 dpif_netlink_port_query_by_number,
2956 dpif_netlink_port_query_by_name,
2957 dpif_netlink_port_get_pid,
2958 dpif_netlink_port_dump_start,
2959 dpif_netlink_port_dump_next,
2960 dpif_netlink_port_dump_done,
2961 dpif_netlink_port_poll,
2962 dpif_netlink_port_poll_wait,
2963 dpif_netlink_flow_flush,
2964 dpif_netlink_flow_dump_create,
2965 dpif_netlink_flow_dump_destroy,
2966 dpif_netlink_flow_dump_thread_create,
2967 dpif_netlink_flow_dump_thread_destroy,
2968 dpif_netlink_flow_dump_next,
2969 dpif_netlink_operate,
2970 dpif_netlink_recv_set,
2971 dpif_netlink_handlers_set,
2972 NULL, /* set_config */
2973 dpif_netlink_queue_to_priority,
2974 dpif_netlink_recv,
2975 dpif_netlink_recv_wait,
2976 dpif_netlink_recv_purge,
2977 NULL, /* register_dp_purge_cb */
2978 NULL, /* register_upcall_cb */
2979 NULL, /* enable_upcall */
2980 NULL, /* disable_upcall */
2981 dpif_netlink_get_datapath_version, /* get_datapath_version */
2982 dpif_netlink_ct_dump_start,
2983 dpif_netlink_ct_dump_next,
2984 dpif_netlink_ct_dump_done,
2985 dpif_netlink_ct_flush,
2986 NULL, /* ct_set_maxconns */
2987 NULL, /* ct_get_maxconns */
2988 NULL, /* ct_get_nconns */
2989 dpif_netlink_meter_get_features,
2990 dpif_netlink_meter_set,
2991 dpif_netlink_meter_get,
2992 dpif_netlink_meter_del,
2993 };
2994
2995 static int
2996 dpif_netlink_init(void)
2997 {
2998 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
2999 static int error;
3000
3001 if (ovsthread_once_start(&once)) {
3002 error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
3003 &ovs_datapath_family);
3004 if (error) {
3005 VLOG_INFO("Generic Netlink family '%s' does not exist. "
3006 "The Open vSwitch kernel module is probably not loaded.",
3007 OVS_DATAPATH_FAMILY);
3008 }
3009 if (!error) {
3010 error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
3011 }
3012 if (!error) {
3013 error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
3014 }
3015 if (!error) {
3016 error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
3017 &ovs_packet_family);
3018 }
3019 if (!error) {
3020 error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
3021 &ovs_vport_mcgroup);
3022 }
3023
3024 ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
3025
3026 ovsthread_once_done(&once);
3027 }
3028
3029 return error;
3030 }
3031
3032 bool
3033 dpif_netlink_is_internal_device(const char *name)
3034 {
3035 struct dpif_netlink_vport reply;
3036 struct ofpbuf *buf;
3037 int error;
3038
3039 error = dpif_netlink_vport_get(name, &reply, &buf);
3040 if (!error) {
3041 ofpbuf_delete(buf);
3042 } else if (error != ENODEV && error != ENOENT) {
3043 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
3044 name, ovs_strerror(error));
3045 }
3046
3047 return reply.type == OVS_VPORT_TYPE_INTERNAL;
3048 }
3049
3050 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
3051 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
3052 * positive errno value.
3053 *
3054 * 'vport' will contain pointers into 'buf', so the caller should not free
3055 * 'buf' while 'vport' is still in use. */
3056 static int
3057 dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
3058 const struct ofpbuf *buf)
3059 {
3060 static const struct nl_policy ovs_vport_policy[] = {
3061 [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
3062 [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
3063 [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
3064 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
3065 [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
3066 .optional = true },
3067 [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
3068 [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
3069 };
3070
3071 dpif_netlink_vport_init(vport);
3072
3073 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3074 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3075 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3076 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3077
3078 struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
3079 if (!nlmsg || !genl || !ovs_header
3080 || nlmsg->nlmsg_type != ovs_vport_family
3081 || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
3082 ARRAY_SIZE(ovs_vport_policy))) {
3083 return EINVAL;
3084 }
3085
3086 vport->cmd = genl->cmd;
3087 vport->dp_ifindex = ovs_header->dp_ifindex;
3088 vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
3089 vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
3090 vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
3091 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
3092 vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
3093 / (sizeof *vport->upcall_pids);
3094 vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
3095
3096 }
3097 if (a[OVS_VPORT_ATTR_STATS]) {
3098 vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
3099 }
3100 if (a[OVS_VPORT_ATTR_OPTIONS]) {
3101 vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
3102 vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
3103 }
3104 if (a[OVS_VPORT_ATTR_NETNSID]) {
3105 netnsid_set(&vport->netnsid,
3106 nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
3107 } else {
3108 netnsid_set_local(&vport->netnsid);
3109 }
3110 return 0;
3111 }
3112
3113 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
3114 * followed by Netlink attributes corresponding to 'vport'. */
3115 static void
3116 dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
3117 struct ofpbuf *buf)
3118 {
3119 struct ovs_header *ovs_header;
3120
3121 nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
3122 vport->cmd, OVS_VPORT_VERSION);
3123
3124 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3125 ovs_header->dp_ifindex = vport->dp_ifindex;
3126
3127 if (vport->port_no != ODPP_NONE) {
3128 nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
3129 }
3130
3131 if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
3132 nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
3133 }
3134
3135 if (vport->name) {
3136 nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
3137 }
3138
3139 if (vport->upcall_pids) {
3140 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
3141 vport->upcall_pids,
3142 vport->n_upcall_pids * sizeof *vport->upcall_pids);
3143 }
3144
3145 if (vport->stats) {
3146 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
3147 vport->stats, sizeof *vport->stats);
3148 }
3149
3150 if (vport->options) {
3151 nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
3152 vport->options, vport->options_len);
3153 }
3154 }
3155
3156 /* Clears 'vport' to "empty" values. */
3157 void
3158 dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
3159 {
3160 memset(vport, 0, sizeof *vport);
3161 vport->port_no = ODPP_NONE;
3162 }
3163
3164 /* Executes 'request' in the kernel datapath. If the command fails, returns a
3165 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3166 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
3167 * result of the command is expected to be an ovs_vport also, which is decoded
3168 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3169 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
3170 int
3171 dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
3172 struct dpif_netlink_vport *reply,
3173 struct ofpbuf **bufp)
3174 {
3175 struct ofpbuf *request_buf;
3176 int error;
3177
3178 ovs_assert((reply != NULL) == (bufp != NULL));
3179
3180 error = dpif_netlink_init();
3181 if (error) {
3182 if (reply) {
3183 *bufp = NULL;
3184 dpif_netlink_vport_init(reply);
3185 }
3186 return error;
3187 }
3188
3189 request_buf = ofpbuf_new(1024);
3190 dpif_netlink_vport_to_ofpbuf(request, request_buf);
3191 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
3192 ofpbuf_delete(request_buf);
3193
3194 if (reply) {
3195 if (!error) {
3196 error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
3197 }
3198 if (error) {
3199 dpif_netlink_vport_init(reply);
3200 ofpbuf_delete(*bufp);
3201 *bufp = NULL;
3202 }
3203 }
3204 return error;
3205 }
3206
3207 /* Obtains information about the kernel vport named 'name' and stores it into
3208 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
3209 * longer needed ('reply' will contain pointers into '*bufp'). */
3210 int
3211 dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
3212 struct ofpbuf **bufp)
3213 {
3214 struct dpif_netlink_vport request;
3215
3216 dpif_netlink_vport_init(&request);
3217 request.cmd = OVS_VPORT_CMD_GET;
3218 request.name = name;
3219
3220 return dpif_netlink_vport_transact(&request, reply, bufp);
3221 }
3222
3223 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
3224 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
3225 * positive errno value.
3226 *
3227 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
3228 * while 'dp' is still in use. */
3229 static int
3230 dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
3231 {
3232 static const struct nl_policy ovs_datapath_policy[] = {
3233 [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
3234 [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
3235 .optional = true },
3236 [OVS_DP_ATTR_MEGAFLOW_STATS] = {
3237 NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
3238 .optional = true },
3239 };
3240
3241 dpif_netlink_dp_init(dp);
3242
3243 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3244 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3245 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3246 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3247
3248 struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
3249 if (!nlmsg || !genl || !ovs_header
3250 || nlmsg->nlmsg_type != ovs_datapath_family
3251 || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
3252 ARRAY_SIZE(ovs_datapath_policy))) {
3253 return EINVAL;
3254 }
3255
3256 dp->cmd = genl->cmd;
3257 dp->dp_ifindex = ovs_header->dp_ifindex;
3258 dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
3259 if (a[OVS_DP_ATTR_STATS]) {
3260 dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
3261 }
3262
3263 if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
3264 dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
3265 }
3266
3267 return 0;
3268 }
3269
3270 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
3271 static void
3272 dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
3273 {
3274 struct ovs_header *ovs_header;
3275
3276 nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
3277 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
3278 OVS_DATAPATH_VERSION);
3279
3280 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3281 ovs_header->dp_ifindex = dp->dp_ifindex;
3282
3283 if (dp->name) {
3284 nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
3285 }
3286
3287 if (dp->upcall_pid) {
3288 nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
3289 }
3290
3291 if (dp->user_features) {
3292 nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
3293 }
3294
3295 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
3296 }
3297
3298 /* Clears 'dp' to "empty" values. */
3299 static void
3300 dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
3301 {
3302 memset(dp, 0, sizeof *dp);
3303 }
3304
3305 static void
3306 dpif_netlink_dp_dump_start(struct nl_dump *dump)
3307 {
3308 struct dpif_netlink_dp request;
3309 struct ofpbuf *buf;
3310
3311 dpif_netlink_dp_init(&request);
3312 request.cmd = OVS_DP_CMD_GET;
3313
3314 buf = ofpbuf_new(1024);
3315 dpif_netlink_dp_to_ofpbuf(&request, buf);
3316 nl_dump_start(dump, NETLINK_GENERIC, buf);
3317 ofpbuf_delete(buf);
3318 }
3319
3320 /* Executes 'request' in the kernel datapath. If the command fails, returns a
3321 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3322 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
3323 * result of the command is expected to be of the same form, which is decoded
3324 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3325 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
3326 static int
3327 dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
3328 struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
3329 {
3330 struct ofpbuf *request_buf;
3331 int error;
3332
3333 ovs_assert((reply != NULL) == (bufp != NULL));
3334
3335 request_buf = ofpbuf_new(1024);
3336 dpif_netlink_dp_to_ofpbuf(request, request_buf);
3337 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
3338 ofpbuf_delete(request_buf);
3339
3340 if (reply) {
3341 dpif_netlink_dp_init(reply);
3342 if (!error) {
3343 error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
3344 }
3345 if (error) {
3346 ofpbuf_delete(*bufp);
3347 *bufp = NULL;
3348 }
3349 }
3350 return error;
3351 }
3352
3353 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
3354 * The caller must free '*bufp' when the reply is no longer needed ('reply'
3355 * will contain pointers into '*bufp'). */
3356 static int
3357 dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
3358 struct ofpbuf **bufp)
3359 {
3360 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3361 struct dpif_netlink_dp request;
3362
3363 dpif_netlink_dp_init(&request);
3364 request.cmd = OVS_DP_CMD_GET;
3365 request.dp_ifindex = dpif->dp_ifindex;
3366
3367 return dpif_netlink_dp_transact(&request, reply, bufp);
3368 }
3369
3370 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
3371 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
3372 * positive errno value.
3373 *
3374 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
3375 * while 'flow' is still in use. */
3376 static int
3377 dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
3378 const struct ofpbuf *buf)
3379 {
3380 static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
3381 [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
3382 [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
3383 [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
3384 [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
3385 .optional = true },
3386 [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
3387 [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
3388 [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
3389 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
3390 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
3391 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
3392 };
3393
3394 dpif_netlink_flow_init(flow);
3395
3396 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3397 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3398 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3399 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3400
3401 struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
3402 if (!nlmsg || !genl || !ovs_header
3403 || nlmsg->nlmsg_type != ovs_flow_family
3404 || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
3405 ARRAY_SIZE(ovs_flow_policy))) {
3406 return EINVAL;
3407 }
3408 if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
3409 return EINVAL;
3410 }
3411
3412 flow->nlmsg_flags = nlmsg->nlmsg_flags;
3413 flow->dp_ifindex = ovs_header->dp_ifindex;
3414 if (a[OVS_FLOW_ATTR_KEY]) {
3415 flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
3416 flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
3417 }
3418
3419 if (a[OVS_FLOW_ATTR_UFID]) {
3420 flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
3421 flow->ufid_present = true;
3422 }
3423 if (a[OVS_FLOW_ATTR_MASK]) {
3424 flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
3425 flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
3426 }
3427 if (a[OVS_FLOW_ATTR_ACTIONS]) {
3428 flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
3429 flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
3430 }
3431 if (a[OVS_FLOW_ATTR_STATS]) {
3432 flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
3433 }
3434 if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
3435 flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
3436 }
3437 if (a[OVS_FLOW_ATTR_USED]) {
3438 flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
3439 }
3440 return 0;
3441 }
3442
3443
3444 /*
3445 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
3446 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
3447 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
3448 */
3449 static void
3450 put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
3451 const struct nlattr *data, uint16_t data_len)
3452 {
3453 const struct nlattr *packet_type;
3454
3455 packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
3456
3457 if (packet_type) {
3458 /* exclude PACKET_TYPE Netlink attribute. */
3459 ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
3460 size_t packet_type_len = NL_A_U32_SIZE;
3461 size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
3462 size_t second_chunk_size = data_len - first_chunk_size
3463 - packet_type_len;
3464 struct nlattr *next_attr = nl_attr_next(packet_type);
3465 size_t ofs;
3466
3467 ofs = nl_msg_start_nested(buf, type);
3468 nl_msg_put(buf, data, first_chunk_size);
3469 nl_msg_put(buf, next_attr, second_chunk_size);
3470 if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
3471 ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
3472 const struct nlattr *nla;
3473
3474 nla = nl_attr_find(buf, NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
3475 if (nla) {
3476 ovs_be16 *ethertype;
3477
3478 ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
3479 *ethertype = pt;
3480 } else {
3481 nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
3482 }
3483 }
3484 nl_msg_end_nested(buf, ofs);
3485 } else {
3486 nl_msg_put_unspec(buf, type, data, data_len);
3487 }
3488 }
3489
3490 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
3491 * followed by Netlink attributes corresponding to 'flow'. */
3492 static void
3493 dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
3494 struct ofpbuf *buf)
3495 {
3496 struct ovs_header *ovs_header;
3497
3498 nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
3499 NLM_F_REQUEST | flow->nlmsg_flags,
3500 flow->cmd, OVS_FLOW_VERSION);
3501
3502 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3503 ovs_header->dp_ifindex = flow->dp_ifindex;
3504
3505 if (flow->ufid_present) {
3506 nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
3507 }
3508 if (flow->ufid_terse) {
3509 nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
3510 OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
3511 | OVS_UFID_F_OMIT_ACTIONS);
3512 }
3513 if (!flow->ufid_terse || !flow->ufid_present) {
3514 if (flow->key_len) {
3515 put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
3516 flow->key_len);
3517 }
3518 if (flow->mask_len) {
3519 put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
3520 flow->mask_len);
3521 }
3522 if (flow->actions || flow->actions_len) {
3523 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
3524 flow->actions, flow->actions_len);
3525 }
3526 }
3527
3528 /* We never need to send these to the kernel. */
3529 ovs_assert(!flow->stats);
3530 ovs_assert(!flow->tcp_flags);
3531 ovs_assert(!flow->used);
3532
3533 if (flow->clear) {
3534 nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
3535 }
3536 if (flow->probe) {
3537 nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
3538 }
3539 }
3540
3541 /* Clears 'flow' to "empty" values. */
3542 static void
3543 dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
3544 {
3545 memset(flow, 0, sizeof *flow);
3546 }
3547
3548 /* Executes 'request' in the kernel datapath. If the command fails, returns a
3549 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3550 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
3551 * result of the command is expected to be a flow also, which is decoded and
3552 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
3553 * is no longer needed ('reply' will contain pointers into '*bufp'). */
3554 static int
3555 dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
3556 struct dpif_netlink_flow *reply,
3557 struct ofpbuf **bufp)
3558 {
3559 struct ofpbuf *request_buf;
3560 int error;
3561
3562 ovs_assert((reply != NULL) == (bufp != NULL));
3563
3564 if (reply) {
3565 request->nlmsg_flags |= NLM_F_ECHO;
3566 }
3567
3568 request_buf = ofpbuf_new(1024);
3569 dpif_netlink_flow_to_ofpbuf(request, request_buf);
3570 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
3571 ofpbuf_delete(request_buf);
3572
3573 if (reply) {
3574 if (!error) {
3575 error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
3576 }
3577 if (error) {
3578 dpif_netlink_flow_init(reply);
3579 ofpbuf_delete(*bufp);
3580 *bufp = NULL;
3581 }
3582 }
3583 return error;
3584 }
3585
3586 static void
3587 dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
3588 struct dpif_flow_stats *stats)
3589 {
3590 if (flow->stats) {
3591 stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
3592 stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
3593 } else {
3594 stats->n_packets = 0;
3595 stats->n_bytes = 0;
3596 }
3597 stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
3598 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
3599 }
3600
3601 /* Logs information about a packet that was recently lost in 'ch' (in
3602 * 'dpif_'). */
3603 static void
3604 report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
3605 uint32_t handler_id)
3606 {
3607 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
3608 struct ds s;
3609
3610 if (VLOG_DROP_WARN(&rl)) {
3611 return;
3612 }
3613
3614 ds_init(&s);
3615 if (ch->last_poll != LLONG_MIN) {
3616 ds_put_format(&s, " (last polled %lld ms ago)",
3617 time_msec() - ch->last_poll);
3618 }
3619
3620 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
3621 dpif_name(&dpif->dpif), ch_idx, handler_id);
3622 ds_destroy(&s);
3623 }