]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpif-netlink.c
5a2ba2d5b6336b024db7bdc8cefb4b986184552d
[mirror_ovs.git] / lib / dpif-netlink.c
1 /*
2 * Copyright (c) 2008-2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "dpif-netlink.h"
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <net/if.h>
26 #include <linux/types.h>
27 #include <linux/pkt_sched.h>
28 #include <poll.h>
29 #include <stdlib.h>
30 #include <strings.h>
31 #include <sys/epoll.h>
32 #include <sys/stat.h>
33 #include <unistd.h>
34
35 #include "bitmap.h"
36 #include "dpif-netlink-rtnl.h"
37 #include "dpif-provider.h"
38 #include "fat-rwlock.h"
39 #include "flow.h"
40 #include "netdev-linux.h"
41 #include "netdev-provider.h"
42 #include "netdev-vport.h"
43 #include "netdev.h"
44 #include "netlink-conntrack.h"
45 #include "netlink-notifier.h"
46 #include "netlink-socket.h"
47 #include "netlink.h"
48 #include "netnsid.h"
49 #include "odp-util.h"
50 #include "openvswitch/dynamic-string.h"
51 #include "openvswitch/flow.h"
52 #include "openvswitch/match.h"
53 #include "openvswitch/ofpbuf.h"
54 #include "openvswitch/poll-loop.h"
55 #include "openvswitch/shash.h"
56 #include "openvswitch/thread.h"
57 #include "openvswitch/vlog.h"
58 #include "packets.h"
59 #include "random.h"
60 #include "sset.h"
61 #include "timeval.h"
62 #include "unaligned.h"
63 #include "util.h"
64
65 VLOG_DEFINE_THIS_MODULE(dpif_netlink);
66 #ifdef _WIN32
67 #include "wmi.h"
68 enum { WINDOWS = 1 };
69 #else
70 enum { WINDOWS = 0 };
71 #endif
72 enum { MAX_PORTS = USHRT_MAX };
73
74 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
75 * missing if we have old headers. */
76 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
77
78 #define FLOW_DUMP_MAX_BATCH 50
79 #define OPERATE_MAX_OPS 50
80
81 #ifndef EPOLLEXCLUSIVE
82 #define EPOLLEXCLUSIVE (1u << 28)
83 #endif
84
85 struct dpif_netlink_dp {
86 /* Generic Netlink header. */
87 uint8_t cmd;
88
89 /* struct ovs_header. */
90 int dp_ifindex;
91
92 /* Attributes. */
93 const char *name; /* OVS_DP_ATTR_NAME. */
94 const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */
95 uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */
96 const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */
97 const struct ovs_dp_megaflow_stats *megaflow_stats;
98 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
99 };
100
101 static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
102 static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
103 const struct ofpbuf *);
104 static void dpif_netlink_dp_dump_start(struct nl_dump *);
105 static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
106 struct dpif_netlink_dp *reply,
107 struct ofpbuf **bufp);
108 static int dpif_netlink_dp_get(const struct dpif *,
109 struct dpif_netlink_dp *reply,
110 struct ofpbuf **bufp);
111
112 struct dpif_netlink_flow {
113 /* Generic Netlink header. */
114 uint8_t cmd;
115
116 /* struct ovs_header. */
117 unsigned int nlmsg_flags;
118 int dp_ifindex;
119
120 /* Attributes.
121 *
122 * The 'stats' member points to 64-bit data that might only be aligned on
123 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
124 * values.
125 *
126 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
127 * the Netlink version of the command, even if actions_len is zero. */
128 const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
129 size_t key_len;
130 const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */
131 size_t mask_len;
132 const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
133 size_t actions_len;
134 ovs_u128 ufid; /* OVS_FLOW_ATTR_FLOW_ID. */
135 bool ufid_present; /* Is there a UFID? */
136 bool ufid_terse; /* Skip serializing key/mask/acts? */
137 const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
138 const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
139 const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
140 bool clear; /* OVS_FLOW_ATTR_CLEAR. */
141 bool probe; /* OVS_FLOW_ATTR_PROBE. */
142 };
143
144 static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
145 static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
146 const struct ofpbuf *);
147 static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
148 struct ofpbuf *);
149 static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
150 struct dpif_netlink_flow *reply,
151 struct ofpbuf **bufp);
152 static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
153 struct dpif_flow_stats *);
154 static void dpif_netlink_flow_to_dpif_flow(struct dpif *, struct dpif_flow *,
155 const struct dpif_netlink_flow *);
156
157 /* One of the dpif channels between the kernel and userspace. */
158 struct dpif_channel {
159 struct nl_sock *sock; /* Netlink socket. */
160 long long int last_poll; /* Last time this channel was polled. */
161 };
162
163 #ifdef _WIN32
164 #define VPORT_SOCK_POOL_SIZE 1
165 /* On Windows, there is no native support for epoll. There are equivalent
166 * interfaces though, that are not used currently. For simpicity, a pool of
167 * netlink sockets is used. Each socket is represented by 'struct
168 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
169 * sharing the same socket. In the future, we can add a reference count and
170 * such fields. */
171 struct dpif_windows_vport_sock {
172 struct nl_sock *nl_sock; /* netlink socket. */
173 };
174 #endif
175
176 struct dpif_handler {
177 struct epoll_event *epoll_events;
178 int epoll_fd; /* epoll fd that includes channel socks. */
179 int n_events; /* Num events returned by epoll_wait(). */
180 int event_offset; /* Offset into 'epoll_events'. */
181
182 #ifdef _WIN32
183 /* Pool of sockets. */
184 struct dpif_windows_vport_sock *vport_sock_pool;
185 size_t last_used_pool_idx; /* Index to aid in allocating a
186 socket in the pool to a port. */
187 #endif
188 };
189
190 /* Datapath interface for the openvswitch Linux kernel module. */
191 struct dpif_netlink {
192 struct dpif dpif;
193 int dp_ifindex;
194
195 /* Upcall messages. */
196 struct fat_rwlock upcall_lock;
197 struct dpif_handler *handlers;
198 uint32_t n_handlers; /* Num of upcall handlers. */
199 struct dpif_channel *channels; /* Array of channels for each port. */
200 int uc_array_size; /* Size of 'handler->channels' and */
201 /* 'handler->epoll_events'. */
202
203 /* Change notification. */
204 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
205 bool refresh_channels;
206 };
207
208 static void report_loss(struct dpif_netlink *, struct dpif_channel *,
209 uint32_t ch_idx, uint32_t handler_id);
210
211 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
212
213 /* Generic Netlink family numbers for OVS.
214 *
215 * Initialized by dpif_netlink_init(). */
216 static int ovs_datapath_family;
217 static int ovs_vport_family;
218 static int ovs_flow_family;
219 static int ovs_packet_family;
220 static int ovs_meter_family;
221 static int ovs_ct_limit_family;
222
223 /* Generic Netlink multicast groups for OVS.
224 *
225 * Initialized by dpif_netlink_init(). */
226 static unsigned int ovs_vport_mcgroup;
227
228 /* If true, tunnel devices are created using OVS compat/genetlink.
229 * If false, tunnel devices are created with rtnetlink and using light weight
230 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
231 * to using the compat interface. */
232 static bool ovs_tunnels_out_of_tree = true;
233
234 static int dpif_netlink_init(void);
235 static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
236 static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
237 odp_port_t port_no);
238 static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
239 static int dpif_netlink_refresh_channels(struct dpif_netlink *,
240 uint32_t n_handlers);
241 static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
242 struct ofpbuf *);
243 static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
244 const struct ofpbuf *);
245 static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
246 odp_port_t port_no, const char *port_name,
247 struct dpif_port *dpif_port);
248
249 static struct dpif_netlink *
250 dpif_netlink_cast(const struct dpif *dpif)
251 {
252 dpif_assert_class(dpif, &dpif_netlink_class);
253 return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
254 }
255
256 static int
257 dpif_netlink_enumerate(struct sset *all_dps,
258 const struct dpif_class *dpif_class OVS_UNUSED)
259 {
260 struct nl_dump dump;
261 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
262 struct ofpbuf msg, buf;
263 int error;
264
265 error = dpif_netlink_init();
266 if (error) {
267 return error;
268 }
269
270 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
271 dpif_netlink_dp_dump_start(&dump);
272 while (nl_dump_next(&dump, &msg, &buf)) {
273 struct dpif_netlink_dp dp;
274
275 if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
276 sset_add(all_dps, dp.name);
277 }
278 }
279 ofpbuf_uninit(&buf);
280 return nl_dump_done(&dump);
281 }
282
283 static int
284 dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
285 bool create, struct dpif **dpifp)
286 {
287 struct dpif_netlink_dp dp_request, dp;
288 struct ofpbuf *buf;
289 uint32_t upcall_pid;
290 int error;
291
292 error = dpif_netlink_init();
293 if (error) {
294 return error;
295 }
296
297 /* Create or look up datapath. */
298 dpif_netlink_dp_init(&dp_request);
299 if (create) {
300 dp_request.cmd = OVS_DP_CMD_NEW;
301 upcall_pid = 0;
302 dp_request.upcall_pid = &upcall_pid;
303 } else {
304 /* Use OVS_DP_CMD_SET to report user features */
305 dp_request.cmd = OVS_DP_CMD_SET;
306 }
307 dp_request.name = name;
308 dp_request.user_features |= OVS_DP_F_UNALIGNED;
309 dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
310 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
311 if (error) {
312 return error;
313 }
314
315 error = open_dpif(&dp, dpifp);
316 ofpbuf_delete(buf);
317 return error;
318 }
319
320 static int
321 open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
322 {
323 struct dpif_netlink *dpif;
324
325 dpif = xzalloc(sizeof *dpif);
326 dpif->port_notifier = NULL;
327 fat_rwlock_init(&dpif->upcall_lock);
328
329 dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
330 dp->dp_ifindex, dp->dp_ifindex);
331
332 dpif->dp_ifindex = dp->dp_ifindex;
333 *dpifp = &dpif->dpif;
334
335 return 0;
336 }
337
338 #ifdef _WIN32
339 static void
340 vport_delete_sock_pool(struct dpif_handler *handler)
341 OVS_REQ_WRLOCK(dpif->upcall_lock)
342 {
343 if (handler->vport_sock_pool) {
344 uint32_t i;
345 struct dpif_windows_vport_sock *sock_pool =
346 handler->vport_sock_pool;
347
348 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
349 if (sock_pool[i].nl_sock) {
350 nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
351 nl_sock_destroy(sock_pool[i].nl_sock);
352 sock_pool[i].nl_sock = NULL;
353 }
354 }
355
356 free(handler->vport_sock_pool);
357 handler->vport_sock_pool = NULL;
358 }
359 }
360
361 static int
362 vport_create_sock_pool(struct dpif_handler *handler)
363 OVS_REQ_WRLOCK(dpif->upcall_lock)
364 {
365 struct dpif_windows_vport_sock *sock_pool;
366 size_t i;
367 int error = 0;
368
369 sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
370 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
371 error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
372 if (error) {
373 goto error;
374 }
375
376 /* Enable the netlink socket to receive packets. This is equivalent to
377 * calling nl_sock_join_mcgroup() to receive events. */
378 error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
379 if (error) {
380 goto error;
381 }
382 }
383
384 handler->vport_sock_pool = sock_pool;
385 handler->last_used_pool_idx = 0;
386 return 0;
387
388 error:
389 vport_delete_sock_pool(handler);
390 return error;
391 }
392 #endif /* _WIN32 */
393
394 /* Given the port number 'port_idx', extracts the pid of netlink socket
395 * associated to the port and assigns it to 'upcall_pid'. */
396 static bool
397 vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
398 uint32_t *upcall_pid)
399 {
400 /* Since the nl_sock can only be assigned in either all
401 * or none "dpif" channels, the following check
402 * would suffice. */
403 if (!dpif->channels[port_idx].sock) {
404 return false;
405 }
406 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
407
408 *upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
409
410 return true;
411 }
412
413 static int
414 vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
415 struct nl_sock *socksp)
416 {
417 struct epoll_event event;
418 uint32_t port_idx = odp_to_u32(port_no);
419 size_t i;
420 int error;
421
422 if (dpif->handlers == NULL) {
423 return 0;
424 }
425
426 /* We assume that the datapath densely chooses port numbers, which can
427 * therefore be used as an index into 'channels' and 'epoll_events' of
428 * 'dpif'. */
429 if (port_idx >= dpif->uc_array_size) {
430 uint32_t new_size = port_idx + 1;
431
432 if (new_size > MAX_PORTS) {
433 VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
434 dpif_name(&dpif->dpif), port_no);
435 return EFBIG;
436 }
437
438 dpif->channels = xrealloc(dpif->channels,
439 new_size * sizeof *dpif->channels);
440
441 for (i = dpif->uc_array_size; i < new_size; i++) {
442 dpif->channels[i].sock = NULL;
443 }
444
445 for (i = 0; i < dpif->n_handlers; i++) {
446 struct dpif_handler *handler = &dpif->handlers[i];
447
448 handler->epoll_events = xrealloc(handler->epoll_events,
449 new_size * sizeof *handler->epoll_events);
450
451 }
452 dpif->uc_array_size = new_size;
453 }
454
455 memset(&event, 0, sizeof event);
456 event.events = EPOLLIN | EPOLLEXCLUSIVE;
457 event.data.u32 = port_idx;
458
459 for (i = 0; i < dpif->n_handlers; i++) {
460 struct dpif_handler *handler = &dpif->handlers[i];
461
462 #ifndef _WIN32
463 if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp),
464 &event) < 0) {
465 error = errno;
466 goto error;
467 }
468 #endif
469 }
470 dpif->channels[port_idx].sock = socksp;
471 dpif->channels[port_idx].last_poll = LLONG_MIN;
472
473 return 0;
474
475 error:
476 #ifndef _WIN32
477 while (i--) {
478 epoll_ctl(dpif->handlers[i].epoll_fd, EPOLL_CTL_DEL,
479 nl_sock_fd(socksp), NULL);
480 }
481 #endif
482 dpif->channels[port_idx].sock = NULL;
483
484 return error;
485 }
486
487 static void
488 vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
489 {
490 uint32_t port_idx = odp_to_u32(port_no);
491 size_t i;
492
493 if (!dpif->handlers || port_idx >= dpif->uc_array_size
494 || !dpif->channels[port_idx].sock) {
495 return;
496 }
497
498 for (i = 0; i < dpif->n_handlers; i++) {
499 struct dpif_handler *handler = &dpif->handlers[i];
500 #ifndef _WIN32
501 epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
502 nl_sock_fd(dpif->channels[port_idx].sock), NULL);
503 #endif
504 handler->event_offset = handler->n_events = 0;
505 }
506 #ifndef _WIN32
507 nl_sock_destroy(dpif->channels[port_idx].sock);
508 #endif
509 dpif->channels[port_idx].sock = NULL;
510 }
511
512 static void
513 destroy_all_channels(struct dpif_netlink *dpif)
514 OVS_REQ_WRLOCK(dpif->upcall_lock)
515 {
516 unsigned int i;
517
518 if (!dpif->handlers) {
519 return;
520 }
521
522 for (i = 0; i < dpif->uc_array_size; i++ ) {
523 struct dpif_netlink_vport vport_request;
524 uint32_t upcall_pids = 0;
525
526 if (!dpif->channels[i].sock) {
527 continue;
528 }
529
530 /* Turn off upcalls. */
531 dpif_netlink_vport_init(&vport_request);
532 vport_request.cmd = OVS_VPORT_CMD_SET;
533 vport_request.dp_ifindex = dpif->dp_ifindex;
534 vport_request.port_no = u32_to_odp(i);
535 vport_request.n_upcall_pids = 1;
536 vport_request.upcall_pids = &upcall_pids;
537 dpif_netlink_vport_transact(&vport_request, NULL, NULL);
538
539 vport_del_channels(dpif, u32_to_odp(i));
540 }
541
542 for (i = 0; i < dpif->n_handlers; i++) {
543 struct dpif_handler *handler = &dpif->handlers[i];
544
545 dpif_netlink_handler_uninit(handler);
546 free(handler->epoll_events);
547 }
548 free(dpif->channels);
549 free(dpif->handlers);
550 dpif->handlers = NULL;
551 dpif->channels = NULL;
552 dpif->n_handlers = 0;
553 dpif->uc_array_size = 0;
554 }
555
556 static void
557 dpif_netlink_close(struct dpif *dpif_)
558 {
559 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
560
561 nl_sock_destroy(dpif->port_notifier);
562
563 fat_rwlock_wrlock(&dpif->upcall_lock);
564 destroy_all_channels(dpif);
565 fat_rwlock_unlock(&dpif->upcall_lock);
566
567 fat_rwlock_destroy(&dpif->upcall_lock);
568 free(dpif);
569 }
570
571 static int
572 dpif_netlink_destroy(struct dpif *dpif_)
573 {
574 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
575 struct dpif_netlink_dp dp;
576
577 dpif_netlink_dp_init(&dp);
578 dp.cmd = OVS_DP_CMD_DEL;
579 dp.dp_ifindex = dpif->dp_ifindex;
580 return dpif_netlink_dp_transact(&dp, NULL, NULL);
581 }
582
583 static bool
584 dpif_netlink_run(struct dpif *dpif_)
585 {
586 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
587
588 if (dpif->refresh_channels) {
589 dpif->refresh_channels = false;
590 fat_rwlock_wrlock(&dpif->upcall_lock);
591 dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
592 fat_rwlock_unlock(&dpif->upcall_lock);
593 }
594 return false;
595 }
596
597 static int
598 dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
599 {
600 struct dpif_netlink_dp dp;
601 struct ofpbuf *buf;
602 int error;
603
604 error = dpif_netlink_dp_get(dpif_, &dp, &buf);
605 if (!error) {
606 memset(stats, 0, sizeof *stats);
607
608 if (dp.stats) {
609 stats->n_hit = get_32aligned_u64(&dp.stats->n_hit);
610 stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
611 stats->n_lost = get_32aligned_u64(&dp.stats->n_lost);
612 stats->n_flows = get_32aligned_u64(&dp.stats->n_flows);
613 }
614
615 if (dp.megaflow_stats) {
616 stats->n_masks = dp.megaflow_stats->n_masks;
617 stats->n_mask_hit = get_32aligned_u64(
618 &dp.megaflow_stats->n_mask_hit);
619 } else {
620 stats->n_masks = UINT32_MAX;
621 stats->n_mask_hit = UINT64_MAX;
622 }
623 ofpbuf_delete(buf);
624 }
625 return error;
626 }
627
628 static const char *
629 get_vport_type(const struct dpif_netlink_vport *vport)
630 {
631 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
632
633 switch (vport->type) {
634 case OVS_VPORT_TYPE_NETDEV: {
635 const char *type = netdev_get_type_from_name(vport->name);
636
637 return type ? type : "system";
638 }
639
640 case OVS_VPORT_TYPE_INTERNAL:
641 return "internal";
642
643 case OVS_VPORT_TYPE_GENEVE:
644 return "geneve";
645
646 case OVS_VPORT_TYPE_GRE:
647 return "gre";
648
649 case OVS_VPORT_TYPE_VXLAN:
650 return "vxlan";
651
652 case OVS_VPORT_TYPE_LISP:
653 return "lisp";
654
655 case OVS_VPORT_TYPE_STT:
656 return "stt";
657
658 case OVS_VPORT_TYPE_ERSPAN:
659 return "erspan";
660
661 case OVS_VPORT_TYPE_IP6ERSPAN:
662 return "ip6erspan";
663
664 case OVS_VPORT_TYPE_IP6GRE:
665 return "ip6gre";
666
667 case OVS_VPORT_TYPE_UNSPEC:
668 case __OVS_VPORT_TYPE_MAX:
669 break;
670 }
671
672 VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
673 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
674 return "unknown";
675 }
676
677 enum ovs_vport_type
678 netdev_to_ovs_vport_type(const char *type)
679 {
680 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
681 return OVS_VPORT_TYPE_NETDEV;
682 } else if (!strcmp(type, "internal")) {
683 return OVS_VPORT_TYPE_INTERNAL;
684 } else if (strstr(type, "stt")) {
685 return OVS_VPORT_TYPE_STT;
686 } else if (!strcmp(type, "geneve")) {
687 return OVS_VPORT_TYPE_GENEVE;
688 } else if (!strcmp(type, "vxlan")) {
689 return OVS_VPORT_TYPE_VXLAN;
690 } else if (!strcmp(type, "lisp")) {
691 return OVS_VPORT_TYPE_LISP;
692 } else if (!strcmp(type, "erspan")) {
693 return OVS_VPORT_TYPE_ERSPAN;
694 } else if (!strcmp(type, "ip6erspan")) {
695 return OVS_VPORT_TYPE_IP6ERSPAN;
696 } else if (!strcmp(type, "ip6gre")) {
697 return OVS_VPORT_TYPE_IP6GRE;
698 } else if (!strcmp(type, "gre")) {
699 return OVS_VPORT_TYPE_GRE;
700 } else {
701 return OVS_VPORT_TYPE_UNSPEC;
702 }
703 }
704
705 static int
706 dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
707 enum ovs_vport_type type,
708 struct ofpbuf *options,
709 odp_port_t *port_nop)
710 OVS_REQ_WRLOCK(dpif->upcall_lock)
711 {
712 struct dpif_netlink_vport request, reply;
713 struct ofpbuf *buf;
714 struct nl_sock *socksp = NULL;
715 uint32_t upcall_pids = 0;
716 int error = 0;
717
718 if (dpif->handlers) {
719 if (nl_sock_create(NETLINK_GENERIC, &socksp)) {
720 return error;
721 }
722 }
723
724 dpif_netlink_vport_init(&request);
725 request.cmd = OVS_VPORT_CMD_NEW;
726 request.dp_ifindex = dpif->dp_ifindex;
727 request.type = type;
728 request.name = name;
729
730 request.port_no = *port_nop;
731 if (socksp) {
732 upcall_pids = nl_sock_pid(socksp);
733 }
734 request.n_upcall_pids = 1;
735 request.upcall_pids = &upcall_pids;
736
737 if (options) {
738 request.options = options->data;
739 request.options_len = options->size;
740 }
741
742 error = dpif_netlink_vport_transact(&request, &reply, &buf);
743 if (!error) {
744 *port_nop = reply.port_no;
745 } else {
746 if (error == EBUSY && *port_nop != ODPP_NONE) {
747 VLOG_INFO("%s: requested port %"PRIu32" is in use",
748 dpif_name(&dpif->dpif), *port_nop);
749 }
750
751 nl_sock_destroy(socksp);
752 goto exit;
753 }
754
755 error = vport_add_channel(dpif, *port_nop, socksp);
756 if (error) {
757 VLOG_INFO("%s: could not add channel for port %s",
758 dpif_name(&dpif->dpif), name);
759
760 /* Delete the port. */
761 dpif_netlink_vport_init(&request);
762 request.cmd = OVS_VPORT_CMD_DEL;
763 request.dp_ifindex = dpif->dp_ifindex;
764 request.port_no = *port_nop;
765 dpif_netlink_vport_transact(&request, NULL, NULL);
766 nl_sock_destroy(socksp);
767 goto exit;
768 }
769
770 exit:
771 ofpbuf_delete(buf);
772
773 return error;
774 }
775
776 static int
777 dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
778 odp_port_t *port_nop)
779 OVS_REQ_WRLOCK(dpif->upcall_lock)
780 {
781 const struct netdev_tunnel_config *tnl_cfg;
782 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
783 const char *type = netdev_get_type(netdev);
784 uint64_t options_stub[64 / 8];
785 enum ovs_vport_type ovs_type;
786 struct ofpbuf options;
787 const char *name;
788
789 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
790
791 ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
792 if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
793 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
794 "unsupported type `%s'",
795 dpif_name(&dpif->dpif), name, type);
796 return EINVAL;
797 }
798
799 if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
800 #ifdef _WIN32
801 /* XXX : Map appropiate Windows handle */
802 #else
803 netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
804 #endif
805 }
806
807 #ifdef _WIN32
808 if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
809 if (!create_wmi_port(name)){
810 VLOG_ERR("Could not create wmi internal port with name:%s", name);
811 return EINVAL;
812 };
813 }
814 #endif
815
816 tnl_cfg = netdev_get_tunnel_config(netdev);
817 if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
818 ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
819 if (tnl_cfg->dst_port) {
820 nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
821 ntohs(tnl_cfg->dst_port));
822 }
823 if (tnl_cfg->exts) {
824 size_t ext_ofs;
825 int i;
826
827 ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
828 for (i = 0; i < 32; i++) {
829 if (tnl_cfg->exts & (1 << i)) {
830 nl_msg_put_flag(&options, i);
831 }
832 }
833 nl_msg_end_nested(&options, ext_ofs);
834 }
835 return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
836 port_nop);
837 } else {
838 return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
839 }
840
841 }
842
843 static int
844 dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
845 struct netdev *netdev,
846 odp_port_t *port_nop)
847 OVS_REQ_WRLOCK(dpif->upcall_lock)
848 {
849 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
850 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
851 const char *name;
852 int error;
853
854 error = dpif_netlink_rtnl_port_create(netdev);
855 if (error) {
856 if (error != EOPNOTSUPP) {
857 VLOG_WARN_RL(&rl, "Failed to create %s with rtnetlink: %s",
858 netdev_get_name(netdev), ovs_strerror(error));
859 }
860 return error;
861 }
862
863 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
864 error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
865 port_nop);
866 if (error) {
867 dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
868 }
869 return error;
870 }
871
872 static int
873 dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
874 odp_port_t *port_nop)
875 {
876 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
877 int error = EOPNOTSUPP;
878
879 fat_rwlock_wrlock(&dpif->upcall_lock);
880 if (!ovs_tunnels_out_of_tree) {
881 error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
882 }
883 if (error) {
884 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
885 }
886 fat_rwlock_unlock(&dpif->upcall_lock);
887
888 return error;
889 }
890
891 static int
892 dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
893 OVS_REQ_WRLOCK(dpif->upcall_lock)
894 {
895 struct dpif_netlink_vport vport;
896 struct dpif_port dpif_port;
897 int error;
898
899 error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
900 if (error) {
901 return error;
902 }
903
904 dpif_netlink_vport_init(&vport);
905 vport.cmd = OVS_VPORT_CMD_DEL;
906 vport.dp_ifindex = dpif->dp_ifindex;
907 vport.port_no = port_no;
908 #ifdef _WIN32
909 if (!strcmp(dpif_port.type, "internal")) {
910 if (!delete_wmi_port(dpif_port.name)) {
911 VLOG_ERR("Could not delete wmi port with name: %s",
912 dpif_port.name);
913 };
914 }
915 #endif
916 error = dpif_netlink_vport_transact(&vport, NULL, NULL);
917
918 vport_del_channels(dpif, port_no);
919
920 if (!error && !ovs_tunnels_out_of_tree) {
921 error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
922 if (error == EOPNOTSUPP) {
923 error = 0;
924 }
925 }
926
927 dpif_port_destroy(&dpif_port);
928
929 return error;
930 }
931
932 static int
933 dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
934 {
935 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
936 int error;
937
938 fat_rwlock_wrlock(&dpif->upcall_lock);
939 error = dpif_netlink_port_del__(dpif, port_no);
940 fat_rwlock_unlock(&dpif->upcall_lock);
941
942 return error;
943 }
944
945 static int
946 dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
947 const char *port_name, struct dpif_port *dpif_port)
948 {
949 struct dpif_netlink_vport request;
950 struct dpif_netlink_vport reply;
951 struct ofpbuf *buf;
952 int error;
953
954 dpif_netlink_vport_init(&request);
955 request.cmd = OVS_VPORT_CMD_GET;
956 request.dp_ifindex = dpif->dp_ifindex;
957 request.port_no = port_no;
958 request.name = port_name;
959
960 error = dpif_netlink_vport_transact(&request, &reply, &buf);
961 if (!error) {
962 if (reply.dp_ifindex != request.dp_ifindex) {
963 /* A query by name reported that 'port_name' is in some datapath
964 * other than 'dpif', but the caller wants to know about 'dpif'. */
965 error = ENODEV;
966 } else if (dpif_port) {
967 dpif_port->name = xstrdup(reply.name);
968 dpif_port->type = xstrdup(get_vport_type(&reply));
969 dpif_port->port_no = reply.port_no;
970 }
971 ofpbuf_delete(buf);
972 }
973 return error;
974 }
975
976 static int
977 dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
978 struct dpif_port *dpif_port)
979 {
980 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
981
982 return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
983 }
984
985 static int
986 dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
987 struct dpif_port *dpif_port)
988 {
989 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
990
991 return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
992 }
993
994 static uint32_t
995 dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
996 odp_port_t port_no)
997 OVS_REQ_RDLOCK(dpif->upcall_lock)
998 {
999 uint32_t port_idx = odp_to_u32(port_no);
1000 uint32_t pid = 0;
1001
1002 if (dpif->handlers && dpif->uc_array_size > 0) {
1003 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1004 * channel, since it is not heavily loaded. */
1005 uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1006
1007 /* Needs to check in case the socket pointer is changed in between
1008 * the holding of upcall_lock. A known case happens when the main
1009 * thread deletes the vport while the handler thread is handling
1010 * the upcall from that port. */
1011 if (dpif->channels[idx].sock) {
1012 pid = nl_sock_pid(dpif->channels[idx].sock);
1013 }
1014 }
1015
1016 return pid;
1017 }
1018
1019 static uint32_t
1020 dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
1021 {
1022 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1023 uint32_t ret;
1024
1025 fat_rwlock_rdlock(&dpif->upcall_lock);
1026 ret = dpif_netlink_port_get_pid__(dpif, port_no);
1027 fat_rwlock_unlock(&dpif->upcall_lock);
1028
1029 return ret;
1030 }
1031
1032 static int
1033 dpif_netlink_flow_flush(struct dpif *dpif_)
1034 {
1035 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1036 struct dpif_netlink_flow flow;
1037
1038 dpif_netlink_flow_init(&flow);
1039 flow.cmd = OVS_FLOW_CMD_DEL;
1040 flow.dp_ifindex = dpif->dp_ifindex;
1041
1042 if (netdev_is_flow_api_enabled()) {
1043 netdev_ports_flow_flush(dpif_->dpif_class);
1044 }
1045
1046 return dpif_netlink_flow_transact(&flow, NULL, NULL);
1047 }
1048
1049 struct dpif_netlink_port_state {
1050 struct nl_dump dump;
1051 struct ofpbuf buf;
1052 };
1053
1054 static void
1055 dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1056 struct nl_dump *dump)
1057 {
1058 struct dpif_netlink_vport request;
1059 struct ofpbuf *buf;
1060
1061 dpif_netlink_vport_init(&request);
1062 request.cmd = OVS_VPORT_CMD_GET;
1063 request.dp_ifindex = dpif->dp_ifindex;
1064
1065 buf = ofpbuf_new(1024);
1066 dpif_netlink_vport_to_ofpbuf(&request, buf);
1067 nl_dump_start(dump, NETLINK_GENERIC, buf);
1068 ofpbuf_delete(buf);
1069 }
1070
1071 static int
1072 dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
1073 {
1074 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1075 struct dpif_netlink_port_state *state;
1076
1077 *statep = state = xmalloc(sizeof *state);
1078 dpif_netlink_port_dump_start__(dpif, &state->dump);
1079
1080 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1081 return 0;
1082 }
1083
1084 static int
1085 dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1086 struct nl_dump *dump,
1087 struct dpif_netlink_vport *vport,
1088 struct ofpbuf *buffer)
1089 {
1090 struct ofpbuf buf;
1091 int error;
1092
1093 if (!nl_dump_next(dump, &buf, buffer)) {
1094 return EOF;
1095 }
1096
1097 error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
1098 if (error) {
1099 VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1100 dpif_name(&dpif->dpif), ovs_strerror(error));
1101 }
1102 return error;
1103 }
1104
1105 static int
1106 dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1107 struct dpif_port *dpif_port)
1108 {
1109 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1110 struct dpif_netlink_port_state *state = state_;
1111 struct dpif_netlink_vport vport;
1112 int error;
1113
1114 error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1115 &state->buf);
1116 if (error) {
1117 return error;
1118 }
1119 dpif_port->name = CONST_CAST(char *, vport.name);
1120 dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
1121 dpif_port->port_no = vport.port_no;
1122 return 0;
1123 }
1124
1125 static int
1126 dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
1127 {
1128 struct dpif_netlink_port_state *state = state_;
1129 int error = nl_dump_done(&state->dump);
1130
1131 ofpbuf_uninit(&state->buf);
1132 free(state);
1133 return error;
1134 }
1135
1136 static int
1137 dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
1138 {
1139 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1140
1141 /* Lazily create the Netlink socket to listen for notifications. */
1142 if (!dpif->port_notifier) {
1143 struct nl_sock *sock;
1144 int error;
1145
1146 error = nl_sock_create(NETLINK_GENERIC, &sock);
1147 if (error) {
1148 return error;
1149 }
1150
1151 error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1152 if (error) {
1153 nl_sock_destroy(sock);
1154 return error;
1155 }
1156 dpif->port_notifier = sock;
1157
1158 /* We have no idea of the current state so report that everything
1159 * changed. */
1160 return ENOBUFS;
1161 }
1162
1163 for (;;) {
1164 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1165 uint64_t buf_stub[4096 / 8];
1166 struct ofpbuf buf;
1167 int error;
1168
1169 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
1170 error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
1171 if (!error) {
1172 struct dpif_netlink_vport vport;
1173
1174 error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
1175 if (!error) {
1176 if (vport.dp_ifindex == dpif->dp_ifindex
1177 && (vport.cmd == OVS_VPORT_CMD_NEW
1178 || vport.cmd == OVS_VPORT_CMD_DEL
1179 || vport.cmd == OVS_VPORT_CMD_SET)) {
1180 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1181 dpif->dpif.full_name, vport.name, vport.cmd);
1182 if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
1183 dpif->refresh_channels = true;
1184 }
1185 *devnamep = xstrdup(vport.name);
1186 ofpbuf_uninit(&buf);
1187 return 0;
1188 }
1189 }
1190 } else if (error != EAGAIN) {
1191 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1192 ovs_strerror(error));
1193 nl_sock_drain(dpif->port_notifier);
1194 error = ENOBUFS;
1195 }
1196
1197 ofpbuf_uninit(&buf);
1198 if (error) {
1199 return error;
1200 }
1201 }
1202 }
1203
1204 static void
1205 dpif_netlink_port_poll_wait(const struct dpif *dpif_)
1206 {
1207 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1208
1209 if (dpif->port_notifier) {
1210 nl_sock_wait(dpif->port_notifier, POLLIN);
1211 } else {
1212 poll_immediate_wake();
1213 }
1214 }
1215
1216 static void
1217 dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1218 const ovs_u128 *ufid, bool terse)
1219 {
1220 if (ufid) {
1221 request->ufid = *ufid;
1222 request->ufid_present = true;
1223 } else {
1224 request->ufid_present = false;
1225 }
1226 request->ufid_terse = terse;
1227 }
1228
1229 static void
1230 dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1231 const struct nlattr *key, size_t key_len,
1232 const ovs_u128 *ufid, bool terse,
1233 struct dpif_netlink_flow *request)
1234 {
1235 dpif_netlink_flow_init(request);
1236 request->cmd = OVS_FLOW_CMD_GET;
1237 request->dp_ifindex = dpif->dp_ifindex;
1238 request->key = key;
1239 request->key_len = key_len;
1240 dpif_netlink_flow_init_ufid(request, ufid, terse);
1241 }
1242
1243 static void
1244 dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1245 const struct dpif_flow_get *get,
1246 struct dpif_netlink_flow *request)
1247 {
1248 dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1249 false, request);
1250 }
1251
1252 static int
1253 dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1254 const struct nlattr *key, size_t key_len,
1255 const ovs_u128 *ufid, bool terse,
1256 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1257 {
1258 struct dpif_netlink_flow request;
1259
1260 dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
1261 return dpif_netlink_flow_transact(&request, reply, bufp);
1262 }
1263
1264 static int
1265 dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1266 const struct dpif_netlink_flow *flow,
1267 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1268 {
1269 return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1270 flow->ufid_present ? &flow->ufid : NULL,
1271 false, reply, bufp);
1272 }
1273
1274 static void
1275 dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1276 const struct dpif_flow_put *put,
1277 struct dpif_netlink_flow *request)
1278 {
1279 static const struct nlattr dummy_action;
1280
1281 dpif_netlink_flow_init(request);
1282 request->cmd = (put->flags & DPIF_FP_CREATE
1283 ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1284 request->dp_ifindex = dpif->dp_ifindex;
1285 request->key = put->key;
1286 request->key_len = put->key_len;
1287 request->mask = put->mask;
1288 request->mask_len = put->mask_len;
1289 dpif_netlink_flow_init_ufid(request, put->ufid, false);
1290
1291 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1292 request->actions = (put->actions
1293 ? put->actions
1294 : CONST_CAST(struct nlattr *, &dummy_action));
1295 request->actions_len = put->actions_len;
1296 if (put->flags & DPIF_FP_ZERO_STATS) {
1297 request->clear = true;
1298 }
1299 if (put->flags & DPIF_FP_PROBE) {
1300 request->probe = true;
1301 }
1302 request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
1303 }
1304
1305 static void
1306 dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1307 const struct nlattr *key, size_t key_len,
1308 const ovs_u128 *ufid, bool terse,
1309 struct dpif_netlink_flow *request)
1310 {
1311 dpif_netlink_flow_init(request);
1312 request->cmd = OVS_FLOW_CMD_DEL;
1313 request->dp_ifindex = dpif->dp_ifindex;
1314 request->key = key;
1315 request->key_len = key_len;
1316 dpif_netlink_flow_init_ufid(request, ufid, terse);
1317 }
1318
1319 static void
1320 dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1321 const struct dpif_flow_del *del,
1322 struct dpif_netlink_flow *request)
1323 {
1324 dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1325 del->ufid, del->terse, request);
1326 }
1327
1328 struct dpif_netlink_flow_dump {
1329 struct dpif_flow_dump up;
1330 struct nl_dump nl_dump;
1331 atomic_int status;
1332 struct netdev_flow_dump **netdev_dumps;
1333 int netdev_dumps_num; /* Number of netdev_flow_dumps */
1334 struct ovs_mutex netdev_lock; /* Guards the following. */
1335 int netdev_current_dump OVS_GUARDED; /* Shared current dump */
1336 struct dpif_flow_dump_types types; /* Type of dump */
1337 };
1338
1339 static struct dpif_netlink_flow_dump *
1340 dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
1341 {
1342 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
1343 }
1344
1345 static void
1346 start_netdev_dump(const struct dpif *dpif_,
1347 struct dpif_netlink_flow_dump *dump)
1348 {
1349 ovs_mutex_init(&dump->netdev_lock);
1350
1351 if (!(dump->types.netdev_flows)) {
1352 dump->netdev_dumps_num = 0;
1353 dump->netdev_dumps = NULL;
1354 return;
1355 }
1356
1357 ovs_mutex_lock(&dump->netdev_lock);
1358 dump->netdev_current_dump = 0;
1359 dump->netdev_dumps
1360 = netdev_ports_flow_dump_create(dpif_->dpif_class,
1361 &dump->netdev_dumps_num);
1362 ovs_mutex_unlock(&dump->netdev_lock);
1363 }
1364
1365 static void
1366 dpif_netlink_populate_flow_dump_types(struct dpif_netlink_flow_dump *dump,
1367 struct dpif_flow_dump_types *types)
1368 {
1369 if (!types) {
1370 dump->types.ovs_flows = true;
1371 dump->types.netdev_flows = true;
1372 } else {
1373 memcpy(&dump->types, types, sizeof *types);
1374 }
1375 }
1376
1377 static struct dpif_flow_dump *
1378 dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse,
1379 struct dpif_flow_dump_types *types)
1380 {
1381 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1382 struct dpif_netlink_flow_dump *dump;
1383 struct dpif_netlink_flow request;
1384 struct ofpbuf *buf;
1385
1386 dump = xmalloc(sizeof *dump);
1387 dpif_flow_dump_init(&dump->up, dpif_);
1388
1389 dpif_netlink_populate_flow_dump_types(dump, types);
1390
1391 if (dump->types.ovs_flows) {
1392 dpif_netlink_flow_init(&request);
1393 request.cmd = OVS_FLOW_CMD_GET;
1394 request.dp_ifindex = dpif->dp_ifindex;
1395 request.ufid_present = false;
1396 request.ufid_terse = terse;
1397
1398 buf = ofpbuf_new(1024);
1399 dpif_netlink_flow_to_ofpbuf(&request, buf);
1400 nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1401 ofpbuf_delete(buf);
1402 }
1403 atomic_init(&dump->status, 0);
1404 dump->up.terse = terse;
1405
1406 start_netdev_dump(dpif_, dump);
1407
1408 return &dump->up;
1409 }
1410
1411 static int
1412 dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
1413 {
1414 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1415 unsigned int nl_status = 0;
1416 int dump_status;
1417
1418 if (dump->types.ovs_flows) {
1419 nl_status = nl_dump_done(&dump->nl_dump);
1420 }
1421
1422 for (int i = 0; i < dump->netdev_dumps_num; i++) {
1423 int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
1424
1425 if (err != 0 && err != EOPNOTSUPP) {
1426 VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
1427 }
1428 }
1429
1430 free(dump->netdev_dumps);
1431 ovs_mutex_destroy(&dump->netdev_lock);
1432
1433 /* No other thread has access to 'dump' at this point. */
1434 atomic_read_relaxed(&dump->status, &dump_status);
1435 free(dump);
1436 return dump_status ? dump_status : nl_status;
1437 }
1438
1439 struct dpif_netlink_flow_dump_thread {
1440 struct dpif_flow_dump_thread up;
1441 struct dpif_netlink_flow_dump *dump;
1442 struct dpif_netlink_flow flow;
1443 struct dpif_flow_stats stats;
1444 struct ofpbuf nl_flows; /* Always used to store flows. */
1445 struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */
1446 int netdev_dump_idx; /* This thread current netdev dump index */
1447 bool netdev_done; /* If we are finished dumping netdevs */
1448
1449 /* (Key/Mask/Actions) Buffers for netdev dumping */
1450 struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
1451 struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
1452 struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
1453 };
1454
1455 static struct dpif_netlink_flow_dump_thread *
1456 dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
1457 {
1458 return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
1459 }
1460
1461 static struct dpif_flow_dump_thread *
1462 dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
1463 {
1464 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1465 struct dpif_netlink_flow_dump_thread *thread;
1466
1467 thread = xmalloc(sizeof *thread);
1468 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1469 thread->dump = dump;
1470 ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1471 thread->nl_actions = NULL;
1472 thread->netdev_dump_idx = 0;
1473 thread->netdev_done = !(thread->netdev_dump_idx < dump->netdev_dumps_num);
1474
1475 return &thread->up;
1476 }
1477
1478 static void
1479 dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
1480 {
1481 struct dpif_netlink_flow_dump_thread *thread
1482 = dpif_netlink_flow_dump_thread_cast(thread_);
1483
1484 ofpbuf_uninit(&thread->nl_flows);
1485 ofpbuf_delete(thread->nl_actions);
1486 free(thread);
1487 }
1488
1489 static void
1490 dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
1491 const struct dpif_netlink_flow *datapath_flow)
1492 {
1493 dpif_flow->key = datapath_flow->key;
1494 dpif_flow->key_len = datapath_flow->key_len;
1495 dpif_flow->mask = datapath_flow->mask;
1496 dpif_flow->mask_len = datapath_flow->mask_len;
1497 dpif_flow->actions = datapath_flow->actions;
1498 dpif_flow->actions_len = datapath_flow->actions_len;
1499 dpif_flow->ufid_present = datapath_flow->ufid_present;
1500 dpif_flow->pmd_id = PMD_ID_NULL;
1501 if (datapath_flow->ufid_present) {
1502 dpif_flow->ufid = datapath_flow->ufid;
1503 } else {
1504 ovs_assert(datapath_flow->key && datapath_flow->key_len);
1505 dpif_flow_hash(dpif, datapath_flow->key, datapath_flow->key_len,
1506 &dpif_flow->ufid);
1507 }
1508 dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
1509 dpif_flow->attrs.offloaded = false;
1510 dpif_flow->attrs.dp_layer = "ovs";
1511 }
1512
1513 /* The design is such that all threads are working together on the first dump
1514 * to the last, in order (at first they all on dump 0).
1515 * When the first thread finds that the given dump is finished,
1516 * they all move to the next. If two or more threads find the same dump
1517 * is finished at the same time, the first one will advance the shared
1518 * netdev_current_dump and the others will catch up. */
1519 static void
1520 dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
1521 {
1522 struct dpif_netlink_flow_dump *dump = thread->dump;
1523
1524 ovs_mutex_lock(&dump->netdev_lock);
1525 /* if we haven't finished (dumped everything) */
1526 if (dump->netdev_current_dump < dump->netdev_dumps_num) {
1527 /* if we are the first to find that current dump is finished
1528 * advance it. */
1529 if (thread->netdev_dump_idx == dump->netdev_current_dump) {
1530 thread->netdev_dump_idx = ++dump->netdev_current_dump;
1531 /* did we just finish the last dump? done. */
1532 if (dump->netdev_current_dump == dump->netdev_dumps_num) {
1533 thread->netdev_done = true;
1534 }
1535 } else {
1536 /* otherwise, we are behind, catch up */
1537 thread->netdev_dump_idx = dump->netdev_current_dump;
1538 }
1539 } else {
1540 /* some other thread finished */
1541 thread->netdev_done = true;
1542 }
1543 ovs_mutex_unlock(&dump->netdev_lock);
1544 }
1545
1546 static int
1547 dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
1548 struct ofpbuf *key_buf,
1549 struct ofpbuf *mask_buf,
1550 struct nlattr *actions,
1551 struct dpif_flow_stats *stats,
1552 struct dpif_flow_attrs *attrs,
1553 ovs_u128 *ufid,
1554 struct dpif_flow *flow,
1555 bool terse OVS_UNUSED)
1556 {
1557
1558 struct odp_flow_key_parms odp_parms = {
1559 .flow = &match->flow,
1560 .mask = &match->wc.masks,
1561 .support = {
1562 .max_vlan_headers = 2,
1563 },
1564 };
1565 size_t offset;
1566
1567 memset(flow, 0, sizeof *flow);
1568
1569 /* Key */
1570 offset = key_buf->size;
1571 flow->key = ofpbuf_tail(key_buf);
1572 odp_flow_key_from_flow(&odp_parms, key_buf);
1573 flow->key_len = key_buf->size - offset;
1574
1575 /* Mask */
1576 offset = mask_buf->size;
1577 flow->mask = ofpbuf_tail(mask_buf);
1578 odp_parms.key_buf = key_buf;
1579 odp_flow_key_from_mask(&odp_parms, mask_buf);
1580 flow->mask_len = mask_buf->size - offset;
1581
1582 /* Actions */
1583 flow->actions = nl_attr_get(actions);
1584 flow->actions_len = nl_attr_get_size(actions);
1585
1586 /* Stats */
1587 memcpy(&flow->stats, stats, sizeof *stats);
1588
1589 /* UFID */
1590 flow->ufid_present = true;
1591 flow->ufid = *ufid;
1592
1593 flow->pmd_id = PMD_ID_NULL;
1594
1595 memcpy(&flow->attrs, attrs, sizeof *attrs);
1596
1597 return 0;
1598 }
1599
1600 static int
1601 dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1602 struct dpif_flow *flows, int max_flows)
1603 {
1604 struct dpif_netlink_flow_dump_thread *thread
1605 = dpif_netlink_flow_dump_thread_cast(thread_);
1606 struct dpif_netlink_flow_dump *dump = thread->dump;
1607 struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
1608 int n_flows;
1609
1610 ofpbuf_delete(thread->nl_actions);
1611 thread->nl_actions = NULL;
1612
1613 n_flows = 0;
1614 max_flows = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
1615
1616 while (!thread->netdev_done && n_flows < max_flows) {
1617 struct odputil_keybuf *maskbuf = &thread->maskbuf[n_flows];
1618 struct odputil_keybuf *keybuf = &thread->keybuf[n_flows];
1619 struct odputil_keybuf *actbuf = &thread->actbuf[n_flows];
1620 struct ofpbuf key, mask, act;
1621 struct dpif_flow *f = &flows[n_flows];
1622 int cur = thread->netdev_dump_idx;
1623 struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
1624 struct match match;
1625 struct nlattr *actions;
1626 struct dpif_flow_stats stats;
1627 struct dpif_flow_attrs attrs;
1628 ovs_u128 ufid;
1629 bool has_next;
1630
1631 ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
1632 ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
1633 ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
1634 has_next = netdev_flow_dump_next(netdev_dump, &match,
1635 &actions, &stats, &attrs,
1636 &ufid,
1637 &thread->nl_flows,
1638 &act);
1639 if (has_next) {
1640 dpif_netlink_netdev_match_to_dpif_flow(&match,
1641 &key, &mask,
1642 actions,
1643 &stats,
1644 &attrs,
1645 &ufid,
1646 f,
1647 dump->up.terse);
1648 n_flows++;
1649 } else {
1650 dpif_netlink_advance_netdev_dump(thread);
1651 }
1652 }
1653
1654 if (!(dump->types.ovs_flows)) {
1655 return n_flows;
1656 }
1657
1658 while (!n_flows
1659 || (n_flows < max_flows && thread->nl_flows.size)) {
1660 struct dpif_netlink_flow datapath_flow;
1661 struct ofpbuf nl_flow;
1662 int error;
1663
1664 /* Try to grab another flow. */
1665 if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1666 break;
1667 }
1668
1669 /* Convert the flow to our output format. */
1670 error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
1671 if (error) {
1672 atomic_store_relaxed(&dump->status, error);
1673 break;
1674 }
1675
1676 if (dump->up.terse || datapath_flow.actions) {
1677 /* Common case: we don't want actions, or the flow includes
1678 * actions. */
1679 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1680 &datapath_flow);
1681 } else {
1682 /* Rare case: the flow does not include actions. Retrieve this
1683 * individual flow again to get the actions. */
1684 error = dpif_netlink_flow_get(dpif, &datapath_flow,
1685 &datapath_flow, &thread->nl_actions);
1686 if (error == ENOENT) {
1687 VLOG_DBG("dumped flow disappeared on get");
1688 continue;
1689 } else if (error) {
1690 VLOG_WARN("error fetching dumped flow: %s",
1691 ovs_strerror(error));
1692 atomic_store_relaxed(&dump->status, error);
1693 break;
1694 }
1695
1696 /* Save this flow. Then exit, because we only have one buffer to
1697 * handle this case. */
1698 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1699 &datapath_flow);
1700 break;
1701 }
1702 }
1703 return n_flows;
1704 }
1705
1706 static void
1707 dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1708 struct ofpbuf *buf)
1709 {
1710 struct ovs_header *k_exec;
1711 size_t key_ofs;
1712
1713 ofpbuf_prealloc_tailroom(buf, (64
1714 + dp_packet_size(d_exec->packet)
1715 + ODP_KEY_METADATA_SIZE
1716 + d_exec->actions_len));
1717
1718 nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
1719 OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
1720
1721 k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1722 k_exec->dp_ifindex = dp_ifindex;
1723
1724 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
1725 dp_packet_data(d_exec->packet),
1726 dp_packet_size(d_exec->packet));
1727
1728 key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
1729 odp_key_from_dp_packet(buf, d_exec->packet);
1730 nl_msg_end_nested(buf, key_ofs);
1731
1732 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1733 d_exec->actions, d_exec->actions_len);
1734 if (d_exec->probe) {
1735 nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
1736 }
1737 if (d_exec->mtu) {
1738 nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1739 }
1740 }
1741
1742 /* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1743 * Returns the number actually executed (at least 1, if 'n_ops' is
1744 * positive). */
1745 static size_t
1746 dpif_netlink_operate__(struct dpif_netlink *dpif,
1747 struct dpif_op **ops, size_t n_ops)
1748 {
1749 struct op_auxdata {
1750 struct nl_transaction txn;
1751
1752 struct ofpbuf request;
1753 uint64_t request_stub[1024 / 8];
1754
1755 struct ofpbuf reply;
1756 uint64_t reply_stub[1024 / 8];
1757 } auxes[OPERATE_MAX_OPS];
1758
1759 struct nl_transaction *txnsp[OPERATE_MAX_OPS];
1760 size_t i;
1761
1762 n_ops = MIN(n_ops, OPERATE_MAX_OPS);
1763 for (i = 0; i < n_ops; i++) {
1764 struct op_auxdata *aux = &auxes[i];
1765 struct dpif_op *op = ops[i];
1766 struct dpif_flow_put *put;
1767 struct dpif_flow_del *del;
1768 struct dpif_flow_get *get;
1769 struct dpif_netlink_flow flow;
1770
1771 ofpbuf_use_stub(&aux->request,
1772 aux->request_stub, sizeof aux->request_stub);
1773 aux->txn.request = &aux->request;
1774
1775 ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1776 aux->txn.reply = NULL;
1777
1778 switch (op->type) {
1779 case DPIF_OP_FLOW_PUT:
1780 put = &op->flow_put;
1781 dpif_netlink_init_flow_put(dpif, put, &flow);
1782 if (put->stats) {
1783 flow.nlmsg_flags |= NLM_F_ECHO;
1784 aux->txn.reply = &aux->reply;
1785 }
1786 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1787 break;
1788
1789 case DPIF_OP_FLOW_DEL:
1790 del = &op->flow_del;
1791 dpif_netlink_init_flow_del(dpif, del, &flow);
1792 if (del->stats) {
1793 flow.nlmsg_flags |= NLM_F_ECHO;
1794 aux->txn.reply = &aux->reply;
1795 }
1796 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1797 break;
1798
1799 case DPIF_OP_EXECUTE:
1800 /* Can't execute a packet that won't fit in a Netlink attribute. */
1801 if (OVS_UNLIKELY(nl_attr_oversized(
1802 dp_packet_size(op->execute.packet)))) {
1803 /* Report an error immediately if this is the first operation.
1804 * Otherwise the easiest thing to do is to postpone to the next
1805 * call (when this will be the first operation). */
1806 if (i == 0) {
1807 VLOG_ERR_RL(&error_rl,
1808 "dropping oversized %"PRIu32"-byte packet",
1809 dp_packet_size(op->execute.packet));
1810 op->error = ENOBUFS;
1811 return 1;
1812 }
1813 n_ops = i;
1814 } else {
1815 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
1816 &aux->request);
1817 }
1818 break;
1819
1820 case DPIF_OP_FLOW_GET:
1821 get = &op->flow_get;
1822 dpif_netlink_init_flow_get(dpif, get, &flow);
1823 aux->txn.reply = get->buffer;
1824 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1825 break;
1826
1827 default:
1828 OVS_NOT_REACHED();
1829 }
1830 }
1831
1832 for (i = 0; i < n_ops; i++) {
1833 txnsp[i] = &auxes[i].txn;
1834 }
1835 nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
1836
1837 for (i = 0; i < n_ops; i++) {
1838 struct op_auxdata *aux = &auxes[i];
1839 struct nl_transaction *txn = &auxes[i].txn;
1840 struct dpif_op *op = ops[i];
1841 struct dpif_flow_put *put;
1842 struct dpif_flow_del *del;
1843 struct dpif_flow_get *get;
1844
1845 op->error = txn->error;
1846
1847 switch (op->type) {
1848 case DPIF_OP_FLOW_PUT:
1849 put = &op->flow_put;
1850 if (put->stats) {
1851 if (!op->error) {
1852 struct dpif_netlink_flow reply;
1853
1854 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1855 txn->reply);
1856 if (!op->error) {
1857 dpif_netlink_flow_get_stats(&reply, put->stats);
1858 }
1859 }
1860 }
1861 break;
1862
1863 case DPIF_OP_FLOW_DEL:
1864 del = &op->flow_del;
1865 if (del->stats) {
1866 if (!op->error) {
1867 struct dpif_netlink_flow reply;
1868
1869 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1870 txn->reply);
1871 if (!op->error) {
1872 dpif_netlink_flow_get_stats(&reply, del->stats);
1873 }
1874 }
1875 }
1876 break;
1877
1878 case DPIF_OP_EXECUTE:
1879 break;
1880
1881 case DPIF_OP_FLOW_GET:
1882 get = &op->flow_get;
1883 if (!op->error) {
1884 struct dpif_netlink_flow reply;
1885
1886 op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
1887 if (!op->error) {
1888 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, get->flow,
1889 &reply);
1890 }
1891 }
1892 break;
1893
1894 default:
1895 OVS_NOT_REACHED();
1896 }
1897
1898 ofpbuf_uninit(&aux->request);
1899 ofpbuf_uninit(&aux->reply);
1900 }
1901
1902 return n_ops;
1903 }
1904
1905 static int
1906 parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
1907 {
1908 struct dpif_flow *dpif_flow = get->flow;
1909 struct match match;
1910 struct nlattr *actions;
1911 struct dpif_flow_stats stats;
1912 struct dpif_flow_attrs attrs;
1913 struct ofpbuf buf;
1914 uint64_t act_buf[1024 / 8];
1915 struct odputil_keybuf maskbuf;
1916 struct odputil_keybuf keybuf;
1917 struct odputil_keybuf actbuf;
1918 struct ofpbuf key, mask, act;
1919 int err;
1920
1921 ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
1922 err = netdev_ports_flow_get(dpif->dpif.dpif_class, &match,
1923 &actions, get->ufid, &stats, &attrs, &buf);
1924 if (err) {
1925 return err;
1926 }
1927
1928 VLOG_DBG("found flow from netdev, translating to dpif flow");
1929
1930 ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1931 ofpbuf_use_stack(&act, &actbuf, sizeof actbuf);
1932 ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
1933 dpif_netlink_netdev_match_to_dpif_flow(&match, &key, &mask, actions,
1934 &stats, &attrs,
1935 (ovs_u128 *) get->ufid,
1936 dpif_flow,
1937 false);
1938 ofpbuf_put(get->buffer, nl_attr_get(actions), nl_attr_get_size(actions));
1939 dpif_flow->actions = ofpbuf_at(get->buffer, 0, 0);
1940 dpif_flow->actions_len = nl_attr_get_size(actions);
1941
1942 return 0;
1943 }
1944
1945 static int
1946 parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
1947 {
1948 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
1949 const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
1950 struct match match;
1951 odp_port_t in_port;
1952 const struct nlattr *nla;
1953 size_t left;
1954 struct netdev *dev;
1955 struct offload_info info;
1956 ovs_be16 dst_port = 0;
1957 uint8_t csum_on = false;
1958 int err;
1959
1960 if (put->flags & DPIF_FP_PROBE) {
1961 return EOPNOTSUPP;
1962 }
1963
1964 err = parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
1965 put->mask_len, &match);
1966 if (err) {
1967 return err;
1968 }
1969
1970 /* When we try to install a dummy flow from a probed feature. */
1971 if (match.flow.dl_type == htons(0x1234)) {
1972 return EOPNOTSUPP;
1973 }
1974
1975 in_port = match.flow.in_port.odp_port;
1976 dev = netdev_ports_get(in_port, dpif_class);
1977 if (!dev) {
1978 return EOPNOTSUPP;
1979 }
1980
1981 /* Get tunnel dst port */
1982 NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
1983 if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
1984 const struct netdev_tunnel_config *tnl_cfg;
1985 struct netdev *outdev;
1986 odp_port_t out_port;
1987
1988 out_port = nl_attr_get_odp_port(nla);
1989 outdev = netdev_ports_get(out_port, dpif_class);
1990 if (!outdev) {
1991 err = EOPNOTSUPP;
1992 goto out;
1993 }
1994 tnl_cfg = netdev_get_tunnel_config(outdev);
1995 if (tnl_cfg && tnl_cfg->dst_port != 0) {
1996 dst_port = tnl_cfg->dst_port;
1997 }
1998 if (tnl_cfg) {
1999 csum_on = tnl_cfg->csum;
2000 }
2001 netdev_close(outdev);
2002 }
2003 }
2004
2005 info.dpif_class = dpif_class;
2006 info.tp_dst_port = dst_port;
2007 info.tunnel_csum_on = csum_on;
2008 err = netdev_flow_put(dev, &match,
2009 CONST_CAST(struct nlattr *, put->actions),
2010 put->actions_len,
2011 CONST_CAST(ovs_u128 *, put->ufid),
2012 &info, put->stats);
2013
2014 if (!err) {
2015 if (put->flags & DPIF_FP_MODIFY) {
2016 struct dpif_op *opp;
2017 struct dpif_op op;
2018
2019 op.type = DPIF_OP_FLOW_DEL;
2020 op.flow_del.key = put->key;
2021 op.flow_del.key_len = put->key_len;
2022 op.flow_del.ufid = put->ufid;
2023 op.flow_del.pmd_id = put->pmd_id;
2024 op.flow_del.stats = NULL;
2025 op.flow_del.terse = false;
2026
2027 opp = &op;
2028 dpif_netlink_operate__(dpif, &opp, 1);
2029 }
2030
2031 VLOG_DBG("added flow");
2032 } else if (err != EEXIST) {
2033 struct netdev *oor_netdev = NULL;
2034 if (err == ENOSPC && netdev_is_offload_rebalance_policy_enabled()) {
2035 /*
2036 * We need to set OOR on the input netdev (i.e, 'dev') for the
2037 * flow. But if the flow has a tunnel attribute (i.e, decap action,
2038 * with a virtual device like a VxLAN interface as its in-port),
2039 * then lookup and set OOR on the underlying tunnel (real) netdev.
2040 */
2041 oor_netdev = flow_get_tunnel_netdev(&match.flow.tunnel);
2042 if (!oor_netdev) {
2043 /* Not a 'tunnel' flow */
2044 oor_netdev = dev;
2045 }
2046 netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true);
2047 }
2048 VLOG_ERR_RL(&rl, "failed to offload flow: %s: %s", ovs_strerror(err),
2049 (oor_netdev ? oor_netdev->name : dev->name));
2050 }
2051
2052 out:
2053 if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
2054 /* Modified rule can't be offloaded, try and delete from HW */
2055 int del_err = netdev_flow_del(dev, put->ufid, put->stats);
2056
2057 if (!del_err) {
2058 /* Delete from hw success, so old flow was offloaded.
2059 * Change flags to create the flow in kernel */
2060 put->flags &= ~DPIF_FP_MODIFY;
2061 put->flags |= DPIF_FP_CREATE;
2062 } else if (del_err != ENOENT) {
2063 VLOG_ERR_RL(&rl, "failed to delete offloaded flow: %s",
2064 ovs_strerror(del_err));
2065 /* stop proccesing the flow in kernel */
2066 err = 0;
2067 }
2068 }
2069
2070 netdev_close(dev);
2071
2072 return err;
2073 }
2074
2075 static int
2076 try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
2077 {
2078 int err = EOPNOTSUPP;
2079
2080 switch (op->type) {
2081 case DPIF_OP_FLOW_PUT: {
2082 struct dpif_flow_put *put = &op->flow_put;
2083
2084 if (!put->ufid) {
2085 break;
2086 }
2087
2088 log_flow_put_message(&dpif->dpif, &this_module, put, 0);
2089 err = parse_flow_put(dpif, put);
2090 break;
2091 }
2092 case DPIF_OP_FLOW_DEL: {
2093 struct dpif_flow_del *del = &op->flow_del;
2094
2095 if (!del->ufid) {
2096 break;
2097 }
2098
2099 log_flow_del_message(&dpif->dpif, &this_module, del, 0);
2100 err = netdev_ports_flow_del(dpif->dpif.dpif_class, del->ufid,
2101 del->stats);
2102 break;
2103 }
2104 case DPIF_OP_FLOW_GET: {
2105 struct dpif_flow_get *get = &op->flow_get;
2106
2107 if (!op->flow_get.ufid) {
2108 break;
2109 }
2110
2111 log_flow_get_message(&dpif->dpif, &this_module, get, 0);
2112 err = parse_flow_get(dpif, get);
2113 break;
2114 }
2115 case DPIF_OP_EXECUTE:
2116 default:
2117 break;
2118 }
2119
2120 return err;
2121 }
2122
2123 static void
2124 dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
2125 size_t n_ops)
2126 {
2127 while (n_ops > 0) {
2128 size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
2129
2130 ops += chunk;
2131 n_ops -= chunk;
2132 }
2133 }
2134
2135 static void
2136 dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
2137 {
2138 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2139 struct dpif_op *new_ops[OPERATE_MAX_OPS];
2140 int count = 0;
2141 int i = 0;
2142 int err = 0;
2143
2144 if (netdev_is_flow_api_enabled()) {
2145 while (n_ops > 0) {
2146 count = 0;
2147
2148 while (n_ops > 0 && count < OPERATE_MAX_OPS) {
2149 struct dpif_op *op = ops[i++];
2150
2151 err = try_send_to_netdev(dpif, op);
2152 if (err && err != EEXIST) {
2153 new_ops[count++] = op;
2154 } else {
2155 op->error = err;
2156 }
2157
2158 n_ops--;
2159 }
2160
2161 dpif_netlink_operate_chunks(dpif, new_ops, count);
2162 }
2163 } else {
2164 dpif_netlink_operate_chunks(dpif, ops, n_ops);
2165 }
2166 }
2167
2168 #if _WIN32
2169 static void
2170 dpif_netlink_handler_uninit(struct dpif_handler *handler)
2171 {
2172 vport_delete_sock_pool(handler);
2173 }
2174
2175 static int
2176 dpif_netlink_handler_init(struct dpif_handler *handler)
2177 {
2178 return vport_create_sock_pool(handler);
2179 }
2180 #else
2181
2182 static int
2183 dpif_netlink_handler_init(struct dpif_handler *handler)
2184 {
2185 handler->epoll_fd = epoll_create(10);
2186 return handler->epoll_fd < 0 ? errno : 0;
2187 }
2188
2189 static void
2190 dpif_netlink_handler_uninit(struct dpif_handler *handler)
2191 {
2192 close(handler->epoll_fd);
2193 }
2194 #endif
2195
2196 /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
2197 * currently in 'dpif' in the kernel, by adding a new set of channels for
2198 * any kernel vport that lacks one and deleting any channels that have no
2199 * backing kernel vports. */
2200 static int
2201 dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
2202 OVS_REQ_WRLOCK(dpif->upcall_lock)
2203 {
2204 unsigned long int *keep_channels;
2205 struct dpif_netlink_vport vport;
2206 size_t keep_channels_nbits;
2207 struct nl_dump dump;
2208 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
2209 struct ofpbuf buf;
2210 int retval = 0;
2211 size_t i;
2212
2213 ovs_assert(!WINDOWS || n_handlers <= 1);
2214 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
2215
2216 if (dpif->n_handlers != n_handlers) {
2217 destroy_all_channels(dpif);
2218 dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
2219 for (i = 0; i < n_handlers; i++) {
2220 int error;
2221 struct dpif_handler *handler = &dpif->handlers[i];
2222
2223 error = dpif_netlink_handler_init(handler);
2224 if (error) {
2225 size_t j;
2226
2227 for (j = 0; j < i; j++) {
2228 struct dpif_handler *tmp = &dpif->handlers[j];
2229 dpif_netlink_handler_uninit(tmp);
2230 }
2231 free(dpif->handlers);
2232 dpif->handlers = NULL;
2233
2234 return error;
2235 }
2236 }
2237 dpif->n_handlers = n_handlers;
2238 }
2239
2240 for (i = 0; i < n_handlers; i++) {
2241 struct dpif_handler *handler = &dpif->handlers[i];
2242
2243 handler->event_offset = handler->n_events = 0;
2244 }
2245
2246 keep_channels_nbits = dpif->uc_array_size;
2247 keep_channels = bitmap_allocate(keep_channels_nbits);
2248
2249 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
2250 dpif_netlink_port_dump_start__(dpif, &dump);
2251 while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
2252 uint32_t port_no = odp_to_u32(vport.port_no);
2253 uint32_t upcall_pid;
2254 int error;
2255
2256 if (port_no >= dpif->uc_array_size
2257 || !vport_get_pid(dpif, port_no, &upcall_pid)) {
2258 struct nl_sock *socksp;
2259
2260 if (nl_sock_create(NETLINK_GENERIC, &socksp)) {
2261 goto error;
2262 }
2263
2264 error = vport_add_channel(dpif, vport.port_no, socksp);
2265 if (error) {
2266 VLOG_INFO("%s: could not add channels for port %s",
2267 dpif_name(&dpif->dpif), vport.name);
2268 nl_sock_destroy(socksp);
2269 retval = error;
2270 goto error;
2271 }
2272 upcall_pid = nl_sock_pid(socksp);
2273 }
2274
2275 /* Configure the vport to deliver misses to 'sock'. */
2276 if (vport.upcall_pids[0] == 0
2277 || vport.n_upcall_pids != 1
2278 || upcall_pid != vport.upcall_pids[0]) {
2279 struct dpif_netlink_vport vport_request;
2280
2281 dpif_netlink_vport_init(&vport_request);
2282 vport_request.cmd = OVS_VPORT_CMD_SET;
2283 vport_request.dp_ifindex = dpif->dp_ifindex;
2284 vport_request.port_no = vport.port_no;
2285 vport_request.n_upcall_pids = 1;
2286 vport_request.upcall_pids = &upcall_pid;
2287 error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
2288 if (error) {
2289 VLOG_WARN_RL(&error_rl,
2290 "%s: failed to set upcall pid on port: %s",
2291 dpif_name(&dpif->dpif), ovs_strerror(error));
2292
2293 if (error != ENODEV && error != ENOENT) {
2294 retval = error;
2295 } else {
2296 /* The vport isn't really there, even though the dump says
2297 * it is. Probably we just hit a race after a port
2298 * disappeared. */
2299 }
2300 goto error;
2301 }
2302 }
2303
2304 if (port_no < keep_channels_nbits) {
2305 bitmap_set1(keep_channels, port_no);
2306 }
2307 continue;
2308
2309 error:
2310 vport_del_channels(dpif, vport.port_no);
2311 }
2312 nl_dump_done(&dump);
2313 ofpbuf_uninit(&buf);
2314
2315 /* Discard any saved channels that we didn't reuse. */
2316 for (i = 0; i < keep_channels_nbits; i++) {
2317 if (!bitmap_is_set(keep_channels, i)) {
2318 vport_del_channels(dpif, u32_to_odp(i));
2319 }
2320 }
2321 free(keep_channels);
2322
2323 return retval;
2324 }
2325
2326 static int
2327 dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable)
2328 OVS_REQ_WRLOCK(dpif->upcall_lock)
2329 {
2330 if ((dpif->handlers != NULL) == enable) {
2331 return 0;
2332 } else if (!enable) {
2333 destroy_all_channels(dpif);
2334 return 0;
2335 } else {
2336 return dpif_netlink_refresh_channels(dpif, 1);
2337 }
2338 }
2339
2340 static int
2341 dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
2342 {
2343 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2344 int error;
2345
2346 fat_rwlock_wrlock(&dpif->upcall_lock);
2347 error = dpif_netlink_recv_set__(dpif, enable);
2348 fat_rwlock_unlock(&dpif->upcall_lock);
2349
2350 return error;
2351 }
2352
2353 static int
2354 dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
2355 {
2356 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2357 int error = 0;
2358
2359 #ifdef _WIN32
2360 /* Multiple upcall handlers will be supported once kernel datapath supports
2361 * it. */
2362 if (n_handlers > 1) {
2363 return error;
2364 }
2365 #endif
2366
2367 fat_rwlock_wrlock(&dpif->upcall_lock);
2368 if (dpif->handlers) {
2369 error = dpif_netlink_refresh_channels(dpif, n_handlers);
2370 }
2371 fat_rwlock_unlock(&dpif->upcall_lock);
2372
2373 return error;
2374 }
2375
2376 static int
2377 dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
2378 uint32_t queue_id, uint32_t *priority)
2379 {
2380 if (queue_id < 0xf000) {
2381 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
2382 return 0;
2383 } else {
2384 return EINVAL;
2385 }
2386 }
2387
2388 static int
2389 parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
2390 struct dpif_upcall *upcall, int *dp_ifindex)
2391 {
2392 static const struct nl_policy ovs_packet_policy[] = {
2393 /* Always present. */
2394 [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
2395 .min_len = ETH_HEADER_LEN },
2396 [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
2397
2398 /* OVS_PACKET_CMD_ACTION only. */
2399 [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
2400 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
2401 [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
2402 [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
2403 };
2404
2405 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2406 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2407 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2408 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2409
2410 struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
2411 if (!nlmsg || !genl || !ovs_header
2412 || nlmsg->nlmsg_type != ovs_packet_family
2413 || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2414 ARRAY_SIZE(ovs_packet_policy))) {
2415 return EINVAL;
2416 }
2417
2418 int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2419 : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2420 : -1);
2421 if (type < 0) {
2422 return EINVAL;
2423 }
2424
2425 /* (Re)set ALL fields of '*upcall' on successful return. */
2426 upcall->type = type;
2427 upcall->key = CONST_CAST(struct nlattr *,
2428 nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
2429 upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
2430 dpif_flow_hash(&dpif->dpif, upcall->key, upcall->key_len, &upcall->ufid);
2431 upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
2432 upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
2433 upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
2434 upcall->mru = a[OVS_PACKET_ATTR_MRU];
2435
2436 /* Allow overwriting the netlink attribute header without reallocating. */
2437 dp_packet_use_stub(&upcall->packet,
2438 CONST_CAST(struct nlattr *,
2439 nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2440 nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2441 sizeof(struct nlattr));
2442 dp_packet_set_data(&upcall->packet,
2443 (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2444 dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
2445
2446 if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2447 /* Ethernet frame */
2448 upcall->packet.packet_type = htonl(PT_ETH);
2449 } else {
2450 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2451 ovs_be16 ethertype = 0;
2452 const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2453 upcall->key_len,
2454 OVS_KEY_ATTR_ETHERTYPE);
2455 if (et_nla) {
2456 ethertype = nl_attr_get_be16(et_nla);
2457 }
2458 upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2459 ntohs(ethertype));
2460 dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2461 }
2462
2463 *dp_ifindex = ovs_header->dp_ifindex;
2464
2465 return 0;
2466 }
2467
2468 #ifdef _WIN32
2469 #define PACKET_RECV_BATCH_SIZE 50
2470 static int
2471 dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2472 struct dpif_upcall *upcall, struct ofpbuf *buf)
2473 OVS_REQ_RDLOCK(dpif->upcall_lock)
2474 {
2475 struct dpif_handler *handler;
2476 int read_tries = 0;
2477 struct dpif_windows_vport_sock *sock_pool;
2478 uint32_t i;
2479
2480 if (!dpif->handlers) {
2481 return EAGAIN;
2482 }
2483
2484 /* Only one handler is supported currently. */
2485 if (handler_id >= 1) {
2486 return EAGAIN;
2487 }
2488
2489 if (handler_id >= dpif->n_handlers) {
2490 return EAGAIN;
2491 }
2492
2493 handler = &dpif->handlers[handler_id];
2494 sock_pool = handler->vport_sock_pool;
2495
2496 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2497 for (;;) {
2498 int dp_ifindex;
2499 int error;
2500
2501 if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2502 return EAGAIN;
2503 }
2504
2505 error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
2506 if (error == ENOBUFS) {
2507 /* ENOBUFS typically means that we've received so many
2508 * packets that the buffer overflowed. Try again
2509 * immediately because there's almost certainly a packet
2510 * waiting for us. */
2511 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2512 continue;
2513 }
2514
2515 /* XXX: ch->last_poll = time_msec(); */
2516 if (error) {
2517 if (error == EAGAIN) {
2518 break;
2519 }
2520 return error;
2521 }
2522
2523 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
2524 if (!error && dp_ifindex == dpif->dp_ifindex) {
2525 return 0;
2526 } else if (error) {
2527 return error;
2528 }
2529 }
2530 }
2531
2532 return EAGAIN;
2533 }
2534 #else
2535 static int
2536 dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
2537 struct dpif_upcall *upcall, struct ofpbuf *buf)
2538 OVS_REQ_RDLOCK(dpif->upcall_lock)
2539 {
2540 struct dpif_handler *handler;
2541 int read_tries = 0;
2542
2543 if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2544 return EAGAIN;
2545 }
2546
2547 handler = &dpif->handlers[handler_id];
2548 if (handler->event_offset >= handler->n_events) {
2549 int retval;
2550
2551 handler->event_offset = handler->n_events = 0;
2552
2553 do {
2554 retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
2555 dpif->uc_array_size, 0);
2556 } while (retval < 0 && errno == EINTR);
2557
2558 if (retval < 0) {
2559 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2560 VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
2561 } else if (retval > 0) {
2562 handler->n_events = retval;
2563 }
2564 }
2565
2566 while (handler->event_offset < handler->n_events) {
2567 int idx = handler->epoll_events[handler->event_offset].data.u32;
2568 struct dpif_channel *ch = &dpif->channels[idx];
2569
2570 handler->event_offset++;
2571
2572 for (;;) {
2573 int dp_ifindex;
2574 int error;
2575
2576 if (++read_tries > 50) {
2577 return EAGAIN;
2578 }
2579
2580 error = nl_sock_recv(ch->sock, buf, NULL, false);
2581 if (error == ENOBUFS) {
2582 /* ENOBUFS typically means that we've received so many
2583 * packets that the buffer overflowed. Try again
2584 * immediately because there's almost certainly a packet
2585 * waiting for us. */
2586 report_loss(dpif, ch, idx, handler_id);
2587 continue;
2588 }
2589
2590 ch->last_poll = time_msec();
2591 if (error) {
2592 if (error == EAGAIN) {
2593 break;
2594 }
2595 return error;
2596 }
2597
2598 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
2599 if (!error && dp_ifindex == dpif->dp_ifindex) {
2600 return 0;
2601 } else if (error) {
2602 return error;
2603 }
2604 }
2605 }
2606
2607 return EAGAIN;
2608 }
2609 #endif
2610
2611 static int
2612 dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2613 struct dpif_upcall *upcall, struct ofpbuf *buf)
2614 {
2615 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2616 int error;
2617
2618 fat_rwlock_rdlock(&dpif->upcall_lock);
2619 #ifdef _WIN32
2620 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2621 #else
2622 error = dpif_netlink_recv__(dpif, handler_id, upcall, buf);
2623 #endif
2624 fat_rwlock_unlock(&dpif->upcall_lock);
2625
2626 return error;
2627 }
2628
2629 static void
2630 dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id)
2631 OVS_REQ_RDLOCK(dpif->upcall_lock)
2632 {
2633 #ifdef _WIN32
2634 uint32_t i;
2635 struct dpif_windows_vport_sock *sock_pool =
2636 dpif->handlers[handler_id].vport_sock_pool;
2637
2638 /* Only one handler is supported currently. */
2639 if (handler_id >= 1) {
2640 return;
2641 }
2642
2643 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2644 nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2645 }
2646 #else
2647 if (dpif->handlers && handler_id < dpif->n_handlers) {
2648 struct dpif_handler *handler = &dpif->handlers[handler_id];
2649
2650 poll_fd_wait(handler->epoll_fd, POLLIN);
2651 }
2652 #endif
2653 }
2654
2655 static void
2656 dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
2657 {
2658 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2659
2660 fat_rwlock_rdlock(&dpif->upcall_lock);
2661 dpif_netlink_recv_wait__(dpif, handler_id);
2662 fat_rwlock_unlock(&dpif->upcall_lock);
2663 }
2664
2665 static void
2666 dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
2667 OVS_REQ_WRLOCK(dpif->upcall_lock)
2668 {
2669 if (dpif->handlers) {
2670 size_t i;
2671
2672 if (!dpif->channels[0].sock) {
2673 return;
2674 }
2675 for (i = 0; i < dpif->uc_array_size; i++ ) {
2676
2677 nl_sock_drain(dpif->channels[i].sock);
2678 }
2679 }
2680 }
2681
2682 static void
2683 dpif_netlink_recv_purge(struct dpif *dpif_)
2684 {
2685 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2686
2687 fat_rwlock_wrlock(&dpif->upcall_lock);
2688 dpif_netlink_recv_purge__(dpif);
2689 fat_rwlock_unlock(&dpif->upcall_lock);
2690 }
2691
2692 static char *
2693 dpif_netlink_get_datapath_version(void)
2694 {
2695 char *version_str = NULL;
2696
2697 #ifdef __linux__
2698
2699 #define MAX_VERSION_STR_SIZE 80
2700 #define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2701 FILE *f;
2702
2703 f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2704 if (f) {
2705 char *newline;
2706 char version[MAX_VERSION_STR_SIZE];
2707
2708 if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2709 newline = strchr(version, '\n');
2710 if (newline) {
2711 *newline = '\0';
2712 }
2713 version_str = xstrdup(version);
2714 }
2715 fclose(f);
2716 }
2717 #endif
2718
2719 return version_str;
2720 }
2721
2722 struct dpif_netlink_ct_dump_state {
2723 struct ct_dpif_dump_state up;
2724 struct nl_ct_dump_state *nl_ct_dump;
2725 };
2726
2727 static int
2728 dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2729 struct ct_dpif_dump_state **dump_,
2730 const uint16_t *zone, int *ptot_bkts)
2731 {
2732 struct dpif_netlink_ct_dump_state *dump;
2733 int err;
2734
2735 dump = xzalloc(sizeof *dump);
2736 err = nl_ct_dump_start(&dump->nl_ct_dump, zone, ptot_bkts);
2737 if (err) {
2738 free(dump);
2739 return err;
2740 }
2741
2742 *dump_ = &dump->up;
2743
2744 return 0;
2745 }
2746
2747 static int
2748 dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2749 struct ct_dpif_dump_state *dump_,
2750 struct ct_dpif_entry *entry)
2751 {
2752 struct dpif_netlink_ct_dump_state *dump;
2753
2754 INIT_CONTAINER(dump, dump_, up);
2755
2756 return nl_ct_dump_next(dump->nl_ct_dump, entry);
2757 }
2758
2759 static int
2760 dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2761 struct ct_dpif_dump_state *dump_)
2762 {
2763 struct dpif_netlink_ct_dump_state *dump;
2764 int err;
2765
2766 INIT_CONTAINER(dump, dump_, up);
2767
2768 err = nl_ct_dump_done(dump->nl_ct_dump);
2769 free(dump);
2770 return err;
2771 }
2772
2773 static int
2774 dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
2775 const struct ct_dpif_tuple *tuple)
2776 {
2777 if (tuple) {
2778 return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
2779 } else if (zone) {
2780 return nl_ct_flush_zone(*zone);
2781 } else {
2782 return nl_ct_flush();
2783 }
2784 }
2785
2786 static int
2787 dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED,
2788 const uint32_t *default_limits,
2789 const struct ovs_list *zone_limits)
2790 {
2791 struct ovs_zone_limit req_zone_limit;
2792
2793 if (ovs_ct_limit_family < 0) {
2794 return EOPNOTSUPP;
2795 }
2796
2797 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2798 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2799 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_SET,
2800 OVS_CT_LIMIT_VERSION);
2801
2802 struct ovs_header *ovs_header;
2803 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2804 ovs_header->dp_ifindex = 0;
2805
2806 size_t opt_offset;
2807 opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2808 if (default_limits) {
2809 req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
2810 req_zone_limit.limit = *default_limits;
2811 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2812 }
2813
2814 if (!ovs_list_is_empty(zone_limits)) {
2815 struct ct_dpif_zone_limit *zone_limit;
2816
2817 LIST_FOR_EACH (zone_limit, node, zone_limits) {
2818 req_zone_limit.zone_id = zone_limit->zone;
2819 req_zone_limit.limit = zone_limit->limit;
2820 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2821 }
2822 }
2823 nl_msg_end_nested(request, opt_offset);
2824
2825 int err = nl_transact(NETLINK_GENERIC, request, NULL);
2826 ofpbuf_uninit(request);
2827 return err;
2828 }
2829
2830 static int
2831 dpif_netlink_zone_limits_from_ofpbuf(const struct ofpbuf *buf,
2832 uint32_t *default_limit,
2833 struct ovs_list *zone_limits)
2834 {
2835 static const struct nl_policy ovs_ct_limit_policy[] = {
2836 [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED,
2837 .optional = true },
2838 };
2839
2840 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2841 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2842 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2843 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2844
2845 struct nlattr *attr[ARRAY_SIZE(ovs_ct_limit_policy)];
2846
2847 if (!nlmsg || !genl || !ovs_header
2848 || nlmsg->nlmsg_type != ovs_ct_limit_family
2849 || !nl_policy_parse(&b, 0, ovs_ct_limit_policy, attr,
2850 ARRAY_SIZE(ovs_ct_limit_policy))) {
2851 return EINVAL;
2852 }
2853
2854
2855 if (!attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
2856 return EINVAL;
2857 }
2858
2859 int rem = NLA_ALIGN(
2860 nl_attr_get_size(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]));
2861 const struct ovs_zone_limit *zone_limit =
2862 nl_attr_get(attr[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]);
2863
2864 while (rem >= sizeof *zone_limit) {
2865 if (zone_limit->zone_id == OVS_ZONE_LIMIT_DEFAULT_ZONE) {
2866 *default_limit = zone_limit->limit;
2867 } else if (zone_limit->zone_id < OVS_ZONE_LIMIT_DEFAULT_ZONE ||
2868 zone_limit->zone_id > UINT16_MAX) {
2869 } else {
2870 ct_dpif_push_zone_limit(zone_limits, zone_limit->zone_id,
2871 zone_limit->limit, zone_limit->count);
2872 }
2873 rem -= NLA_ALIGN(sizeof *zone_limit);
2874 zone_limit = ALIGNED_CAST(struct ovs_zone_limit *,
2875 (unsigned char *) zone_limit + NLA_ALIGN(sizeof *zone_limit));
2876 }
2877 return 0;
2878 }
2879
2880 static int
2881 dpif_netlink_ct_get_limits(struct dpif *dpif OVS_UNUSED,
2882 uint32_t *default_limit,
2883 const struct ovs_list *zone_limits_request,
2884 struct ovs_list *zone_limits_reply)
2885 {
2886 if (ovs_ct_limit_family < 0) {
2887 return EOPNOTSUPP;
2888 }
2889
2890 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2891 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2892 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_GET,
2893 OVS_CT_LIMIT_VERSION);
2894
2895 struct ovs_header *ovs_header;
2896 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2897 ovs_header->dp_ifindex = 0;
2898
2899 if (!ovs_list_is_empty(zone_limits_request)) {
2900 size_t opt_offset = nl_msg_start_nested(request,
2901 OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2902
2903 struct ovs_zone_limit req_zone_limit;
2904 req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
2905 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2906
2907 struct ct_dpif_zone_limit *zone_limit;
2908 LIST_FOR_EACH (zone_limit, node, zone_limits_request) {
2909 req_zone_limit.zone_id = zone_limit->zone;
2910 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2911 }
2912
2913 nl_msg_end_nested(request, opt_offset);
2914 }
2915
2916 struct ofpbuf *reply;
2917 int err = nl_transact(NETLINK_GENERIC, request, &reply);
2918 if (err) {
2919 goto out;
2920 }
2921
2922 err = dpif_netlink_zone_limits_from_ofpbuf(reply, default_limit,
2923 zone_limits_reply);
2924
2925 out:
2926 ofpbuf_uninit(request);
2927 ofpbuf_uninit(reply);
2928 return err;
2929 }
2930
2931 static int
2932 dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED,
2933 const struct ovs_list *zone_limits)
2934 {
2935 if (ovs_ct_limit_family < 0) {
2936 return EOPNOTSUPP;
2937 }
2938
2939 struct ofpbuf *request = ofpbuf_new(NL_DUMP_BUFSIZE);
2940 nl_msg_put_genlmsghdr(request, 0, ovs_ct_limit_family,
2941 NLM_F_REQUEST | NLM_F_ECHO, OVS_CT_LIMIT_CMD_DEL,
2942 OVS_CT_LIMIT_VERSION);
2943
2944 struct ovs_header *ovs_header;
2945 ovs_header = ofpbuf_put_uninit(request, sizeof *ovs_header);
2946 ovs_header->dp_ifindex = 0;
2947
2948 if (!ovs_list_is_empty(zone_limits)) {
2949 size_t opt_offset =
2950 nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
2951
2952 struct ct_dpif_zone_limit *zone_limit;
2953 LIST_FOR_EACH (zone_limit, node, zone_limits) {
2954 struct ovs_zone_limit req_zone_limit;
2955 req_zone_limit.zone_id = zone_limit->zone;
2956 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit);
2957 }
2958 nl_msg_end_nested(request, opt_offset);
2959 }
2960
2961 int err = nl_transact(NETLINK_GENERIC, request, NULL);
2962
2963 ofpbuf_uninit(request);
2964 return err;
2965 }
2966 \f
2967 /* Meters */
2968
2969 /* Set of supported meter flags */
2970 #define DP_SUPPORTED_METER_FLAGS_MASK \
2971 (OFPMF13_STATS | OFPMF13_PKTPS | OFPMF13_KBPS | OFPMF13_BURST)
2972
2973 /* Meter support was introduced in Linux 4.15. In some versions of
2974 * Linux 4.15, 4.16, and 4.17, there was a bug that never set the id
2975 * when the meter was created, so all meters essentially had an id of
2976 * zero. Check for that condition and disable meters on those kernels. */
2977 static bool probe_broken_meters(struct dpif *);
2978
2979 static void
2980 dpif_netlink_meter_init(struct dpif_netlink *dpif, struct ofpbuf *buf,
2981 void *stub, size_t size, uint32_t command)
2982 {
2983 ofpbuf_use_stub(buf, stub, size);
2984
2985 nl_msg_put_genlmsghdr(buf, 0, ovs_meter_family, NLM_F_REQUEST | NLM_F_ECHO,
2986 command, OVS_METER_VERSION);
2987
2988 struct ovs_header *ovs_header;
2989 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
2990 ovs_header->dp_ifindex = dpif->dp_ifindex;
2991 }
2992
2993 /* Execute meter 'request' in the kernel datapath. If the command
2994 * fails, returns a positive errno value. Otherwise, stores the reply
2995 * in '*replyp', parses the policy according to 'reply_policy' into the
2996 * array of Netlink attribute in 'a', and returns 0. On success, the
2997 * caller is responsible for calling ofpbuf_delete() on '*replyp'
2998 * ('replyp' will contain pointers into 'a'). */
2999 static int
3000 dpif_netlink_meter_transact(struct ofpbuf *request, struct ofpbuf **replyp,
3001 const struct nl_policy *reply_policy,
3002 struct nlattr **a, size_t size_a)
3003 {
3004 int error = nl_transact(NETLINK_GENERIC, request, replyp);
3005 ofpbuf_uninit(request);
3006
3007 if (error) {
3008 return error;
3009 }
3010
3011 struct nlmsghdr *nlmsg = ofpbuf_try_pull(*replyp, sizeof *nlmsg);
3012 struct genlmsghdr *genl = ofpbuf_try_pull(*replyp, sizeof *genl);
3013 struct ovs_header *ovs_header = ofpbuf_try_pull(*replyp,
3014 sizeof *ovs_header);
3015 if (!nlmsg || !genl || !ovs_header
3016 || nlmsg->nlmsg_type != ovs_meter_family
3017 || !nl_policy_parse(*replyp, 0, reply_policy, a, size_a)) {
3018 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3019 VLOG_DBG_RL(&rl,
3020 "Kernel module response to meter tranaction is invalid");
3021 return EINVAL;
3022 }
3023 return 0;
3024 }
3025
3026 static void
3027 dpif_netlink_meter_get_features(const struct dpif *dpif_,
3028 struct ofputil_meter_features *features)
3029 {
3030 if (probe_broken_meters(CONST_CAST(struct dpif *, dpif_))) {
3031 features = NULL;
3032 return;
3033 }
3034
3035 struct ofpbuf buf, *msg;
3036 uint64_t stub[1024 / 8];
3037
3038 static const struct nl_policy ovs_meter_features_policy[] = {
3039 [OVS_METER_ATTR_MAX_METERS] = { .type = NL_A_U32 },
3040 [OVS_METER_ATTR_MAX_BANDS] = { .type = NL_A_U32 },
3041 [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3042 };
3043 struct nlattr *a[ARRAY_SIZE(ovs_meter_features_policy)];
3044
3045 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3046 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub,
3047 OVS_METER_CMD_FEATURES);
3048 if (dpif_netlink_meter_transact(&buf, &msg, ovs_meter_features_policy, a,
3049 ARRAY_SIZE(ovs_meter_features_policy))) {
3050 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3051 VLOG_INFO_RL(&rl,
3052 "dpif_netlink_meter_transact OVS_METER_CMD_FEATURES failed");
3053 return;
3054 }
3055
3056 features->max_meters = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_METERS]);
3057 features->max_bands = nl_attr_get_u32(a[OVS_METER_ATTR_MAX_BANDS]);
3058
3059 /* Bands is a nested attribute of zero or more nested
3060 * band attributes. */
3061 if (a[OVS_METER_ATTR_BANDS]) {
3062 const struct nlattr *nla;
3063 size_t left;
3064
3065 NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3066 const struct nlattr *band_nla;
3067 size_t band_left;
3068
3069 NL_NESTED_FOR_EACH (band_nla, band_left, nla) {
3070 if (nl_attr_type(band_nla) == OVS_BAND_ATTR_TYPE) {
3071 if (nl_attr_get_size(band_nla) == sizeof(uint32_t)) {
3072 switch (nl_attr_get_u32(band_nla)) {
3073 case OVS_METER_BAND_TYPE_DROP:
3074 features->band_types |= 1 << OFPMBT13_DROP;
3075 break;
3076 }
3077 }
3078 }
3079 }
3080 }
3081 }
3082 features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
3083
3084 ofpbuf_delete(msg);
3085 }
3086
3087 static int
3088 dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
3089 struct ofputil_meter_config *config)
3090 {
3091 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3092 struct ofpbuf buf, *msg;
3093 uint64_t stub[1024 / 8];
3094
3095 static const struct nl_policy ovs_meter_set_response_policy[] = {
3096 [OVS_METER_ATTR_ID] = { .type = NL_A_U32 },
3097 };
3098 struct nlattr *a[ARRAY_SIZE(ovs_meter_set_response_policy)];
3099
3100 if (config->flags & ~DP_SUPPORTED_METER_FLAGS_MASK) {
3101 return EBADF; /* Unsupported flags set */
3102 }
3103
3104 for (size_t i = 0; i < config->n_bands; i++) {
3105 switch (config->bands[i].type) {
3106 case OFPMBT13_DROP:
3107 break;
3108 default:
3109 return ENODEV; /* Unsupported band type */
3110 }
3111 }
3112
3113 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, OVS_METER_CMD_SET);
3114
3115 nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3116
3117 if (config->flags & OFPMF13_KBPS) {
3118 nl_msg_put_flag(&buf, OVS_METER_ATTR_KBPS);
3119 }
3120
3121 size_t bands_offset = nl_msg_start_nested(&buf, OVS_METER_ATTR_BANDS);
3122 /* Bands */
3123 for (size_t i = 0; i < config->n_bands; ++i) {
3124 struct ofputil_meter_band * band = &config->bands[i];
3125 uint32_t band_type;
3126
3127 size_t band_offset = nl_msg_start_nested(&buf, OVS_BAND_ATTR_UNSPEC);
3128
3129 switch (band->type) {
3130 case OFPMBT13_DROP:
3131 band_type = OVS_METER_BAND_TYPE_DROP;
3132 break;
3133 default:
3134 band_type = OVS_METER_BAND_TYPE_UNSPEC;
3135 }
3136 nl_msg_put_u32(&buf, OVS_BAND_ATTR_TYPE, band_type);
3137 nl_msg_put_u32(&buf, OVS_BAND_ATTR_RATE, band->rate);
3138 nl_msg_put_u32(&buf, OVS_BAND_ATTR_BURST,
3139 config->flags & OFPMF13_BURST ?
3140 band->burst_size : band->rate);
3141 nl_msg_end_nested(&buf, band_offset);
3142 }
3143 nl_msg_end_nested(&buf, bands_offset);
3144
3145 int error = dpif_netlink_meter_transact(&buf, &msg,
3146 ovs_meter_set_response_policy, a,
3147 ARRAY_SIZE(ovs_meter_set_response_policy));
3148 if (error) {
3149 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3150 VLOG_INFO_RL(&rl,
3151 "dpif_netlink_meter_transact OVS_METER_CMD_SET failed");
3152 return error;
3153 }
3154
3155 if (nl_attr_get_u32(a[OVS_METER_ATTR_ID]) != meter_id.uint32) {
3156 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3157 VLOG_INFO_RL(&rl,
3158 "Kernel returned a different meter id than requested");
3159 }
3160 ofpbuf_delete(msg);
3161 return 0;
3162 }
3163
3164 static int
3165 dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
3166 struct ofputil_meter_config *config)
3167 {
3168 if (probe_broken_meters(dpif_)) {
3169 return ENOMEM;
3170 }
3171
3172 return dpif_netlink_meter_set__(dpif_, meter_id, config);
3173 }
3174
3175 /* Retrieve statistics and/or delete meter 'meter_id'. Statistics are
3176 * stored in 'stats', if it is not null. If 'command' is
3177 * OVS_METER_CMD_DEL, the meter is deleted and statistics are optionally
3178 * retrieved. If 'command' is OVS_METER_CMD_GET, then statistics are
3179 * simply retrieved. */
3180 static int
3181 dpif_netlink_meter_get_stats(const struct dpif *dpif_,
3182 ofproto_meter_id meter_id,
3183 struct ofputil_meter_stats *stats,
3184 uint16_t max_bands,
3185 enum ovs_meter_cmd command)
3186 {
3187 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3188 struct ofpbuf buf, *msg;
3189 uint64_t stub[1024 / 8];
3190
3191 static const struct nl_policy ovs_meter_stats_policy[] = {
3192 [OVS_METER_ATTR_ID] = { .type = NL_A_U32, .optional = true},
3193 [OVS_METER_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
3194 .optional = true},
3195 [OVS_METER_ATTR_BANDS] = { .type = NL_A_NESTED, .optional = true },
3196 };
3197 struct nlattr *a[ARRAY_SIZE(ovs_meter_stats_policy)];
3198
3199 dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, command);
3200
3201 nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
3202
3203 int error = dpif_netlink_meter_transact(&buf, &msg,
3204 ovs_meter_stats_policy, a,
3205 ARRAY_SIZE(ovs_meter_stats_policy));
3206 if (error) {
3207 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3208 VLOG_INFO_RL(&rl, "dpif_netlink_meter_transact %s failed",
3209 command == OVS_METER_CMD_GET ? "get" : "del");
3210 return error;
3211 }
3212
3213 if (stats
3214 && a[OVS_METER_ATTR_ID]
3215 && a[OVS_METER_ATTR_STATS]
3216 && nl_attr_get_u32(a[OVS_METER_ATTR_ID]) == meter_id.uint32) {
3217 /* return stats */
3218 const struct ovs_flow_stats *stat;
3219 const struct nlattr *nla;
3220 size_t left;
3221
3222 stat = nl_attr_get(a[OVS_METER_ATTR_STATS]);
3223 stats->packet_in_count = get_32aligned_u64(&stat->n_packets);
3224 stats->byte_in_count = get_32aligned_u64(&stat->n_bytes);
3225
3226 if (a[OVS_METER_ATTR_BANDS]) {
3227 size_t n_bands = 0;
3228 NL_NESTED_FOR_EACH (nla, left, a[OVS_METER_ATTR_BANDS]) {
3229 const struct nlattr *band_nla;
3230 band_nla = nl_attr_find_nested(nla, OVS_BAND_ATTR_STATS);
3231 if (band_nla && nl_attr_get_size(band_nla) \
3232 == sizeof(struct ovs_flow_stats)) {
3233 stat = nl_attr_get(band_nla);
3234
3235 if (n_bands < max_bands) {
3236 stats->bands[n_bands].packet_count
3237 = get_32aligned_u64(&stat->n_packets);
3238 stats->bands[n_bands].byte_count
3239 = get_32aligned_u64(&stat->n_bytes);
3240 ++n_bands;
3241 }
3242 } else {
3243 stats->bands[n_bands].packet_count = 0;
3244 stats->bands[n_bands].byte_count = 0;
3245 ++n_bands;
3246 }
3247 }
3248 stats->n_bands = n_bands;
3249 } else {
3250 /* For a non-existent meter, return 0 stats. */
3251 stats->n_bands = 0;
3252 }
3253 }
3254
3255 ofpbuf_delete(msg);
3256 return error;
3257 }
3258
3259 static int
3260 dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
3261 struct ofputil_meter_stats *stats, uint16_t max_bands)
3262 {
3263 return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3264 OVS_METER_CMD_GET);
3265 }
3266
3267 static int
3268 dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
3269 struct ofputil_meter_stats *stats, uint16_t max_bands)
3270 {
3271 return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
3272 OVS_METER_CMD_DEL);
3273 }
3274
3275 static bool
3276 probe_broken_meters__(struct dpif *dpif)
3277 {
3278 /* This test is destructive if a probe occurs while ovs-vswitchd is
3279 * running (e.g., an ovs-dpctl meter command is called), so choose a
3280 * random high meter id to make this less likely to occur. */
3281 ofproto_meter_id id1 = { 54545401 };
3282 ofproto_meter_id id2 = { 54545402 };
3283 struct ofputil_meter_band band = {OFPMBT13_DROP, 0, 1, 0};
3284 struct ofputil_meter_config config1 = { 1, OFPMF13_KBPS, 1, &band};
3285 struct ofputil_meter_config config2 = { 2, OFPMF13_KBPS, 1, &band};
3286
3287 /* Try adding two meters and make sure that they both come back with
3288 * the proper meter id. Use the "__" version so that we don't cause
3289 * a recurve deadlock. */
3290 dpif_netlink_meter_set__(dpif, id1, &config1);
3291 dpif_netlink_meter_set__(dpif, id2, &config2);
3292
3293 if (dpif_netlink_meter_get(dpif, id1, NULL, 0)
3294 || dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
3295 VLOG_INFO("The kernel module has a broken meter implementation.");
3296 return true;
3297 }
3298
3299 dpif_netlink_meter_del(dpif, id1, NULL, 0);
3300 dpif_netlink_meter_del(dpif, id2, NULL, 0);
3301
3302 return false;
3303 }
3304
3305 static bool
3306 probe_broken_meters(struct dpif *dpif)
3307 {
3308 /* This is a once-only test because currently OVS only has at most a single
3309 * Netlink capable datapath on any given platform. */
3310 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3311
3312 static bool broken_meters = false;
3313 if (ovsthread_once_start(&once)) {
3314 broken_meters = probe_broken_meters__(dpif);
3315 ovsthread_once_done(&once);
3316 }
3317 return broken_meters;
3318 }
3319 \f
3320 const struct dpif_class dpif_netlink_class = {
3321 "system",
3322 NULL, /* init */
3323 dpif_netlink_enumerate,
3324 NULL,
3325 dpif_netlink_open,
3326 dpif_netlink_close,
3327 dpif_netlink_destroy,
3328 dpif_netlink_run,
3329 NULL, /* wait */
3330 dpif_netlink_get_stats,
3331 dpif_netlink_port_add,
3332 dpif_netlink_port_del,
3333 NULL, /* port_set_config */
3334 dpif_netlink_port_query_by_number,
3335 dpif_netlink_port_query_by_name,
3336 dpif_netlink_port_get_pid,
3337 dpif_netlink_port_dump_start,
3338 dpif_netlink_port_dump_next,
3339 dpif_netlink_port_dump_done,
3340 dpif_netlink_port_poll,
3341 dpif_netlink_port_poll_wait,
3342 dpif_netlink_flow_flush,
3343 dpif_netlink_flow_dump_create,
3344 dpif_netlink_flow_dump_destroy,
3345 dpif_netlink_flow_dump_thread_create,
3346 dpif_netlink_flow_dump_thread_destroy,
3347 dpif_netlink_flow_dump_next,
3348 dpif_netlink_operate,
3349 dpif_netlink_recv_set,
3350 dpif_netlink_handlers_set,
3351 NULL, /* set_config */
3352 dpif_netlink_queue_to_priority,
3353 dpif_netlink_recv,
3354 dpif_netlink_recv_wait,
3355 dpif_netlink_recv_purge,
3356 NULL, /* register_dp_purge_cb */
3357 NULL, /* register_upcall_cb */
3358 NULL, /* enable_upcall */
3359 NULL, /* disable_upcall */
3360 dpif_netlink_get_datapath_version, /* get_datapath_version */
3361 dpif_netlink_ct_dump_start,
3362 dpif_netlink_ct_dump_next,
3363 dpif_netlink_ct_dump_done,
3364 dpif_netlink_ct_flush,
3365 NULL, /* ct_set_maxconns */
3366 NULL, /* ct_get_maxconns */
3367 NULL, /* ct_get_nconns */
3368 dpif_netlink_ct_set_limits,
3369 dpif_netlink_ct_get_limits,
3370 dpif_netlink_ct_del_limits,
3371 dpif_netlink_meter_get_features,
3372 dpif_netlink_meter_set,
3373 dpif_netlink_meter_get,
3374 dpif_netlink_meter_del,
3375 };
3376
3377 static int
3378 dpif_netlink_init(void)
3379 {
3380 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
3381 static int error;
3382
3383 if (ovsthread_once_start(&once)) {
3384 error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
3385 &ovs_datapath_family);
3386 if (error) {
3387 VLOG_INFO("Generic Netlink family '%s' does not exist. "
3388 "The Open vSwitch kernel module is probably not loaded.",
3389 OVS_DATAPATH_FAMILY);
3390 }
3391 if (!error) {
3392 error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
3393 }
3394 if (!error) {
3395 error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
3396 }
3397 if (!error) {
3398 error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
3399 &ovs_packet_family);
3400 }
3401 if (!error) {
3402 error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
3403 &ovs_vport_mcgroup);
3404 }
3405 if (!error) {
3406 if (nl_lookup_genl_family(OVS_METER_FAMILY, &ovs_meter_family)) {
3407 VLOG_INFO("The kernel module does not support meters.");
3408 }
3409 }
3410 if (nl_lookup_genl_family(OVS_CT_LIMIT_FAMILY,
3411 &ovs_ct_limit_family) < 0) {
3412 VLOG_INFO("Generic Netlink family '%s' does not exist. "
3413 "Please update the Open vSwitch kernel module to enable "
3414 "the conntrack limit feature.", OVS_CT_LIMIT_FAMILY);
3415 }
3416
3417 ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
3418
3419 ovsthread_once_done(&once);
3420 }
3421
3422 return error;
3423 }
3424
3425 bool
3426 dpif_netlink_is_internal_device(const char *name)
3427 {
3428 struct dpif_netlink_vport reply;
3429 struct ofpbuf *buf;
3430 int error;
3431
3432 error = dpif_netlink_vport_get(name, &reply, &buf);
3433 if (!error) {
3434 ofpbuf_delete(buf);
3435 } else if (error != ENODEV && error != ENOENT) {
3436 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
3437 name, ovs_strerror(error));
3438 }
3439
3440 return reply.type == OVS_VPORT_TYPE_INTERNAL;
3441 }
3442
3443 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
3444 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
3445 * positive errno value.
3446 *
3447 * 'vport' will contain pointers into 'buf', so the caller should not free
3448 * 'buf' while 'vport' is still in use. */
3449 static int
3450 dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
3451 const struct ofpbuf *buf)
3452 {
3453 static const struct nl_policy ovs_vport_policy[] = {
3454 [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
3455 [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
3456 [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
3457 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
3458 [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
3459 .optional = true },
3460 [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
3461 [OVS_VPORT_ATTR_NETNSID] = { .type = NL_A_U32, .optional = true },
3462 };
3463
3464 dpif_netlink_vport_init(vport);
3465
3466 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3467 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3468 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3469 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3470
3471 struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
3472 if (!nlmsg || !genl || !ovs_header
3473 || nlmsg->nlmsg_type != ovs_vport_family
3474 || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
3475 ARRAY_SIZE(ovs_vport_policy))) {
3476 return EINVAL;
3477 }
3478
3479 vport->cmd = genl->cmd;
3480 vport->dp_ifindex = ovs_header->dp_ifindex;
3481 vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
3482 vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
3483 vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
3484 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
3485 vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
3486 / (sizeof *vport->upcall_pids);
3487 vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
3488
3489 }
3490 if (a[OVS_VPORT_ATTR_STATS]) {
3491 vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
3492 }
3493 if (a[OVS_VPORT_ATTR_OPTIONS]) {
3494 vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
3495 vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
3496 }
3497 if (a[OVS_VPORT_ATTR_NETNSID]) {
3498 netnsid_set(&vport->netnsid,
3499 nl_attr_get_u32(a[OVS_VPORT_ATTR_NETNSID]));
3500 } else {
3501 netnsid_set_local(&vport->netnsid);
3502 }
3503 return 0;
3504 }
3505
3506 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
3507 * followed by Netlink attributes corresponding to 'vport'. */
3508 static void
3509 dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
3510 struct ofpbuf *buf)
3511 {
3512 struct ovs_header *ovs_header;
3513
3514 nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
3515 vport->cmd, OVS_VPORT_VERSION);
3516
3517 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3518 ovs_header->dp_ifindex = vport->dp_ifindex;
3519
3520 if (vport->port_no != ODPP_NONE) {
3521 nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
3522 }
3523
3524 if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
3525 nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
3526 }
3527
3528 if (vport->name) {
3529 nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
3530 }
3531
3532 if (vport->upcall_pids) {
3533 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
3534 vport->upcall_pids,
3535 vport->n_upcall_pids * sizeof *vport->upcall_pids);
3536 }
3537
3538 if (vport->stats) {
3539 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
3540 vport->stats, sizeof *vport->stats);
3541 }
3542
3543 if (vport->options) {
3544 nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
3545 vport->options, vport->options_len);
3546 }
3547 }
3548
3549 /* Clears 'vport' to "empty" values. */
3550 void
3551 dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
3552 {
3553 memset(vport, 0, sizeof *vport);
3554 vport->port_no = ODPP_NONE;
3555 }
3556
3557 /* Executes 'request' in the kernel datapath. If the command fails, returns a
3558 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3559 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
3560 * result of the command is expected to be an ovs_vport also, which is decoded
3561 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3562 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
3563 int
3564 dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
3565 struct dpif_netlink_vport *reply,
3566 struct ofpbuf **bufp)
3567 {
3568 struct ofpbuf *request_buf;
3569 int error;
3570
3571 ovs_assert((reply != NULL) == (bufp != NULL));
3572
3573 error = dpif_netlink_init();
3574 if (error) {
3575 if (reply) {
3576 *bufp = NULL;
3577 dpif_netlink_vport_init(reply);
3578 }
3579 return error;
3580 }
3581
3582 request_buf = ofpbuf_new(1024);
3583 dpif_netlink_vport_to_ofpbuf(request, request_buf);
3584 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
3585 ofpbuf_delete(request_buf);
3586
3587 if (reply) {
3588 if (!error) {
3589 error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
3590 }
3591 if (error) {
3592 dpif_netlink_vport_init(reply);
3593 ofpbuf_delete(*bufp);
3594 *bufp = NULL;
3595 }
3596 }
3597 return error;
3598 }
3599
3600 /* Obtains information about the kernel vport named 'name' and stores it into
3601 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
3602 * longer needed ('reply' will contain pointers into '*bufp'). */
3603 int
3604 dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
3605 struct ofpbuf **bufp)
3606 {
3607 struct dpif_netlink_vport request;
3608
3609 dpif_netlink_vport_init(&request);
3610 request.cmd = OVS_VPORT_CMD_GET;
3611 request.name = name;
3612
3613 return dpif_netlink_vport_transact(&request, reply, bufp);
3614 }
3615
3616 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
3617 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
3618 * positive errno value.
3619 *
3620 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
3621 * while 'dp' is still in use. */
3622 static int
3623 dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
3624 {
3625 static const struct nl_policy ovs_datapath_policy[] = {
3626 [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
3627 [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
3628 .optional = true },
3629 [OVS_DP_ATTR_MEGAFLOW_STATS] = {
3630 NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
3631 .optional = true },
3632 };
3633
3634 dpif_netlink_dp_init(dp);
3635
3636 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3637 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3638 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3639 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3640
3641 struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
3642 if (!nlmsg || !genl || !ovs_header
3643 || nlmsg->nlmsg_type != ovs_datapath_family
3644 || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
3645 ARRAY_SIZE(ovs_datapath_policy))) {
3646 return EINVAL;
3647 }
3648
3649 dp->cmd = genl->cmd;
3650 dp->dp_ifindex = ovs_header->dp_ifindex;
3651 dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
3652 if (a[OVS_DP_ATTR_STATS]) {
3653 dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
3654 }
3655
3656 if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
3657 dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
3658 }
3659
3660 return 0;
3661 }
3662
3663 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
3664 static void
3665 dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
3666 {
3667 struct ovs_header *ovs_header;
3668
3669 nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
3670 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
3671 OVS_DATAPATH_VERSION);
3672
3673 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3674 ovs_header->dp_ifindex = dp->dp_ifindex;
3675
3676 if (dp->name) {
3677 nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
3678 }
3679
3680 if (dp->upcall_pid) {
3681 nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
3682 }
3683
3684 if (dp->user_features) {
3685 nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
3686 }
3687
3688 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
3689 }
3690
3691 /* Clears 'dp' to "empty" values. */
3692 static void
3693 dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
3694 {
3695 memset(dp, 0, sizeof *dp);
3696 }
3697
3698 static void
3699 dpif_netlink_dp_dump_start(struct nl_dump *dump)
3700 {
3701 struct dpif_netlink_dp request;
3702 struct ofpbuf *buf;
3703
3704 dpif_netlink_dp_init(&request);
3705 request.cmd = OVS_DP_CMD_GET;
3706
3707 buf = ofpbuf_new(1024);
3708 dpif_netlink_dp_to_ofpbuf(&request, buf);
3709 nl_dump_start(dump, NETLINK_GENERIC, buf);
3710 ofpbuf_delete(buf);
3711 }
3712
3713 /* Executes 'request' in the kernel datapath. If the command fails, returns a
3714 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3715 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
3716 * result of the command is expected to be of the same form, which is decoded
3717 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
3718 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
3719 static int
3720 dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
3721 struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
3722 {
3723 struct ofpbuf *request_buf;
3724 int error;
3725
3726 ovs_assert((reply != NULL) == (bufp != NULL));
3727
3728 request_buf = ofpbuf_new(1024);
3729 dpif_netlink_dp_to_ofpbuf(request, request_buf);
3730 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
3731 ofpbuf_delete(request_buf);
3732
3733 if (reply) {
3734 dpif_netlink_dp_init(reply);
3735 if (!error) {
3736 error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
3737 }
3738 if (error) {
3739 ofpbuf_delete(*bufp);
3740 *bufp = NULL;
3741 }
3742 }
3743 return error;
3744 }
3745
3746 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
3747 * The caller must free '*bufp' when the reply is no longer needed ('reply'
3748 * will contain pointers into '*bufp'). */
3749 static int
3750 dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
3751 struct ofpbuf **bufp)
3752 {
3753 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
3754 struct dpif_netlink_dp request;
3755
3756 dpif_netlink_dp_init(&request);
3757 request.cmd = OVS_DP_CMD_GET;
3758 request.dp_ifindex = dpif->dp_ifindex;
3759
3760 return dpif_netlink_dp_transact(&request, reply, bufp);
3761 }
3762
3763 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
3764 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
3765 * positive errno value.
3766 *
3767 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
3768 * while 'flow' is still in use. */
3769 static int
3770 dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
3771 const struct ofpbuf *buf)
3772 {
3773 static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
3774 [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
3775 [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
3776 [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
3777 [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
3778 .optional = true },
3779 [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
3780 [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
3781 [OVS_FLOW_ATTR_UFID] = { .type = NL_A_U128, .optional = true },
3782 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
3783 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
3784 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
3785 };
3786
3787 dpif_netlink_flow_init(flow);
3788
3789 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
3790 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
3791 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
3792 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
3793
3794 struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
3795 if (!nlmsg || !genl || !ovs_header
3796 || nlmsg->nlmsg_type != ovs_flow_family
3797 || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
3798 ARRAY_SIZE(ovs_flow_policy))) {
3799 return EINVAL;
3800 }
3801 if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
3802 return EINVAL;
3803 }
3804
3805 flow->nlmsg_flags = nlmsg->nlmsg_flags;
3806 flow->dp_ifindex = ovs_header->dp_ifindex;
3807 if (a[OVS_FLOW_ATTR_KEY]) {
3808 flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
3809 flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
3810 }
3811
3812 if (a[OVS_FLOW_ATTR_UFID]) {
3813 flow->ufid = nl_attr_get_u128(a[OVS_FLOW_ATTR_UFID]);
3814 flow->ufid_present = true;
3815 }
3816 if (a[OVS_FLOW_ATTR_MASK]) {
3817 flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
3818 flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
3819 }
3820 if (a[OVS_FLOW_ATTR_ACTIONS]) {
3821 flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
3822 flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
3823 }
3824 if (a[OVS_FLOW_ATTR_STATS]) {
3825 flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
3826 }
3827 if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
3828 flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
3829 }
3830 if (a[OVS_FLOW_ATTR_USED]) {
3831 flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
3832 }
3833 return 0;
3834 }
3835
3836
3837 /*
3838 * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out.
3839 * If the flow is not Ethernet, the OVS_KEY_ATTR_PACKET_TYPE is converted to
3840 * OVS_KEY_ATTR_ETHERTYPE. Puts 'data' to 'buf'.
3841 */
3842 static void
3843 put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
3844 const struct nlattr *data, uint16_t data_len)
3845 {
3846 const struct nlattr *packet_type;
3847
3848 packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
3849
3850 if (packet_type) {
3851 /* exclude PACKET_TYPE Netlink attribute. */
3852 ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
3853 size_t packet_type_len = NL_A_U32_SIZE;
3854 size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
3855 size_t second_chunk_size = data_len - first_chunk_size
3856 - packet_type_len;
3857 struct nlattr *next_attr = nl_attr_next(packet_type);
3858 size_t ofs;
3859
3860 ofs = nl_msg_start_nested(buf, type);
3861 nl_msg_put(buf, data, first_chunk_size);
3862 nl_msg_put(buf, next_attr, second_chunk_size);
3863 if (!nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERNET)) {
3864 ovs_be16 pt = pt_ns_type_be(nl_attr_get_be32(packet_type));
3865 const struct nlattr *nla;
3866
3867 nla = nl_attr_find(buf, NLA_HDRLEN, OVS_KEY_ATTR_ETHERTYPE);
3868 if (nla) {
3869 ovs_be16 *ethertype;
3870
3871 ethertype = CONST_CAST(ovs_be16 *, nl_attr_get(nla));
3872 *ethertype = pt;
3873 } else {
3874 nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, pt);
3875 }
3876 }
3877 nl_msg_end_nested(buf, ofs);
3878 } else {
3879 nl_msg_put_unspec(buf, type, data, data_len);
3880 }
3881 }
3882
3883 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
3884 * followed by Netlink attributes corresponding to 'flow'. */
3885 static void
3886 dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
3887 struct ofpbuf *buf)
3888 {
3889 struct ovs_header *ovs_header;
3890
3891 nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
3892 NLM_F_REQUEST | flow->nlmsg_flags,
3893 flow->cmd, OVS_FLOW_VERSION);
3894
3895 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3896 ovs_header->dp_ifindex = flow->dp_ifindex;
3897
3898 if (flow->ufid_present) {
3899 nl_msg_put_u128(buf, OVS_FLOW_ATTR_UFID, flow->ufid);
3900 }
3901 if (flow->ufid_terse) {
3902 nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
3903 OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
3904 | OVS_UFID_F_OMIT_ACTIONS);
3905 }
3906 if (!flow->ufid_terse || !flow->ufid_present) {
3907 if (flow->key_len) {
3908 put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
3909 flow->key_len);
3910 }
3911 if (flow->mask_len) {
3912 put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
3913 flow->mask_len);
3914 }
3915 if (flow->actions || flow->actions_len) {
3916 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
3917 flow->actions, flow->actions_len);
3918 }
3919 }
3920
3921 /* We never need to send these to the kernel. */
3922 ovs_assert(!flow->stats);
3923 ovs_assert(!flow->tcp_flags);
3924 ovs_assert(!flow->used);
3925
3926 if (flow->clear) {
3927 nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
3928 }
3929 if (flow->probe) {
3930 nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
3931 }
3932 }
3933
3934 /* Clears 'flow' to "empty" values. */
3935 static void
3936 dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
3937 {
3938 memset(flow, 0, sizeof *flow);
3939 }
3940
3941 /* Executes 'request' in the kernel datapath. If the command fails, returns a
3942 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3943 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
3944 * result of the command is expected to be a flow also, which is decoded and
3945 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
3946 * is no longer needed ('reply' will contain pointers into '*bufp'). */
3947 static int
3948 dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
3949 struct dpif_netlink_flow *reply,
3950 struct ofpbuf **bufp)
3951 {
3952 struct ofpbuf *request_buf;
3953 int error;
3954
3955 ovs_assert((reply != NULL) == (bufp != NULL));
3956
3957 if (reply) {
3958 request->nlmsg_flags |= NLM_F_ECHO;
3959 }
3960
3961 request_buf = ofpbuf_new(1024);
3962 dpif_netlink_flow_to_ofpbuf(request, request_buf);
3963 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
3964 ofpbuf_delete(request_buf);
3965
3966 if (reply) {
3967 if (!error) {
3968 error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
3969 }
3970 if (error) {
3971 dpif_netlink_flow_init(reply);
3972 ofpbuf_delete(*bufp);
3973 *bufp = NULL;
3974 }
3975 }
3976 return error;
3977 }
3978
3979 static void
3980 dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
3981 struct dpif_flow_stats *stats)
3982 {
3983 if (flow->stats) {
3984 stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
3985 stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
3986 } else {
3987 stats->n_packets = 0;
3988 stats->n_bytes = 0;
3989 }
3990 stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
3991 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
3992 }
3993
3994 /* Logs information about a packet that was recently lost in 'ch' (in
3995 * 'dpif_'). */
3996 static void
3997 report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
3998 uint32_t handler_id)
3999 {
4000 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
4001 struct ds s;
4002
4003 if (VLOG_DROP_WARN(&rl)) {
4004 return;
4005 }
4006
4007 ds_init(&s);
4008 if (ch->last_poll != LLONG_MIN) {
4009 ds_put_format(&s, " (last polled %lld ms ago)",
4010 time_msec() - ch->last_poll);
4011 }
4012
4013 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
4014 dpif_name(&dpif->dpif), ch_idx, handler_id);
4015 ds_destroy(&s);
4016 }