]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpif-netlink.c
dpif-netlink: Probe for out-of-tree tunnels, decides used interface
[mirror_ovs.git] / lib / dpif-netlink.c
1 /*
2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "dpif-netlink.h"
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <net/if.h>
26 #include <linux/types.h>
27 #include <linux/pkt_sched.h>
28 #include <poll.h>
29 #include <stdlib.h>
30 #include <strings.h>
31 #include <sys/epoll.h>
32 #include <sys/stat.h>
33 #include <unistd.h>
34
35 #include "bitmap.h"
36 #include "dpif-provider.h"
37 #include "dpif-netlink-rtnl.h"
38 #include "openvswitch/dynamic-string.h"
39 #include "flow.h"
40 #include "fat-rwlock.h"
41 #include "netdev.h"
42 #include "netdev-linux.h"
43 #include "netdev-vport.h"
44 #include "netlink-conntrack.h"
45 #include "netlink-notifier.h"
46 #include "netlink-socket.h"
47 #include "netlink.h"
48 #include "odp-util.h"
49 #include "openvswitch/ofpbuf.h"
50 #include "packets.h"
51 #include "poll-loop.h"
52 #include "random.h"
53 #include "openvswitch/shash.h"
54 #include "sset.h"
55 #include "timeval.h"
56 #include "unaligned.h"
57 #include "util.h"
58 #include "openvswitch/vlog.h"
59 #include "openvswitch/flow.h"
60
61 VLOG_DEFINE_THIS_MODULE(dpif_netlink);
62 #ifdef _WIN32
63 #include "wmi.h"
64 enum { WINDOWS = 1 };
65 #else
66 enum { WINDOWS = 0 };
67 #endif
68 enum { MAX_PORTS = USHRT_MAX };
69
70 /* This ethtool flag was introduced in Linux 2.6.24, so it might be
71 * missing if we have old headers. */
72 #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */
73
74 struct dpif_netlink_dp {
75 /* Generic Netlink header. */
76 uint8_t cmd;
77
78 /* struct ovs_header. */
79 int dp_ifindex;
80
81 /* Attributes. */
82 const char *name; /* OVS_DP_ATTR_NAME. */
83 const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */
84 uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */
85 const struct ovs_dp_stats *stats; /* OVS_DP_ATTR_STATS. */
86 const struct ovs_dp_megaflow_stats *megaflow_stats;
87 /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
88 };
89
90 static void dpif_netlink_dp_init(struct dpif_netlink_dp *);
91 static int dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *,
92 const struct ofpbuf *);
93 static void dpif_netlink_dp_dump_start(struct nl_dump *);
94 static int dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
95 struct dpif_netlink_dp *reply,
96 struct ofpbuf **bufp);
97 static int dpif_netlink_dp_get(const struct dpif *,
98 struct dpif_netlink_dp *reply,
99 struct ofpbuf **bufp);
100
101 struct dpif_netlink_flow {
102 /* Generic Netlink header. */
103 uint8_t cmd;
104
105 /* struct ovs_header. */
106 unsigned int nlmsg_flags;
107 int dp_ifindex;
108
109 /* Attributes.
110 *
111 * The 'stats' member points to 64-bit data that might only be aligned on
112 * 32-bit boundaries, so get_unaligned_u64() should be used to access its
113 * values.
114 *
115 * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
116 * the Netlink version of the command, even if actions_len is zero. */
117 const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
118 size_t key_len;
119 const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */
120 size_t mask_len;
121 const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
122 size_t actions_len;
123 ovs_u128 ufid; /* OVS_FLOW_ATTR_FLOW_ID. */
124 bool ufid_present; /* Is there a UFID? */
125 bool ufid_terse; /* Skip serializing key/mask/acts? */
126 const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
127 const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
128 const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
129 bool clear; /* OVS_FLOW_ATTR_CLEAR. */
130 bool probe; /* OVS_FLOW_ATTR_PROBE. */
131 };
132
133 static void dpif_netlink_flow_init(struct dpif_netlink_flow *);
134 static int dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *,
135 const struct ofpbuf *);
136 static void dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *,
137 struct ofpbuf *);
138 static int dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
139 struct dpif_netlink_flow *reply,
140 struct ofpbuf **bufp);
141 static void dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *,
142 struct dpif_flow_stats *);
143 static void dpif_netlink_flow_to_dpif_flow(struct dpif *, struct dpif_flow *,
144 const struct dpif_netlink_flow *);
145
146 /* One of the dpif channels between the kernel and userspace. */
147 struct dpif_channel {
148 struct nl_sock *sock; /* Netlink socket. */
149 long long int last_poll; /* Last time this channel was polled. */
150 };
151
152 #ifdef _WIN32
153 #define VPORT_SOCK_POOL_SIZE 1
154 /* On Windows, there is no native support for epoll. There are equivalent
155 * interfaces though, that are not used currently. For simpicity, a pool of
156 * netlink sockets is used. Each socket is represented by 'struct
157 * dpif_windows_vport_sock'. Since it is a pool, multiple OVS ports may be
158 * sharing the same socket. In the future, we can add a reference count and
159 * such fields. */
160 struct dpif_windows_vport_sock {
161 struct nl_sock *nl_sock; /* netlink socket. */
162 };
163 #endif
164
165 struct dpif_handler {
166 struct dpif_channel *channels;/* Array of channels for each handler. */
167 struct epoll_event *epoll_events;
168 int epoll_fd; /* epoll fd that includes channel socks. */
169 int n_events; /* Num events returned by epoll_wait(). */
170 int event_offset; /* Offset into 'epoll_events'. */
171
172 #ifdef _WIN32
173 /* Pool of sockets. */
174 struct dpif_windows_vport_sock *vport_sock_pool;
175 size_t last_used_pool_idx; /* Index to aid in allocating a
176 socket in the pool to a port. */
177 #endif
178 };
179
180 /* Datapath interface for the openvswitch Linux kernel module. */
181 struct dpif_netlink {
182 struct dpif dpif;
183 int dp_ifindex;
184
185 /* Upcall messages. */
186 struct fat_rwlock upcall_lock;
187 struct dpif_handler *handlers;
188 uint32_t n_handlers; /* Num of upcall handlers. */
189 int uc_array_size; /* Size of 'handler->channels' and */
190 /* 'handler->epoll_events'. */
191
192 /* Change notification. */
193 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
194 bool refresh_channels;
195 };
196
197 static void report_loss(struct dpif_netlink *, struct dpif_channel *,
198 uint32_t ch_idx, uint32_t handler_id);
199
200 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
201
202 /* Generic Netlink family numbers for OVS.
203 *
204 * Initialized by dpif_netlink_init(). */
205 static int ovs_datapath_family;
206 static int ovs_vport_family;
207 static int ovs_flow_family;
208 static int ovs_packet_family;
209
210 /* Generic Netlink multicast groups for OVS.
211 *
212 * Initialized by dpif_netlink_init(). */
213 static unsigned int ovs_vport_mcgroup;
214
215 /* If true, tunnel devices are created using OVS compat/genetlink.
216 * If false, tunnel devices are created with rtnetlink and using light weight
217 * tunnels. If we fail to create the tunnel the rtnetlink+LWT, then we fallback
218 * to using the compat interface. */
219 static bool ovs_tunnels_out_of_tree = true;
220
221 static int dpif_netlink_init(void);
222 static int open_dpif(const struct dpif_netlink_dp *, struct dpif **);
223 static uint32_t dpif_netlink_port_get_pid(const struct dpif *,
224 odp_port_t port_no, uint32_t hash);
225 static void dpif_netlink_handler_uninit(struct dpif_handler *handler);
226 static int dpif_netlink_refresh_channels(struct dpif_netlink *,
227 uint32_t n_handlers);
228 static void dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *,
229 struct ofpbuf *);
230 static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
231 const struct ofpbuf *);
232 static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
233 odp_port_t port_no, const char *port_name,
234 struct dpif_port *dpif_port);
235
236 static struct dpif_netlink *
237 dpif_netlink_cast(const struct dpif *dpif)
238 {
239 dpif_assert_class(dpif, &dpif_netlink_class);
240 return CONTAINER_OF(dpif, struct dpif_netlink, dpif);
241 }
242
243 static int
244 dpif_netlink_enumerate(struct sset *all_dps,
245 const struct dpif_class *dpif_class OVS_UNUSED)
246 {
247 struct nl_dump dump;
248 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
249 struct ofpbuf msg, buf;
250 int error;
251
252 error = dpif_netlink_init();
253 if (error) {
254 return error;
255 }
256
257 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
258 dpif_netlink_dp_dump_start(&dump);
259 while (nl_dump_next(&dump, &msg, &buf)) {
260 struct dpif_netlink_dp dp;
261
262 if (!dpif_netlink_dp_from_ofpbuf(&dp, &msg)) {
263 sset_add(all_dps, dp.name);
264 }
265 }
266 ofpbuf_uninit(&buf);
267 return nl_dump_done(&dump);
268 }
269
270 static int
271 dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
272 bool create, struct dpif **dpifp)
273 {
274 struct dpif_netlink_dp dp_request, dp;
275 struct ofpbuf *buf;
276 uint32_t upcall_pid;
277 int error;
278
279 error = dpif_netlink_init();
280 if (error) {
281 return error;
282 }
283
284 /* Create or look up datapath. */
285 dpif_netlink_dp_init(&dp_request);
286 if (create) {
287 dp_request.cmd = OVS_DP_CMD_NEW;
288 upcall_pid = 0;
289 dp_request.upcall_pid = &upcall_pid;
290 } else {
291 /* Use OVS_DP_CMD_SET to report user features */
292 dp_request.cmd = OVS_DP_CMD_SET;
293 }
294 dp_request.name = name;
295 dp_request.user_features |= OVS_DP_F_UNALIGNED;
296 dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
297 error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
298 if (error) {
299 return error;
300 }
301
302 error = open_dpif(&dp, dpifp);
303 ofpbuf_delete(buf);
304 return error;
305 }
306
307 static int
308 open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
309 {
310 struct dpif_netlink *dpif;
311
312 dpif = xzalloc(sizeof *dpif);
313 dpif->port_notifier = NULL;
314 fat_rwlock_init(&dpif->upcall_lock);
315
316 dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
317 dp->dp_ifindex, dp->dp_ifindex);
318
319 dpif->dp_ifindex = dp->dp_ifindex;
320 *dpifp = &dpif->dpif;
321
322 return 0;
323 }
324
325 /* Destroys the netlink sockets pointed by the elements in 'socksp'
326 * and frees the 'socksp'. */
327 static void
328 vport_del_socksp__(struct nl_sock **socksp, uint32_t n_socks)
329 {
330 size_t i;
331
332 for (i = 0; i < n_socks; i++) {
333 nl_sock_destroy(socksp[i]);
334 }
335
336 free(socksp);
337 }
338
339 /* Creates an array of netlink sockets. Returns an array of the
340 * corresponding pointers. Records the error in 'error'. */
341 static struct nl_sock **
342 vport_create_socksp__(uint32_t n_socks, int *error)
343 {
344 struct nl_sock **socksp = xzalloc(n_socks * sizeof *socksp);
345 size_t i;
346
347 for (i = 0; i < n_socks; i++) {
348 *error = nl_sock_create(NETLINK_GENERIC, &socksp[i]);
349 if (*error) {
350 goto error;
351 }
352 }
353
354 return socksp;
355
356 error:
357 vport_del_socksp__(socksp, n_socks);
358
359 return NULL;
360 }
361
362 #ifdef _WIN32
363 static void
364 vport_delete_sock_pool(struct dpif_handler *handler)
365 OVS_REQ_WRLOCK(dpif->upcall_lock)
366 {
367 if (handler->vport_sock_pool) {
368 uint32_t i;
369 struct dpif_windows_vport_sock *sock_pool =
370 handler->vport_sock_pool;
371
372 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
373 if (sock_pool[i].nl_sock) {
374 nl_sock_unsubscribe_packets(sock_pool[i].nl_sock);
375 nl_sock_destroy(sock_pool[i].nl_sock);
376 sock_pool[i].nl_sock = NULL;
377 }
378 }
379
380 free(handler->vport_sock_pool);
381 handler->vport_sock_pool = NULL;
382 }
383 }
384
385 static int
386 vport_create_sock_pool(struct dpif_handler *handler)
387 OVS_REQ_WRLOCK(dpif->upcall_lock)
388 {
389 struct dpif_windows_vport_sock *sock_pool;
390 size_t i;
391 int error = 0;
392
393 sock_pool = xzalloc(VPORT_SOCK_POOL_SIZE * sizeof *sock_pool);
394 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
395 error = nl_sock_create(NETLINK_GENERIC, &sock_pool[i].nl_sock);
396 if (error) {
397 goto error;
398 }
399
400 /* Enable the netlink socket to receive packets. This is equivalent to
401 * calling nl_sock_join_mcgroup() to receive events. */
402 error = nl_sock_subscribe_packets(sock_pool[i].nl_sock);
403 if (error) {
404 goto error;
405 }
406 }
407
408 handler->vport_sock_pool = sock_pool;
409 handler->last_used_pool_idx = 0;
410 return 0;
411
412 error:
413 vport_delete_sock_pool(handler);
414 return error;
415 }
416
417 /* Returns an array pointers to netlink sockets. The sockets are picked from a
418 * pool. Records the error in 'error'. */
419 static struct nl_sock **
420 vport_create_socksp_windows(struct dpif_netlink *dpif, int *error)
421 OVS_REQ_WRLOCK(dpif->upcall_lock)
422 {
423 uint32_t n_socks = dpif->n_handlers;
424 struct nl_sock **socksp;
425 size_t i;
426
427 ovs_assert(n_socks <= 1);
428 socksp = xzalloc(n_socks * sizeof *socksp);
429
430 /* Pick netlink sockets to use in a round-robin fashion from each
431 * handler's pool of sockets. */
432 for (i = 0; i < n_socks; i++) {
433 struct dpif_handler *handler = &dpif->handlers[i];
434 struct dpif_windows_vport_sock *sock_pool = handler->vport_sock_pool;
435 size_t index = handler->last_used_pool_idx;
436
437 /* A pool of sockets is allocated when the handler is initialized. */
438 if (sock_pool == NULL) {
439 free(socksp);
440 *error = EINVAL;
441 return NULL;
442 }
443
444 ovs_assert(index < VPORT_SOCK_POOL_SIZE);
445 socksp[i] = sock_pool[index].nl_sock;
446 socksp[i] = sock_pool[index].nl_sock;
447 ovs_assert(socksp[i]);
448 index = (index == VPORT_SOCK_POOL_SIZE - 1) ? 0 : index + 1;
449 handler->last_used_pool_idx = index;
450 }
451
452 return socksp;
453 }
454
455 static void
456 vport_del_socksp_windows(struct dpif_netlink *dpif, struct nl_sock **socksp)
457 {
458 free(socksp);
459 }
460 #endif /* _WIN32 */
461
462 static struct nl_sock **
463 vport_create_socksp(struct dpif_netlink *dpif, int *error)
464 {
465 #ifdef _WIN32
466 return vport_create_socksp_windows(dpif, error);
467 #else
468 return vport_create_socksp__(dpif->n_handlers, error);
469 #endif
470 }
471
472 static void
473 vport_del_socksp(struct dpif_netlink *dpif, struct nl_sock **socksp)
474 {
475 #ifdef _WIN32
476 vport_del_socksp_windows(dpif, socksp);
477 #else
478 vport_del_socksp__(socksp, dpif->n_handlers);
479 #endif
480 }
481
482 /* Given the array of pointers to netlink sockets 'socksp', returns
483 * the array of corresponding pids. If the 'socksp' is NULL, returns
484 * a single-element array of value 0. */
485 static uint32_t *
486 vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks)
487 {
488 uint32_t *pids;
489
490 if (!socksp) {
491 pids = xzalloc(sizeof *pids);
492 } else {
493 size_t i;
494
495 pids = xzalloc(n_socks * sizeof *pids);
496 for (i = 0; i < n_socks; i++) {
497 pids[i] = nl_sock_pid(socksp[i]);
498 }
499 }
500
501 return pids;
502 }
503
504 /* Given the port number 'port_idx', extracts the pids of netlink sockets
505 * associated to the port and assigns it to 'upcall_pids'. */
506 static bool
507 vport_get_pids(struct dpif_netlink *dpif, uint32_t port_idx,
508 uint32_t **upcall_pids)
509 {
510 uint32_t *pids;
511 size_t i;
512
513 /* Since the nl_sock can only be assigned in either all
514 * or none "dpif->handlers" channels, the following check
515 * would suffice. */
516 if (!dpif->handlers[0].channels[port_idx].sock) {
517 return false;
518 }
519 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
520
521 pids = xzalloc(dpif->n_handlers * sizeof *pids);
522
523 for (i = 0; i < dpif->n_handlers; i++) {
524 pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock);
525 }
526
527 *upcall_pids = pids;
528
529 return true;
530 }
531
532 static int
533 vport_add_channels(struct dpif_netlink *dpif, odp_port_t port_no,
534 struct nl_sock **socksp)
535 {
536 struct epoll_event event;
537 uint32_t port_idx = odp_to_u32(port_no);
538 size_t i, j;
539 int error;
540
541 if (dpif->handlers == NULL) {
542 return 0;
543 }
544
545 /* We assume that the datapath densely chooses port numbers, which can
546 * therefore be used as an index into 'channels' and 'epoll_events' of
547 * 'dpif->handler'. */
548 if (port_idx >= dpif->uc_array_size) {
549 uint32_t new_size = port_idx + 1;
550
551 if (new_size > MAX_PORTS) {
552 VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big",
553 dpif_name(&dpif->dpif), port_no);
554 return EFBIG;
555 }
556
557 for (i = 0; i < dpif->n_handlers; i++) {
558 struct dpif_handler *handler = &dpif->handlers[i];
559
560 handler->channels = xrealloc(handler->channels,
561 new_size * sizeof *handler->channels);
562
563 for (j = dpif->uc_array_size; j < new_size; j++) {
564 handler->channels[j].sock = NULL;
565 }
566
567 handler->epoll_events = xrealloc(handler->epoll_events,
568 new_size * sizeof *handler->epoll_events);
569
570 }
571 dpif->uc_array_size = new_size;
572 }
573
574 memset(&event, 0, sizeof event);
575 event.events = EPOLLIN;
576 event.data.u32 = port_idx;
577
578 for (i = 0; i < dpif->n_handlers; i++) {
579 struct dpif_handler *handler = &dpif->handlers[i];
580
581 #ifndef _WIN32
582 if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp[i]),
583 &event) < 0) {
584 error = errno;
585 goto error;
586 }
587 #endif
588 dpif->handlers[i].channels[port_idx].sock = socksp[i];
589 dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN;
590 }
591
592 return 0;
593
594 error:
595 for (j = 0; j < i; j++) {
596 #ifndef _WIN32
597 epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL,
598 nl_sock_fd(socksp[j]), NULL);
599 #endif
600 dpif->handlers[j].channels[port_idx].sock = NULL;
601 }
602
603 return error;
604 }
605
606 static void
607 vport_del_channels(struct dpif_netlink *dpif, odp_port_t port_no)
608 {
609 uint32_t port_idx = odp_to_u32(port_no);
610 size_t i;
611
612 if (!dpif->handlers || port_idx >= dpif->uc_array_size) {
613 return;
614 }
615
616 /* Since the sock can only be assigned in either all or none
617 * of "dpif->handlers" channels, the following check would
618 * suffice. */
619 if (!dpif->handlers[0].channels[port_idx].sock) {
620 return;
621 }
622
623 for (i = 0; i < dpif->n_handlers; i++) {
624 struct dpif_handler *handler = &dpif->handlers[i];
625 #ifndef _WIN32
626 epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL,
627 nl_sock_fd(handler->channels[port_idx].sock), NULL);
628 nl_sock_destroy(handler->channels[port_idx].sock);
629 #endif
630 handler->channels[port_idx].sock = NULL;
631 handler->event_offset = handler->n_events = 0;
632 }
633 }
634
635 static void
636 destroy_all_channels(struct dpif_netlink *dpif)
637 OVS_REQ_WRLOCK(dpif->upcall_lock)
638 {
639 unsigned int i;
640
641 if (!dpif->handlers) {
642 return;
643 }
644
645 for (i = 0; i < dpif->uc_array_size; i++ ) {
646 struct dpif_netlink_vport vport_request;
647 uint32_t upcall_pids = 0;
648
649 /* Since the sock can only be assigned in either all or none
650 * of "dpif->handlers" channels, the following check would
651 * suffice. */
652 if (!dpif->handlers[0].channels[i].sock) {
653 continue;
654 }
655
656 /* Turn off upcalls. */
657 dpif_netlink_vport_init(&vport_request);
658 vport_request.cmd = OVS_VPORT_CMD_SET;
659 vport_request.dp_ifindex = dpif->dp_ifindex;
660 vport_request.port_no = u32_to_odp(i);
661 vport_request.n_upcall_pids = 1;
662 vport_request.upcall_pids = &upcall_pids;
663 dpif_netlink_vport_transact(&vport_request, NULL, NULL);
664
665 vport_del_channels(dpif, u32_to_odp(i));
666 }
667
668 for (i = 0; i < dpif->n_handlers; i++) {
669 struct dpif_handler *handler = &dpif->handlers[i];
670
671 dpif_netlink_handler_uninit(handler);
672 free(handler->epoll_events);
673 free(handler->channels);
674 }
675
676 free(dpif->handlers);
677 dpif->handlers = NULL;
678 dpif->n_handlers = 0;
679 dpif->uc_array_size = 0;
680 }
681
682 static void
683 dpif_netlink_close(struct dpif *dpif_)
684 {
685 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
686
687 nl_sock_destroy(dpif->port_notifier);
688
689 fat_rwlock_wrlock(&dpif->upcall_lock);
690 destroy_all_channels(dpif);
691 fat_rwlock_unlock(&dpif->upcall_lock);
692
693 fat_rwlock_destroy(&dpif->upcall_lock);
694 free(dpif);
695 }
696
697 static int
698 dpif_netlink_destroy(struct dpif *dpif_)
699 {
700 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
701 struct dpif_netlink_dp dp;
702
703 dpif_netlink_dp_init(&dp);
704 dp.cmd = OVS_DP_CMD_DEL;
705 dp.dp_ifindex = dpif->dp_ifindex;
706 return dpif_netlink_dp_transact(&dp, NULL, NULL);
707 }
708
709 static bool
710 dpif_netlink_run(struct dpif *dpif_)
711 {
712 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
713
714 if (dpif->refresh_channels) {
715 dpif->refresh_channels = false;
716 fat_rwlock_wrlock(&dpif->upcall_lock);
717 dpif_netlink_refresh_channels(dpif, dpif->n_handlers);
718 fat_rwlock_unlock(&dpif->upcall_lock);
719 }
720 return false;
721 }
722
723 static int
724 dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats)
725 {
726 struct dpif_netlink_dp dp;
727 struct ofpbuf *buf;
728 int error;
729
730 error = dpif_netlink_dp_get(dpif_, &dp, &buf);
731 if (!error) {
732 memset(stats, 0, sizeof *stats);
733
734 if (dp.stats) {
735 stats->n_hit = get_32aligned_u64(&dp.stats->n_hit);
736 stats->n_missed = get_32aligned_u64(&dp.stats->n_missed);
737 stats->n_lost = get_32aligned_u64(&dp.stats->n_lost);
738 stats->n_flows = get_32aligned_u64(&dp.stats->n_flows);
739 }
740
741 if (dp.megaflow_stats) {
742 stats->n_masks = dp.megaflow_stats->n_masks;
743 stats->n_mask_hit = get_32aligned_u64(
744 &dp.megaflow_stats->n_mask_hit);
745 } else {
746 stats->n_masks = UINT32_MAX;
747 stats->n_mask_hit = UINT64_MAX;
748 }
749 ofpbuf_delete(buf);
750 }
751 return error;
752 }
753
754 static const char *
755 get_vport_type(const struct dpif_netlink_vport *vport)
756 {
757 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
758
759 switch (vport->type) {
760 case OVS_VPORT_TYPE_NETDEV: {
761 const char *type = netdev_get_type_from_name(vport->name);
762
763 return type ? type : "system";
764 }
765
766 case OVS_VPORT_TYPE_INTERNAL:
767 return "internal";
768
769 case OVS_VPORT_TYPE_GENEVE:
770 return "geneve";
771
772 case OVS_VPORT_TYPE_GRE:
773 return "gre";
774
775 case OVS_VPORT_TYPE_VXLAN:
776 return "vxlan";
777
778 case OVS_VPORT_TYPE_LISP:
779 return "lisp";
780
781 case OVS_VPORT_TYPE_STT:
782 return "stt";
783
784 case OVS_VPORT_TYPE_UNSPEC:
785 case __OVS_VPORT_TYPE_MAX:
786 break;
787 }
788
789 VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u",
790 vport->dp_ifindex, vport->name, (unsigned int) vport->type);
791 return "unknown";
792 }
793
794 enum ovs_vport_type
795 netdev_to_ovs_vport_type(const char *type)
796 {
797 if (!strcmp(type, "tap") || !strcmp(type, "system")) {
798 return OVS_VPORT_TYPE_NETDEV;
799 } else if (!strcmp(type, "internal")) {
800 return OVS_VPORT_TYPE_INTERNAL;
801 } else if (strstr(type, "stt")) {
802 return OVS_VPORT_TYPE_STT;
803 } else if (!strcmp(type, "geneve")) {
804 return OVS_VPORT_TYPE_GENEVE;
805 } else if (strstr(type, "gre")) {
806 return OVS_VPORT_TYPE_GRE;
807 } else if (!strcmp(type, "vxlan")) {
808 return OVS_VPORT_TYPE_VXLAN;
809 } else if (!strcmp(type, "lisp")) {
810 return OVS_VPORT_TYPE_LISP;
811 } else {
812 return OVS_VPORT_TYPE_UNSPEC;
813 }
814 }
815
816 static int
817 dpif_netlink_port_add__(struct dpif_netlink *dpif, const char *name,
818 enum ovs_vport_type type,
819 struct ofpbuf *options,
820 odp_port_t *port_nop)
821 OVS_REQ_WRLOCK(dpif->upcall_lock)
822 {
823 struct dpif_netlink_vport request, reply;
824 struct ofpbuf *buf;
825 struct nl_sock **socksp = NULL;
826 uint32_t *upcall_pids;
827 int error = 0;
828
829 if (dpif->handlers) {
830 socksp = vport_create_socksp(dpif, &error);
831 if (!socksp) {
832 return error;
833 }
834 }
835
836 dpif_netlink_vport_init(&request);
837 request.cmd = OVS_VPORT_CMD_NEW;
838 request.dp_ifindex = dpif->dp_ifindex;
839 request.type = type;
840 request.name = name;
841
842 request.port_no = *port_nop;
843 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
844 request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
845 request.upcall_pids = upcall_pids;
846
847 if (options) {
848 request.options = options->data;
849 request.options_len = options->size;
850 }
851
852 error = dpif_netlink_vport_transact(&request, &reply, &buf);
853 if (!error) {
854 *port_nop = reply.port_no;
855 } else {
856 if (error == EBUSY && *port_nop != ODPP_NONE) {
857 VLOG_INFO("%s: requested port %"PRIu32" is in use",
858 dpif_name(&dpif->dpif), *port_nop);
859 }
860
861 vport_del_socksp(dpif, socksp);
862 goto exit;
863 }
864
865 if (socksp) {
866 error = vport_add_channels(dpif, *port_nop, socksp);
867 if (error) {
868 VLOG_INFO("%s: could not add channel for port %s",
869 dpif_name(&dpif->dpif), name);
870
871 /* Delete the port. */
872 dpif_netlink_vport_init(&request);
873 request.cmd = OVS_VPORT_CMD_DEL;
874 request.dp_ifindex = dpif->dp_ifindex;
875 request.port_no = *port_nop;
876 dpif_netlink_vport_transact(&request, NULL, NULL);
877 vport_del_socksp(dpif, socksp);
878 goto exit;
879 }
880 }
881 free(socksp);
882
883 exit:
884 ofpbuf_delete(buf);
885 free(upcall_pids);
886
887 return error;
888 }
889
890 static int
891 dpif_netlink_port_add_compat(struct dpif_netlink *dpif, struct netdev *netdev,
892 odp_port_t *port_nop)
893 OVS_REQ_WRLOCK(dpif->upcall_lock)
894 {
895 const struct netdev_tunnel_config *tnl_cfg;
896 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
897 const char *type = netdev_get_type(netdev);
898 uint64_t options_stub[64 / 8];
899 enum ovs_vport_type ovs_type;
900 struct ofpbuf options;
901 const char *name;
902
903 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
904
905 ovs_type = netdev_to_ovs_vport_type(netdev_get_type(netdev));
906 if (ovs_type == OVS_VPORT_TYPE_UNSPEC) {
907 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
908 "unsupported type `%s'",
909 dpif_name(&dpif->dpif), name, type);
910 return EINVAL;
911 }
912
913 if (ovs_type == OVS_VPORT_TYPE_NETDEV) {
914 #ifdef _WIN32
915 /* XXX : Map appropiate Windows handle */
916 #else
917 netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false);
918 #endif
919 }
920
921 #ifdef _WIN32
922 if (ovs_type == OVS_VPORT_TYPE_INTERNAL) {
923 if (!create_wmi_port(name)){
924 VLOG_ERR("Could not create wmi internal port with name:%s", name);
925 return EINVAL;
926 };
927 }
928 #endif
929
930 tnl_cfg = netdev_get_tunnel_config(netdev);
931 if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
932 ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
933 if (tnl_cfg->dst_port) {
934 nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
935 ntohs(tnl_cfg->dst_port));
936 }
937 if (tnl_cfg->exts) {
938 size_t ext_ofs;
939 int i;
940
941 ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
942 for (i = 0; i < 32; i++) {
943 if (tnl_cfg->exts & (1 << i)) {
944 nl_msg_put_flag(&options, i);
945 }
946 }
947 nl_msg_end_nested(&options, ext_ofs);
948 }
949 return dpif_netlink_port_add__(dpif, name, ovs_type, &options,
950 port_nop);
951 } else {
952 return dpif_netlink_port_add__(dpif, name, ovs_type, NULL, port_nop);
953 }
954
955 }
956
957 static int
958 dpif_netlink_rtnl_port_create_and_add(struct dpif_netlink *dpif,
959 struct netdev *netdev,
960 odp_port_t *port_nop)
961 OVS_REQ_WRLOCK(dpif->upcall_lock)
962 {
963 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
964 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
965 const char *name;
966 int error;
967
968 error = dpif_netlink_rtnl_port_create(netdev);
969 if (error) {
970 if (error != EOPNOTSUPP) {
971 VLOG_INFO_RL(&rl, "Failed to create %s with rtnetlink: %s",
972 netdev_get_name(netdev), ovs_strerror(error));
973 }
974 return error;
975 }
976
977 name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
978 error = dpif_netlink_port_add__(dpif, name, OVS_VPORT_TYPE_NETDEV, NULL,
979 port_nop);
980 if (error) {
981 dpif_netlink_rtnl_port_destroy(name, netdev_get_type(netdev));
982 }
983 return error;
984 }
985
986 static int
987 dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,
988 odp_port_t *port_nop)
989 {
990 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
991 int error = EOPNOTSUPP;
992
993 fat_rwlock_wrlock(&dpif->upcall_lock);
994 if (!ovs_tunnels_out_of_tree) {
995 error = dpif_netlink_rtnl_port_create_and_add(dpif, netdev, port_nop);
996 }
997 if (error) {
998 error = dpif_netlink_port_add_compat(dpif, netdev, port_nop);
999 }
1000 fat_rwlock_unlock(&dpif->upcall_lock);
1001
1002 return error;
1003 }
1004
1005 static int
1006 dpif_netlink_port_del__(struct dpif_netlink *dpif, odp_port_t port_no)
1007 OVS_REQ_WRLOCK(dpif->upcall_lock)
1008 {
1009 struct dpif_netlink_vport vport;
1010 struct dpif_port dpif_port;
1011 int error;
1012
1013 error = dpif_netlink_port_query__(dpif, port_no, NULL, &dpif_port);
1014 if (error) {
1015 return error;
1016 }
1017
1018 dpif_netlink_vport_init(&vport);
1019 vport.cmd = OVS_VPORT_CMD_DEL;
1020 vport.dp_ifindex = dpif->dp_ifindex;
1021 vport.port_no = port_no;
1022 #ifdef _WIN32
1023 if (!strcmp(dpif_port.type, "internal")) {
1024 if (!delete_wmi_port(dpif_port.name)) {
1025 VLOG_ERR("Could not delete wmi port with name: %s",
1026 dpif_port.name);
1027 };
1028 }
1029 #endif
1030 error = dpif_netlink_vport_transact(&vport, NULL, NULL);
1031
1032 vport_del_channels(dpif, port_no);
1033
1034 if (!error && !ovs_tunnels_out_of_tree) {
1035 error = dpif_netlink_rtnl_port_destroy(dpif_port.name, dpif_port.type);
1036 if (error == EOPNOTSUPP) {
1037 error = 0;
1038 }
1039 }
1040
1041 dpif_port_destroy(&dpif_port);
1042
1043 return error;
1044 }
1045
1046 static int
1047 dpif_netlink_port_del(struct dpif *dpif_, odp_port_t port_no)
1048 {
1049 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1050 int error;
1051
1052 fat_rwlock_wrlock(&dpif->upcall_lock);
1053 error = dpif_netlink_port_del__(dpif, port_no);
1054 fat_rwlock_unlock(&dpif->upcall_lock);
1055
1056 return error;
1057 }
1058
1059 static int
1060 dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
1061 const char *port_name, struct dpif_port *dpif_port)
1062 {
1063 struct dpif_netlink_vport request;
1064 struct dpif_netlink_vport reply;
1065 struct ofpbuf *buf;
1066 int error;
1067
1068 dpif_netlink_vport_init(&request);
1069 request.cmd = OVS_VPORT_CMD_GET;
1070 request.dp_ifindex = dpif->dp_ifindex;
1071 request.port_no = port_no;
1072 request.name = port_name;
1073
1074 error = dpif_netlink_vport_transact(&request, &reply, &buf);
1075 if (!error) {
1076 if (reply.dp_ifindex != request.dp_ifindex) {
1077 /* A query by name reported that 'port_name' is in some datapath
1078 * other than 'dpif', but the caller wants to know about 'dpif'. */
1079 error = ENODEV;
1080 } else if (dpif_port) {
1081 dpif_port->name = xstrdup(reply.name);
1082 dpif_port->type = xstrdup(get_vport_type(&reply));
1083 dpif_port->port_no = reply.port_no;
1084 }
1085 ofpbuf_delete(buf);
1086 }
1087 return error;
1088 }
1089
1090 static int
1091 dpif_netlink_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no,
1092 struct dpif_port *dpif_port)
1093 {
1094 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1095
1096 return dpif_netlink_port_query__(dpif, port_no, NULL, dpif_port);
1097 }
1098
1099 static int
1100 dpif_netlink_port_query_by_name(const struct dpif *dpif_, const char *devname,
1101 struct dpif_port *dpif_port)
1102 {
1103 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1104
1105 return dpif_netlink_port_query__(dpif, 0, devname, dpif_port);
1106 }
1107
1108 static uint32_t
1109 dpif_netlink_port_get_pid__(const struct dpif_netlink *dpif,
1110 odp_port_t port_no, uint32_t hash)
1111 OVS_REQ_RDLOCK(dpif->upcall_lock)
1112 {
1113 uint32_t port_idx = odp_to_u32(port_no);
1114 uint32_t pid = 0;
1115
1116 if (dpif->handlers && dpif->uc_array_size > 0) {
1117 /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s
1118 * channel, since it is not heavily loaded. */
1119 uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx;
1120 struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers];
1121
1122 /* Needs to check in case the socket pointer is changed in between
1123 * the holding of upcall_lock. A known case happens when the main
1124 * thread deletes the vport while the handler thread is handling
1125 * the upcall from that port. */
1126 if (h->channels[idx].sock) {
1127 pid = nl_sock_pid(h->channels[idx].sock);
1128 }
1129 }
1130
1131 return pid;
1132 }
1133
1134 static uint32_t
1135 dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no,
1136 uint32_t hash)
1137 {
1138 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1139 uint32_t ret;
1140
1141 fat_rwlock_rdlock(&dpif->upcall_lock);
1142 ret = dpif_netlink_port_get_pid__(dpif, port_no, hash);
1143 fat_rwlock_unlock(&dpif->upcall_lock);
1144
1145 return ret;
1146 }
1147
1148 static int
1149 dpif_netlink_flow_flush(struct dpif *dpif_)
1150 {
1151 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1152 struct dpif_netlink_flow flow;
1153
1154 dpif_netlink_flow_init(&flow);
1155 flow.cmd = OVS_FLOW_CMD_DEL;
1156 flow.dp_ifindex = dpif->dp_ifindex;
1157 return dpif_netlink_flow_transact(&flow, NULL, NULL);
1158 }
1159
1160 struct dpif_netlink_port_state {
1161 struct nl_dump dump;
1162 struct ofpbuf buf;
1163 };
1164
1165 static void
1166 dpif_netlink_port_dump_start__(const struct dpif_netlink *dpif,
1167 struct nl_dump *dump)
1168 {
1169 struct dpif_netlink_vport request;
1170 struct ofpbuf *buf;
1171
1172 dpif_netlink_vport_init(&request);
1173 request.cmd = OVS_VPORT_CMD_GET;
1174 request.dp_ifindex = dpif->dp_ifindex;
1175
1176 buf = ofpbuf_new(1024);
1177 dpif_netlink_vport_to_ofpbuf(&request, buf);
1178 nl_dump_start(dump, NETLINK_GENERIC, buf);
1179 ofpbuf_delete(buf);
1180 }
1181
1182 static int
1183 dpif_netlink_port_dump_start(const struct dpif *dpif_, void **statep)
1184 {
1185 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1186 struct dpif_netlink_port_state *state;
1187
1188 *statep = state = xmalloc(sizeof *state);
1189 dpif_netlink_port_dump_start__(dpif, &state->dump);
1190
1191 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
1192 return 0;
1193 }
1194
1195 static int
1196 dpif_netlink_port_dump_next__(const struct dpif_netlink *dpif,
1197 struct nl_dump *dump,
1198 struct dpif_netlink_vport *vport,
1199 struct ofpbuf *buffer)
1200 {
1201 struct ofpbuf buf;
1202 int error;
1203
1204 if (!nl_dump_next(dump, &buf, buffer)) {
1205 return EOF;
1206 }
1207
1208 error = dpif_netlink_vport_from_ofpbuf(vport, &buf);
1209 if (error) {
1210 VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)",
1211 dpif_name(&dpif->dpif), ovs_strerror(error));
1212 }
1213 return error;
1214 }
1215
1216 static int
1217 dpif_netlink_port_dump_next(const struct dpif *dpif_, void *state_,
1218 struct dpif_port *dpif_port)
1219 {
1220 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1221 struct dpif_netlink_port_state *state = state_;
1222 struct dpif_netlink_vport vport;
1223 int error;
1224
1225 error = dpif_netlink_port_dump_next__(dpif, &state->dump, &vport,
1226 &state->buf);
1227 if (error) {
1228 return error;
1229 }
1230 dpif_port->name = CONST_CAST(char *, vport.name);
1231 dpif_port->type = CONST_CAST(char *, get_vport_type(&vport));
1232 dpif_port->port_no = vport.port_no;
1233 return 0;
1234 }
1235
1236 static int
1237 dpif_netlink_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
1238 {
1239 struct dpif_netlink_port_state *state = state_;
1240 int error = nl_dump_done(&state->dump);
1241
1242 ofpbuf_uninit(&state->buf);
1243 free(state);
1244 return error;
1245 }
1246
1247 static int
1248 dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep)
1249 {
1250 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1251
1252 /* Lazily create the Netlink socket to listen for notifications. */
1253 if (!dpif->port_notifier) {
1254 struct nl_sock *sock;
1255 int error;
1256
1257 error = nl_sock_create(NETLINK_GENERIC, &sock);
1258 if (error) {
1259 return error;
1260 }
1261
1262 error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup);
1263 if (error) {
1264 nl_sock_destroy(sock);
1265 return error;
1266 }
1267 dpif->port_notifier = sock;
1268
1269 /* We have no idea of the current state so report that everything
1270 * changed. */
1271 return ENOBUFS;
1272 }
1273
1274 for (;;) {
1275 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1276 uint64_t buf_stub[4096 / 8];
1277 struct ofpbuf buf;
1278 int error;
1279
1280 ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
1281 error = nl_sock_recv(dpif->port_notifier, &buf, false);
1282 if (!error) {
1283 struct dpif_netlink_vport vport;
1284
1285 error = dpif_netlink_vport_from_ofpbuf(&vport, &buf);
1286 if (!error) {
1287 if (vport.dp_ifindex == dpif->dp_ifindex
1288 && (vport.cmd == OVS_VPORT_CMD_NEW
1289 || vport.cmd == OVS_VPORT_CMD_DEL
1290 || vport.cmd == OVS_VPORT_CMD_SET)) {
1291 VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
1292 dpif->dpif.full_name, vport.name, vport.cmd);
1293 if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) {
1294 dpif->refresh_channels = true;
1295 }
1296 *devnamep = xstrdup(vport.name);
1297 ofpbuf_uninit(&buf);
1298 return 0;
1299 }
1300 }
1301 } else if (error != EAGAIN) {
1302 VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
1303 ovs_strerror(error));
1304 nl_sock_drain(dpif->port_notifier);
1305 error = ENOBUFS;
1306 }
1307
1308 ofpbuf_uninit(&buf);
1309 if (error) {
1310 return error;
1311 }
1312 }
1313 }
1314
1315 static void
1316 dpif_netlink_port_poll_wait(const struct dpif *dpif_)
1317 {
1318 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1319
1320 if (dpif->port_notifier) {
1321 nl_sock_wait(dpif->port_notifier, POLLIN);
1322 } else {
1323 poll_immediate_wake();
1324 }
1325 }
1326
1327 static void
1328 dpif_netlink_flow_init_ufid(struct dpif_netlink_flow *request,
1329 const ovs_u128 *ufid, bool terse)
1330 {
1331 if (ufid) {
1332 request->ufid = *ufid;
1333 request->ufid_present = true;
1334 } else {
1335 request->ufid_present = false;
1336 }
1337 request->ufid_terse = terse;
1338 }
1339
1340 static void
1341 dpif_netlink_init_flow_get__(const struct dpif_netlink *dpif,
1342 const struct nlattr *key, size_t key_len,
1343 const ovs_u128 *ufid, bool terse,
1344 struct dpif_netlink_flow *request)
1345 {
1346 dpif_netlink_flow_init(request);
1347 request->cmd = OVS_FLOW_CMD_GET;
1348 request->dp_ifindex = dpif->dp_ifindex;
1349 request->key = key;
1350 request->key_len = key_len;
1351 dpif_netlink_flow_init_ufid(request, ufid, terse);
1352 }
1353
1354 static void
1355 dpif_netlink_init_flow_get(const struct dpif_netlink *dpif,
1356 const struct dpif_flow_get *get,
1357 struct dpif_netlink_flow *request)
1358 {
1359 dpif_netlink_init_flow_get__(dpif, get->key, get->key_len, get->ufid,
1360 false, request);
1361 }
1362
1363 static int
1364 dpif_netlink_flow_get__(const struct dpif_netlink *dpif,
1365 const struct nlattr *key, size_t key_len,
1366 const ovs_u128 *ufid, bool terse,
1367 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1368 {
1369 struct dpif_netlink_flow request;
1370
1371 dpif_netlink_init_flow_get__(dpif, key, key_len, ufid, terse, &request);
1372 return dpif_netlink_flow_transact(&request, reply, bufp);
1373 }
1374
1375 static int
1376 dpif_netlink_flow_get(const struct dpif_netlink *dpif,
1377 const struct dpif_netlink_flow *flow,
1378 struct dpif_netlink_flow *reply, struct ofpbuf **bufp)
1379 {
1380 return dpif_netlink_flow_get__(dpif, flow->key, flow->key_len,
1381 flow->ufid_present ? &flow->ufid : NULL,
1382 false, reply, bufp);
1383 }
1384
1385 static void
1386 dpif_netlink_init_flow_put(struct dpif_netlink *dpif,
1387 const struct dpif_flow_put *put,
1388 struct dpif_netlink_flow *request)
1389 {
1390 static const struct nlattr dummy_action;
1391
1392 dpif_netlink_flow_init(request);
1393 request->cmd = (put->flags & DPIF_FP_CREATE
1394 ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET);
1395 request->dp_ifindex = dpif->dp_ifindex;
1396 request->key = put->key;
1397 request->key_len = put->key_len;
1398 request->mask = put->mask;
1399 request->mask_len = put->mask_len;
1400 dpif_netlink_flow_init_ufid(request, put->ufid, false);
1401
1402 /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */
1403 request->actions = (put->actions
1404 ? put->actions
1405 : CONST_CAST(struct nlattr *, &dummy_action));
1406 request->actions_len = put->actions_len;
1407 if (put->flags & DPIF_FP_ZERO_STATS) {
1408 request->clear = true;
1409 }
1410 if (put->flags & DPIF_FP_PROBE) {
1411 request->probe = true;
1412 }
1413 request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
1414 }
1415
1416 static void
1417 dpif_netlink_init_flow_del__(struct dpif_netlink *dpif,
1418 const struct nlattr *key, size_t key_len,
1419 const ovs_u128 *ufid, bool terse,
1420 struct dpif_netlink_flow *request)
1421 {
1422 dpif_netlink_flow_init(request);
1423 request->cmd = OVS_FLOW_CMD_DEL;
1424 request->dp_ifindex = dpif->dp_ifindex;
1425 request->key = key;
1426 request->key_len = key_len;
1427 dpif_netlink_flow_init_ufid(request, ufid, terse);
1428 }
1429
1430 static void
1431 dpif_netlink_init_flow_del(struct dpif_netlink *dpif,
1432 const struct dpif_flow_del *del,
1433 struct dpif_netlink_flow *request)
1434 {
1435 dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
1436 del->ufid, del->terse, request);
1437 }
1438
1439 struct dpif_netlink_flow_dump {
1440 struct dpif_flow_dump up;
1441 struct nl_dump nl_dump;
1442 atomic_int status;
1443 };
1444
1445 static struct dpif_netlink_flow_dump *
1446 dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
1447 {
1448 return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
1449 }
1450
1451 static struct dpif_flow_dump *
1452 dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
1453 {
1454 const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1455 struct dpif_netlink_flow_dump *dump;
1456 struct dpif_netlink_flow request;
1457 struct ofpbuf *buf;
1458
1459 dump = xmalloc(sizeof *dump);
1460 dpif_flow_dump_init(&dump->up, dpif_);
1461
1462 dpif_netlink_flow_init(&request);
1463 request.cmd = OVS_FLOW_CMD_GET;
1464 request.dp_ifindex = dpif->dp_ifindex;
1465 request.ufid_present = false;
1466 request.ufid_terse = terse;
1467
1468 buf = ofpbuf_new(1024);
1469 dpif_netlink_flow_to_ofpbuf(&request, buf);
1470 nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
1471 ofpbuf_delete(buf);
1472 atomic_init(&dump->status, 0);
1473 dump->up.terse = terse;
1474
1475 return &dump->up;
1476 }
1477
1478 static int
1479 dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
1480 {
1481 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1482 unsigned int nl_status = nl_dump_done(&dump->nl_dump);
1483 int dump_status;
1484
1485 /* No other thread has access to 'dump' at this point. */
1486 atomic_read_relaxed(&dump->status, &dump_status);
1487 free(dump);
1488 return dump_status ? dump_status : nl_status;
1489 }
1490
1491 struct dpif_netlink_flow_dump_thread {
1492 struct dpif_flow_dump_thread up;
1493 struct dpif_netlink_flow_dump *dump;
1494 struct dpif_netlink_flow flow;
1495 struct dpif_flow_stats stats;
1496 struct ofpbuf nl_flows; /* Always used to store flows. */
1497 struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */
1498 };
1499
1500 static struct dpif_netlink_flow_dump_thread *
1501 dpif_netlink_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
1502 {
1503 return CONTAINER_OF(thread, struct dpif_netlink_flow_dump_thread, up);
1504 }
1505
1506 static struct dpif_flow_dump_thread *
1507 dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
1508 {
1509 struct dpif_netlink_flow_dump *dump = dpif_netlink_flow_dump_cast(dump_);
1510 struct dpif_netlink_flow_dump_thread *thread;
1511
1512 thread = xmalloc(sizeof *thread);
1513 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1514 thread->dump = dump;
1515 ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
1516 thread->nl_actions = NULL;
1517
1518 return &thread->up;
1519 }
1520
1521 static void
1522 dpif_netlink_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
1523 {
1524 struct dpif_netlink_flow_dump_thread *thread
1525 = dpif_netlink_flow_dump_thread_cast(thread_);
1526
1527 ofpbuf_uninit(&thread->nl_flows);
1528 ofpbuf_delete(thread->nl_actions);
1529 free(thread);
1530 }
1531
1532 static void
1533 dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
1534 const struct dpif_netlink_flow *datapath_flow)
1535 {
1536 dpif_flow->key = datapath_flow->key;
1537 dpif_flow->key_len = datapath_flow->key_len;
1538 dpif_flow->mask = datapath_flow->mask;
1539 dpif_flow->mask_len = datapath_flow->mask_len;
1540 dpif_flow->actions = datapath_flow->actions;
1541 dpif_flow->actions_len = datapath_flow->actions_len;
1542 dpif_flow->ufid_present = datapath_flow->ufid_present;
1543 dpif_flow->pmd_id = PMD_ID_NULL;
1544 if (datapath_flow->ufid_present) {
1545 dpif_flow->ufid = datapath_flow->ufid;
1546 } else {
1547 ovs_assert(datapath_flow->key && datapath_flow->key_len);
1548 dpif_flow_hash(dpif, datapath_flow->key, datapath_flow->key_len,
1549 &dpif_flow->ufid);
1550 }
1551 dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
1552 }
1553
1554 static int
1555 dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1556 struct dpif_flow *flows, int max_flows)
1557 {
1558 struct dpif_netlink_flow_dump_thread *thread
1559 = dpif_netlink_flow_dump_thread_cast(thread_);
1560 struct dpif_netlink_flow_dump *dump = thread->dump;
1561 struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
1562 int n_flows;
1563
1564 ofpbuf_delete(thread->nl_actions);
1565 thread->nl_actions = NULL;
1566
1567 n_flows = 0;
1568 while (!n_flows
1569 || (n_flows < max_flows && thread->nl_flows.size)) {
1570 struct dpif_netlink_flow datapath_flow;
1571 struct ofpbuf nl_flow;
1572 int error;
1573
1574 /* Try to grab another flow. */
1575 if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) {
1576 break;
1577 }
1578
1579 /* Convert the flow to our output format. */
1580 error = dpif_netlink_flow_from_ofpbuf(&datapath_flow, &nl_flow);
1581 if (error) {
1582 atomic_store_relaxed(&dump->status, error);
1583 break;
1584 }
1585
1586 if (dump->up.terse || datapath_flow.actions) {
1587 /* Common case: we don't want actions, or the flow includes
1588 * actions. */
1589 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1590 &datapath_flow);
1591 } else {
1592 /* Rare case: the flow does not include actions. Retrieve this
1593 * individual flow again to get the actions. */
1594 error = dpif_netlink_flow_get(dpif, &datapath_flow,
1595 &datapath_flow, &thread->nl_actions);
1596 if (error == ENOENT) {
1597 VLOG_DBG("dumped flow disappeared on get");
1598 continue;
1599 } else if (error) {
1600 VLOG_WARN("error fetching dumped flow: %s",
1601 ovs_strerror(error));
1602 atomic_store_relaxed(&dump->status, error);
1603 break;
1604 }
1605
1606 /* Save this flow. Then exit, because we only have one buffer to
1607 * handle this case. */
1608 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
1609 &datapath_flow);
1610 break;
1611 }
1612 }
1613 return n_flows;
1614 }
1615
1616 static void
1617 dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
1618 struct ofpbuf *buf)
1619 {
1620 struct ovs_header *k_exec;
1621 size_t key_ofs;
1622
1623 ofpbuf_prealloc_tailroom(buf, (64
1624 + dp_packet_size(d_exec->packet)
1625 + ODP_KEY_METADATA_SIZE
1626 + d_exec->actions_len));
1627
1628 nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST,
1629 OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION);
1630
1631 k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec);
1632 k_exec->dp_ifindex = dp_ifindex;
1633
1634 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
1635 dp_packet_data(d_exec->packet),
1636 dp_packet_size(d_exec->packet));
1637
1638 key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
1639 odp_key_from_pkt_metadata(buf, &d_exec->packet->md);
1640 nl_msg_end_nested(buf, key_ofs);
1641
1642 nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
1643 d_exec->actions, d_exec->actions_len);
1644 if (d_exec->probe) {
1645 nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
1646 }
1647 if (d_exec->mtu) {
1648 nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
1649 }
1650 }
1651
1652 /* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
1653 * Returns the number actually executed (at least 1, if 'n_ops' is
1654 * positive). */
1655 static size_t
1656 dpif_netlink_operate__(struct dpif_netlink *dpif,
1657 struct dpif_op **ops, size_t n_ops)
1658 {
1659 enum { MAX_OPS = 50 };
1660
1661 struct op_auxdata {
1662 struct nl_transaction txn;
1663
1664 struct ofpbuf request;
1665 uint64_t request_stub[1024 / 8];
1666
1667 struct ofpbuf reply;
1668 uint64_t reply_stub[1024 / 8];
1669 } auxes[MAX_OPS];
1670
1671 struct nl_transaction *txnsp[MAX_OPS];
1672 size_t i;
1673
1674 n_ops = MIN(n_ops, MAX_OPS);
1675 for (i = 0; i < n_ops; i++) {
1676 struct op_auxdata *aux = &auxes[i];
1677 struct dpif_op *op = ops[i];
1678 struct dpif_flow_put *put;
1679 struct dpif_flow_del *del;
1680 struct dpif_flow_get *get;
1681 struct dpif_netlink_flow flow;
1682
1683 ofpbuf_use_stub(&aux->request,
1684 aux->request_stub, sizeof aux->request_stub);
1685 aux->txn.request = &aux->request;
1686
1687 ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub);
1688 aux->txn.reply = NULL;
1689
1690 switch (op->type) {
1691 case DPIF_OP_FLOW_PUT:
1692 put = &op->u.flow_put;
1693 dpif_netlink_init_flow_put(dpif, put, &flow);
1694 if (put->stats) {
1695 flow.nlmsg_flags |= NLM_F_ECHO;
1696 aux->txn.reply = &aux->reply;
1697 }
1698 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1699 break;
1700
1701 case DPIF_OP_FLOW_DEL:
1702 del = &op->u.flow_del;
1703 dpif_netlink_init_flow_del(dpif, del, &flow);
1704 if (del->stats) {
1705 flow.nlmsg_flags |= NLM_F_ECHO;
1706 aux->txn.reply = &aux->reply;
1707 }
1708 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1709 break;
1710
1711 case DPIF_OP_EXECUTE:
1712 /* Can't execute a packet that won't fit in a Netlink attribute. */
1713 if (OVS_UNLIKELY(nl_attr_oversized(
1714 dp_packet_size(op->u.execute.packet)))) {
1715 /* Report an error immediately if this is the first operation.
1716 * Otherwise the easiest thing to do is to postpone to the next
1717 * call (when this will be the first operation). */
1718 if (i == 0) {
1719 VLOG_ERR_RL(&error_rl,
1720 "dropping oversized %"PRIu32"-byte packet",
1721 dp_packet_size(op->u.execute.packet));
1722 op->error = ENOBUFS;
1723 return 1;
1724 }
1725 n_ops = i;
1726 } else {
1727 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->u.execute,
1728 &aux->request);
1729 }
1730 break;
1731
1732 case DPIF_OP_FLOW_GET:
1733 get = &op->u.flow_get;
1734 dpif_netlink_init_flow_get(dpif, get, &flow);
1735 aux->txn.reply = get->buffer;
1736 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
1737 break;
1738
1739 default:
1740 OVS_NOT_REACHED();
1741 }
1742 }
1743
1744 for (i = 0; i < n_ops; i++) {
1745 txnsp[i] = &auxes[i].txn;
1746 }
1747 nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
1748
1749 for (i = 0; i < n_ops; i++) {
1750 struct op_auxdata *aux = &auxes[i];
1751 struct nl_transaction *txn = &auxes[i].txn;
1752 struct dpif_op *op = ops[i];
1753 struct dpif_flow_put *put;
1754 struct dpif_flow_del *del;
1755 struct dpif_flow_get *get;
1756
1757 op->error = txn->error;
1758
1759 switch (op->type) {
1760 case DPIF_OP_FLOW_PUT:
1761 put = &op->u.flow_put;
1762 if (put->stats) {
1763 if (!op->error) {
1764 struct dpif_netlink_flow reply;
1765
1766 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1767 txn->reply);
1768 if (!op->error) {
1769 dpif_netlink_flow_get_stats(&reply, put->stats);
1770 }
1771 }
1772 }
1773 break;
1774
1775 case DPIF_OP_FLOW_DEL:
1776 del = &op->u.flow_del;
1777 if (del->stats) {
1778 if (!op->error) {
1779 struct dpif_netlink_flow reply;
1780
1781 op->error = dpif_netlink_flow_from_ofpbuf(&reply,
1782 txn->reply);
1783 if (!op->error) {
1784 dpif_netlink_flow_get_stats(&reply, del->stats);
1785 }
1786 }
1787 }
1788 break;
1789
1790 case DPIF_OP_EXECUTE:
1791 break;
1792
1793 case DPIF_OP_FLOW_GET:
1794 get = &op->u.flow_get;
1795 if (!op->error) {
1796 struct dpif_netlink_flow reply;
1797
1798 op->error = dpif_netlink_flow_from_ofpbuf(&reply, txn->reply);
1799 if (!op->error) {
1800 dpif_netlink_flow_to_dpif_flow(&dpif->dpif, get->flow,
1801 &reply);
1802 }
1803 }
1804 break;
1805
1806 default:
1807 OVS_NOT_REACHED();
1808 }
1809
1810 ofpbuf_uninit(&aux->request);
1811 ofpbuf_uninit(&aux->reply);
1812 }
1813
1814 return n_ops;
1815 }
1816
1817 static void
1818 dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
1819 {
1820 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
1821
1822 while (n_ops > 0) {
1823 size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
1824 ops += chunk;
1825 n_ops -= chunk;
1826 }
1827 }
1828
1829 #if _WIN32
1830 static void
1831 dpif_netlink_handler_uninit(struct dpif_handler *handler)
1832 {
1833 vport_delete_sock_pool(handler);
1834 }
1835
1836 static int
1837 dpif_netlink_handler_init(struct dpif_handler *handler)
1838 {
1839 return vport_create_sock_pool(handler);
1840 }
1841 #else
1842
1843 static int
1844 dpif_netlink_handler_init(struct dpif_handler *handler)
1845 {
1846 handler->epoll_fd = epoll_create(10);
1847 return handler->epoll_fd < 0 ? errno : 0;
1848 }
1849
1850 static void
1851 dpif_netlink_handler_uninit(struct dpif_handler *handler)
1852 {
1853 close(handler->epoll_fd);
1854 }
1855 #endif
1856
1857 /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
1858 * currently in 'dpif' in the kernel, by adding a new set of channels for
1859 * any kernel vport that lacks one and deleting any channels that have no
1860 * backing kernel vports. */
1861 static int
1862 dpif_netlink_refresh_channels(struct dpif_netlink *dpif, uint32_t n_handlers)
1863 OVS_REQ_WRLOCK(dpif->upcall_lock)
1864 {
1865 unsigned long int *keep_channels;
1866 struct dpif_netlink_vport vport;
1867 size_t keep_channels_nbits;
1868 struct nl_dump dump;
1869 uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
1870 struct ofpbuf buf;
1871 int retval = 0;
1872 size_t i;
1873
1874 ovs_assert(!WINDOWS || n_handlers <= 1);
1875 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
1876
1877 if (dpif->n_handlers != n_handlers) {
1878 destroy_all_channels(dpif);
1879 dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers);
1880 for (i = 0; i < n_handlers; i++) {
1881 int error;
1882 struct dpif_handler *handler = &dpif->handlers[i];
1883
1884 error = dpif_netlink_handler_init(handler);
1885 if (error) {
1886 size_t j;
1887 struct dpif_handler *tmp = &dpif->handlers[i];
1888
1889
1890 for (j = 0; j < i; j++) {
1891 dpif_netlink_handler_uninit(tmp);
1892 }
1893 free(dpif->handlers);
1894 dpif->handlers = NULL;
1895
1896 return error;
1897 }
1898 }
1899 dpif->n_handlers = n_handlers;
1900 }
1901
1902 for (i = 0; i < n_handlers; i++) {
1903 struct dpif_handler *handler = &dpif->handlers[i];
1904
1905 handler->event_offset = handler->n_events = 0;
1906 }
1907
1908 keep_channels_nbits = dpif->uc_array_size;
1909 keep_channels = bitmap_allocate(keep_channels_nbits);
1910
1911 ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
1912 dpif_netlink_port_dump_start__(dpif, &dump);
1913 while (!dpif_netlink_port_dump_next__(dpif, &dump, &vport, &buf)) {
1914 uint32_t port_no = odp_to_u32(vport.port_no);
1915 uint32_t *upcall_pids = NULL;
1916 int error;
1917
1918 if (port_no >= dpif->uc_array_size
1919 || !vport_get_pids(dpif, port_no, &upcall_pids)) {
1920 struct nl_sock **socksp = vport_create_socksp(dpif, &error);
1921
1922 if (!socksp) {
1923 goto error;
1924 }
1925
1926 error = vport_add_channels(dpif, vport.port_no, socksp);
1927 if (error) {
1928 VLOG_INFO("%s: could not add channels for port %s",
1929 dpif_name(&dpif->dpif), vport.name);
1930 vport_del_socksp(dpif, socksp);
1931 retval = error;
1932 goto error;
1933 }
1934 upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
1935 free(socksp);
1936 }
1937
1938 /* Configure the vport to deliver misses to 'sock'. */
1939 if (vport.upcall_pids[0] == 0
1940 || vport.n_upcall_pids != dpif->n_handlers
1941 || memcmp(upcall_pids, vport.upcall_pids, n_handlers * sizeof
1942 *upcall_pids)) {
1943 struct dpif_netlink_vport vport_request;
1944
1945 dpif_netlink_vport_init(&vport_request);
1946 vport_request.cmd = OVS_VPORT_CMD_SET;
1947 vport_request.dp_ifindex = dpif->dp_ifindex;
1948 vport_request.port_no = vport.port_no;
1949 vport_request.n_upcall_pids = dpif->n_handlers;
1950 vport_request.upcall_pids = upcall_pids;
1951 error = dpif_netlink_vport_transact(&vport_request, NULL, NULL);
1952 if (error) {
1953 VLOG_WARN_RL(&error_rl,
1954 "%s: failed to set upcall pid on port: %s",
1955 dpif_name(&dpif->dpif), ovs_strerror(error));
1956
1957 if (error != ENODEV && error != ENOENT) {
1958 retval = error;
1959 } else {
1960 /* The vport isn't really there, even though the dump says
1961 * it is. Probably we just hit a race after a port
1962 * disappeared. */
1963 }
1964 goto error;
1965 }
1966 }
1967
1968 if (port_no < keep_channels_nbits) {
1969 bitmap_set1(keep_channels, port_no);
1970 }
1971 free(upcall_pids);
1972 continue;
1973
1974 error:
1975 free(upcall_pids);
1976 vport_del_channels(dpif, vport.port_no);
1977 }
1978 nl_dump_done(&dump);
1979 ofpbuf_uninit(&buf);
1980
1981 /* Discard any saved channels that we didn't reuse. */
1982 for (i = 0; i < keep_channels_nbits; i++) {
1983 if (!bitmap_is_set(keep_channels, i)) {
1984 vport_del_channels(dpif, u32_to_odp(i));
1985 }
1986 }
1987 free(keep_channels);
1988
1989 return retval;
1990 }
1991
1992 static int
1993 dpif_netlink_recv_set__(struct dpif_netlink *dpif, bool enable)
1994 OVS_REQ_WRLOCK(dpif->upcall_lock)
1995 {
1996 if ((dpif->handlers != NULL) == enable) {
1997 return 0;
1998 } else if (!enable) {
1999 destroy_all_channels(dpif);
2000 return 0;
2001 } else {
2002 return dpif_netlink_refresh_channels(dpif, 1);
2003 }
2004 }
2005
2006 static int
2007 dpif_netlink_recv_set(struct dpif *dpif_, bool enable)
2008 {
2009 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2010 int error;
2011
2012 fat_rwlock_wrlock(&dpif->upcall_lock);
2013 error = dpif_netlink_recv_set__(dpif, enable);
2014 fat_rwlock_unlock(&dpif->upcall_lock);
2015
2016 return error;
2017 }
2018
2019 static int
2020 dpif_netlink_handlers_set(struct dpif *dpif_, uint32_t n_handlers)
2021 {
2022 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2023 int error = 0;
2024
2025 #ifdef _WIN32
2026 /* Multiple upcall handlers will be supported once kernel datapath supports
2027 * it. */
2028 if (n_handlers > 1) {
2029 return error;
2030 }
2031 #endif
2032
2033 fat_rwlock_wrlock(&dpif->upcall_lock);
2034 if (dpif->handlers) {
2035 error = dpif_netlink_refresh_channels(dpif, n_handlers);
2036 }
2037 fat_rwlock_unlock(&dpif->upcall_lock);
2038
2039 return error;
2040 }
2041
2042 static int
2043 dpif_netlink_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
2044 uint32_t queue_id, uint32_t *priority)
2045 {
2046 if (queue_id < 0xf000) {
2047 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
2048 return 0;
2049 } else {
2050 return EINVAL;
2051 }
2052 }
2053
2054 static int
2055 parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
2056 struct dpif_upcall *upcall, int *dp_ifindex)
2057 {
2058 static const struct nl_policy ovs_packet_policy[] = {
2059 /* Always present. */
2060 [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
2061 .min_len = ETH_HEADER_LEN },
2062 [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
2063
2064 /* OVS_PACKET_CMD_ACTION only. */
2065 [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
2066 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
2067 [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
2068 [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
2069 };
2070
2071 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2072 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2073 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2074 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2075
2076 struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)];
2077 if (!nlmsg || !genl || !ovs_header
2078 || nlmsg->nlmsg_type != ovs_packet_family
2079 || !nl_policy_parse(&b, 0, ovs_packet_policy, a,
2080 ARRAY_SIZE(ovs_packet_policy))) {
2081 return EINVAL;
2082 }
2083
2084 int type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS
2085 : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION
2086 : -1);
2087 if (type < 0) {
2088 return EINVAL;
2089 }
2090
2091 /* (Re)set ALL fields of '*upcall' on successful return. */
2092 upcall->type = type;
2093 upcall->key = CONST_CAST(struct nlattr *,
2094 nl_attr_get(a[OVS_PACKET_ATTR_KEY]));
2095 upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]);
2096 dpif_flow_hash(&dpif->dpif, upcall->key, upcall->key_len, &upcall->ufid);
2097 upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
2098 upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
2099 upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
2100 upcall->mru = a[OVS_PACKET_ATTR_MRU];
2101
2102 /* Allow overwriting the netlink attribute header without reallocating. */
2103 dp_packet_use_stub(&upcall->packet,
2104 CONST_CAST(struct nlattr *,
2105 nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
2106 nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
2107 sizeof(struct nlattr));
2108 dp_packet_set_data(&upcall->packet,
2109 (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
2110 dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
2111
2112 if (nl_attr_find__(upcall->key, upcall->key_len, OVS_KEY_ATTR_ETHERNET)) {
2113 /* Ethernet frame */
2114 upcall->packet.packet_type = htonl(PT_ETH);
2115 } else {
2116 /* Non-Ethernet packet. Get the Ethertype from the NL attributes */
2117 ovs_be16 ethertype = 0;
2118 const struct nlattr *et_nla = nl_attr_find__(upcall->key,
2119 upcall->key_len,
2120 OVS_KEY_ATTR_ETHERTYPE);
2121 if (et_nla) {
2122 ethertype = nl_attr_get_be16(et_nla);
2123 }
2124 upcall->packet.packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
2125 ntohs(ethertype));
2126 dp_packet_set_l3(&upcall->packet, dp_packet_data(&upcall->packet));
2127 }
2128
2129 *dp_ifindex = ovs_header->dp_ifindex;
2130
2131 return 0;
2132 }
2133
2134 #ifdef _WIN32
2135 #define PACKET_RECV_BATCH_SIZE 50
2136 static int
2137 dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id,
2138 struct dpif_upcall *upcall, struct ofpbuf *buf)
2139 OVS_REQ_RDLOCK(dpif->upcall_lock)
2140 {
2141 struct dpif_handler *handler;
2142 int read_tries = 0;
2143 struct dpif_windows_vport_sock *sock_pool;
2144 uint32_t i;
2145
2146 if (!dpif->handlers) {
2147 return EAGAIN;
2148 }
2149
2150 /* Only one handler is supported currently. */
2151 if (handler_id >= 1) {
2152 return EAGAIN;
2153 }
2154
2155 if (handler_id >= dpif->n_handlers) {
2156 return EAGAIN;
2157 }
2158
2159 handler = &dpif->handlers[handler_id];
2160 sock_pool = handler->vport_sock_pool;
2161
2162 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2163 for (;;) {
2164 int dp_ifindex;
2165 int error;
2166
2167 if (++read_tries > PACKET_RECV_BATCH_SIZE) {
2168 return EAGAIN;
2169 }
2170
2171 error = nl_sock_recv(sock_pool[i].nl_sock, buf, false);
2172 if (error == ENOBUFS) {
2173 /* ENOBUFS typically means that we've received so many
2174 * packets that the buffer overflowed. Try again
2175 * immediately because there's almost certainly a packet
2176 * waiting for us. */
2177 /* XXX: report_loss(dpif, ch, idx, handler_id); */
2178 continue;
2179 }
2180
2181 /* XXX: ch->last_poll = time_msec(); */
2182 if (error) {
2183 if (error == EAGAIN) {
2184 break;
2185 }
2186 return error;
2187 }
2188
2189 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
2190 if (!error && dp_ifindex == dpif->dp_ifindex) {
2191 return 0;
2192 } else if (error) {
2193 return error;
2194 }
2195 }
2196 }
2197
2198 return EAGAIN;
2199 }
2200 #else
2201 static int
2202 dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id,
2203 struct dpif_upcall *upcall, struct ofpbuf *buf)
2204 OVS_REQ_RDLOCK(dpif->upcall_lock)
2205 {
2206 struct dpif_handler *handler;
2207 int read_tries = 0;
2208
2209 if (!dpif->handlers || handler_id >= dpif->n_handlers) {
2210 return EAGAIN;
2211 }
2212
2213 handler = &dpif->handlers[handler_id];
2214 if (handler->event_offset >= handler->n_events) {
2215 int retval;
2216
2217 handler->event_offset = handler->n_events = 0;
2218
2219 do {
2220 retval = epoll_wait(handler->epoll_fd, handler->epoll_events,
2221 dpif->uc_array_size, 0);
2222 } while (retval < 0 && errno == EINTR);
2223
2224 if (retval < 0) {
2225 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2226 VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno));
2227 } else if (retval > 0) {
2228 handler->n_events = retval;
2229 }
2230 }
2231
2232 while (handler->event_offset < handler->n_events) {
2233 int idx = handler->epoll_events[handler->event_offset].data.u32;
2234 struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx];
2235
2236 handler->event_offset++;
2237
2238 for (;;) {
2239 int dp_ifindex;
2240 int error;
2241
2242 if (++read_tries > 50) {
2243 return EAGAIN;
2244 }
2245
2246 error = nl_sock_recv(ch->sock, buf, false);
2247 if (error == ENOBUFS) {
2248 /* ENOBUFS typically means that we've received so many
2249 * packets that the buffer overflowed. Try again
2250 * immediately because there's almost certainly a packet
2251 * waiting for us. */
2252 report_loss(dpif, ch, idx, handler_id);
2253 continue;
2254 }
2255
2256 ch->last_poll = time_msec();
2257 if (error) {
2258 if (error == EAGAIN) {
2259 break;
2260 }
2261 return error;
2262 }
2263
2264 error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex);
2265 if (!error && dp_ifindex == dpif->dp_ifindex) {
2266 return 0;
2267 } else if (error) {
2268 return error;
2269 }
2270 }
2271 }
2272
2273 return EAGAIN;
2274 }
2275 #endif
2276
2277 static int
2278 dpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id,
2279 struct dpif_upcall *upcall, struct ofpbuf *buf)
2280 {
2281 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2282 int error;
2283
2284 fat_rwlock_rdlock(&dpif->upcall_lock);
2285 #ifdef _WIN32
2286 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);
2287 #else
2288 error = dpif_netlink_recv__(dpif, handler_id, upcall, buf);
2289 #endif
2290 fat_rwlock_unlock(&dpif->upcall_lock);
2291
2292 return error;
2293 }
2294
2295 static void
2296 dpif_netlink_recv_wait__(struct dpif_netlink *dpif, uint32_t handler_id)
2297 OVS_REQ_RDLOCK(dpif->upcall_lock)
2298 {
2299 #ifdef _WIN32
2300 uint32_t i;
2301 struct dpif_windows_vport_sock *sock_pool =
2302 dpif->handlers[handler_id].vport_sock_pool;
2303
2304 /* Only one handler is supported currently. */
2305 if (handler_id >= 1) {
2306 return;
2307 }
2308
2309 for (i = 0; i < VPORT_SOCK_POOL_SIZE; i++) {
2310 nl_sock_wait(sock_pool[i].nl_sock, POLLIN);
2311 }
2312 #else
2313 if (dpif->handlers && handler_id < dpif->n_handlers) {
2314 struct dpif_handler *handler = &dpif->handlers[handler_id];
2315
2316 poll_fd_wait(handler->epoll_fd, POLLIN);
2317 }
2318 #endif
2319 }
2320
2321 static void
2322 dpif_netlink_recv_wait(struct dpif *dpif_, uint32_t handler_id)
2323 {
2324 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2325
2326 fat_rwlock_rdlock(&dpif->upcall_lock);
2327 dpif_netlink_recv_wait__(dpif, handler_id);
2328 fat_rwlock_unlock(&dpif->upcall_lock);
2329 }
2330
2331 static void
2332 dpif_netlink_recv_purge__(struct dpif_netlink *dpif)
2333 OVS_REQ_WRLOCK(dpif->upcall_lock)
2334 {
2335 if (dpif->handlers) {
2336 size_t i, j;
2337
2338 for (i = 0; i < dpif->uc_array_size; i++ ) {
2339 if (!dpif->handlers[0].channels[i].sock) {
2340 continue;
2341 }
2342
2343 for (j = 0; j < dpif->n_handlers; j++) {
2344 nl_sock_drain(dpif->handlers[j].channels[i].sock);
2345 }
2346 }
2347 }
2348 }
2349
2350 static void
2351 dpif_netlink_recv_purge(struct dpif *dpif_)
2352 {
2353 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2354
2355 fat_rwlock_wrlock(&dpif->upcall_lock);
2356 dpif_netlink_recv_purge__(dpif);
2357 fat_rwlock_unlock(&dpif->upcall_lock);
2358 }
2359
2360 static char *
2361 dpif_netlink_get_datapath_version(void)
2362 {
2363 char *version_str = NULL;
2364
2365 #ifdef __linux__
2366
2367 #define MAX_VERSION_STR_SIZE 80
2368 #define LINUX_DATAPATH_VERSION_FILE "/sys/module/openvswitch/version"
2369 FILE *f;
2370
2371 f = fopen(LINUX_DATAPATH_VERSION_FILE, "r");
2372 if (f) {
2373 char *newline;
2374 char version[MAX_VERSION_STR_SIZE];
2375
2376 if (fgets(version, MAX_VERSION_STR_SIZE, f)) {
2377 newline = strchr(version, '\n');
2378 if (newline) {
2379 *newline = '\0';
2380 }
2381 version_str = xstrdup(version);
2382 }
2383 fclose(f);
2384 }
2385 #endif
2386
2387 return version_str;
2388 }
2389
2390 struct dpif_netlink_ct_dump_state {
2391 struct ct_dpif_dump_state up;
2392 struct nl_ct_dump_state *nl_ct_dump;
2393 };
2394
2395 static int
2396 dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
2397 struct ct_dpif_dump_state **dump_,
2398 const uint16_t *zone)
2399 {
2400 struct dpif_netlink_ct_dump_state *dump;
2401 int err;
2402
2403 dump = xzalloc(sizeof *dump);
2404 err = nl_ct_dump_start(&dump->nl_ct_dump, zone);
2405 if (err) {
2406 free(dump);
2407 return err;
2408 }
2409
2410 *dump_ = &dump->up;
2411
2412 return 0;
2413 }
2414
2415 static int
2416 dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
2417 struct ct_dpif_dump_state *dump_,
2418 struct ct_dpif_entry *entry)
2419 {
2420 struct dpif_netlink_ct_dump_state *dump;
2421
2422 INIT_CONTAINER(dump, dump_, up);
2423
2424 return nl_ct_dump_next(dump->nl_ct_dump, entry);
2425 }
2426
2427 static int
2428 dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
2429 struct ct_dpif_dump_state *dump_)
2430 {
2431 struct dpif_netlink_ct_dump_state *dump;
2432 int err;
2433
2434 INIT_CONTAINER(dump, dump_, up);
2435
2436 err = nl_ct_dump_done(dump->nl_ct_dump);
2437 free(dump);
2438 return err;
2439 }
2440
2441 static int
2442 dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone)
2443 {
2444 if (zone) {
2445 return nl_ct_flush_zone(*zone);
2446 } else {
2447 return nl_ct_flush();
2448 }
2449 }
2450
2451 \f
2452 /* Meters */
2453 static void
2454 dpif_netlink_meter_get_features(const struct dpif * dpif OVS_UNUSED,
2455 struct ofputil_meter_features *features)
2456 {
2457 features->max_meters = 0;
2458 features->band_types = 0;
2459 features->capabilities = 0;
2460 features->max_bands = 0;
2461 features->max_color = 0;
2462 }
2463
2464 static int
2465 dpif_netlink_meter_set(struct dpif *dpif OVS_UNUSED,
2466 ofproto_meter_id *meter_id OVS_UNUSED,
2467 struct ofputil_meter_config *config OVS_UNUSED)
2468 {
2469 return EFBIG; /* meter_id out of range */
2470 }
2471
2472 static int
2473 dpif_netlink_meter_get(const struct dpif *dpif OVS_UNUSED,
2474 ofproto_meter_id meter_id OVS_UNUSED,
2475 struct ofputil_meter_stats *stats OVS_UNUSED,
2476 uint16_t n_bands OVS_UNUSED)
2477 {
2478 return EFBIG; /* meter_id out of range */
2479 }
2480
2481 static int
2482 dpif_netlink_meter_del(struct dpif *dpif OVS_UNUSED,
2483 ofproto_meter_id meter_id OVS_UNUSED,
2484 struct ofputil_meter_stats *stats OVS_UNUSED,
2485 uint16_t n_bands OVS_UNUSED)
2486 {
2487 return EFBIG; /* meter_id out of range */
2488 }
2489
2490 \f
2491 const struct dpif_class dpif_netlink_class = {
2492 "system",
2493 NULL, /* init */
2494 dpif_netlink_enumerate,
2495 NULL,
2496 dpif_netlink_open,
2497 dpif_netlink_close,
2498 dpif_netlink_destroy,
2499 dpif_netlink_run,
2500 NULL, /* wait */
2501 dpif_netlink_get_stats,
2502 dpif_netlink_port_add,
2503 dpif_netlink_port_del,
2504 NULL, /* port_set_config */
2505 dpif_netlink_port_query_by_number,
2506 dpif_netlink_port_query_by_name,
2507 dpif_netlink_port_get_pid,
2508 dpif_netlink_port_dump_start,
2509 dpif_netlink_port_dump_next,
2510 dpif_netlink_port_dump_done,
2511 dpif_netlink_port_poll,
2512 dpif_netlink_port_poll_wait,
2513 dpif_netlink_flow_flush,
2514 dpif_netlink_flow_dump_create,
2515 dpif_netlink_flow_dump_destroy,
2516 dpif_netlink_flow_dump_thread_create,
2517 dpif_netlink_flow_dump_thread_destroy,
2518 dpif_netlink_flow_dump_next,
2519 dpif_netlink_operate,
2520 dpif_netlink_recv_set,
2521 dpif_netlink_handlers_set,
2522 NULL, /* set_config */
2523 dpif_netlink_queue_to_priority,
2524 dpif_netlink_recv,
2525 dpif_netlink_recv_wait,
2526 dpif_netlink_recv_purge,
2527 NULL, /* register_dp_purge_cb */
2528 NULL, /* register_upcall_cb */
2529 NULL, /* enable_upcall */
2530 NULL, /* disable_upcall */
2531 dpif_netlink_get_datapath_version, /* get_datapath_version */
2532 dpif_netlink_ct_dump_start,
2533 dpif_netlink_ct_dump_next,
2534 dpif_netlink_ct_dump_done,
2535 dpif_netlink_ct_flush,
2536 dpif_netlink_meter_get_features,
2537 dpif_netlink_meter_set,
2538 dpif_netlink_meter_get,
2539 dpif_netlink_meter_del,
2540 };
2541
2542 static int
2543 dpif_netlink_init(void)
2544 {
2545 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
2546 static int error;
2547
2548 if (ovsthread_once_start(&once)) {
2549 error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY,
2550 &ovs_datapath_family);
2551 if (error) {
2552 VLOG_WARN("Generic Netlink family '%s' does not exist. "
2553 "The Open vSwitch kernel module is probably not loaded.",
2554 OVS_DATAPATH_FAMILY);
2555 }
2556 if (!error) {
2557 error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family);
2558 }
2559 if (!error) {
2560 error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family);
2561 }
2562 if (!error) {
2563 error = nl_lookup_genl_family(OVS_PACKET_FAMILY,
2564 &ovs_packet_family);
2565 }
2566 if (!error) {
2567 error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP,
2568 &ovs_vport_mcgroup);
2569 }
2570
2571 ovs_tunnels_out_of_tree = dpif_netlink_rtnl_probe_oot_tunnels();
2572
2573 ovsthread_once_done(&once);
2574 }
2575
2576 return error;
2577 }
2578
2579 bool
2580 dpif_netlink_is_internal_device(const char *name)
2581 {
2582 struct dpif_netlink_vport reply;
2583 struct ofpbuf *buf;
2584 int error;
2585
2586 error = dpif_netlink_vport_get(name, &reply, &buf);
2587 if (!error) {
2588 ofpbuf_delete(buf);
2589 } else if (error != ENODEV && error != ENOENT) {
2590 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
2591 name, ovs_strerror(error));
2592 }
2593
2594 return reply.type == OVS_VPORT_TYPE_INTERNAL;
2595 }
2596
2597 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2598 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
2599 * positive errno value.
2600 *
2601 * 'vport' will contain pointers into 'buf', so the caller should not free
2602 * 'buf' while 'vport' is still in use. */
2603 static int
2604 dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *vport,
2605 const struct ofpbuf *buf)
2606 {
2607 static const struct nl_policy ovs_vport_policy[] = {
2608 [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
2609 [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
2610 [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
2611 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC },
2612 [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats),
2613 .optional = true },
2614 [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
2615 };
2616
2617 dpif_netlink_vport_init(vport);
2618
2619 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2620 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2621 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2622 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2623
2624 struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)];
2625 if (!nlmsg || !genl || !ovs_header
2626 || nlmsg->nlmsg_type != ovs_vport_family
2627 || !nl_policy_parse(&b, 0, ovs_vport_policy, a,
2628 ARRAY_SIZE(ovs_vport_policy))) {
2629 return EINVAL;
2630 }
2631
2632 vport->cmd = genl->cmd;
2633 vport->dp_ifindex = ovs_header->dp_ifindex;
2634 vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]);
2635 vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2636 vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]);
2637 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2638 vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID])
2639 / (sizeof *vport->upcall_pids);
2640 vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]);
2641
2642 }
2643 if (a[OVS_VPORT_ATTR_STATS]) {
2644 vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]);
2645 }
2646 if (a[OVS_VPORT_ATTR_OPTIONS]) {
2647 vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]);
2648 vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]);
2649 }
2650 return 0;
2651 }
2652
2653 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
2654 * followed by Netlink attributes corresponding to 'vport'. */
2655 static void
2656 dpif_netlink_vport_to_ofpbuf(const struct dpif_netlink_vport *vport,
2657 struct ofpbuf *buf)
2658 {
2659 struct ovs_header *ovs_header;
2660
2661 nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
2662 vport->cmd, OVS_VPORT_VERSION);
2663
2664 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
2665 ovs_header->dp_ifindex = vport->dp_ifindex;
2666
2667 if (vport->port_no != ODPP_NONE) {
2668 nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
2669 }
2670
2671 if (vport->type != OVS_VPORT_TYPE_UNSPEC) {
2672 nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type);
2673 }
2674
2675 if (vport->name) {
2676 nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name);
2677 }
2678
2679 if (vport->upcall_pids) {
2680 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID,
2681 vport->upcall_pids,
2682 vport->n_upcall_pids * sizeof *vport->upcall_pids);
2683 }
2684
2685 if (vport->stats) {
2686 nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS,
2687 vport->stats, sizeof *vport->stats);
2688 }
2689
2690 if (vport->options) {
2691 nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS,
2692 vport->options, vport->options_len);
2693 }
2694 }
2695
2696 /* Clears 'vport' to "empty" values. */
2697 void
2698 dpif_netlink_vport_init(struct dpif_netlink_vport *vport)
2699 {
2700 memset(vport, 0, sizeof *vport);
2701 vport->port_no = ODPP_NONE;
2702 }
2703
2704 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2705 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2706 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2707 * result of the command is expected to be an ovs_vport also, which is decoded
2708 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2709 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2710 int
2711 dpif_netlink_vport_transact(const struct dpif_netlink_vport *request,
2712 struct dpif_netlink_vport *reply,
2713 struct ofpbuf **bufp)
2714 {
2715 struct ofpbuf *request_buf;
2716 int error;
2717
2718 ovs_assert((reply != NULL) == (bufp != NULL));
2719
2720 error = dpif_netlink_init();
2721 if (error) {
2722 if (reply) {
2723 *bufp = NULL;
2724 dpif_netlink_vport_init(reply);
2725 }
2726 return error;
2727 }
2728
2729 request_buf = ofpbuf_new(1024);
2730 dpif_netlink_vport_to_ofpbuf(request, request_buf);
2731 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
2732 ofpbuf_delete(request_buf);
2733
2734 if (reply) {
2735 if (!error) {
2736 error = dpif_netlink_vport_from_ofpbuf(reply, *bufp);
2737 }
2738 if (error) {
2739 dpif_netlink_vport_init(reply);
2740 ofpbuf_delete(*bufp);
2741 *bufp = NULL;
2742 }
2743 }
2744 return error;
2745 }
2746
2747 /* Obtains information about the kernel vport named 'name' and stores it into
2748 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
2749 * longer needed ('reply' will contain pointers into '*bufp'). */
2750 int
2751 dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply,
2752 struct ofpbuf **bufp)
2753 {
2754 struct dpif_netlink_vport request;
2755
2756 dpif_netlink_vport_init(&request);
2757 request.cmd = OVS_VPORT_CMD_GET;
2758 request.name = name;
2759
2760 return dpif_netlink_vport_transact(&request, reply, bufp);
2761 }
2762
2763 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2764 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
2765 * positive errno value.
2766 *
2767 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
2768 * while 'dp' is still in use. */
2769 static int
2770 dpif_netlink_dp_from_ofpbuf(struct dpif_netlink_dp *dp, const struct ofpbuf *buf)
2771 {
2772 static const struct nl_policy ovs_datapath_policy[] = {
2773 [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
2774 [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats),
2775 .optional = true },
2776 [OVS_DP_ATTR_MEGAFLOW_STATS] = {
2777 NL_POLICY_FOR(struct ovs_dp_megaflow_stats),
2778 .optional = true },
2779 };
2780
2781 dpif_netlink_dp_init(dp);
2782
2783 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2784 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2785 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2786 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2787
2788 struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)];
2789 if (!nlmsg || !genl || !ovs_header
2790 || nlmsg->nlmsg_type != ovs_datapath_family
2791 || !nl_policy_parse(&b, 0, ovs_datapath_policy, a,
2792 ARRAY_SIZE(ovs_datapath_policy))) {
2793 return EINVAL;
2794 }
2795
2796 dp->cmd = genl->cmd;
2797 dp->dp_ifindex = ovs_header->dp_ifindex;
2798 dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]);
2799 if (a[OVS_DP_ATTR_STATS]) {
2800 dp->stats = nl_attr_get(a[OVS_DP_ATTR_STATS]);
2801 }
2802
2803 if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) {
2804 dp->megaflow_stats = nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]);
2805 }
2806
2807 return 0;
2808 }
2809
2810 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
2811 static void
2812 dpif_netlink_dp_to_ofpbuf(const struct dpif_netlink_dp *dp, struct ofpbuf *buf)
2813 {
2814 struct ovs_header *ovs_header;
2815
2816 nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family,
2817 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd,
2818 OVS_DATAPATH_VERSION);
2819
2820 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
2821 ovs_header->dp_ifindex = dp->dp_ifindex;
2822
2823 if (dp->name) {
2824 nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name);
2825 }
2826
2827 if (dp->upcall_pid) {
2828 nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
2829 }
2830
2831 if (dp->user_features) {
2832 nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
2833 }
2834
2835 /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
2836 }
2837
2838 /* Clears 'dp' to "empty" values. */
2839 static void
2840 dpif_netlink_dp_init(struct dpif_netlink_dp *dp)
2841 {
2842 memset(dp, 0, sizeof *dp);
2843 }
2844
2845 static void
2846 dpif_netlink_dp_dump_start(struct nl_dump *dump)
2847 {
2848 struct dpif_netlink_dp request;
2849 struct ofpbuf *buf;
2850
2851 dpif_netlink_dp_init(&request);
2852 request.cmd = OVS_DP_CMD_GET;
2853
2854 buf = ofpbuf_new(1024);
2855 dpif_netlink_dp_to_ofpbuf(&request, buf);
2856 nl_dump_start(dump, NETLINK_GENERIC, buf);
2857 ofpbuf_delete(buf);
2858 }
2859
2860 /* Executes 'request' in the kernel datapath. If the command fails, returns a
2861 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
2862 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
2863 * result of the command is expected to be of the same form, which is decoded
2864 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
2865 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
2866 static int
2867 dpif_netlink_dp_transact(const struct dpif_netlink_dp *request,
2868 struct dpif_netlink_dp *reply, struct ofpbuf **bufp)
2869 {
2870 struct ofpbuf *request_buf;
2871 int error;
2872
2873 ovs_assert((reply != NULL) == (bufp != NULL));
2874
2875 request_buf = ofpbuf_new(1024);
2876 dpif_netlink_dp_to_ofpbuf(request, request_buf);
2877 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
2878 ofpbuf_delete(request_buf);
2879
2880 if (reply) {
2881 dpif_netlink_dp_init(reply);
2882 if (!error) {
2883 error = dpif_netlink_dp_from_ofpbuf(reply, *bufp);
2884 }
2885 if (error) {
2886 ofpbuf_delete(*bufp);
2887 *bufp = NULL;
2888 }
2889 }
2890 return error;
2891 }
2892
2893 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
2894 * The caller must free '*bufp' when the reply is no longer needed ('reply'
2895 * will contain pointers into '*bufp'). */
2896 static int
2897 dpif_netlink_dp_get(const struct dpif *dpif_, struct dpif_netlink_dp *reply,
2898 struct ofpbuf **bufp)
2899 {
2900 struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
2901 struct dpif_netlink_dp request;
2902
2903 dpif_netlink_dp_init(&request);
2904 request.cmd = OVS_DP_CMD_GET;
2905 request.dp_ifindex = dpif->dp_ifindex;
2906
2907 return dpif_netlink_dp_transact(&request, reply, bufp);
2908 }
2909
2910 /* Parses the contents of 'buf', which contains a "struct ovs_header" followed
2911 * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
2912 * positive errno value.
2913 *
2914 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
2915 * while 'flow' is still in use. */
2916 static int
2917 dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
2918 const struct ofpbuf *buf)
2919 {
2920 static const struct nl_policy ovs_flow_policy[__OVS_FLOW_ATTR_MAX] = {
2921 [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED, .optional = true },
2922 [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true },
2923 [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
2924 [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
2925 .optional = true },
2926 [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
2927 [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
2928 [OVS_FLOW_ATTR_UFID] = { .type = NL_A_UNSPEC, .optional = true,
2929 .min_len = sizeof(ovs_u128) },
2930 /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
2931 /* The kernel never uses OVS_FLOW_ATTR_PROBE. */
2932 /* The kernel never uses OVS_FLOW_ATTR_UFID_FLAGS. */
2933 };
2934
2935 dpif_netlink_flow_init(flow);
2936
2937 struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size);
2938 struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
2939 struct genlmsghdr *genl = ofpbuf_try_pull(&b, sizeof *genl);
2940 struct ovs_header *ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
2941
2942 struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
2943 if (!nlmsg || !genl || !ovs_header
2944 || nlmsg->nlmsg_type != ovs_flow_family
2945 || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
2946 ARRAY_SIZE(ovs_flow_policy))) {
2947 return EINVAL;
2948 }
2949 if (!a[OVS_FLOW_ATTR_KEY] && !a[OVS_FLOW_ATTR_UFID]) {
2950 return EINVAL;
2951 }
2952
2953 flow->nlmsg_flags = nlmsg->nlmsg_flags;
2954 flow->dp_ifindex = ovs_header->dp_ifindex;
2955 if (a[OVS_FLOW_ATTR_KEY]) {
2956 flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
2957 flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
2958 }
2959
2960 if (a[OVS_FLOW_ATTR_UFID]) {
2961 const ovs_u128 *ufid;
2962
2963 ufid = nl_attr_get_unspec(a[OVS_FLOW_ATTR_UFID],
2964 nl_attr_get_size(a[OVS_FLOW_ATTR_UFID]));
2965 flow->ufid = *ufid;
2966 flow->ufid_present = true;
2967 }
2968 if (a[OVS_FLOW_ATTR_MASK]) {
2969 flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]);
2970 flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]);
2971 }
2972 if (a[OVS_FLOW_ATTR_ACTIONS]) {
2973 flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
2974 flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
2975 }
2976 if (a[OVS_FLOW_ATTR_STATS]) {
2977 flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
2978 }
2979 if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
2980 flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
2981 }
2982 if (a[OVS_FLOW_ATTR_USED]) {
2983 flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
2984 }
2985 return 0;
2986 }
2987
2988 /* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
2989 * followed by Netlink attributes corresponding to 'flow'. */
2990 static void
2991 dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
2992 struct ofpbuf *buf)
2993 {
2994 struct ovs_header *ovs_header;
2995
2996 nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
2997 NLM_F_REQUEST | flow->nlmsg_flags,
2998 flow->cmd, OVS_FLOW_VERSION);
2999
3000 ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
3001 ovs_header->dp_ifindex = flow->dp_ifindex;
3002
3003 if (flow->ufid_present) {
3004 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_UFID, &flow->ufid,
3005 sizeof flow->ufid);
3006 }
3007 if (flow->ufid_terse) {
3008 nl_msg_put_u32(buf, OVS_FLOW_ATTR_UFID_FLAGS,
3009 OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
3010 | OVS_UFID_F_OMIT_ACTIONS);
3011 }
3012 if (!flow->ufid_terse || !flow->ufid_present) {
3013 if (flow->key_len) {
3014 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_KEY,
3015 flow->key, flow->key_len);
3016 }
3017
3018 if (flow->mask_len) {
3019 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_MASK,
3020 flow->mask, flow->mask_len);
3021 }
3022 if (flow->actions || flow->actions_len) {
3023 nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
3024 flow->actions, flow->actions_len);
3025 }
3026 }
3027
3028 /* We never need to send these to the kernel. */
3029 ovs_assert(!flow->stats);
3030 ovs_assert(!flow->tcp_flags);
3031 ovs_assert(!flow->used);
3032
3033 if (flow->clear) {
3034 nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
3035 }
3036 if (flow->probe) {
3037 nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
3038 }
3039 }
3040
3041 /* Clears 'flow' to "empty" values. */
3042 static void
3043 dpif_netlink_flow_init(struct dpif_netlink_flow *flow)
3044 {
3045 memset(flow, 0, sizeof *flow);
3046 }
3047
3048 /* Executes 'request' in the kernel datapath. If the command fails, returns a
3049 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
3050 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
3051 * result of the command is expected to be a flow also, which is decoded and
3052 * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
3053 * is no longer needed ('reply' will contain pointers into '*bufp'). */
3054 static int
3055 dpif_netlink_flow_transact(struct dpif_netlink_flow *request,
3056 struct dpif_netlink_flow *reply,
3057 struct ofpbuf **bufp)
3058 {
3059 struct ofpbuf *request_buf;
3060 int error;
3061
3062 ovs_assert((reply != NULL) == (bufp != NULL));
3063
3064 if (reply) {
3065 request->nlmsg_flags |= NLM_F_ECHO;
3066 }
3067
3068 request_buf = ofpbuf_new(1024);
3069 dpif_netlink_flow_to_ofpbuf(request, request_buf);
3070 error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
3071 ofpbuf_delete(request_buf);
3072
3073 if (reply) {
3074 if (!error) {
3075 error = dpif_netlink_flow_from_ofpbuf(reply, *bufp);
3076 }
3077 if (error) {
3078 dpif_netlink_flow_init(reply);
3079 ofpbuf_delete(*bufp);
3080 *bufp = NULL;
3081 }
3082 }
3083 return error;
3084 }
3085
3086 static void
3087 dpif_netlink_flow_get_stats(const struct dpif_netlink_flow *flow,
3088 struct dpif_flow_stats *stats)
3089 {
3090 if (flow->stats) {
3091 stats->n_packets = get_32aligned_u64(&flow->stats->n_packets);
3092 stats->n_bytes = get_32aligned_u64(&flow->stats->n_bytes);
3093 } else {
3094 stats->n_packets = 0;
3095 stats->n_bytes = 0;
3096 }
3097 stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
3098 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
3099 }
3100
3101 /* Logs information about a packet that was recently lost in 'ch' (in
3102 * 'dpif_'). */
3103 static void
3104 report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx,
3105 uint32_t handler_id)
3106 {
3107 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
3108 struct ds s;
3109
3110 if (VLOG_DROP_WARN(&rl)) {
3111 return;
3112 }
3113
3114 ds_init(&s);
3115 if (ch->last_poll != LLONG_MIN) {
3116 ds_put_format(&s, " (last polled %lld ms ago)",
3117 time_msec() - ch->last_poll);
3118 }
3119
3120 VLOG_WARN("%s: lost packet on port channel %u of handler %u",
3121 dpif_name(&dpif->dpif), ch_idx, handler_id);
3122 ds_destroy(&s);
3123 }