]>
Commit | Line | Data |
---|---|---|
96fba48f | 1 | /* |
8917f72c | 2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. |
96fba48f BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
9fe3b9a2 BP |
18 | |
19 | #include "dpif-linux.h" | |
96fba48f | 20 | |
96fba48f BP |
21 | #include <ctype.h> |
22 | #include <errno.h> | |
23 | #include <fcntl.h> | |
24 | #include <inttypes.h> | |
25 | #include <net/if.h> | |
b90fa799 | 26 | #include <linux/types.h> |
aae51f53 | 27 | #include <linux/pkt_sched.h> |
8522ba09 | 28 | #include <poll.h> |
96fba48f | 29 | #include <stdlib.h> |
8522ba09 | 30 | #include <strings.h> |
50f80534 | 31 | #include <sys/epoll.h> |
10dcf8de | 32 | #include <sys/stat.h> |
96fba48f BP |
33 | #include <unistd.h> |
34 | ||
773cd538 | 35 | #include "bitmap.h" |
96fba48f | 36 | #include "dpif-provider.h" |
80e5eed9 | 37 | #include "dynamic-string.h" |
eb8b28e7 | 38 | #include "flow.h" |
1579cf67 | 39 | #include "fat-rwlock.h" |
3abc4a1a | 40 | #include "netdev.h" |
032aa6a3 | 41 | #include "netdev-linux.h" |
c3827f61 | 42 | #include "netdev-vport.h" |
45c8d3a1 | 43 | #include "netlink-notifier.h" |
982b8810 | 44 | #include "netlink-socket.h" |
856081f6 | 45 | #include "netlink.h" |
feebdea2 | 46 | #include "odp-util.h" |
96fba48f | 47 | #include "ofpbuf.h" |
856081f6 | 48 | #include "packets.h" |
96fba48f | 49 | #include "poll-loop.h" |
17411ecf | 50 | #include "random.h" |
54825e09 | 51 | #include "shash.h" |
b3c01ed3 | 52 | #include "sset.h" |
14b4d2f9 | 53 | #include "timeval.h" |
d6569377 | 54 | #include "unaligned.h" |
96fba48f | 55 | #include "util.h" |
96fba48f | 56 | #include "vlog.h" |
5136ce49 | 57 | |
d98e6007 | 58 | VLOG_DEFINE_THIS_MODULE(dpif_linux); |
95b1d73a | 59 | enum { MAX_PORTS = USHRT_MAX }; |
773cd538 | 60 | |
24b019f8 JP |
61 | /* This ethtool flag was introduced in Linux 2.6.24, so it might be |
62 | * missing if we have old headers. */ | |
63 | #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */ | |
64 | ||
d6569377 | 65 | struct dpif_linux_dp { |
aaff4b55 BP |
66 | /* Generic Netlink header. */ |
67 | uint8_t cmd; | |
d6569377 | 68 | |
df2c07f4 | 69 | /* struct ovs_header. */ |
254f2dc8 | 70 | int dp_ifindex; |
d6569377 BP |
71 | |
72 | /* Attributes. */ | |
df2c07f4 | 73 | const char *name; /* OVS_DP_ATTR_NAME. */ |
fcd5d230 | 74 | const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */ |
b7fd5e38 | 75 | uint32_t user_features; /* OVS_DP_ATTR_USER_FEATURES */ |
df2c07f4 | 76 | struct ovs_dp_stats stats; /* OVS_DP_ATTR_STATS. */ |
847108dc AZ |
77 | struct ovs_dp_megaflow_stats megaflow_stats; |
78 | /* OVS_DP_ATTR_MEGAFLOW_STATS.*/ | |
d6569377 BP |
79 | }; |
80 | ||
81 | static void dpif_linux_dp_init(struct dpif_linux_dp *); | |
aaff4b55 BP |
82 | static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *, |
83 | const struct ofpbuf *); | |
84 | static void dpif_linux_dp_dump_start(struct nl_dump *); | |
d6569377 BP |
85 | static int dpif_linux_dp_transact(const struct dpif_linux_dp *request, |
86 | struct dpif_linux_dp *reply, | |
87 | struct ofpbuf **bufp); | |
88 | static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply, | |
89 | struct ofpbuf **bufp); | |
90 | ||
91 | struct dpif_linux_flow { | |
37a1300c BP |
92 | /* Generic Netlink header. */ |
93 | uint8_t cmd; | |
d6569377 | 94 | |
df2c07f4 | 95 | /* struct ovs_header. */ |
d6569377 | 96 | unsigned int nlmsg_flags; |
254f2dc8 | 97 | int dp_ifindex; |
d6569377 BP |
98 | |
99 | /* Attributes. | |
100 | * | |
0e70cdcb BP |
101 | * The 'stats' member points to 64-bit data that might only be aligned on |
102 | * 32-bit boundaries, so get_unaligned_u64() should be used to access its | |
103 | * values. | |
d2a23af2 | 104 | * |
df2c07f4 | 105 | * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in |
d2a23af2 | 106 | * the Netlink version of the command, even if actions_len is zero. */ |
df2c07f4 | 107 | const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */ |
d6569377 | 108 | size_t key_len; |
e6cc0bab AZ |
109 | const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */ |
110 | size_t mask_len; | |
df2c07f4 | 111 | const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */ |
d6569377 | 112 | size_t actions_len; |
df2c07f4 JP |
113 | const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */ |
114 | const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */ | |
0e70cdcb | 115 | const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */ |
df2c07f4 | 116 | bool clear; /* OVS_FLOW_ATTR_CLEAR. */ |
d6569377 BP |
117 | }; |
118 | ||
119 | static void dpif_linux_flow_init(struct dpif_linux_flow *); | |
37a1300c BP |
120 | static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *, |
121 | const struct ofpbuf *); | |
122 | static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *, | |
123 | struct ofpbuf *); | |
30b44744 | 124 | static int dpif_linux_flow_transact(struct dpif_linux_flow *request, |
d6569377 BP |
125 | struct dpif_linux_flow *reply, |
126 | struct ofpbuf **bufp); | |
127 | static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *, | |
128 | struct dpif_flow_stats *); | |
129 | ||
989fd548 | 130 | /* One of the dpif channels between the kernel and userspace. */ |
fe3d61b3 | 131 | struct dpif_channel { |
14b4d2f9 | 132 | struct nl_sock *sock; /* Netlink socket. */ |
14b4d2f9 | 133 | long long int last_poll; /* Last time this channel was polled. */ |
fe3d61b3 BP |
134 | }; |
135 | ||
1579cf67 AW |
136 | struct dpif_handler { |
137 | struct dpif_channel *channels;/* Array of channels for each handler. */ | |
138 | struct epoll_event *epoll_events; | |
139 | int epoll_fd; /* epoll fd that includes channel socks. */ | |
140 | int n_events; /* Num events returned by epoll_wait(). */ | |
141 | int event_offset; /* Offset into 'epoll_events'. */ | |
142 | }; | |
14b4d2f9 | 143 | |
96fba48f BP |
144 | /* Datapath interface for the openvswitch Linux kernel module. */ |
145 | struct dpif_linux { | |
146 | struct dpif dpif; | |
254f2dc8 | 147 | int dp_ifindex; |
e9e28be3 | 148 | |
b063d9f0 | 149 | /* Upcall messages. */ |
1579cf67 AW |
150 | struct fat_rwlock upcall_lock; |
151 | struct dpif_handler *handlers; | |
152 | uint32_t n_handlers; /* Num of upcall handlers. */ | |
153 | int uc_array_size; /* Size of 'handler->channels' and */ | |
154 | /* 'handler->epoll_events'. */ | |
982b8810 | 155 | |
e9e28be3 | 156 | /* Change notification. */ |
e4516b20 | 157 | struct nl_sock *port_notifier; /* vport multicast group subscriber. */ |
61eae437 | 158 | bool refresh_channels; |
96fba48f BP |
159 | }; |
160 | ||
9b00386b AW |
161 | static void report_loss(struct dpif_linux *, struct dpif_channel *, |
162 | uint32_t ch_idx, uint32_t handler_id); | |
1579cf67 | 163 | |
96fba48f BP |
164 | static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); |
165 | ||
e4516b20 BP |
166 | /* Generic Netlink family numbers for OVS. |
167 | * | |
168 | * Initialized by dpif_linux_init(). */ | |
df2c07f4 JP |
169 | static int ovs_datapath_family; |
170 | static int ovs_vport_family; | |
171 | static int ovs_flow_family; | |
172 | static int ovs_packet_family; | |
982b8810 | 173 | |
e4516b20 BP |
174 | /* Generic Netlink multicast groups for OVS. |
175 | * | |
176 | * Initialized by dpif_linux_init(). */ | |
177 | static unsigned int ovs_vport_mcgroup; | |
982b8810 BP |
178 | |
179 | static int dpif_linux_init(void); | |
e4516b20 | 180 | static int open_dpif(const struct dpif_linux_dp *, struct dpif **); |
4e022ec0 | 181 | static uint32_t dpif_linux_port_get_pid(const struct dpif *, |
1954e6bb | 182 | odp_port_t port_no, uint32_t hash); |
b90de034 AW |
183 | static int dpif_linux_refresh_channels(struct dpif_linux *, |
184 | uint32_t n_handlers); | |
f0fef760 BP |
185 | static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, |
186 | struct ofpbuf *); | |
187 | static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *, | |
188 | const struct ofpbuf *); | |
189 | ||
96fba48f BP |
190 | static struct dpif_linux * |
191 | dpif_linux_cast(const struct dpif *dpif) | |
192 | { | |
193 | dpif_assert_class(dpif, &dpif_linux_class); | |
194 | return CONTAINER_OF(dpif, struct dpif_linux, dpif); | |
195 | } | |
196 | ||
d3d22744 | 197 | static int |
2240af25 DDP |
198 | dpif_linux_enumerate(struct sset *all_dps, |
199 | const struct dpif_class *dpif_class OVS_UNUSED) | |
d3d22744 | 200 | { |
aaff4b55 | 201 | struct nl_dump dump; |
d57695d7 JS |
202 | uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; |
203 | struct ofpbuf msg, buf; | |
aaff4b55 | 204 | int error; |
982b8810 | 205 | |
aaff4b55 BP |
206 | error = dpif_linux_init(); |
207 | if (error) { | |
208 | return error; | |
982b8810 | 209 | } |
d3d22744 | 210 | |
d57695d7 | 211 | ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); |
aaff4b55 | 212 | dpif_linux_dp_dump_start(&dump); |
d57695d7 | 213 | while (nl_dump_next(&dump, &msg, &buf)) { |
aaff4b55 | 214 | struct dpif_linux_dp dp; |
d6569377 | 215 | |
aaff4b55 | 216 | if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) { |
d0c23a1a | 217 | sset_add(all_dps, dp.name); |
d3d22744 BP |
218 | } |
219 | } | |
d57695d7 | 220 | ofpbuf_uninit(&buf); |
aaff4b55 | 221 | return nl_dump_done(&dump); |
d3d22744 BP |
222 | } |
223 | ||
96fba48f | 224 | static int |
4a387741 BP |
225 | dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name, |
226 | bool create, struct dpif **dpifp) | |
96fba48f | 227 | { |
982b8810 | 228 | struct dpif_linux_dp dp_request, dp; |
c19e6535 | 229 | struct ofpbuf *buf; |
ea36840f | 230 | uint32_t upcall_pid; |
c19e6535 | 231 | int error; |
96fba48f | 232 | |
982b8810 BP |
233 | error = dpif_linux_init(); |
234 | if (error) { | |
235 | return error; | |
236 | } | |
237 | ||
982b8810 BP |
238 | /* Create or look up datapath. */ |
239 | dpif_linux_dp_init(&dp_request); | |
ea36840f BP |
240 | if (create) { |
241 | dp_request.cmd = OVS_DP_CMD_NEW; | |
242 | upcall_pid = 0; | |
243 | dp_request.upcall_pid = &upcall_pid; | |
244 | } else { | |
b7fd5e38 TG |
245 | /* Use OVS_DP_CMD_SET to report user features */ |
246 | dp_request.cmd = OVS_DP_CMD_SET; | |
ea36840f | 247 | } |
254f2dc8 | 248 | dp_request.name = name; |
b7fd5e38 | 249 | dp_request.user_features |= OVS_DP_F_UNALIGNED; |
1579cf67 | 250 | dp_request.user_features |= OVS_DP_F_VPORT_PIDS; |
982b8810 BP |
251 | error = dpif_linux_dp_transact(&dp_request, &dp, &buf); |
252 | if (error) { | |
253 | return error; | |
c19e6535 | 254 | } |
254f2dc8 | 255 | |
e4516b20 | 256 | error = open_dpif(&dp, dpifp); |
8f4a4df5 | 257 | ofpbuf_delete(buf); |
e4516b20 | 258 | return error; |
c19e6535 BP |
259 | } |
260 | ||
e4516b20 | 261 | static int |
254f2dc8 | 262 | open_dpif(const struct dpif_linux_dp *dp, struct dpif **dpifp) |
c19e6535 | 263 | { |
c19e6535 | 264 | struct dpif_linux *dpif; |
c19e6535 | 265 | |
17411ecf | 266 | dpif = xzalloc(sizeof *dpif); |
e4516b20 | 267 | dpif->port_notifier = NULL; |
1579cf67 | 268 | fat_rwlock_init(&dpif->upcall_lock); |
c19e6535 | 269 | |
254f2dc8 BP |
270 | dpif_init(&dpif->dpif, &dpif_linux_class, dp->name, |
271 | dp->dp_ifindex, dp->dp_ifindex); | |
c19e6535 | 272 | |
254f2dc8 | 273 | dpif->dp_ifindex = dp->dp_ifindex; |
c19e6535 | 274 | *dpifp = &dpif->dpif; |
e4516b20 BP |
275 | |
276 | return 0; | |
96fba48f BP |
277 | } |
278 | ||
1579cf67 AW |
279 | /* Destroys the netlink sockets pointed by the elements in 'socksp' |
280 | * and frees the 'socksp'. */ | |
17411ecf | 281 | static void |
1579cf67 | 282 | vport_del_socksp(struct nl_sock **socksp, uint32_t n_socks) |
17411ecf | 283 | { |
1579cf67 | 284 | size_t i; |
17411ecf | 285 | |
1579cf67 AW |
286 | for (i = 0; i < n_socks; i++) { |
287 | nl_sock_destroy(socksp[i]); | |
50f80534 | 288 | } |
989fd548 | 289 | |
1579cf67 AW |
290 | free(socksp); |
291 | } | |
989fd548 | 292 | |
1579cf67 AW |
293 | /* Creates an array of netlink sockets. Returns an array of the |
294 | * corresponding pointers. Records the error in 'error'. */ | |
295 | static struct nl_sock ** | |
296 | vport_create_socksp(uint32_t n_socks, int *error) | |
297 | { | |
298 | struct nl_sock **socksp = xzalloc(n_socks * sizeof *socksp); | |
299 | size_t i; | |
300 | ||
301 | for (i = 0; i < n_socks; i++) { | |
302 | *error = nl_sock_create(NETLINK_GENERIC, &socksp[i]); | |
303 | if (*error) { | |
304 | goto error; | |
989fd548 | 305 | } |
1579cf67 | 306 | } |
989fd548 | 307 | |
1579cf67 | 308 | return socksp; |
9fafa796 | 309 | |
1579cf67 AW |
310 | error: |
311 | vport_del_socksp(socksp, n_socks); | |
989fd548 | 312 | |
1579cf67 AW |
313 | return NULL; |
314 | } | |
315 | ||
316 | /* Given the array of pointers to netlink sockets 'socksp', returns | |
317 | * the array of corresponding pids. If the 'socksp' is NULL, returns | |
318 | * a single-element array of value 0. */ | |
319 | static uint32_t * | |
320 | vport_socksp_to_pids(struct nl_sock **socksp, uint32_t n_socks) | |
321 | { | |
322 | uint32_t *pids; | |
323 | ||
324 | if (!socksp) { | |
325 | pids = xzalloc(sizeof *pids); | |
326 | } else { | |
327 | size_t i; | |
328 | ||
329 | pids = xzalloc(n_socks * sizeof *pids); | |
330 | for (i = 0; i < n_socks; i++) { | |
331 | pids[i] = nl_sock_pid(socksp[i]); | |
332 | } | |
17411ecf | 333 | } |
989fd548 | 334 | |
1579cf67 AW |
335 | return pids; |
336 | } | |
337 | ||
338 | /* Given the port number 'port_idx', extracts the pids of netlink sockets | |
339 | * associated to the port and assigns it to 'upcall_pids'. */ | |
340 | static bool | |
341 | vport_get_pids(struct dpif_linux *dpif, uint32_t port_idx, | |
342 | uint32_t **upcall_pids) | |
343 | { | |
344 | uint32_t *pids; | |
345 | size_t i; | |
989fd548 | 346 | |
1579cf67 AW |
347 | /* Since the nl_sock can only be assigned in either all |
348 | * or none "dpif->handlers" channels, the following check | |
349 | * would suffice. */ | |
350 | if (!dpif->handlers[0].channels[port_idx].sock) { | |
351 | return false; | |
352 | } | |
353 | ||
354 | pids = xzalloc(dpif->n_handlers * sizeof *pids); | |
355 | ||
356 | for (i = 0; i < dpif->n_handlers; i++) { | |
357 | pids[i] = nl_sock_pid(dpif->handlers[i].channels[port_idx].sock); | |
358 | } | |
359 | ||
360 | *upcall_pids = pids; | |
989fd548 | 361 | |
1579cf67 | 362 | return true; |
989fd548 JP |
363 | } |
364 | ||
365 | static int | |
1579cf67 AW |
366 | vport_add_channels(struct dpif_linux *dpif, odp_port_t port_no, |
367 | struct nl_sock **socksp) | |
989fd548 JP |
368 | { |
369 | struct epoll_event event; | |
4e022ec0 | 370 | uint32_t port_idx = odp_to_u32(port_no); |
1579cf67 AW |
371 | size_t i, j; |
372 | int error; | |
989fd548 | 373 | |
1579cf67 | 374 | if (dpif->handlers == NULL) { |
989fd548 JP |
375 | return 0; |
376 | } | |
377 | ||
1579cf67 AW |
378 | /* We assume that the datapath densely chooses port numbers, which can |
379 | * therefore be used as an index into 'channels' and 'epoll_events' of | |
380 | * 'dpif->handler'. */ | |
4e022ec0 AW |
381 | if (port_idx >= dpif->uc_array_size) { |
382 | uint32_t new_size = port_idx + 1; | |
989fd548 | 383 | |
12d76859 | 384 | if (new_size > MAX_PORTS) { |
989fd548 JP |
385 | VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big", |
386 | dpif_name(&dpif->dpif), port_no); | |
387 | return EFBIG; | |
388 | } | |
389 | ||
1579cf67 AW |
390 | for (i = 0; i < dpif->n_handlers; i++) { |
391 | struct dpif_handler *handler = &dpif->handlers[i]; | |
392 | ||
393 | handler->channels = xrealloc(handler->channels, | |
394 | new_size * sizeof *handler->channels); | |
395 | ||
396 | for (j = dpif->uc_array_size; j < new_size; j++) { | |
397 | handler->channels[j].sock = NULL; | |
398 | } | |
399 | ||
400 | handler->epoll_events = xrealloc(handler->epoll_events, | |
401 | new_size * sizeof *handler->epoll_events); | |
989fd548 | 402 | |
1579cf67 | 403 | } |
989fd548 JP |
404 | dpif->uc_array_size = new_size; |
405 | } | |
406 | ||
407 | memset(&event, 0, sizeof event); | |
408 | event.events = EPOLLIN; | |
4e022ec0 | 409 | event.data.u32 = port_idx; |
989fd548 | 410 | |
1579cf67 AW |
411 | for (i = 0; i < dpif->n_handlers; i++) { |
412 | struct dpif_handler *handler = &dpif->handlers[i]; | |
413 | ||
414 | if (epoll_ctl(handler->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(socksp[i]), | |
415 | &event) < 0) { | |
416 | error = errno; | |
417 | goto error; | |
418 | } | |
419 | dpif->handlers[i].channels[port_idx].sock = socksp[i]; | |
420 | dpif->handlers[i].channels[port_idx].last_poll = LLONG_MIN; | |
421 | } | |
989fd548 JP |
422 | |
423 | return 0; | |
1579cf67 AW |
424 | |
425 | error: | |
426 | for (j = 0; j < i; j++) { | |
427 | epoll_ctl(dpif->handlers[j].epoll_fd, EPOLL_CTL_DEL, | |
428 | nl_sock_fd(socksp[j]), NULL); | |
429 | dpif->handlers[j].channels[port_idx].sock = NULL; | |
430 | } | |
431 | ||
432 | return error; | |
989fd548 JP |
433 | } |
434 | ||
435 | static void | |
1579cf67 | 436 | vport_del_channels(struct dpif_linux *dpif, odp_port_t port_no) |
989fd548 | 437 | { |
4e022ec0 | 438 | uint32_t port_idx = odp_to_u32(port_no); |
1579cf67 | 439 | size_t i; |
989fd548 | 440 | |
1579cf67 | 441 | if (!dpif->handlers || port_idx >= dpif->uc_array_size) { |
989fd548 JP |
442 | return; |
443 | } | |
444 | ||
1579cf67 AW |
445 | /* Since the sock can only be assigned in either all or none |
446 | * of "dpif->handlers" channels, the following check would | |
447 | * suffice. */ | |
448 | if (!dpif->handlers[0].channels[port_idx].sock) { | |
989fd548 JP |
449 | return; |
450 | } | |
451 | ||
1579cf67 AW |
452 | for (i = 0; i < dpif->n_handlers; i++) { |
453 | struct dpif_handler *handler = &dpif->handlers[i]; | |
454 | ||
455 | epoll_ctl(handler->epoll_fd, EPOLL_CTL_DEL, | |
456 | nl_sock_fd(handler->channels[port_idx].sock), NULL); | |
457 | nl_sock_destroy(handler->channels[port_idx].sock); | |
458 | handler->channels[port_idx].sock = NULL; | |
459 | handler->event_offset = handler->n_events = 0; | |
460 | } | |
461 | } | |
462 | ||
463 | static void | |
b90de034 | 464 | destroy_all_channels(struct dpif_linux *dpif) OVS_REQ_WRLOCK(dpif->upcall_lock) |
1579cf67 AW |
465 | { |
466 | unsigned int i; | |
467 | ||
468 | if (!dpif->handlers) { | |
469 | return; | |
470 | } | |
471 | ||
472 | for (i = 0; i < dpif->uc_array_size; i++ ) { | |
473 | struct dpif_linux_vport vport_request; | |
474 | uint32_t upcall_pids = 0; | |
475 | ||
476 | /* Since the sock can only be assigned in either all or none | |
477 | * of "dpif->handlers" channels, the following check would | |
478 | * suffice. */ | |
479 | if (!dpif->handlers[0].channels[i].sock) { | |
480 | continue; | |
481 | } | |
482 | ||
483 | /* Turn off upcalls. */ | |
484 | dpif_linux_vport_init(&vport_request); | |
485 | vport_request.cmd = OVS_VPORT_CMD_SET; | |
486 | vport_request.dp_ifindex = dpif->dp_ifindex; | |
487 | vport_request.port_no = u32_to_odp(i); | |
488 | vport_request.upcall_pids = &upcall_pids; | |
489 | dpif_linux_vport_transact(&vport_request, NULL, NULL); | |
490 | ||
491 | vport_del_channels(dpif, u32_to_odp(i)); | |
492 | } | |
493 | ||
494 | for (i = 0; i < dpif->n_handlers; i++) { | |
495 | struct dpif_handler *handler = &dpif->handlers[i]; | |
496 | ||
497 | close(handler->epoll_fd); | |
498 | free(handler->epoll_events); | |
499 | free(handler->channels); | |
500 | } | |
989fd548 | 501 | |
1579cf67 AW |
502 | free(dpif->handlers); |
503 | dpif->handlers = NULL; | |
504 | dpif->n_handlers = 0; | |
505 | dpif->uc_array_size = 0; | |
17411ecf JG |
506 | } |
507 | ||
96fba48f BP |
508 | static void |
509 | dpif_linux_close(struct dpif *dpif_) | |
510 | { | |
511 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
c7178a0b | 512 | |
e4516b20 | 513 | nl_sock_destroy(dpif->port_notifier); |
1579cf67 AW |
514 | |
515 | fat_rwlock_wrlock(&dpif->upcall_lock); | |
516 | destroy_all_channels(dpif); | |
517 | fat_rwlock_unlock(&dpif->upcall_lock); | |
518 | ||
519 | fat_rwlock_destroy(&dpif->upcall_lock); | |
96fba48f BP |
520 | free(dpif); |
521 | } | |
522 | ||
523 | static int | |
7dab847a | 524 | dpif_linux_destroy(struct dpif *dpif_) |
96fba48f | 525 | { |
d6569377 BP |
526 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
527 | struct dpif_linux_dp dp; | |
528 | ||
529 | dpif_linux_dp_init(&dp); | |
df2c07f4 | 530 | dp.cmd = OVS_DP_CMD_DEL; |
254f2dc8 | 531 | dp.dp_ifindex = dpif->dp_ifindex; |
d6569377 | 532 | return dpif_linux_dp_transact(&dp, NULL, NULL); |
96fba48f BP |
533 | } |
534 | ||
61eae437 BP |
535 | static void |
536 | dpif_linux_run(struct dpif *dpif_) | |
537 | { | |
538 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
1579cf67 | 539 | |
61eae437 BP |
540 | if (dpif->refresh_channels) { |
541 | dpif->refresh_channels = false; | |
1579cf67 | 542 | fat_rwlock_wrlock(&dpif->upcall_lock); |
9b00386b | 543 | dpif_linux_refresh_channels(dpif, dpif->n_handlers); |
1579cf67 | 544 | fat_rwlock_unlock(&dpif->upcall_lock); |
61eae437 BP |
545 | } |
546 | } | |
547 | ||
96fba48f | 548 | static int |
a8d9304d | 549 | dpif_linux_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) |
96fba48f | 550 | { |
d6569377 BP |
551 | struct dpif_linux_dp dp; |
552 | struct ofpbuf *buf; | |
553 | int error; | |
554 | ||
555 | error = dpif_linux_dp_get(dpif_, &dp, &buf); | |
556 | if (!error) { | |
a8d9304d BP |
557 | stats->n_hit = dp.stats.n_hit; |
558 | stats->n_missed = dp.stats.n_missed; | |
559 | stats->n_lost = dp.stats.n_lost; | |
560 | stats->n_flows = dp.stats.n_flows; | |
847108dc AZ |
561 | stats->n_masks = dp.megaflow_stats.n_masks; |
562 | stats->n_mask_hit = dp.megaflow_stats.n_mask_hit; | |
d6569377 BP |
563 | ofpbuf_delete(buf); |
564 | } | |
565 | return error; | |
96fba48f BP |
566 | } |
567 | ||
b9ad7294 EJ |
568 | static const char * |
569 | get_vport_type(const struct dpif_linux_vport *vport) | |
570 | { | |
571 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); | |
572 | ||
573 | switch (vport->type) { | |
5ed51209 JS |
574 | case OVS_VPORT_TYPE_NETDEV: { |
575 | const char *type = netdev_get_type_from_name(vport->name); | |
576 | ||
577 | return type ? type : "system"; | |
578 | } | |
b9ad7294 EJ |
579 | |
580 | case OVS_VPORT_TYPE_INTERNAL: | |
581 | return "internal"; | |
582 | ||
583 | case OVS_VPORT_TYPE_GRE: | |
584 | return "gre"; | |
585 | ||
586 | case OVS_VPORT_TYPE_GRE64: | |
587 | return "gre64"; | |
588 | ||
b9ad7294 EJ |
589 | case OVS_VPORT_TYPE_VXLAN: |
590 | return "vxlan"; | |
591 | ||
a6ae068b LJ |
592 | case OVS_VPORT_TYPE_LISP: |
593 | return "lisp"; | |
594 | ||
b9ad7294 EJ |
595 | case OVS_VPORT_TYPE_UNSPEC: |
596 | case __OVS_VPORT_TYPE_MAX: | |
597 | break; | |
598 | } | |
599 | ||
600 | VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u", | |
601 | vport->dp_ifindex, vport->name, (unsigned int) vport->type); | |
602 | return "unknown"; | |
603 | } | |
604 | ||
c060c4cf EJ |
605 | static enum ovs_vport_type |
606 | netdev_to_ovs_vport_type(const struct netdev *netdev) | |
607 | { | |
608 | const char *type = netdev_get_type(netdev); | |
609 | ||
610 | if (!strcmp(type, "tap") || !strcmp(type, "system")) { | |
611 | return OVS_VPORT_TYPE_NETDEV; | |
612 | } else if (!strcmp(type, "internal")) { | |
613 | return OVS_VPORT_TYPE_INTERNAL; | |
614 | } else if (strstr(type, "gre64")) { | |
615 | return OVS_VPORT_TYPE_GRE64; | |
616 | } else if (strstr(type, "gre")) { | |
617 | return OVS_VPORT_TYPE_GRE; | |
c060c4cf EJ |
618 | } else if (!strcmp(type, "vxlan")) { |
619 | return OVS_VPORT_TYPE_VXLAN; | |
a6ae068b LJ |
620 | } else if (!strcmp(type, "lisp")) { |
621 | return OVS_VPORT_TYPE_LISP; | |
c060c4cf EJ |
622 | } else { |
623 | return OVS_VPORT_TYPE_UNSPEC; | |
624 | } | |
625 | } | |
626 | ||
96fba48f | 627 | static int |
9b00386b | 628 | dpif_linux_port_add__(struct dpif_linux *dpif, struct netdev *netdev, |
9fafa796 | 629 | odp_port_t *port_nop) |
b90de034 | 630 | OVS_REQ_WRLOCK(dpif->upcall_lock) |
96fba48f | 631 | { |
26508d9a | 632 | const struct netdev_tunnel_config *tnl_cfg; |
3aa30359 BP |
633 | char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; |
634 | const char *name = netdev_vport_get_dpif_port(netdev, | |
635 | namebuf, sizeof namebuf); | |
c3827f61 | 636 | const char *type = netdev_get_type(netdev); |
c19e6535 | 637 | struct dpif_linux_vport request, reply; |
c19e6535 | 638 | struct ofpbuf *buf; |
26508d9a KM |
639 | uint64_t options_stub[64 / 8]; |
640 | struct ofpbuf options; | |
1579cf67 AW |
641 | struct nl_sock **socksp = NULL; |
642 | uint32_t *upcall_pids; | |
643 | int error = 0; | |
96fba48f | 644 | |
1579cf67 AW |
645 | if (dpif->handlers) { |
646 | socksp = vport_create_socksp(dpif->n_handlers, &error); | |
647 | if (!socksp) { | |
989fd548 JP |
648 | return error; |
649 | } | |
650 | } | |
651 | ||
c19e6535 | 652 | dpif_linux_vport_init(&request); |
df2c07f4 | 653 | request.cmd = OVS_VPORT_CMD_NEW; |
254f2dc8 | 654 | request.dp_ifindex = dpif->dp_ifindex; |
c060c4cf | 655 | request.type = netdev_to_ovs_vport_type(netdev); |
df2c07f4 | 656 | if (request.type == OVS_VPORT_TYPE_UNSPEC) { |
c283069c BP |
657 | VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has " |
658 | "unsupported type `%s'", | |
9b00386b | 659 | dpif_name(&dpif->dpif), name, type); |
1579cf67 | 660 | vport_del_socksp(socksp, dpif->n_handlers); |
c283069c BP |
661 | return EINVAL; |
662 | } | |
c19e6535 | 663 | request.name = name; |
c3827f61 | 664 | |
24b019f8 JP |
665 | if (request.type == OVS_VPORT_TYPE_NETDEV) { |
666 | netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false); | |
667 | } | |
668 | ||
26508d9a KM |
669 | tnl_cfg = netdev_get_tunnel_config(netdev); |
670 | if (tnl_cfg && tnl_cfg->dst_port != 0) { | |
671 | ofpbuf_use_stack(&options, options_stub, sizeof options_stub); | |
672 | nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT, | |
7e2d8aea | 673 | ntohs(tnl_cfg->dst_port)); |
1f317cb5 PS |
674 | request.options = ofpbuf_data(&options); |
675 | request.options_len = ofpbuf_size(&options); | |
26508d9a KM |
676 | } |
677 | ||
78a2d59c | 678 | request.port_no = *port_nop; |
1579cf67 | 679 | upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers); |
aeaae11f | 680 | request.n_upcall_pids = socksp ? dpif->n_handlers : 1; |
1579cf67 | 681 | request.upcall_pids = upcall_pids; |
95b1d73a | 682 | |
78a2d59c | 683 | error = dpif_linux_vport_transact(&request, &reply, &buf); |
78a2d59c JP |
684 | if (!error) { |
685 | *port_nop = reply.port_no; | |
2510ba7c | 686 | } else { |
4e022ec0 | 687 | if (error == EBUSY && *port_nop != ODPP_NONE) { |
2510ba7c | 688 | VLOG_INFO("%s: requested port %"PRIu32" is in use", |
9b00386b | 689 | dpif_name(&dpif->dpif), *port_nop); |
2510ba7c | 690 | } |
1579cf67 AW |
691 | |
692 | vport_del_socksp(socksp, dpif->n_handlers); | |
693 | goto exit; | |
78a2d59c | 694 | } |
c3827f61 | 695 | |
1579cf67 AW |
696 | if (socksp) { |
697 | error = vport_add_channels(dpif, *port_nop, socksp); | |
989fd548 JP |
698 | if (error) { |
699 | VLOG_INFO("%s: could not add channel for port %s", | |
9b00386b | 700 | dpif_name(&dpif->dpif), name); |
989fd548 JP |
701 | |
702 | /* Delete the port. */ | |
703 | dpif_linux_vport_init(&request); | |
704 | request.cmd = OVS_VPORT_CMD_DEL; | |
705 | request.dp_ifindex = dpif->dp_ifindex; | |
706 | request.port_no = *port_nop; | |
707 | dpif_linux_vport_transact(&request, NULL, NULL); | |
1579cf67 AW |
708 | vport_del_socksp(socksp, dpif->n_handlers); |
709 | goto exit; | |
989fd548 JP |
710 | } |
711 | } | |
1579cf67 | 712 | free(socksp); |
989fd548 | 713 | |
1579cf67 AW |
714 | exit: |
715 | ofpbuf_delete(buf); | |
716 | free(upcall_pids); | |
717 | ||
718 | return error; | |
96fba48f BP |
719 | } |
720 | ||
721 | static int | |
9fafa796 BP |
722 | dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev, |
723 | odp_port_t *port_nop) | |
724 | { | |
725 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
726 | int error; | |
727 | ||
1579cf67 | 728 | fat_rwlock_wrlock(&dpif->upcall_lock); |
9b00386b | 729 | error = dpif_linux_port_add__(dpif, netdev, port_nop); |
1579cf67 | 730 | fat_rwlock_unlock(&dpif->upcall_lock); |
9fafa796 BP |
731 | |
732 | return error; | |
733 | } | |
734 | ||
735 | static int | |
9b00386b | 736 | dpif_linux_port_del__(struct dpif_linux *dpif, odp_port_t port_no) |
b90de034 | 737 | OVS_REQ_WRLOCK(dpif->upcall_lock) |
96fba48f | 738 | { |
c19e6535 | 739 | struct dpif_linux_vport vport; |
773cd538 | 740 | int error; |
c19e6535 BP |
741 | |
742 | dpif_linux_vport_init(&vport); | |
df2c07f4 | 743 | vport.cmd = OVS_VPORT_CMD_DEL; |
254f2dc8 | 744 | vport.dp_ifindex = dpif->dp_ifindex; |
c19e6535 | 745 | vport.port_no = port_no; |
773cd538 EJ |
746 | error = dpif_linux_vport_transact(&vport, NULL, NULL); |
747 | ||
1579cf67 | 748 | vport_del_channels(dpif, port_no); |
989fd548 | 749 | |
773cd538 | 750 | return error; |
c3827f61 | 751 | } |
3abc4a1a | 752 | |
9fafa796 BP |
753 | static int |
754 | dpif_linux_port_del(struct dpif *dpif_, odp_port_t port_no) | |
755 | { | |
756 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
757 | int error; | |
758 | ||
1579cf67 | 759 | fat_rwlock_wrlock(&dpif->upcall_lock); |
9b00386b | 760 | error = dpif_linux_port_del__(dpif, port_no); |
1579cf67 | 761 | fat_rwlock_unlock(&dpif->upcall_lock); |
9fafa796 BP |
762 | |
763 | return error; | |
764 | } | |
765 | ||
c3827f61 | 766 | static int |
9b00386b | 767 | dpif_linux_port_query__(const struct dpif_linux *dpif, odp_port_t port_no, |
4c738a8d | 768 | const char *port_name, struct dpif_port *dpif_port) |
c3827f61 | 769 | { |
c19e6535 BP |
770 | struct dpif_linux_vport request; |
771 | struct dpif_linux_vport reply; | |
772 | struct ofpbuf *buf; | |
4c738a8d BP |
773 | int error; |
774 | ||
c19e6535 | 775 | dpif_linux_vport_init(&request); |
df2c07f4 | 776 | request.cmd = OVS_VPORT_CMD_GET; |
9b00386b | 777 | request.dp_ifindex = dpif->dp_ifindex; |
c19e6535 BP |
778 | request.port_no = port_no; |
779 | request.name = port_name; | |
4c738a8d | 780 | |
c19e6535 BP |
781 | error = dpif_linux_vport_transact(&request, &reply, &buf); |
782 | if (!error) { | |
33db1592 BP |
783 | if (reply.dp_ifindex != request.dp_ifindex) { |
784 | /* A query by name reported that 'port_name' is in some datapath | |
785 | * other than 'dpif', but the caller wants to know about 'dpif'. */ | |
786 | error = ENODEV; | |
4afba28d | 787 | } else if (dpif_port) { |
33db1592 | 788 | dpif_port->name = xstrdup(reply.name); |
b9ad7294 | 789 | dpif_port->type = xstrdup(get_vport_type(&reply)); |
33db1592 BP |
790 | dpif_port->port_no = reply.port_no; |
791 | } | |
c19e6535 | 792 | ofpbuf_delete(buf); |
3abc4a1a | 793 | } |
c19e6535 | 794 | return error; |
96fba48f BP |
795 | } |
796 | ||
797 | static int | |
9b00386b | 798 | dpif_linux_port_query_by_number(const struct dpif *dpif_, odp_port_t port_no, |
4c738a8d | 799 | struct dpif_port *dpif_port) |
96fba48f | 800 | { |
9b00386b AW |
801 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
802 | ||
c19e6535 | 803 | return dpif_linux_port_query__(dpif, port_no, NULL, dpif_port); |
96fba48f BP |
804 | } |
805 | ||
806 | static int | |
9b00386b | 807 | dpif_linux_port_query_by_name(const struct dpif *dpif_, const char *devname, |
4c738a8d | 808 | struct dpif_port *dpif_port) |
96fba48f | 809 | { |
9b00386b AW |
810 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
811 | ||
4c738a8d | 812 | return dpif_linux_port_query__(dpif, 0, devname, dpif_port); |
96fba48f BP |
813 | } |
814 | ||
98403001 | 815 | static uint32_t |
b90de034 AW |
816 | dpif_linux_port_get_pid__(const struct dpif_linux *dpif, odp_port_t port_no, |
817 | uint32_t hash) | |
818 | OVS_REQ_RDLOCK(dpif->upcall_lock) | |
98403001 | 819 | { |
4e022ec0 | 820 | uint32_t port_idx = odp_to_u32(port_no); |
9fafa796 | 821 | uint32_t pid = 0; |
98403001 | 822 | |
1579cf67 | 823 | if (dpif->handlers) { |
4e022ec0 | 824 | /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s |
989fd548 | 825 | * channel, since it is not heavily loaded. */ |
4e022ec0 | 826 | uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx; |
1579cf67 AW |
827 | struct dpif_handler *h = &dpif->handlers[hash % dpif->n_handlers]; |
828 | ||
829 | pid = nl_sock_pid(h->channels[idx].sock); | |
98403001 | 830 | } |
9fafa796 BP |
831 | |
832 | return pid; | |
98403001 BP |
833 | } |
834 | ||
b90de034 AW |
835 | static uint32_t |
836 | dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no, | |
837 | uint32_t hash) | |
838 | { | |
839 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
840 | uint32_t ret; | |
841 | ||
842 | fat_rwlock_rdlock(&dpif->upcall_lock); | |
843 | ret = dpif_linux_port_get_pid__(dpif, port_no, hash); | |
844 | fat_rwlock_unlock(&dpif->upcall_lock); | |
845 | ||
846 | return ret; | |
847 | } | |
848 | ||
96fba48f BP |
849 | static int |
850 | dpif_linux_flow_flush(struct dpif *dpif_) | |
851 | { | |
550f0db4 | 852 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
37a1300c BP |
853 | struct dpif_linux_flow flow; |
854 | ||
855 | dpif_linux_flow_init(&flow); | |
df2c07f4 | 856 | flow.cmd = OVS_FLOW_CMD_DEL; |
254f2dc8 | 857 | flow.dp_ifindex = dpif->dp_ifindex; |
37a1300c | 858 | return dpif_linux_flow_transact(&flow, NULL, NULL); |
96fba48f BP |
859 | } |
860 | ||
c19e6535 | 861 | struct dpif_linux_port_state { |
f0fef760 | 862 | struct nl_dump dump; |
d57695d7 | 863 | struct ofpbuf buf; |
c19e6535 BP |
864 | }; |
865 | ||
222837c4 | 866 | static void |
9b00386b AW |
867 | dpif_linux_port_dump_start__(const struct dpif_linux *dpif, |
868 | struct nl_dump *dump) | |
96fba48f | 869 | { |
f0fef760 BP |
870 | struct dpif_linux_vport request; |
871 | struct ofpbuf *buf; | |
872 | ||
f0fef760 | 873 | dpif_linux_vport_init(&request); |
067f1e23 | 874 | request.cmd = OVS_VPORT_CMD_GET; |
254f2dc8 | 875 | request.dp_ifindex = dpif->dp_ifindex; |
f0fef760 BP |
876 | |
877 | buf = ofpbuf_new(1024); | |
878 | dpif_linux_vport_to_ofpbuf(&request, buf); | |
222837c4 | 879 | nl_dump_start(dump, NETLINK_GENERIC, buf); |
f0fef760 | 880 | ofpbuf_delete(buf); |
222837c4 BP |
881 | } |
882 | ||
883 | static int | |
9b00386b | 884 | dpif_linux_port_dump_start(const struct dpif *dpif_, void **statep) |
222837c4 | 885 | { |
9b00386b | 886 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
222837c4 BP |
887 | struct dpif_linux_port_state *state; |
888 | ||
889 | *statep = state = xmalloc(sizeof *state); | |
890 | dpif_linux_port_dump_start__(dpif, &state->dump); | |
f0fef760 | 891 | |
d57695d7 | 892 | ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); |
b0ec0f27 BP |
893 | return 0; |
894 | } | |
895 | ||
7c1ef244 | 896 | static int |
9b00386b | 897 | dpif_linux_port_dump_next__(const struct dpif_linux *dpif, struct nl_dump *dump, |
d57695d7 JS |
898 | struct dpif_linux_vport *vport, |
899 | struct ofpbuf *buffer) | |
222837c4 | 900 | { |
222837c4 BP |
901 | struct ofpbuf buf; |
902 | int error; | |
903 | ||
d57695d7 | 904 | if (!nl_dump_next(dump, &buf, buffer)) { |
222837c4 BP |
905 | return EOF; |
906 | } | |
907 | ||
908 | error = dpif_linux_vport_from_ofpbuf(vport, &buf); | |
909 | if (error) { | |
910 | VLOG_WARN_RL(&error_rl, "%s: failed to parse vport record (%s)", | |
911 | dpif_name(&dpif->dpif), ovs_strerror(error)); | |
912 | } | |
913 | return error; | |
914 | } | |
915 | ||
b0ec0f27 | 916 | static int |
9b00386b | 917 | dpif_linux_port_dump_next(const struct dpif *dpif_, void *state_, |
4c738a8d | 918 | struct dpif_port *dpif_port) |
b0ec0f27 | 919 | { |
9b00386b | 920 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
c19e6535 | 921 | struct dpif_linux_port_state *state = state_; |
f0fef760 | 922 | struct dpif_linux_vport vport; |
96fba48f BP |
923 | int error; |
924 | ||
d57695d7 JS |
925 | error = dpif_linux_port_dump_next__(dpif, &state->dump, &vport, |
926 | &state->buf); | |
c3827f61 | 927 | if (error) { |
f0fef760 | 928 | return error; |
c3827f61 | 929 | } |
ebc56baa | 930 | dpif_port->name = CONST_CAST(char *, vport.name); |
b9ad7294 | 931 | dpif_port->type = CONST_CAST(char *, get_vport_type(&vport)); |
f0fef760 BP |
932 | dpif_port->port_no = vport.port_no; |
933 | return 0; | |
b0ec0f27 BP |
934 | } |
935 | ||
936 | static int | |
95b1d73a | 937 | dpif_linux_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_) |
b0ec0f27 | 938 | { |
c19e6535 | 939 | struct dpif_linux_port_state *state = state_; |
f0fef760 | 940 | int error = nl_dump_done(&state->dump); |
8522b383 | 941 | |
d57695d7 | 942 | ofpbuf_uninit(&state->buf); |
b0ec0f27 | 943 | free(state); |
f0fef760 | 944 | return error; |
96fba48f BP |
945 | } |
946 | ||
e9e28be3 BP |
947 | static int |
948 | dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) | |
949 | { | |
950 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
e9e28be3 | 951 | |
e4516b20 BP |
952 | /* Lazily create the Netlink socket to listen for notifications. */ |
953 | if (!dpif->port_notifier) { | |
954 | struct nl_sock *sock; | |
955 | int error; | |
956 | ||
957 | error = nl_sock_create(NETLINK_GENERIC, &sock); | |
958 | if (error) { | |
959 | return error; | |
960 | } | |
961 | ||
962 | error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup); | |
963 | if (error) { | |
964 | nl_sock_destroy(sock); | |
965 | return error; | |
966 | } | |
967 | dpif->port_notifier = sock; | |
968 | ||
969 | /* We have no idea of the current state so report that everything | |
970 | * changed. */ | |
971 | return ENOBUFS; | |
972 | } | |
973 | ||
974 | for (;;) { | |
975 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
976 | uint64_t buf_stub[4096 / 8]; | |
977 | struct ofpbuf buf; | |
978 | int error; | |
979 | ||
980 | ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); | |
981 | error = nl_sock_recv(dpif->port_notifier, &buf, false); | |
982 | if (!error) { | |
983 | struct dpif_linux_vport vport; | |
984 | ||
985 | error = dpif_linux_vport_from_ofpbuf(&vport, &buf); | |
986 | if (!error) { | |
987 | if (vport.dp_ifindex == dpif->dp_ifindex | |
988 | && (vport.cmd == OVS_VPORT_CMD_NEW | |
989 | || vport.cmd == OVS_VPORT_CMD_DEL | |
990 | || vport.cmd == OVS_VPORT_CMD_SET)) { | |
991 | VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8, | |
992 | dpif->dpif.full_name, vport.name, vport.cmd); | |
1579cf67 | 993 | if (vport.cmd == OVS_VPORT_CMD_DEL && dpif->handlers) { |
61eae437 BP |
994 | dpif->refresh_channels = true; |
995 | } | |
e4516b20 | 996 | *devnamep = xstrdup(vport.name); |
59e0c910 | 997 | ofpbuf_uninit(&buf); |
e4516b20 | 998 | return 0; |
e4516b20 BP |
999 | } |
1000 | } | |
59e0c910 BP |
1001 | } else if (error != EAGAIN) { |
1002 | VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)", | |
1003 | ovs_strerror(error)); | |
1004 | nl_sock_drain(dpif->port_notifier); | |
1005 | error = ENOBUFS; | |
e4516b20 BP |
1006 | } |
1007 | ||
59e0c910 BP |
1008 | ofpbuf_uninit(&buf); |
1009 | if (error) { | |
1010 | return error; | |
1011 | } | |
e9e28be3 | 1012 | } |
e9e28be3 BP |
1013 | } |
1014 | ||
1015 | static void | |
1016 | dpif_linux_port_poll_wait(const struct dpif *dpif_) | |
1017 | { | |
550f0db4 | 1018 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
e4516b20 BP |
1019 | |
1020 | if (dpif->port_notifier) { | |
1021 | nl_sock_wait(dpif->port_notifier, POLLIN); | |
1022 | } else { | |
e9e28be3 | 1023 | poll_immediate_wake(); |
e9e28be3 BP |
1024 | } |
1025 | } | |
1026 | ||
96fba48f | 1027 | static int |
9b00386b | 1028 | dpif_linux_flow_get__(const struct dpif_linux *dpif, |
30053024 BP |
1029 | const struct nlattr *key, size_t key_len, |
1030 | struct dpif_linux_flow *reply, struct ofpbuf **bufp) | |
96fba48f | 1031 | { |
30053024 | 1032 | struct dpif_linux_flow request; |
feebdea2 | 1033 | |
d6569377 | 1034 | dpif_linux_flow_init(&request); |
df2c07f4 | 1035 | request.cmd = OVS_FLOW_CMD_GET; |
254f2dc8 | 1036 | request.dp_ifindex = dpif->dp_ifindex; |
d6569377 BP |
1037 | request.key = key; |
1038 | request.key_len = key_len; | |
30053024 BP |
1039 | return dpif_linux_flow_transact(&request, reply, bufp); |
1040 | } | |
1041 | ||
1042 | static int | |
1043 | dpif_linux_flow_get(const struct dpif *dpif_, | |
1044 | const struct nlattr *key, size_t key_len, | |
1045 | struct ofpbuf **actionsp, struct dpif_flow_stats *stats) | |
1046 | { | |
9b00386b | 1047 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
30053024 BP |
1048 | struct dpif_linux_flow reply; |
1049 | struct ofpbuf *buf; | |
1050 | int error; | |
1051 | ||
9b00386b | 1052 | error = dpif_linux_flow_get__(dpif, key, key_len, &reply, &buf); |
feebdea2 BP |
1053 | if (!error) { |
1054 | if (stats) { | |
d6569377 | 1055 | dpif_linux_flow_get_stats(&reply, stats); |
feebdea2 | 1056 | } |
d6569377 | 1057 | if (actionsp) { |
1f317cb5 PS |
1058 | ofpbuf_set_data(buf, CONST_CAST(struct nlattr *, reply.actions)); |
1059 | ofpbuf_set_size(buf, reply.actions_len); | |
d6569377 BP |
1060 | *actionsp = buf; |
1061 | } else { | |
1062 | ofpbuf_delete(buf); | |
feebdea2 BP |
1063 | } |
1064 | } | |
1065 | return error; | |
96fba48f BP |
1066 | } |
1067 | ||
6bc60024 | 1068 | static void |
9b00386b | 1069 | dpif_linux_init_flow_put(struct dpif_linux *dpif, const struct dpif_flow_put *put, |
6bc60024 BP |
1070 | struct dpif_linux_flow *request) |
1071 | { | |
d64e176c | 1072 | static const struct nlattr dummy_action; |
6bc60024 | 1073 | |
6bc60024 | 1074 | dpif_linux_flow_init(request); |
89625d1e | 1075 | request->cmd = (put->flags & DPIF_FP_CREATE |
6bc60024 BP |
1076 | ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET); |
1077 | request->dp_ifindex = dpif->dp_ifindex; | |
89625d1e BP |
1078 | request->key = put->key; |
1079 | request->key_len = put->key_len; | |
e6cc0bab AZ |
1080 | request->mask = put->mask; |
1081 | request->mask_len = put->mask_len; | |
6bc60024 | 1082 | /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */ |
d64e176c BP |
1083 | request->actions = (put->actions |
1084 | ? put->actions | |
1085 | : CONST_CAST(struct nlattr *, &dummy_action)); | |
89625d1e BP |
1086 | request->actions_len = put->actions_len; |
1087 | if (put->flags & DPIF_FP_ZERO_STATS) { | |
6bc60024 BP |
1088 | request->clear = true; |
1089 | } | |
89625d1e | 1090 | request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE; |
6bc60024 BP |
1091 | } |
1092 | ||
96fba48f | 1093 | static int |
89625d1e | 1094 | dpif_linux_flow_put(struct dpif *dpif_, const struct dpif_flow_put *put) |
96fba48f | 1095 | { |
9b00386b | 1096 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
d6569377 BP |
1097 | struct dpif_linux_flow request, reply; |
1098 | struct ofpbuf *buf; | |
feebdea2 BP |
1099 | int error; |
1100 | ||
9b00386b | 1101 | dpif_linux_init_flow_put(dpif, put, &request); |
d6569377 | 1102 | error = dpif_linux_flow_transact(&request, |
89625d1e BP |
1103 | put->stats ? &reply : NULL, |
1104 | put->stats ? &buf : NULL); | |
1105 | if (!error && put->stats) { | |
1106 | dpif_linux_flow_get_stats(&reply, put->stats); | |
d6569377 | 1107 | ofpbuf_delete(buf); |
feebdea2 BP |
1108 | } |
1109 | return error; | |
96fba48f BP |
1110 | } |
1111 | ||
b99d3cee | 1112 | static void |
9b00386b | 1113 | dpif_linux_init_flow_del(struct dpif_linux *dpif, const struct dpif_flow_del *del, |
b99d3cee | 1114 | struct dpif_linux_flow *request) |
96fba48f | 1115 | { |
b99d3cee BP |
1116 | dpif_linux_flow_init(request); |
1117 | request->cmd = OVS_FLOW_CMD_DEL; | |
1118 | request->dp_ifindex = dpif->dp_ifindex; | |
1119 | request->key = del->key; | |
1120 | request->key_len = del->key_len; | |
1121 | } | |
1122 | ||
1123 | static int | |
1124 | dpif_linux_flow_del(struct dpif *dpif_, const struct dpif_flow_del *del) | |
1125 | { | |
9b00386b | 1126 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
d6569377 BP |
1127 | struct dpif_linux_flow request, reply; |
1128 | struct ofpbuf *buf; | |
feebdea2 BP |
1129 | int error; |
1130 | ||
9b00386b | 1131 | dpif_linux_init_flow_del(dpif, del, &request); |
d6569377 | 1132 | error = dpif_linux_flow_transact(&request, |
b99d3cee BP |
1133 | del->stats ? &reply : NULL, |
1134 | del->stats ? &buf : NULL); | |
1135 | if (!error && del->stats) { | |
1136 | dpif_linux_flow_get_stats(&reply, del->stats); | |
d6569377 | 1137 | ofpbuf_delete(buf); |
feebdea2 BP |
1138 | } |
1139 | return error; | |
96fba48f BP |
1140 | } |
1141 | ||
ac64794a BP |
1142 | struct dpif_linux_flow_dump { |
1143 | struct dpif_flow_dump up; | |
1144 | struct nl_dump nl_dump; | |
d2ad7ef1 | 1145 | atomic_int status; |
e723fd32 JS |
1146 | }; |
1147 | ||
ac64794a BP |
1148 | static struct dpif_linux_flow_dump * |
1149 | dpif_linux_flow_dump_cast(struct dpif_flow_dump *dump) | |
e723fd32 | 1150 | { |
ac64794a | 1151 | return CONTAINER_OF(dump, struct dpif_linux_flow_dump, up); |
e723fd32 JS |
1152 | } |
1153 | ||
ac64794a BP |
1154 | static struct dpif_flow_dump * |
1155 | dpif_linux_flow_dump_create(const struct dpif *dpif_) | |
96fba48f | 1156 | { |
550f0db4 | 1157 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
ac64794a | 1158 | struct dpif_linux_flow_dump *dump; |
37a1300c BP |
1159 | struct dpif_linux_flow request; |
1160 | struct ofpbuf *buf; | |
1161 | ||
ac64794a BP |
1162 | dump = xmalloc(sizeof *dump); |
1163 | dpif_flow_dump_init(&dump->up, dpif_); | |
37a1300c BP |
1164 | |
1165 | dpif_linux_flow_init(&request); | |
067f1e23 | 1166 | request.cmd = OVS_FLOW_CMD_GET; |
254f2dc8 | 1167 | request.dp_ifindex = dpif->dp_ifindex; |
37a1300c BP |
1168 | |
1169 | buf = ofpbuf_new(1024); | |
1170 | dpif_linux_flow_to_ofpbuf(&request, buf); | |
ac64794a | 1171 | nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf); |
37a1300c | 1172 | ofpbuf_delete(buf); |
ac64794a | 1173 | atomic_init(&dump->status, 0); |
30053024 | 1174 | |
ac64794a | 1175 | return &dump->up; |
704a1e09 BP |
1176 | } |
1177 | ||
1178 | static int | |
ac64794a | 1179 | dpif_linux_flow_dump_destroy(struct dpif_flow_dump *dump_) |
704a1e09 | 1180 | { |
ac64794a BP |
1181 | struct dpif_linux_flow_dump *dump = dpif_linux_flow_dump_cast(dump_); |
1182 | unsigned int nl_status = nl_dump_done(&dump->nl_dump); | |
1183 | int dump_status; | |
96fba48f | 1184 | |
ac64794a BP |
1185 | atomic_read(&dump->status, &dump_status); |
1186 | free(dump); | |
1187 | return dump_status ? dump_status : nl_status; | |
1188 | } | |
feebdea2 | 1189 | |
ac64794a BP |
1190 | struct dpif_linux_flow_dump_thread { |
1191 | struct dpif_flow_dump_thread up; | |
1192 | struct dpif_linux_flow_dump *dump; | |
1193 | struct dpif_linux_flow flow; | |
1194 | struct dpif_flow_stats stats; | |
1195 | struct ofpbuf nl_flows; /* Always used to store flows. */ | |
1196 | struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */ | |
1197 | }; | |
1198 | ||
1199 | static struct dpif_linux_flow_dump_thread * | |
1200 | dpif_linux_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread) | |
1201 | { | |
1202 | return CONTAINER_OF(thread, struct dpif_linux_flow_dump_thread, up); | |
1203 | } | |
1204 | ||
1205 | static struct dpif_flow_dump_thread * | |
1206 | dpif_linux_flow_dump_thread_create(struct dpif_flow_dump *dump_) | |
1207 | { | |
1208 | struct dpif_linux_flow_dump *dump = dpif_linux_flow_dump_cast(dump_); | |
1209 | struct dpif_linux_flow_dump_thread *thread; | |
1210 | ||
1211 | thread = xmalloc(sizeof *thread); | |
1212 | dpif_flow_dump_thread_init(&thread->up, &dump->up); | |
1213 | thread->dump = dump; | |
1214 | ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE); | |
1215 | thread->nl_actions = NULL; | |
1216 | ||
1217 | return &thread->up; | |
1218 | } | |
1219 | ||
1220 | static void | |
1221 | dpif_linux_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_) | |
1222 | { | |
1223 | struct dpif_linux_flow_dump_thread *thread | |
1224 | = dpif_linux_flow_dump_thread_cast(thread_); | |
1225 | ||
1226 | ofpbuf_uninit(&thread->nl_flows); | |
1227 | ofpbuf_delete(thread->nl_actions); | |
1228 | free(thread); | |
1229 | } | |
1230 | ||
1231 | static void | |
1232 | dpif_linux_flow_to_dpif_flow(struct dpif_flow *dpif_flow, | |
1233 | struct dpif_linux_flow *linux_flow) | |
1234 | { | |
1235 | dpif_flow->key = linux_flow->key; | |
1236 | dpif_flow->key_len = linux_flow->key_len; | |
1237 | dpif_flow->mask = linux_flow->mask; | |
1238 | dpif_flow->mask_len = linux_flow->mask_len; | |
1239 | dpif_flow->actions = linux_flow->actions; | |
1240 | dpif_flow->actions_len = linux_flow->actions_len; | |
1241 | dpif_linux_flow_get_stats(linux_flow, &dpif_flow->stats); | |
1242 | } | |
1243 | ||
1244 | static int | |
1245 | dpif_linux_flow_dump_next(struct dpif_flow_dump_thread *thread_, | |
1246 | struct dpif_flow *flows, int max_flows) | |
1247 | { | |
1248 | struct dpif_linux_flow_dump_thread *thread | |
1249 | = dpif_linux_flow_dump_thread_cast(thread_); | |
1250 | struct dpif_linux_flow_dump *dump = thread->dump; | |
1251 | struct dpif_linux *dpif = dpif_linux_cast(thread->up.dpif); | |
1252 | int n_flows; | |
1253 | ||
1254 | ofpbuf_delete(thread->nl_actions); | |
1255 | thread->nl_actions = NULL; | |
1256 | ||
1257 | n_flows = 0; | |
1258 | while (!n_flows | |
1259 | || (n_flows < max_flows && ofpbuf_size(&thread->nl_flows))) { | |
1260 | struct dpif_linux_flow linux_flow; | |
1261 | struct ofpbuf nl_flow; | |
1262 | int error; | |
1263 | ||
1264 | /* Try to grab another flow. */ | |
1265 | if (!nl_dump_next(&dump->nl_dump, &nl_flow, &thread->nl_flows)) { | |
1266 | break; | |
feebdea2 | 1267 | } |
30053024 | 1268 | |
ac64794a BP |
1269 | /* Convert the flow to our output format. */ |
1270 | error = dpif_linux_flow_from_ofpbuf(&linux_flow, &nl_flow); | |
30053024 | 1271 | if (error) { |
ac64794a BP |
1272 | atomic_store(&dump->status, error); |
1273 | break; | |
feebdea2 | 1274 | } |
30053024 | 1275 | |
ac64794a BP |
1276 | if (linux_flow.actions) { |
1277 | /* Common case: the flow includes actions. */ | |
1278 | dpif_linux_flow_to_dpif_flow(&flows[n_flows++], &linux_flow); | |
1279 | } else { | |
1280 | /* Rare case: the flow does not include actions. Retrieve this | |
1281 | * individual flow again to get the actions. */ | |
1282 | error = dpif_linux_flow_get__(dpif, linux_flow.key, | |
1283 | linux_flow.key_len, &linux_flow, | |
1284 | &thread->nl_actions); | |
30053024 BP |
1285 | if (error == ENOENT) { |
1286 | VLOG_DBG("dumped flow disappeared on get"); | |
ac64794a | 1287 | continue; |
30053024 | 1288 | } else if (error) { |
10a89ef0 BP |
1289 | VLOG_WARN("error fetching dumped flow: %s", |
1290 | ovs_strerror(error)); | |
ac64794a BP |
1291 | atomic_store(&dump->status, error); |
1292 | break; | |
30053024 | 1293 | } |
30053024 | 1294 | |
ac64794a BP |
1295 | /* Save this flow. Then exit, because we only have one buffer to |
1296 | * handle this case. */ | |
1297 | dpif_linux_flow_to_dpif_flow(&flows[n_flows++], &linux_flow); | |
1298 | break; | |
1299 | } | |
feebdea2 | 1300 | } |
ac64794a | 1301 | return n_flows; |
96fba48f BP |
1302 | } |
1303 | ||
eabe7c68 BP |
1304 | static void |
1305 | dpif_linux_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec, | |
1306 | struct ofpbuf *buf) | |
96fba48f | 1307 | { |
89625d1e | 1308 | struct ovs_header *k_exec; |
758c456d | 1309 | size_t key_ofs; |
f7cd0081 | 1310 | |
eabe7c68 | 1311 | ofpbuf_prealloc_tailroom(buf, (64 |
1f317cb5 | 1312 | + ofpbuf_size(d_exec->packet) |
758c456d | 1313 | + ODP_KEY_METADATA_SIZE |
eabe7c68 | 1314 | + d_exec->actions_len)); |
f7cd0081 | 1315 | |
df2c07f4 | 1316 | nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST, |
69685a88 | 1317 | OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION); |
f7cd0081 | 1318 | |
89625d1e BP |
1319 | k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec); |
1320 | k_exec->dp_ifindex = dp_ifindex; | |
f7cd0081 | 1321 | |
89625d1e | 1322 | nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET, |
1f317cb5 PS |
1323 | ofpbuf_data(d_exec->packet), |
1324 | ofpbuf_size(d_exec->packet)); | |
758c456d JR |
1325 | |
1326 | key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY); | |
1327 | odp_key_from_pkt_metadata(buf, &d_exec->md); | |
1328 | nl_msg_end_nested(buf, key_ofs); | |
1329 | ||
89625d1e BP |
1330 | nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS, |
1331 | d_exec->actions, d_exec->actions_len); | |
6bc60024 BP |
1332 | } |
1333 | ||
1334 | static int | |
89625d1e | 1335 | dpif_linux_execute__(int dp_ifindex, const struct dpif_execute *execute) |
6bc60024 | 1336 | { |
eabe7c68 BP |
1337 | uint64_t request_stub[1024 / 8]; |
1338 | struct ofpbuf request; | |
6bc60024 BP |
1339 | int error; |
1340 | ||
eabe7c68 BP |
1341 | ofpbuf_use_stub(&request, request_stub, sizeof request_stub); |
1342 | dpif_linux_encode_execute(dp_ifindex, execute, &request); | |
a88b4e04 | 1343 | error = nl_transact(NETLINK_GENERIC, &request, NULL); |
eabe7c68 | 1344 | ofpbuf_uninit(&request); |
6bc60024 | 1345 | |
f7cd0081 | 1346 | return error; |
96fba48f BP |
1347 | } |
1348 | ||
eb8b28e7 | 1349 | static int |
758c456d | 1350 | dpif_linux_execute(struct dpif *dpif_, struct dpif_execute *execute) |
eb8b28e7 | 1351 | { |
550f0db4 | 1352 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
eb8b28e7 | 1353 | |
89625d1e | 1354 | return dpif_linux_execute__(dpif->dp_ifindex, execute); |
eb8b28e7 EJ |
1355 | } |
1356 | ||
eabe7c68 BP |
1357 | #define MAX_OPS 50 |
1358 | ||
6bc60024 | 1359 | static void |
9b00386b | 1360 | dpif_linux_operate__(struct dpif_linux *dpif, struct dpif_op **ops, size_t n_ops) |
6bc60024 | 1361 | { |
eabe7c68 BP |
1362 | |
1363 | struct op_auxdata { | |
1364 | struct nl_transaction txn; | |
72d32ac0 | 1365 | |
eabe7c68 BP |
1366 | struct ofpbuf request; |
1367 | uint64_t request_stub[1024 / 8]; | |
72d32ac0 BP |
1368 | |
1369 | struct ofpbuf reply; | |
1370 | uint64_t reply_stub[1024 / 8]; | |
eabe7c68 BP |
1371 | } auxes[MAX_OPS]; |
1372 | ||
1373 | struct nl_transaction *txnsp[MAX_OPS]; | |
6bc60024 BP |
1374 | size_t i; |
1375 | ||
cb22974d | 1376 | ovs_assert(n_ops <= MAX_OPS); |
6bc60024 | 1377 | for (i = 0; i < n_ops; i++) { |
eabe7c68 | 1378 | struct op_auxdata *aux = &auxes[i]; |
c2b565b5 | 1379 | struct dpif_op *op = ops[i]; |
b99d3cee BP |
1380 | struct dpif_flow_put *put; |
1381 | struct dpif_flow_del *del; | |
1382 | struct dpif_execute *execute; | |
eabe7c68 BP |
1383 | struct dpif_linux_flow flow; |
1384 | ||
1385 | ofpbuf_use_stub(&aux->request, | |
1386 | aux->request_stub, sizeof aux->request_stub); | |
1387 | aux->txn.request = &aux->request; | |
b99d3cee | 1388 | |
72d32ac0 BP |
1389 | ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub); |
1390 | aux->txn.reply = NULL; | |
1391 | ||
b99d3cee BP |
1392 | switch (op->type) { |
1393 | case DPIF_OP_FLOW_PUT: | |
1394 | put = &op->u.flow_put; | |
9b00386b | 1395 | dpif_linux_init_flow_put(dpif, put, &flow); |
6bc60024 | 1396 | if (put->stats) { |
eabe7c68 | 1397 | flow.nlmsg_flags |= NLM_F_ECHO; |
72d32ac0 | 1398 | aux->txn.reply = &aux->reply; |
6bc60024 | 1399 | } |
eabe7c68 | 1400 | dpif_linux_flow_to_ofpbuf(&flow, &aux->request); |
b99d3cee BP |
1401 | break; |
1402 | ||
1403 | case DPIF_OP_FLOW_DEL: | |
1404 | del = &op->u.flow_del; | |
9b00386b | 1405 | dpif_linux_init_flow_del(dpif, del, &flow); |
b99d3cee | 1406 | if (del->stats) { |
eabe7c68 | 1407 | flow.nlmsg_flags |= NLM_F_ECHO; |
72d32ac0 | 1408 | aux->txn.reply = &aux->reply; |
b99d3cee | 1409 | } |
eabe7c68 | 1410 | dpif_linux_flow_to_ofpbuf(&flow, &aux->request); |
b99d3cee | 1411 | break; |
6bc60024 | 1412 | |
b99d3cee BP |
1413 | case DPIF_OP_EXECUTE: |
1414 | execute = &op->u.execute; | |
eabe7c68 BP |
1415 | dpif_linux_encode_execute(dpif->dp_ifindex, execute, |
1416 | &aux->request); | |
b99d3cee BP |
1417 | break; |
1418 | ||
1419 | default: | |
428b2edd | 1420 | OVS_NOT_REACHED(); |
6bc60024 BP |
1421 | } |
1422 | } | |
1423 | ||
6bc60024 | 1424 | for (i = 0; i < n_ops; i++) { |
eabe7c68 | 1425 | txnsp[i] = &auxes[i].txn; |
6bc60024 | 1426 | } |
a88b4e04 | 1427 | nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops); |
6bc60024 | 1428 | |
6bc60024 | 1429 | for (i = 0; i < n_ops; i++) { |
72d32ac0 | 1430 | struct op_auxdata *aux = &auxes[i]; |
eabe7c68 | 1431 | struct nl_transaction *txn = &auxes[i].txn; |
c2b565b5 | 1432 | struct dpif_op *op = ops[i]; |
b99d3cee BP |
1433 | struct dpif_flow_put *put; |
1434 | struct dpif_flow_del *del; | |
6bc60024 | 1435 | |
b99d3cee | 1436 | op->error = txn->error; |
6bc60024 | 1437 | |
b99d3cee BP |
1438 | switch (op->type) { |
1439 | case DPIF_OP_FLOW_PUT: | |
1440 | put = &op->u.flow_put; | |
cfceb2b5 | 1441 | if (put->stats) { |
b99d3cee | 1442 | if (!op->error) { |
cfceb2b5 BP |
1443 | struct dpif_linux_flow reply; |
1444 | ||
1445 | op->error = dpif_linux_flow_from_ofpbuf(&reply, | |
1446 | txn->reply); | |
1447 | if (!op->error) { | |
1448 | dpif_linux_flow_get_stats(&reply, put->stats); | |
1449 | } | |
1450 | } | |
1451 | ||
1452 | if (op->error) { | |
1453 | memset(put->stats, 0, sizeof *put->stats); | |
6bc60024 BP |
1454 | } |
1455 | } | |
b99d3cee BP |
1456 | break; |
1457 | ||
1458 | case DPIF_OP_FLOW_DEL: | |
1459 | del = &op->u.flow_del; | |
cfceb2b5 | 1460 | if (del->stats) { |
b99d3cee | 1461 | if (!op->error) { |
cfceb2b5 BP |
1462 | struct dpif_linux_flow reply; |
1463 | ||
1464 | op->error = dpif_linux_flow_from_ofpbuf(&reply, | |
1465 | txn->reply); | |
1466 | if (!op->error) { | |
1467 | dpif_linux_flow_get_stats(&reply, del->stats); | |
1468 | } | |
1469 | } | |
1470 | ||
1471 | if (op->error) { | |
1472 | memset(del->stats, 0, sizeof *del->stats); | |
b99d3cee BP |
1473 | } |
1474 | } | |
1475 | break; | |
1476 | ||
1477 | case DPIF_OP_EXECUTE: | |
1478 | break; | |
1479 | ||
1480 | default: | |
428b2edd | 1481 | OVS_NOT_REACHED(); |
6bc60024 BP |
1482 | } |
1483 | ||
72d32ac0 BP |
1484 | ofpbuf_uninit(&aux->request); |
1485 | ofpbuf_uninit(&aux->reply); | |
6bc60024 | 1486 | } |
eabe7c68 BP |
1487 | } |
1488 | ||
1489 | static void | |
9b00386b | 1490 | dpif_linux_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) |
eabe7c68 | 1491 | { |
9b00386b AW |
1492 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
1493 | ||
eabe7c68 BP |
1494 | while (n_ops > 0) { |
1495 | size_t chunk = MIN(n_ops, MAX_OPS); | |
1496 | dpif_linux_operate__(dpif, ops, chunk); | |
1497 | ops += chunk; | |
1498 | n_ops -= chunk; | |
1499 | } | |
6bc60024 BP |
1500 | } |
1501 | ||
1579cf67 AW |
1502 | /* Synchronizes 'channels' in 'dpif->handlers' with the set of vports |
1503 | * currently in 'dpif' in the kernel, by adding a new set of channels for | |
1504 | * any kernel vport that lacks one and deleting any channels that have no | |
1505 | * backing kernel vports. */ | |
96fba48f | 1506 | static int |
9b00386b | 1507 | dpif_linux_refresh_channels(struct dpif_linux *dpif, uint32_t n_handlers) |
b90de034 | 1508 | OVS_REQ_WRLOCK(dpif->upcall_lock) |
96fba48f | 1509 | { |
8381a3d3 BP |
1510 | unsigned long int *keep_channels; |
1511 | struct dpif_linux_vport vport; | |
1512 | size_t keep_channels_nbits; | |
1513 | struct nl_dump dump; | |
d57695d7 JS |
1514 | uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; |
1515 | struct ofpbuf buf; | |
8381a3d3 BP |
1516 | int retval = 0; |
1517 | size_t i; | |
982b8810 | 1518 | |
1579cf67 AW |
1519 | if (dpif->n_handlers != n_handlers) { |
1520 | destroy_all_channels(dpif); | |
1521 | dpif->handlers = xzalloc(n_handlers * sizeof *dpif->handlers); | |
1522 | for (i = 0; i < n_handlers; i++) { | |
1523 | struct dpif_handler *handler = &dpif->handlers[i]; | |
1524 | ||
1525 | handler->epoll_fd = epoll_create(10); | |
1526 | if (handler->epoll_fd < 0) { | |
1527 | size_t j; | |
1528 | ||
1529 | for (j = 0; j < i; j++) { | |
1530 | close(dpif->handlers[j].epoll_fd); | |
1531 | } | |
1532 | free(dpif->handlers); | |
1533 | dpif->handlers = NULL; | |
1534 | ||
1535 | return errno; | |
1536 | } | |
8381a3d3 | 1537 | } |
1579cf67 AW |
1538 | dpif->n_handlers = n_handlers; |
1539 | } | |
1540 | ||
1541 | for (i = 0; i < n_handlers; i++) { | |
1542 | struct dpif_handler *handler = &dpif->handlers[i]; | |
1543 | ||
1544 | handler->event_offset = handler->n_events = 0; | |
17411ecf | 1545 | } |
b063d9f0 | 1546 | |
8381a3d3 BP |
1547 | keep_channels_nbits = dpif->uc_array_size; |
1548 | keep_channels = bitmap_allocate(keep_channels_nbits); | |
982b8810 | 1549 | |
d57695d7 | 1550 | ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); |
9b00386b AW |
1551 | dpif_linux_port_dump_start__(dpif, &dump); |
1552 | while (!dpif_linux_port_dump_next__(dpif, &dump, &vport, &buf)) { | |
8381a3d3 | 1553 | uint32_t port_no = odp_to_u32(vport.port_no); |
1579cf67 | 1554 | uint32_t *upcall_pids = NULL; |
8381a3d3 | 1555 | int error; |
50f80534 | 1556 | |
1579cf67 AW |
1557 | if (port_no >= dpif->uc_array_size |
1558 | || !vport_get_pids(dpif, port_no, &upcall_pids)) { | |
1559 | struct nl_sock **socksp = vport_create_socksp(dpif->n_handlers, | |
1560 | &error); | |
1561 | ||
1562 | if (!socksp) { | |
1563 | goto error; | |
1564 | } | |
1565 | ||
1566 | error = vport_add_channels(dpif, vport.port_no, socksp); | |
b063d9f0 | 1567 | if (error) { |
1579cf67 | 1568 | VLOG_INFO("%s: could not add channels for port %s", |
9b00386b | 1569 | dpif_name(&dpif->dpif), vport.name); |
1579cf67 | 1570 | vport_del_socksp(socksp, dpif->n_handlers); |
8381a3d3 BP |
1571 | retval = error; |
1572 | goto error; | |
982b8810 | 1573 | } |
1579cf67 AW |
1574 | upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers); |
1575 | free(socksp); | |
8381a3d3 | 1576 | } |
50f80534 | 1577 | |
8381a3d3 | 1578 | /* Configure the vport to deliver misses to 'sock'. */ |
1579cf67 AW |
1579 | if (vport.upcall_pids[0] == 0 |
1580 | || vport.n_upcall_pids != dpif->n_handlers | |
1581 | || memcmp(upcall_pids, vport.upcall_pids, n_handlers * sizeof | |
1582 | *upcall_pids)) { | |
8381a3d3 | 1583 | struct dpif_linux_vport vport_request; |
989fd548 JP |
1584 | |
1585 | dpif_linux_vport_init(&vport_request); | |
1586 | vport_request.cmd = OVS_VPORT_CMD_SET; | |
1587 | vport_request.dp_ifindex = dpif->dp_ifindex; | |
8381a3d3 | 1588 | vport_request.port_no = vport.port_no; |
1579cf67 AW |
1589 | vport_request.n_upcall_pids = dpif->n_handlers; |
1590 | vport_request.upcall_pids = upcall_pids; | |
989fd548 | 1591 | error = dpif_linux_vport_transact(&vport_request, NULL, NULL); |
1579cf67 | 1592 | if (error) { |
989fd548 JP |
1593 | VLOG_WARN_RL(&error_rl, |
1594 | "%s: failed to set upcall pid on port: %s", | |
10a89ef0 | 1595 | dpif_name(&dpif->dpif), ovs_strerror(error)); |
989fd548 | 1596 | |
8381a3d3 BP |
1597 | if (error != ENODEV && error != ENOENT) { |
1598 | retval = error; | |
989fd548 | 1599 | } else { |
8381a3d3 BP |
1600 | /* The vport isn't really there, even though the dump says |
1601 | * it is. Probably we just hit a race after a port | |
1602 | * disappeared. */ | |
989fd548 | 1603 | } |
8381a3d3 | 1604 | goto error; |
50f80534 | 1605 | } |
8381a3d3 | 1606 | } |
14b4d2f9 | 1607 | |
8381a3d3 BP |
1608 | if (port_no < keep_channels_nbits) { |
1609 | bitmap_set1(keep_channels, port_no); | |
1610 | } | |
1579cf67 | 1611 | free(upcall_pids); |
8381a3d3 BP |
1612 | continue; |
1613 | ||
1614 | error: | |
1579cf67 AW |
1615 | free(upcall_pids); |
1616 | vport_del_channels(dpif, vport.port_no); | |
982b8810 | 1617 | } |
8381a3d3 | 1618 | nl_dump_done(&dump); |
d57695d7 | 1619 | ofpbuf_uninit(&buf); |
b063d9f0 | 1620 | |
8381a3d3 BP |
1621 | /* Discard any saved channels that we didn't reuse. */ |
1622 | for (i = 0; i < keep_channels_nbits; i++) { | |
1623 | if (!bitmap_is_set(keep_channels, i)) { | |
1579cf67 | 1624 | vport_del_channels(dpif, u32_to_odp(i)); |
8381a3d3 BP |
1625 | } |
1626 | } | |
1627 | free(keep_channels); | |
1628 | ||
1629 | return retval; | |
1630 | } | |
1631 | ||
1632 | static int | |
9b00386b | 1633 | dpif_linux_recv_set__(struct dpif_linux *dpif, bool enable) |
b90de034 | 1634 | OVS_REQ_WRLOCK(dpif->upcall_lock) |
8381a3d3 | 1635 | { |
1579cf67 | 1636 | if ((dpif->handlers != NULL) == enable) { |
8381a3d3 BP |
1637 | return 0; |
1638 | } else if (!enable) { | |
1579cf67 | 1639 | destroy_all_channels(dpif); |
8381a3d3 BP |
1640 | return 0; |
1641 | } else { | |
9b00386b | 1642 | return dpif_linux_refresh_channels(dpif, 1); |
8381a3d3 | 1643 | } |
96fba48f BP |
1644 | } |
1645 | ||
9fafa796 BP |
1646 | static int |
1647 | dpif_linux_recv_set(struct dpif *dpif_, bool enable) | |
1648 | { | |
1649 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
1650 | int error; | |
1651 | ||
1579cf67 | 1652 | fat_rwlock_wrlock(&dpif->upcall_lock); |
9b00386b | 1653 | error = dpif_linux_recv_set__(dpif, enable); |
1579cf67 | 1654 | fat_rwlock_unlock(&dpif->upcall_lock); |
9fafa796 BP |
1655 | |
1656 | return error; | |
1657 | } | |
1658 | ||
1954e6bb | 1659 | static int |
1579cf67 | 1660 | dpif_linux_handlers_set(struct dpif *dpif_, uint32_t n_handlers) |
1954e6bb | 1661 | { |
1579cf67 AW |
1662 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
1663 | int error = 0; | |
1664 | ||
1665 | fat_rwlock_wrlock(&dpif->upcall_lock); | |
1666 | if (dpif->handlers) { | |
9b00386b | 1667 | error = dpif_linux_refresh_channels(dpif, n_handlers); |
1579cf67 AW |
1668 | } |
1669 | fat_rwlock_unlock(&dpif->upcall_lock); | |
1670 | ||
1671 | return error; | |
1954e6bb AW |
1672 | } |
1673 | ||
aae51f53 BP |
1674 | static int |
1675 | dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, | |
1676 | uint32_t queue_id, uint32_t *priority) | |
1677 | { | |
1678 | if (queue_id < 0xf000) { | |
17ee3c1f | 1679 | *priority = TC_H_MAKE(1 << 16, queue_id + 1); |
aae51f53 BP |
1680 | return 0; |
1681 | } else { | |
1682 | return EINVAL; | |
1683 | } | |
1684 | } | |
1685 | ||
96fba48f | 1686 | static int |
982b8810 | 1687 | parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, |
254f2dc8 | 1688 | int *dp_ifindex) |
856081f6 | 1689 | { |
df2c07f4 | 1690 | static const struct nl_policy ovs_packet_policy[] = { |
856081f6 | 1691 | /* Always present. */ |
df2c07f4 | 1692 | [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC, |
856081f6 | 1693 | .min_len = ETH_HEADER_LEN }, |
df2c07f4 | 1694 | [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED }, |
856081f6 | 1695 | |
df2c07f4 | 1696 | /* OVS_PACKET_CMD_ACTION only. */ |
e995e3df | 1697 | [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true }, |
856081f6 BP |
1698 | }; |
1699 | ||
df2c07f4 JP |
1700 | struct ovs_header *ovs_header; |
1701 | struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)]; | |
982b8810 BP |
1702 | struct nlmsghdr *nlmsg; |
1703 | struct genlmsghdr *genl; | |
1704 | struct ofpbuf b; | |
aaff4b55 | 1705 | int type; |
982b8810 | 1706 | |
1f317cb5 | 1707 | ofpbuf_use_const(&b, ofpbuf_data(buf), ofpbuf_size(buf)); |
982b8810 BP |
1708 | |
1709 | nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); | |
1710 | genl = ofpbuf_try_pull(&b, sizeof *genl); | |
df2c07f4 JP |
1711 | ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); |
1712 | if (!nlmsg || !genl || !ovs_header | |
1713 | || nlmsg->nlmsg_type != ovs_packet_family | |
1714 | || !nl_policy_parse(&b, 0, ovs_packet_policy, a, | |
1715 | ARRAY_SIZE(ovs_packet_policy))) { | |
856081f6 BP |
1716 | return EINVAL; |
1717 | } | |
1718 | ||
df2c07f4 JP |
1719 | type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS |
1720 | : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION | |
aaff4b55 BP |
1721 | : -1); |
1722 | if (type < 0) { | |
1723 | return EINVAL; | |
1724 | } | |
82272ede | 1725 | |
877c9270 | 1726 | /* (Re)set ALL fields of '*upcall' on successful return. */ |
aaff4b55 | 1727 | upcall->type = type; |
ebc56baa BP |
1728 | upcall->key = CONST_CAST(struct nlattr *, |
1729 | nl_attr_get(a[OVS_PACKET_ATTR_KEY])); | |
df2c07f4 | 1730 | upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]); |
e995e3df | 1731 | upcall->userdata = a[OVS_PACKET_ATTR_USERDATA]; |
da546e07 JR |
1732 | |
1733 | /* Allow overwriting the netlink attribute header without reallocating. */ | |
1734 | ofpbuf_use_stub(&upcall->packet, | |
1735 | CONST_CAST(struct nlattr *, | |
1736 | nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1, | |
1737 | nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) + | |
1738 | sizeof(struct nlattr)); | |
1f317cb5 PS |
1739 | ofpbuf_set_data(&upcall->packet, |
1740 | (char *)ofpbuf_data(&upcall->packet) + sizeof(struct nlattr)); | |
1741 | ofpbuf_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET])); | |
da546e07 | 1742 | |
df2c07f4 | 1743 | *dp_ifindex = ovs_header->dp_ifindex; |
982b8810 | 1744 | |
856081f6 BP |
1745 | return 0; |
1746 | } | |
1747 | ||
1748 | static int | |
9b00386b | 1749 | dpif_linux_recv__(struct dpif_linux *dpif, uint32_t handler_id, |
1579cf67 | 1750 | struct dpif_upcall *upcall, struct ofpbuf *buf) |
b90de034 | 1751 | OVS_REQ_RDLOCK(dpif->upcall_lock) |
96fba48f | 1752 | { |
1579cf67 | 1753 | struct dpif_handler *handler; |
17411ecf | 1754 | int read_tries = 0; |
96fba48f | 1755 | |
1579cf67 AW |
1756 | if (!dpif->handlers || handler_id >= dpif->n_handlers) { |
1757 | return EAGAIN; | |
982b8810 BP |
1758 | } |
1759 | ||
1579cf67 AW |
1760 | handler = &dpif->handlers[handler_id]; |
1761 | if (handler->event_offset >= handler->n_events) { | |
8522ba09 | 1762 | int retval; |
989fd548 | 1763 | |
1579cf67 | 1764 | handler->event_offset = handler->n_events = 0; |
f6d1465c | 1765 | |
8522ba09 | 1766 | do { |
1579cf67 | 1767 | retval = epoll_wait(handler->epoll_fd, handler->epoll_events, |
989fd548 | 1768 | dpif->uc_array_size, 0); |
8522ba09 BP |
1769 | } while (retval < 0 && errno == EINTR); |
1770 | if (retval < 0) { | |
1771 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
10a89ef0 | 1772 | VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno)); |
989fd548 | 1773 | } else if (retval > 0) { |
1579cf67 | 1774 | handler->n_events = retval; |
8522ba09 | 1775 | } |
8522ba09 BP |
1776 | } |
1777 | ||
1579cf67 AW |
1778 | while (handler->event_offset < handler->n_events) { |
1779 | int idx = handler->epoll_events[handler->event_offset].data.u32; | |
1780 | struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx]; | |
8522ba09 | 1781 | |
1579cf67 | 1782 | handler->event_offset++; |
17411ecf | 1783 | |
f6d1465c | 1784 | for (;;) { |
8522ba09 | 1785 | int dp_ifindex; |
f6d1465c | 1786 | int error; |
17411ecf | 1787 | |
f6d1465c BP |
1788 | if (++read_tries > 50) { |
1789 | return EAGAIN; | |
1790 | } | |
17411ecf | 1791 | |
fe3d61b3 | 1792 | error = nl_sock_recv(ch->sock, buf, false); |
14b4d2f9 BP |
1793 | if (error == ENOBUFS) { |
1794 | /* ENOBUFS typically means that we've received so many | |
1795 | * packets that the buffer overflowed. Try again | |
1796 | * immediately because there's almost certainly a packet | |
1797 | * waiting for us. */ | |
9b00386b | 1798 | report_loss(dpif, ch, idx, handler_id); |
14b4d2f9 BP |
1799 | continue; |
1800 | } | |
1801 | ||
1802 | ch->last_poll = time_msec(); | |
72d32ac0 | 1803 | if (error) { |
72d32ac0 BP |
1804 | if (error == EAGAIN) { |
1805 | break; | |
1806 | } | |
f6d1465c BP |
1807 | return error; |
1808 | } | |
17411ecf | 1809 | |
f6d1465c | 1810 | error = parse_odp_packet(buf, upcall, &dp_ifindex); |
a12b3ead | 1811 | if (!error && dp_ifindex == dpif->dp_ifindex) { |
f6d1465c | 1812 | return 0; |
989fd548 | 1813 | } else if (error) { |
f6d1465c | 1814 | return error; |
17411ecf | 1815 | } |
982b8810 | 1816 | } |
50f80534 | 1817 | } |
982b8810 BP |
1818 | |
1819 | return EAGAIN; | |
96fba48f BP |
1820 | } |
1821 | ||
9fafa796 | 1822 | static int |
1579cf67 | 1823 | dpif_linux_recv(struct dpif *dpif_, uint32_t handler_id, |
1954e6bb | 1824 | struct dpif_upcall *upcall, struct ofpbuf *buf) |
9fafa796 BP |
1825 | { |
1826 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
1827 | int error; | |
1828 | ||
1579cf67 | 1829 | fat_rwlock_rdlock(&dpif->upcall_lock); |
9b00386b | 1830 | error = dpif_linux_recv__(dpif, handler_id, upcall, buf); |
1579cf67 | 1831 | fat_rwlock_unlock(&dpif->upcall_lock); |
9fafa796 BP |
1832 | |
1833 | return error; | |
1834 | } | |
1835 | ||
96fba48f | 1836 | static void |
b90de034 AW |
1837 | dpif_linux_recv_wait__(struct dpif_linux *dpif, uint32_t handler_id) |
1838 | OVS_REQ_RDLOCK(dpif->upcall_lock) | |
96fba48f | 1839 | { |
1579cf67 AW |
1840 | if (dpif->handlers && handler_id < dpif->n_handlers) { |
1841 | struct dpif_handler *handler = &dpif->handlers[handler_id]; | |
1842 | ||
1843 | poll_fd_wait(handler->epoll_fd, POLLIN); | |
17411ecf | 1844 | } |
96fba48f BP |
1845 | } |
1846 | ||
1ba530f4 | 1847 | static void |
b90de034 | 1848 | dpif_linux_recv_wait(struct dpif *dpif_, uint32_t handler_id) |
1ba530f4 BP |
1849 | { |
1850 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
17411ecf | 1851 | |
b90de034 AW |
1852 | fat_rwlock_rdlock(&dpif->upcall_lock); |
1853 | dpif_linux_recv_wait__(dpif, handler_id); | |
1854 | fat_rwlock_unlock(&dpif->upcall_lock); | |
1855 | } | |
1856 | ||
1857 | static void | |
1858 | dpif_linux_recv_purge__(struct dpif_linux *dpif) | |
1859 | OVS_REQ_WRLOCK(dpif->upcall_lock) | |
1860 | { | |
1579cf67 AW |
1861 | if (dpif->handlers) { |
1862 | size_t i, j; | |
1863 | ||
1864 | for (i = 0; i < dpif->uc_array_size; i++ ) { | |
1865 | if (!dpif->handlers[0].channels[i].sock) { | |
1866 | continue; | |
1867 | } | |
1ba530f4 | 1868 | |
1579cf67 AW |
1869 | for (j = 0; j < dpif->n_handlers; j++) { |
1870 | nl_sock_drain(dpif->handlers[j].channels[i].sock); | |
9fafa796 | 1871 | } |
989fd548 | 1872 | } |
1ba530f4 | 1873 | } |
b90de034 AW |
1874 | } |
1875 | ||
1876 | static void | |
1877 | dpif_linux_recv_purge(struct dpif *dpif_) | |
1878 | { | |
1879 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
1880 | ||
1881 | fat_rwlock_wrlock(&dpif->upcall_lock); | |
1882 | dpif_linux_recv_purge__(dpif); | |
1579cf67 | 1883 | fat_rwlock_unlock(&dpif->upcall_lock); |
1ba530f4 BP |
1884 | } |
1885 | ||
96fba48f | 1886 | const struct dpif_class dpif_linux_class = { |
1a6f1e2a | 1887 | "system", |
d3d22744 | 1888 | dpif_linux_enumerate, |
0aeaabc8 | 1889 | NULL, |
96fba48f BP |
1890 | dpif_linux_open, |
1891 | dpif_linux_close, | |
7dab847a | 1892 | dpif_linux_destroy, |
61eae437 | 1893 | dpif_linux_run, |
e4516b20 | 1894 | NULL, /* wait */ |
96fba48f | 1895 | dpif_linux_get_stats, |
96fba48f BP |
1896 | dpif_linux_port_add, |
1897 | dpif_linux_port_del, | |
1898 | dpif_linux_port_query_by_number, | |
1899 | dpif_linux_port_query_by_name, | |
98403001 | 1900 | dpif_linux_port_get_pid, |
b0ec0f27 BP |
1901 | dpif_linux_port_dump_start, |
1902 | dpif_linux_port_dump_next, | |
1903 | dpif_linux_port_dump_done, | |
e9e28be3 BP |
1904 | dpif_linux_port_poll, |
1905 | dpif_linux_port_poll_wait, | |
96fba48f BP |
1906 | dpif_linux_flow_get, |
1907 | dpif_linux_flow_put, | |
1908 | dpif_linux_flow_del, | |
1909 | dpif_linux_flow_flush, | |
ac64794a BP |
1910 | dpif_linux_flow_dump_create, |
1911 | dpif_linux_flow_dump_destroy, | |
1912 | dpif_linux_flow_dump_thread_create, | |
1913 | dpif_linux_flow_dump_thread_destroy, | |
704a1e09 | 1914 | dpif_linux_flow_dump_next, |
96fba48f | 1915 | dpif_linux_execute, |
6bc60024 | 1916 | dpif_linux_operate, |
a12b3ead | 1917 | dpif_linux_recv_set, |
1954e6bb | 1918 | dpif_linux_handlers_set, |
aae51f53 | 1919 | dpif_linux_queue_to_priority, |
96fba48f BP |
1920 | dpif_linux_recv, |
1921 | dpif_linux_recv_wait, | |
1ba530f4 | 1922 | dpif_linux_recv_purge, |
96fba48f BP |
1923 | }; |
1924 | \f | |
96fba48f | 1925 | static int |
982b8810 | 1926 | dpif_linux_init(void) |
96fba48f | 1927 | { |
eb8ed438 BP |
1928 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; |
1929 | static int error; | |
982b8810 | 1930 | |
eb8ed438 | 1931 | if (ovsthread_once_start(&once)) { |
df2c07f4 JP |
1932 | error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY, |
1933 | &ovs_datapath_family); | |
37a1300c BP |
1934 | if (error) { |
1935 | VLOG_ERR("Generic Netlink family '%s' does not exist. " | |
1936 | "The Open vSwitch kernel module is probably not loaded.", | |
df2c07f4 | 1937 | OVS_DATAPATH_FAMILY); |
37a1300c | 1938 | } |
f0fef760 | 1939 | if (!error) { |
df2c07f4 | 1940 | error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family); |
f0fef760 | 1941 | } |
37a1300c | 1942 | if (!error) { |
df2c07f4 | 1943 | error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family); |
37a1300c | 1944 | } |
aaff4b55 | 1945 | if (!error) { |
df2c07f4 JP |
1946 | error = nl_lookup_genl_family(OVS_PACKET_FAMILY, |
1947 | &ovs_packet_family); | |
aaff4b55 | 1948 | } |
c7178a0b EJ |
1949 | if (!error) { |
1950 | error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP, | |
b3dcb73c | 1951 | &ovs_vport_mcgroup); |
c7178a0b | 1952 | } |
eb8ed438 BP |
1953 | |
1954 | ovsthread_once_done(&once); | |
982b8810 BP |
1955 | } |
1956 | ||
1957 | return error; | |
96fba48f BP |
1958 | } |
1959 | ||
c19e6535 BP |
1960 | bool |
1961 | dpif_linux_is_internal_device(const char *name) | |
9fe3b9a2 | 1962 | { |
c19e6535 BP |
1963 | struct dpif_linux_vport reply; |
1964 | struct ofpbuf *buf; | |
9fe3b9a2 | 1965 | int error; |
96fba48f | 1966 | |
c19e6535 BP |
1967 | error = dpif_linux_vport_get(name, &reply, &buf); |
1968 | if (!error) { | |
1969 | ofpbuf_delete(buf); | |
141d9ce4 | 1970 | } else if (error != ENODEV && error != ENOENT) { |
c19e6535 | 1971 | VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)", |
10a89ef0 | 1972 | name, ovs_strerror(error)); |
96fba48f BP |
1973 | } |
1974 | ||
df2c07f4 | 1975 | return reply.type == OVS_VPORT_TYPE_INTERNAL; |
96fba48f | 1976 | } |
c19e6535 | 1977 | \f |
df2c07f4 | 1978 | /* Parses the contents of 'buf', which contains a "struct ovs_header" followed |
c19e6535 BP |
1979 | * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a |
1980 | * positive errno value. | |
1981 | * | |
1982 | * 'vport' will contain pointers into 'buf', so the caller should not free | |
1983 | * 'buf' while 'vport' is still in use. */ | |
1984 | static int | |
1985 | dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport, | |
1986 | const struct ofpbuf *buf) | |
1987 | { | |
df2c07f4 JP |
1988 | static const struct nl_policy ovs_vport_policy[] = { |
1989 | [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 }, | |
1990 | [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 }, | |
1991 | [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ }, | |
1579cf67 | 1992 | [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_UNSPEC }, |
f7df9823 | 1993 | [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats), |
c19e6535 | 1994 | .optional = true }, |
df2c07f4 | 1995 | [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true }, |
c19e6535 BP |
1996 | }; |
1997 | ||
df2c07f4 JP |
1998 | struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)]; |
1999 | struct ovs_header *ovs_header; | |
f0fef760 BP |
2000 | struct nlmsghdr *nlmsg; |
2001 | struct genlmsghdr *genl; | |
2002 | struct ofpbuf b; | |
c19e6535 BP |
2003 | |
2004 | dpif_linux_vport_init(vport); | |
2005 | ||
1f317cb5 | 2006 | ofpbuf_use_const(&b, ofpbuf_data(buf), ofpbuf_size(buf)); |
f0fef760 BP |
2007 | nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); |
2008 | genl = ofpbuf_try_pull(&b, sizeof *genl); | |
df2c07f4 JP |
2009 | ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); |
2010 | if (!nlmsg || !genl || !ovs_header | |
2011 | || nlmsg->nlmsg_type != ovs_vport_family | |
2012 | || !nl_policy_parse(&b, 0, ovs_vport_policy, a, | |
2013 | ARRAY_SIZE(ovs_vport_policy))) { | |
c19e6535 BP |
2014 | return EINVAL; |
2015 | } | |
c19e6535 | 2016 | |
f0fef760 | 2017 | vport->cmd = genl->cmd; |
df2c07f4 | 2018 | vport->dp_ifindex = ovs_header->dp_ifindex; |
4e022ec0 | 2019 | vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]); |
df2c07f4 JP |
2020 | vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]); |
2021 | vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]); | |
b063d9f0 | 2022 | if (a[OVS_VPORT_ATTR_UPCALL_PID]) { |
1579cf67 AW |
2023 | vport->n_upcall_pids = nl_attr_get_size(a[OVS_VPORT_ATTR_UPCALL_PID]) |
2024 | / (sizeof *vport->upcall_pids); | |
2025 | vport->upcall_pids = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]); | |
2026 | ||
b063d9f0 | 2027 | } |
df2c07f4 JP |
2028 | if (a[OVS_VPORT_ATTR_STATS]) { |
2029 | vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]); | |
2030 | } | |
df2c07f4 JP |
2031 | if (a[OVS_VPORT_ATTR_OPTIONS]) { |
2032 | vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]); | |
2033 | vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]); | |
c19e6535 | 2034 | } |
c19e6535 BP |
2035 | return 0; |
2036 | } | |
2037 | ||
df2c07f4 | 2038 | /* Appends to 'buf' (which must initially be empty) a "struct ovs_header" |
c19e6535 BP |
2039 | * followed by Netlink attributes corresponding to 'vport'. */ |
2040 | static void | |
2041 | dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport, | |
2042 | struct ofpbuf *buf) | |
2043 | { | |
df2c07f4 | 2044 | struct ovs_header *ovs_header; |
f0fef760 | 2045 | |
df2c07f4 | 2046 | nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO, |
69685a88 | 2047 | vport->cmd, OVS_VPORT_VERSION); |
c19e6535 | 2048 | |
df2c07f4 JP |
2049 | ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); |
2050 | ovs_header->dp_ifindex = vport->dp_ifindex; | |
c19e6535 | 2051 | |
4e022ec0 AW |
2052 | if (vport->port_no != ODPP_NONE) { |
2053 | nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no); | |
c19e6535 BP |
2054 | } |
2055 | ||
df2c07f4 JP |
2056 | if (vport->type != OVS_VPORT_TYPE_UNSPEC) { |
2057 | nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type); | |
c19e6535 BP |
2058 | } |
2059 | ||
2060 | if (vport->name) { | |
df2c07f4 | 2061 | nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name); |
c19e6535 BP |
2062 | } |
2063 | ||
1579cf67 AW |
2064 | if (vport->upcall_pids) { |
2065 | nl_msg_put_unspec(buf, OVS_VPORT_ATTR_UPCALL_PID, | |
2066 | vport->upcall_pids, | |
2067 | vport->n_upcall_pids * sizeof *vport->upcall_pids); | |
a24a6574 | 2068 | } |
b063d9f0 | 2069 | |
c19e6535 | 2070 | if (vport->stats) { |
df2c07f4 | 2071 | nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS, |
c19e6535 BP |
2072 | vport->stats, sizeof *vport->stats); |
2073 | } | |
2074 | ||
c19e6535 | 2075 | if (vport->options) { |
df2c07f4 | 2076 | nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS, |
c19e6535 BP |
2077 | vport->options, vport->options_len); |
2078 | } | |
c19e6535 BP |
2079 | } |
2080 | ||
2081 | /* Clears 'vport' to "empty" values. */ | |
2082 | void | |
2083 | dpif_linux_vport_init(struct dpif_linux_vport *vport) | |
2084 | { | |
2085 | memset(vport, 0, sizeof *vport); | |
4e022ec0 | 2086 | vport->port_no = ODPP_NONE; |
c19e6535 BP |
2087 | } |
2088 | ||
2089 | /* Executes 'request' in the kernel datapath. If the command fails, returns a | |
2090 | * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 | |
2091 | * without doing anything else. If 'reply' and 'bufp' are nonnull, then the | |
df2c07f4 | 2092 | * result of the command is expected to be an ovs_vport also, which is decoded |
c19e6535 BP |
2093 | * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the |
2094 | * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ | |
2095 | int | |
2096 | dpif_linux_vport_transact(const struct dpif_linux_vport *request, | |
2097 | struct dpif_linux_vport *reply, | |
2098 | struct ofpbuf **bufp) | |
2099 | { | |
f0fef760 | 2100 | struct ofpbuf *request_buf; |
c19e6535 BP |
2101 | int error; |
2102 | ||
cb22974d | 2103 | ovs_assert((reply != NULL) == (bufp != NULL)); |
c19e6535 | 2104 | |
42bb6c72 BP |
2105 | error = dpif_linux_init(); |
2106 | if (error) { | |
2107 | if (reply) { | |
2108 | *bufp = NULL; | |
2109 | dpif_linux_vport_init(reply); | |
2110 | } | |
2111 | return error; | |
2112 | } | |
2113 | ||
f0fef760 BP |
2114 | request_buf = ofpbuf_new(1024); |
2115 | dpif_linux_vport_to_ofpbuf(request, request_buf); | |
a88b4e04 | 2116 | error = nl_transact(NETLINK_GENERIC, request_buf, bufp); |
f0fef760 | 2117 | ofpbuf_delete(request_buf); |
c19e6535 | 2118 | |
f0fef760 BP |
2119 | if (reply) { |
2120 | if (!error) { | |
2121 | error = dpif_linux_vport_from_ofpbuf(reply, *bufp); | |
2122 | } | |
c19e6535 | 2123 | if (error) { |
f0fef760 BP |
2124 | dpif_linux_vport_init(reply); |
2125 | ofpbuf_delete(*bufp); | |
2126 | *bufp = NULL; | |
c19e6535 | 2127 | } |
c19e6535 BP |
2128 | } |
2129 | return error; | |
2130 | } | |
2131 | ||
2132 | /* Obtains information about the kernel vport named 'name' and stores it into | |
2133 | * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no | |
2134 | * longer needed ('reply' will contain pointers into '*bufp'). */ | |
2135 | int | |
2136 | dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply, | |
2137 | struct ofpbuf **bufp) | |
2138 | { | |
2139 | struct dpif_linux_vport request; | |
2140 | ||
2141 | dpif_linux_vport_init(&request); | |
df2c07f4 | 2142 | request.cmd = OVS_VPORT_CMD_GET; |
c19e6535 BP |
2143 | request.name = name; |
2144 | ||
2145 | return dpif_linux_vport_transact(&request, reply, bufp); | |
2146 | } | |
d6569377 | 2147 | \f |
df2c07f4 | 2148 | /* Parses the contents of 'buf', which contains a "struct ovs_header" followed |
aaff4b55 BP |
2149 | * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a |
2150 | * positive errno value. | |
d6569377 BP |
2151 | * |
2152 | * 'dp' will contain pointers into 'buf', so the caller should not free 'buf' | |
2153 | * while 'dp' is still in use. */ | |
2154 | static int | |
2155 | dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) | |
2156 | { | |
df2c07f4 JP |
2157 | static const struct nl_policy ovs_datapath_policy[] = { |
2158 | [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ }, | |
f7df9823 | 2159 | [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats), |
d6569377 | 2160 | .optional = true }, |
847108dc AZ |
2161 | [OVS_DP_ATTR_MEGAFLOW_STATS] = { |
2162 | NL_POLICY_FOR(struct ovs_dp_megaflow_stats), | |
2163 | .optional = true }, | |
d6569377 BP |
2164 | }; |
2165 | ||
df2c07f4 JP |
2166 | struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)]; |
2167 | struct ovs_header *ovs_header; | |
aaff4b55 BP |
2168 | struct nlmsghdr *nlmsg; |
2169 | struct genlmsghdr *genl; | |
2170 | struct ofpbuf b; | |
d6569377 BP |
2171 | |
2172 | dpif_linux_dp_init(dp); | |
2173 | ||
1f317cb5 | 2174 | ofpbuf_use_const(&b, ofpbuf_data(buf), ofpbuf_size(buf)); |
aaff4b55 BP |
2175 | nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); |
2176 | genl = ofpbuf_try_pull(&b, sizeof *genl); | |
df2c07f4 JP |
2177 | ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); |
2178 | if (!nlmsg || !genl || !ovs_header | |
2179 | || nlmsg->nlmsg_type != ovs_datapath_family | |
2180 | || !nl_policy_parse(&b, 0, ovs_datapath_policy, a, | |
2181 | ARRAY_SIZE(ovs_datapath_policy))) { | |
d6569377 BP |
2182 | return EINVAL; |
2183 | } | |
d6569377 | 2184 | |
aaff4b55 | 2185 | dp->cmd = genl->cmd; |
df2c07f4 JP |
2186 | dp->dp_ifindex = ovs_header->dp_ifindex; |
2187 | dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]); | |
2188 | if (a[OVS_DP_ATTR_STATS]) { | |
d6569377 BP |
2189 | /* Can't use structure assignment because Netlink doesn't ensure |
2190 | * sufficient alignment for 64-bit members. */ | |
df2c07f4 | 2191 | memcpy(&dp->stats, nl_attr_get(a[OVS_DP_ATTR_STATS]), |
d6569377 BP |
2192 | sizeof dp->stats); |
2193 | } | |
982b8810 | 2194 | |
847108dc AZ |
2195 | if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) { |
2196 | /* Can't use structure assignment because Netlink doesn't ensure | |
2197 | * sufficient alignment for 64-bit members. */ | |
2198 | memcpy(&dp->megaflow_stats, nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]), | |
2199 | sizeof dp->megaflow_stats); | |
2200 | } | |
2201 | ||
d6569377 BP |
2202 | return 0; |
2203 | } | |
2204 | ||
aaff4b55 | 2205 | /* Appends to 'buf' the Generic Netlink message described by 'dp'. */ |
d6569377 BP |
2206 | static void |
2207 | dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf) | |
2208 | { | |
df2c07f4 | 2209 | struct ovs_header *ovs_header; |
d6569377 | 2210 | |
df2c07f4 | 2211 | nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family, |
69685a88 JG |
2212 | NLM_F_REQUEST | NLM_F_ECHO, dp->cmd, |
2213 | OVS_DATAPATH_VERSION); | |
aaff4b55 | 2214 | |
df2c07f4 JP |
2215 | ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); |
2216 | ovs_header->dp_ifindex = dp->dp_ifindex; | |
d6569377 BP |
2217 | |
2218 | if (dp->name) { | |
df2c07f4 | 2219 | nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name); |
d6569377 BP |
2220 | } |
2221 | ||
a24a6574 BP |
2222 | if (dp->upcall_pid) { |
2223 | nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid); | |
2224 | } | |
b063d9f0 | 2225 | |
b7fd5e38 TG |
2226 | if (dp->user_features) { |
2227 | nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features); | |
2228 | } | |
2229 | ||
df2c07f4 | 2230 | /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */ |
d6569377 BP |
2231 | } |
2232 | ||
2233 | /* Clears 'dp' to "empty" values. */ | |
d3d8f1f7 | 2234 | static void |
d6569377 BP |
2235 | dpif_linux_dp_init(struct dpif_linux_dp *dp) |
2236 | { | |
2237 | memset(dp, 0, sizeof *dp); | |
847108dc AZ |
2238 | dp->megaflow_stats.n_masks = UINT32_MAX; |
2239 | dp->megaflow_stats.n_mask_hit = UINT64_MAX; | |
d6569377 BP |
2240 | } |
2241 | ||
aaff4b55 BP |
2242 | static void |
2243 | dpif_linux_dp_dump_start(struct nl_dump *dump) | |
2244 | { | |
2245 | struct dpif_linux_dp request; | |
2246 | struct ofpbuf *buf; | |
2247 | ||
2248 | dpif_linux_dp_init(&request); | |
df2c07f4 | 2249 | request.cmd = OVS_DP_CMD_GET; |
aaff4b55 BP |
2250 | |
2251 | buf = ofpbuf_new(1024); | |
2252 | dpif_linux_dp_to_ofpbuf(&request, buf); | |
a88b4e04 | 2253 | nl_dump_start(dump, NETLINK_GENERIC, buf); |
aaff4b55 BP |
2254 | ofpbuf_delete(buf); |
2255 | } | |
2256 | ||
d6569377 BP |
2257 | /* Executes 'request' in the kernel datapath. If the command fails, returns a |
2258 | * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 | |
2259 | * without doing anything else. If 'reply' and 'bufp' are nonnull, then the | |
aaff4b55 BP |
2260 | * result of the command is expected to be of the same form, which is decoded |
2261 | * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the | |
2262 | * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ | |
d3d8f1f7 | 2263 | static int |
d6569377 BP |
2264 | dpif_linux_dp_transact(const struct dpif_linux_dp *request, |
2265 | struct dpif_linux_dp *reply, struct ofpbuf **bufp) | |
2266 | { | |
aaff4b55 | 2267 | struct ofpbuf *request_buf; |
d6569377 | 2268 | int error; |
d6569377 | 2269 | |
cb22974d | 2270 | ovs_assert((reply != NULL) == (bufp != NULL)); |
d6569377 | 2271 | |
aaff4b55 BP |
2272 | request_buf = ofpbuf_new(1024); |
2273 | dpif_linux_dp_to_ofpbuf(request, request_buf); | |
a88b4e04 | 2274 | error = nl_transact(NETLINK_GENERIC, request_buf, bufp); |
aaff4b55 | 2275 | ofpbuf_delete(request_buf); |
d6569377 | 2276 | |
aaff4b55 | 2277 | if (reply) { |
847108dc | 2278 | dpif_linux_dp_init(reply); |
aaff4b55 BP |
2279 | if (!error) { |
2280 | error = dpif_linux_dp_from_ofpbuf(reply, *bufp); | |
2281 | } | |
d6569377 | 2282 | if (error) { |
aaff4b55 BP |
2283 | ofpbuf_delete(*bufp); |
2284 | *bufp = NULL; | |
d6569377 | 2285 | } |
d6569377 BP |
2286 | } |
2287 | return error; | |
2288 | } | |
2289 | ||
2290 | /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'. | |
2291 | * The caller must free '*bufp' when the reply is no longer needed ('reply' | |
2292 | * will contain pointers into '*bufp'). */ | |
d3d8f1f7 | 2293 | static int |
d6569377 BP |
2294 | dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply, |
2295 | struct ofpbuf **bufp) | |
2296 | { | |
2297 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
2298 | struct dpif_linux_dp request; | |
2299 | ||
2300 | dpif_linux_dp_init(&request); | |
df2c07f4 | 2301 | request.cmd = OVS_DP_CMD_GET; |
254f2dc8 | 2302 | request.dp_ifindex = dpif->dp_ifindex; |
d6569377 BP |
2303 | |
2304 | return dpif_linux_dp_transact(&request, reply, bufp); | |
2305 | } | |
2306 | \f | |
df2c07f4 | 2307 | /* Parses the contents of 'buf', which contains a "struct ovs_header" followed |
37a1300c | 2308 | * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a |
d6569377 BP |
2309 | * positive errno value. |
2310 | * | |
2311 | * 'flow' will contain pointers into 'buf', so the caller should not free 'buf' | |
2312 | * while 'flow' is still in use. */ | |
2313 | static int | |
2314 | dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow, | |
2315 | const struct ofpbuf *buf) | |
2316 | { | |
df2c07f4 JP |
2317 | static const struct nl_policy ovs_flow_policy[] = { |
2318 | [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED }, | |
e6cc0bab | 2319 | [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true }, |
df2c07f4 | 2320 | [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true }, |
f7df9823 | 2321 | [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats), |
d6569377 | 2322 | .optional = true }, |
df2c07f4 JP |
2323 | [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true }, |
2324 | [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true }, | |
2325 | /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */ | |
d6569377 BP |
2326 | }; |
2327 | ||
df2c07f4 JP |
2328 | struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)]; |
2329 | struct ovs_header *ovs_header; | |
37a1300c BP |
2330 | struct nlmsghdr *nlmsg; |
2331 | struct genlmsghdr *genl; | |
2332 | struct ofpbuf b; | |
d6569377 BP |
2333 | |
2334 | dpif_linux_flow_init(flow); | |
2335 | ||
1f317cb5 | 2336 | ofpbuf_use_const(&b, ofpbuf_data(buf), ofpbuf_size(buf)); |
37a1300c BP |
2337 | nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); |
2338 | genl = ofpbuf_try_pull(&b, sizeof *genl); | |
df2c07f4 JP |
2339 | ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); |
2340 | if (!nlmsg || !genl || !ovs_header | |
2341 | || nlmsg->nlmsg_type != ovs_flow_family | |
2342 | || !nl_policy_parse(&b, 0, ovs_flow_policy, a, | |
2343 | ARRAY_SIZE(ovs_flow_policy))) { | |
d6569377 BP |
2344 | return EINVAL; |
2345 | } | |
d6569377 | 2346 | |
37a1300c | 2347 | flow->nlmsg_flags = nlmsg->nlmsg_flags; |
df2c07f4 JP |
2348 | flow->dp_ifindex = ovs_header->dp_ifindex; |
2349 | flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]); | |
2350 | flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]); | |
e6cc0bab AZ |
2351 | |
2352 | if (a[OVS_FLOW_ATTR_MASK]) { | |
2353 | flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]); | |
2354 | flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]); | |
2355 | } | |
df2c07f4 JP |
2356 | if (a[OVS_FLOW_ATTR_ACTIONS]) { |
2357 | flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]); | |
2358 | flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]); | |
d6569377 | 2359 | } |
df2c07f4 JP |
2360 | if (a[OVS_FLOW_ATTR_STATS]) { |
2361 | flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]); | |
d6569377 | 2362 | } |
df2c07f4 JP |
2363 | if (a[OVS_FLOW_ATTR_TCP_FLAGS]) { |
2364 | flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]); | |
d6569377 | 2365 | } |
df2c07f4 JP |
2366 | if (a[OVS_FLOW_ATTR_USED]) { |
2367 | flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]); | |
9e980142 | 2368 | } |
d6569377 BP |
2369 | return 0; |
2370 | } | |
2371 | ||
df2c07f4 | 2372 | /* Appends to 'buf' (which must initially be empty) a "struct ovs_header" |
d6569377 BP |
2373 | * followed by Netlink attributes corresponding to 'flow'. */ |
2374 | static void | |
2375 | dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, | |
2376 | struct ofpbuf *buf) | |
2377 | { | |
df2c07f4 | 2378 | struct ovs_header *ovs_header; |
d6569377 | 2379 | |
df2c07f4 | 2380 | nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family, |
30b44744 | 2381 | NLM_F_REQUEST | flow->nlmsg_flags, |
69685a88 | 2382 | flow->cmd, OVS_FLOW_VERSION); |
37a1300c | 2383 | |
df2c07f4 JP |
2384 | ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); |
2385 | ovs_header->dp_ifindex = flow->dp_ifindex; | |
d6569377 BP |
2386 | |
2387 | if (flow->key_len) { | |
df2c07f4 | 2388 | nl_msg_put_unspec(buf, OVS_FLOW_ATTR_KEY, flow->key, flow->key_len); |
d6569377 BP |
2389 | } |
2390 | ||
e6cc0bab AZ |
2391 | if (flow->mask_len) { |
2392 | nl_msg_put_unspec(buf, OVS_FLOW_ATTR_MASK, flow->mask, flow->mask_len); | |
2393 | } | |
2394 | ||
d2a23af2 | 2395 | if (flow->actions || flow->actions_len) { |
df2c07f4 | 2396 | nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS, |
d6569377 BP |
2397 | flow->actions, flow->actions_len); |
2398 | } | |
2399 | ||
2400 | /* We never need to send these to the kernel. */ | |
cb22974d BP |
2401 | ovs_assert(!flow->stats); |
2402 | ovs_assert(!flow->tcp_flags); | |
2403 | ovs_assert(!flow->used); | |
d6569377 BP |
2404 | |
2405 | if (flow->clear) { | |
df2c07f4 | 2406 | nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR); |
d6569377 | 2407 | } |
d6569377 BP |
2408 | } |
2409 | ||
2410 | /* Clears 'flow' to "empty" values. */ | |
d3d8f1f7 | 2411 | static void |
d6569377 BP |
2412 | dpif_linux_flow_init(struct dpif_linux_flow *flow) |
2413 | { | |
2414 | memset(flow, 0, sizeof *flow); | |
2415 | } | |
2416 | ||
2417 | /* Executes 'request' in the kernel datapath. If the command fails, returns a | |
2418 | * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 | |
2419 | * without doing anything else. If 'reply' and 'bufp' are nonnull, then the | |
37a1300c BP |
2420 | * result of the command is expected to be a flow also, which is decoded and |
2421 | * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply | |
2422 | * is no longer needed ('reply' will contain pointers into '*bufp'). */ | |
d3d8f1f7 | 2423 | static int |
30b44744 | 2424 | dpif_linux_flow_transact(struct dpif_linux_flow *request, |
d6569377 BP |
2425 | struct dpif_linux_flow *reply, struct ofpbuf **bufp) |
2426 | { | |
37a1300c | 2427 | struct ofpbuf *request_buf; |
d6569377 | 2428 | int error; |
d6569377 | 2429 | |
cb22974d | 2430 | ovs_assert((reply != NULL) == (bufp != NULL)); |
d6569377 | 2431 | |
30b44744 BP |
2432 | if (reply) { |
2433 | request->nlmsg_flags |= NLM_F_ECHO; | |
2434 | } | |
2435 | ||
37a1300c BP |
2436 | request_buf = ofpbuf_new(1024); |
2437 | dpif_linux_flow_to_ofpbuf(request, request_buf); | |
a88b4e04 | 2438 | error = nl_transact(NETLINK_GENERIC, request_buf, bufp); |
37a1300c | 2439 | ofpbuf_delete(request_buf); |
d6569377 | 2440 | |
37a1300c BP |
2441 | if (reply) { |
2442 | if (!error) { | |
2443 | error = dpif_linux_flow_from_ofpbuf(reply, *bufp); | |
2444 | } | |
d6569377 | 2445 | if (error) { |
37a1300c BP |
2446 | dpif_linux_flow_init(reply); |
2447 | ofpbuf_delete(*bufp); | |
2448 | *bufp = NULL; | |
d6569377 | 2449 | } |
d6569377 BP |
2450 | } |
2451 | return error; | |
2452 | } | |
2453 | ||
2454 | static void | |
2455 | dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow, | |
2456 | struct dpif_flow_stats *stats) | |
2457 | { | |
2458 | if (flow->stats) { | |
2459 | stats->n_packets = get_unaligned_u64(&flow->stats->n_packets); | |
2460 | stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes); | |
2461 | } else { | |
2462 | stats->n_packets = 0; | |
2463 | stats->n_bytes = 0; | |
2464 | } | |
0e70cdcb | 2465 | stats->used = flow->used ? get_32aligned_u64(flow->used) : 0; |
d6569377 BP |
2466 | stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0; |
2467 | } | |
14b4d2f9 | 2468 | \f |
14b4d2f9 BP |
2469 | /* Logs information about a packet that was recently lost in 'ch' (in |
2470 | * 'dpif_'). */ | |
2471 | static void | |
9b00386b | 2472 | report_loss(struct dpif_linux *dpif, struct dpif_channel *ch, uint32_t ch_idx, |
1579cf67 | 2473 | uint32_t handler_id) |
14b4d2f9 | 2474 | { |
14b4d2f9 | 2475 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); |
14b4d2f9 BP |
2476 | struct ds s; |
2477 | ||
8d675c5a | 2478 | if (VLOG_DROP_WARN(&rl)) { |
14b4d2f9 BP |
2479 | return; |
2480 | } | |
2481 | ||
2482 | ds_init(&s); | |
2483 | if (ch->last_poll != LLONG_MIN) { | |
2484 | ds_put_format(&s, " (last polled %lld ms ago)", | |
2485 | time_msec() - ch->last_poll); | |
2486 | } | |
14b4d2f9 | 2487 | |
1579cf67 | 2488 | VLOG_WARN("%s: lost packet on port channel %u of handler %u", |
9b00386b | 2489 | dpif_name(&dpif->dpif), ch_idx, handler_id); |
14b4d2f9 BP |
2490 | ds_destroy(&s); |
2491 | } |