]>
Commit | Line | Data |
---|---|---|
96fba48f | 1 | /* |
de281153 | 2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. |
96fba48f BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
9fe3b9a2 BP |
18 | |
19 | #include "dpif-linux.h" | |
96fba48f | 20 | |
96fba48f BP |
21 | #include <ctype.h> |
22 | #include <errno.h> | |
23 | #include <fcntl.h> | |
24 | #include <inttypes.h> | |
25 | #include <net/if.h> | |
b90fa799 | 26 | #include <linux/types.h> |
aae51f53 | 27 | #include <linux/pkt_sched.h> |
e9e28be3 | 28 | #include <linux/rtnetlink.h> |
96fba48f | 29 | #include <linux/sockios.h> |
8522ba09 | 30 | #include <poll.h> |
96fba48f | 31 | #include <stdlib.h> |
8522ba09 | 32 | #include <strings.h> |
50f80534 | 33 | #include <sys/epoll.h> |
10dcf8de | 34 | #include <sys/stat.h> |
96fba48f BP |
35 | #include <unistd.h> |
36 | ||
773cd538 | 37 | #include "bitmap.h" |
96fba48f | 38 | #include "dpif-provider.h" |
80e5eed9 | 39 | #include "dynamic-string.h" |
eb8b28e7 | 40 | #include "flow.h" |
3abc4a1a | 41 | #include "netdev.h" |
032aa6a3 | 42 | #include "netdev-linux.h" |
c3827f61 | 43 | #include "netdev-vport.h" |
45c8d3a1 | 44 | #include "netlink-notifier.h" |
982b8810 | 45 | #include "netlink-socket.h" |
856081f6 | 46 | #include "netlink.h" |
feebdea2 | 47 | #include "odp-util.h" |
96fba48f | 48 | #include "ofpbuf.h" |
856081f6 | 49 | #include "packets.h" |
96fba48f | 50 | #include "poll-loop.h" |
17411ecf | 51 | #include "random.h" |
54825e09 | 52 | #include "shash.h" |
b3c01ed3 | 53 | #include "sset.h" |
14b4d2f9 | 54 | #include "timeval.h" |
d6569377 | 55 | #include "unaligned.h" |
96fba48f | 56 | #include "util.h" |
96fba48f | 57 | #include "vlog.h" |
5136ce49 | 58 | |
d98e6007 | 59 | VLOG_DEFINE_THIS_MODULE(dpif_linux); |
95b1d73a | 60 | enum { MAX_PORTS = USHRT_MAX }; |
773cd538 | 61 | |
24b019f8 JP |
62 | /* This ethtool flag was introduced in Linux 2.6.24, so it might be |
63 | * missing if we have old headers. */ | |
64 | #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */ | |
65 | ||
d6569377 | 66 | struct dpif_linux_dp { |
aaff4b55 BP |
67 | /* Generic Netlink header. */ |
68 | uint8_t cmd; | |
d6569377 | 69 | |
df2c07f4 | 70 | /* struct ovs_header. */ |
254f2dc8 | 71 | int dp_ifindex; |
d6569377 BP |
72 | |
73 | /* Attributes. */ | |
df2c07f4 | 74 | const char *name; /* OVS_DP_ATTR_NAME. */ |
fcd5d230 | 75 | const uint32_t *upcall_pid; /* OVS_DP_ATTR_UPCALL_PID. */ |
df2c07f4 | 76 | struct ovs_dp_stats stats; /* OVS_DP_ATTR_STATS. */ |
847108dc AZ |
77 | struct ovs_dp_megaflow_stats megaflow_stats; |
78 | /* OVS_DP_ATTR_MEGAFLOW_STATS.*/ | |
d6569377 BP |
79 | }; |
80 | ||
81 | static void dpif_linux_dp_init(struct dpif_linux_dp *); | |
aaff4b55 BP |
82 | static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *, |
83 | const struct ofpbuf *); | |
84 | static void dpif_linux_dp_dump_start(struct nl_dump *); | |
d6569377 BP |
85 | static int dpif_linux_dp_transact(const struct dpif_linux_dp *request, |
86 | struct dpif_linux_dp *reply, | |
87 | struct ofpbuf **bufp); | |
88 | static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply, | |
89 | struct ofpbuf **bufp); | |
90 | ||
91 | struct dpif_linux_flow { | |
37a1300c BP |
92 | /* Generic Netlink header. */ |
93 | uint8_t cmd; | |
d6569377 | 94 | |
df2c07f4 | 95 | /* struct ovs_header. */ |
d6569377 | 96 | unsigned int nlmsg_flags; |
254f2dc8 | 97 | int dp_ifindex; |
d6569377 BP |
98 | |
99 | /* Attributes. | |
100 | * | |
0e70cdcb BP |
101 | * The 'stats' member points to 64-bit data that might only be aligned on |
102 | * 32-bit boundaries, so get_unaligned_u64() should be used to access its | |
103 | * values. | |
d2a23af2 | 104 | * |
df2c07f4 | 105 | * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in |
d2a23af2 | 106 | * the Netlink version of the command, even if actions_len is zero. */ |
df2c07f4 | 107 | const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */ |
d6569377 | 108 | size_t key_len; |
e6cc0bab AZ |
109 | const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */ |
110 | size_t mask_len; | |
df2c07f4 | 111 | const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */ |
d6569377 | 112 | size_t actions_len; |
df2c07f4 JP |
113 | const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */ |
114 | const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */ | |
0e70cdcb | 115 | const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */ |
df2c07f4 | 116 | bool clear; /* OVS_FLOW_ATTR_CLEAR. */ |
d6569377 BP |
117 | }; |
118 | ||
119 | static void dpif_linux_flow_init(struct dpif_linux_flow *); | |
37a1300c BP |
120 | static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *, |
121 | const struct ofpbuf *); | |
122 | static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *, | |
123 | struct ofpbuf *); | |
30b44744 | 124 | static int dpif_linux_flow_transact(struct dpif_linux_flow *request, |
d6569377 BP |
125 | struct dpif_linux_flow *reply, |
126 | struct ofpbuf **bufp); | |
127 | static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *, | |
128 | struct dpif_flow_stats *); | |
129 | ||
989fd548 | 130 | /* One of the dpif channels between the kernel and userspace. */ |
fe3d61b3 | 131 | struct dpif_channel { |
14b4d2f9 | 132 | struct nl_sock *sock; /* Netlink socket. */ |
14b4d2f9 | 133 | long long int last_poll; /* Last time this channel was polled. */ |
fe3d61b3 BP |
134 | }; |
135 | ||
14b4d2f9 BP |
136 | static void report_loss(struct dpif *, struct dpif_channel *); |
137 | ||
96fba48f BP |
138 | /* Datapath interface for the openvswitch Linux kernel module. */ |
139 | struct dpif_linux { | |
140 | struct dpif dpif; | |
254f2dc8 | 141 | int dp_ifindex; |
e9e28be3 | 142 | |
b063d9f0 | 143 | /* Upcall messages. */ |
97be1538 | 144 | struct ovs_mutex upcall_lock; |
989fd548 JP |
145 | int uc_array_size; /* Size of 'channels' and 'epoll_events'. */ |
146 | struct dpif_channel *channels; | |
147 | struct epoll_event *epoll_events; | |
fe3d61b3 | 148 | int epoll_fd; /* epoll fd that includes channel socks. */ |
989fd548 JP |
149 | int n_events; /* Num events returned by epoll_wait(). */ |
150 | int event_offset; /* Offset into 'epoll_events'. */ | |
982b8810 | 151 | |
e9e28be3 | 152 | /* Change notification. */ |
e4516b20 | 153 | struct nl_sock *port_notifier; /* vport multicast group subscriber. */ |
96fba48f BP |
154 | }; |
155 | ||
156 | static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); | |
157 | ||
e4516b20 BP |
158 | /* Generic Netlink family numbers for OVS. |
159 | * | |
160 | * Initialized by dpif_linux_init(). */ | |
df2c07f4 JP |
161 | static int ovs_datapath_family; |
162 | static int ovs_vport_family; | |
163 | static int ovs_flow_family; | |
164 | static int ovs_packet_family; | |
982b8810 | 165 | |
e4516b20 BP |
166 | /* Generic Netlink multicast groups for OVS. |
167 | * | |
168 | * Initialized by dpif_linux_init(). */ | |
169 | static unsigned int ovs_vport_mcgroup; | |
982b8810 BP |
170 | |
171 | static int dpif_linux_init(void); | |
e4516b20 | 172 | static int open_dpif(const struct dpif_linux_dp *, struct dpif **); |
4e022ec0 AW |
173 | static uint32_t dpif_linux_port_get_pid(const struct dpif *, |
174 | odp_port_t port_no); | |
96fba48f | 175 | |
f0fef760 BP |
176 | static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, |
177 | struct ofpbuf *); | |
178 | static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *, | |
179 | const struct ofpbuf *); | |
180 | ||
96fba48f BP |
181 | static struct dpif_linux * |
182 | dpif_linux_cast(const struct dpif *dpif) | |
183 | { | |
184 | dpif_assert_class(dpif, &dpif_linux_class); | |
185 | return CONTAINER_OF(dpif, struct dpif_linux, dpif); | |
186 | } | |
187 | ||
d3d22744 | 188 | static int |
d0c23a1a | 189 | dpif_linux_enumerate(struct sset *all_dps) |
d3d22744 | 190 | { |
aaff4b55 BP |
191 | struct nl_dump dump; |
192 | struct ofpbuf msg; | |
aaff4b55 | 193 | int error; |
982b8810 | 194 | |
aaff4b55 BP |
195 | error = dpif_linux_init(); |
196 | if (error) { | |
197 | return error; | |
982b8810 | 198 | } |
d3d22744 | 199 | |
aaff4b55 BP |
200 | dpif_linux_dp_dump_start(&dump); |
201 | while (nl_dump_next(&dump, &msg)) { | |
202 | struct dpif_linux_dp dp; | |
d6569377 | 203 | |
aaff4b55 | 204 | if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) { |
d0c23a1a | 205 | sset_add(all_dps, dp.name); |
d3d22744 BP |
206 | } |
207 | } | |
aaff4b55 | 208 | return nl_dump_done(&dump); |
d3d22744 BP |
209 | } |
210 | ||
96fba48f | 211 | static int |
4a387741 BP |
212 | dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name, |
213 | bool create, struct dpif **dpifp) | |
96fba48f | 214 | { |
982b8810 | 215 | struct dpif_linux_dp dp_request, dp; |
c19e6535 | 216 | struct ofpbuf *buf; |
ea36840f | 217 | uint32_t upcall_pid; |
c19e6535 | 218 | int error; |
96fba48f | 219 | |
982b8810 BP |
220 | error = dpif_linux_init(); |
221 | if (error) { | |
222 | return error; | |
223 | } | |
224 | ||
982b8810 BP |
225 | /* Create or look up datapath. */ |
226 | dpif_linux_dp_init(&dp_request); | |
ea36840f BP |
227 | if (create) { |
228 | dp_request.cmd = OVS_DP_CMD_NEW; | |
229 | upcall_pid = 0; | |
230 | dp_request.upcall_pid = &upcall_pid; | |
231 | } else { | |
232 | dp_request.cmd = OVS_DP_CMD_GET; | |
233 | } | |
254f2dc8 | 234 | dp_request.name = name; |
982b8810 BP |
235 | error = dpif_linux_dp_transact(&dp_request, &dp, &buf); |
236 | if (error) { | |
237 | return error; | |
c19e6535 | 238 | } |
254f2dc8 | 239 | |
e4516b20 | 240 | error = open_dpif(&dp, dpifp); |
8f4a4df5 | 241 | ofpbuf_delete(buf); |
e4516b20 | 242 | return error; |
c19e6535 BP |
243 | } |
244 | ||
e4516b20 | 245 | static int |
254f2dc8 | 246 | open_dpif(const struct dpif_linux_dp *dp, struct dpif **dpifp) |
c19e6535 | 247 | { |
c19e6535 | 248 | struct dpif_linux *dpif; |
c19e6535 | 249 | |
17411ecf | 250 | dpif = xzalloc(sizeof *dpif); |
e4516b20 | 251 | dpif->port_notifier = NULL; |
834d6caf | 252 | ovs_mutex_init(&dpif->upcall_lock); |
50f80534 | 253 | dpif->epoll_fd = -1; |
c19e6535 | 254 | |
254f2dc8 BP |
255 | dpif_init(&dpif->dpif, &dpif_linux_class, dp->name, |
256 | dp->dp_ifindex, dp->dp_ifindex); | |
c19e6535 | 257 | |
254f2dc8 | 258 | dpif->dp_ifindex = dp->dp_ifindex; |
c19e6535 | 259 | *dpifp = &dpif->dpif; |
e4516b20 BP |
260 | |
261 | return 0; | |
96fba48f BP |
262 | } |
263 | ||
17411ecf | 264 | static void |
fe3d61b3 | 265 | destroy_channels(struct dpif_linux *dpif) |
17411ecf | 266 | { |
4e022ec0 | 267 | unsigned int i; |
17411ecf | 268 | |
989fd548 JP |
269 | if (dpif->epoll_fd < 0) { |
270 | return; | |
50f80534 | 271 | } |
989fd548 JP |
272 | |
273 | for (i = 0; i < dpif->uc_array_size; i++ ) { | |
274 | struct dpif_linux_vport vport_request; | |
275 | struct dpif_channel *ch = &dpif->channels[i]; | |
276 | uint32_t upcall_pid = 0; | |
277 | ||
278 | if (!ch->sock) { | |
279 | continue; | |
280 | } | |
281 | ||
9fafa796 BP |
282 | epoll_ctl(dpif->epoll_fd, EPOLL_CTL_DEL, nl_sock_fd(ch->sock), NULL); |
283 | ||
989fd548 JP |
284 | /* Turn off upcalls. */ |
285 | dpif_linux_vport_init(&vport_request); | |
286 | vport_request.cmd = OVS_VPORT_CMD_SET; | |
287 | vport_request.dp_ifindex = dpif->dp_ifindex; | |
4e022ec0 | 288 | vport_request.port_no = u32_to_odp(i); |
989fd548 JP |
289 | vport_request.upcall_pid = &upcall_pid; |
290 | dpif_linux_vport_transact(&vport_request, NULL, NULL); | |
291 | ||
fe3d61b3 | 292 | nl_sock_destroy(ch->sock); |
17411ecf | 293 | } |
989fd548 JP |
294 | |
295 | free(dpif->channels); | |
296 | dpif->channels = NULL; | |
297 | dpif->uc_array_size = 0; | |
298 | ||
299 | free(dpif->epoll_events); | |
300 | dpif->epoll_events = NULL; | |
301 | dpif->n_events = dpif->event_offset = 0; | |
302 | ||
9fafa796 BP |
303 | /* Don't close dpif->epoll_fd since that would cause other threads that |
304 | * call dpif_recv_wait(dpif) to wait on an arbitrary fd or a closed fd. */ | |
989fd548 JP |
305 | } |
306 | ||
307 | static int | |
4e022ec0 | 308 | add_channel(struct dpif_linux *dpif, odp_port_t port_no, struct nl_sock *sock) |
989fd548 JP |
309 | { |
310 | struct epoll_event event; | |
4e022ec0 | 311 | uint32_t port_idx = odp_to_u32(port_no); |
989fd548 JP |
312 | |
313 | if (dpif->epoll_fd < 0) { | |
314 | return 0; | |
315 | } | |
316 | ||
317 | /* We assume that the datapath densely chooses port numbers, which | |
318 | * can therefore be used as an index into an array of channels. */ | |
4e022ec0 AW |
319 | if (port_idx >= dpif->uc_array_size) { |
320 | uint32_t new_size = port_idx + 1; | |
321 | uint32_t i; | |
989fd548 | 322 | |
12d76859 | 323 | if (new_size > MAX_PORTS) { |
989fd548 JP |
324 | VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big", |
325 | dpif_name(&dpif->dpif), port_no); | |
326 | return EFBIG; | |
327 | } | |
328 | ||
329 | dpif->channels = xrealloc(dpif->channels, | |
330 | new_size * sizeof *dpif->channels); | |
331 | for (i = dpif->uc_array_size; i < new_size; i++) { | |
332 | dpif->channels[i].sock = NULL; | |
333 | } | |
334 | ||
335 | dpif->epoll_events = xrealloc(dpif->epoll_events, | |
336 | new_size * sizeof *dpif->epoll_events); | |
337 | dpif->uc_array_size = new_size; | |
338 | } | |
339 | ||
340 | memset(&event, 0, sizeof event); | |
341 | event.events = EPOLLIN; | |
4e022ec0 | 342 | event.data.u32 = port_idx; |
989fd548 JP |
343 | if (epoll_ctl(dpif->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock), |
344 | &event) < 0) { | |
345 | return errno; | |
346 | } | |
347 | ||
4e022ec0 AW |
348 | nl_sock_destroy(dpif->channels[port_idx].sock); |
349 | dpif->channels[port_idx].sock = sock; | |
350 | dpif->channels[port_idx].last_poll = LLONG_MIN; | |
989fd548 JP |
351 | |
352 | return 0; | |
353 | } | |
354 | ||
355 | static void | |
4e022ec0 | 356 | del_channel(struct dpif_linux *dpif, odp_port_t port_no) |
989fd548 JP |
357 | { |
358 | struct dpif_channel *ch; | |
4e022ec0 | 359 | uint32_t port_idx = odp_to_u32(port_no); |
989fd548 | 360 | |
4e022ec0 | 361 | if (dpif->epoll_fd < 0 || port_idx >= dpif->uc_array_size) { |
989fd548 JP |
362 | return; |
363 | } | |
364 | ||
4e022ec0 | 365 | ch = &dpif->channels[port_idx]; |
989fd548 JP |
366 | if (!ch->sock) { |
367 | return; | |
368 | } | |
369 | ||
370 | epoll_ctl(dpif->epoll_fd, EPOLL_CTL_DEL, nl_sock_fd(ch->sock), NULL); | |
fa717215 | 371 | dpif->event_offset = dpif->n_events = 0; |
989fd548 JP |
372 | |
373 | nl_sock_destroy(ch->sock); | |
374 | ch->sock = NULL; | |
17411ecf JG |
375 | } |
376 | ||
96fba48f BP |
377 | static void |
378 | dpif_linux_close(struct dpif *dpif_) | |
379 | { | |
380 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
c7178a0b | 381 | |
e4516b20 | 382 | nl_sock_destroy(dpif->port_notifier); |
fe3d61b3 | 383 | destroy_channels(dpif); |
9fafa796 BP |
384 | if (dpif->epoll_fd >= 0) { |
385 | close(dpif->epoll_fd); | |
386 | } | |
97be1538 | 387 | ovs_mutex_destroy(&dpif->upcall_lock); |
96fba48f BP |
388 | free(dpif); |
389 | } | |
390 | ||
391 | static int | |
7dab847a | 392 | dpif_linux_destroy(struct dpif *dpif_) |
96fba48f | 393 | { |
d6569377 BP |
394 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
395 | struct dpif_linux_dp dp; | |
396 | ||
397 | dpif_linux_dp_init(&dp); | |
df2c07f4 | 398 | dp.cmd = OVS_DP_CMD_DEL; |
254f2dc8 | 399 | dp.dp_ifindex = dpif->dp_ifindex; |
d6569377 | 400 | return dpif_linux_dp_transact(&dp, NULL, NULL); |
96fba48f BP |
401 | } |
402 | ||
403 | static int | |
a8d9304d | 404 | dpif_linux_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) |
96fba48f | 405 | { |
d6569377 BP |
406 | struct dpif_linux_dp dp; |
407 | struct ofpbuf *buf; | |
408 | int error; | |
409 | ||
410 | error = dpif_linux_dp_get(dpif_, &dp, &buf); | |
411 | if (!error) { | |
a8d9304d BP |
412 | stats->n_hit = dp.stats.n_hit; |
413 | stats->n_missed = dp.stats.n_missed; | |
414 | stats->n_lost = dp.stats.n_lost; | |
415 | stats->n_flows = dp.stats.n_flows; | |
847108dc AZ |
416 | stats->n_masks = dp.megaflow_stats.n_masks; |
417 | stats->n_mask_hit = dp.megaflow_stats.n_mask_hit; | |
d6569377 BP |
418 | ofpbuf_delete(buf); |
419 | } | |
420 | return error; | |
96fba48f BP |
421 | } |
422 | ||
b9ad7294 EJ |
423 | static const char * |
424 | get_vport_type(const struct dpif_linux_vport *vport) | |
425 | { | |
426 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); | |
427 | ||
428 | switch (vport->type) { | |
429 | case OVS_VPORT_TYPE_NETDEV: | |
430 | return "system"; | |
431 | ||
432 | case OVS_VPORT_TYPE_INTERNAL: | |
433 | return "internal"; | |
434 | ||
435 | case OVS_VPORT_TYPE_GRE: | |
436 | return "gre"; | |
437 | ||
438 | case OVS_VPORT_TYPE_GRE64: | |
439 | return "gre64"; | |
440 | ||
b9ad7294 EJ |
441 | case OVS_VPORT_TYPE_VXLAN: |
442 | return "vxlan"; | |
443 | ||
a6ae068b LJ |
444 | case OVS_VPORT_TYPE_LISP: |
445 | return "lisp"; | |
446 | ||
b9ad7294 EJ |
447 | case OVS_VPORT_TYPE_UNSPEC: |
448 | case __OVS_VPORT_TYPE_MAX: | |
449 | break; | |
450 | } | |
451 | ||
452 | VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u", | |
453 | vport->dp_ifindex, vport->name, (unsigned int) vport->type); | |
454 | return "unknown"; | |
455 | } | |
456 | ||
c060c4cf EJ |
457 | static enum ovs_vport_type |
458 | netdev_to_ovs_vport_type(const struct netdev *netdev) | |
459 | { | |
460 | const char *type = netdev_get_type(netdev); | |
461 | ||
462 | if (!strcmp(type, "tap") || !strcmp(type, "system")) { | |
463 | return OVS_VPORT_TYPE_NETDEV; | |
464 | } else if (!strcmp(type, "internal")) { | |
465 | return OVS_VPORT_TYPE_INTERNAL; | |
466 | } else if (strstr(type, "gre64")) { | |
467 | return OVS_VPORT_TYPE_GRE64; | |
468 | } else if (strstr(type, "gre")) { | |
469 | return OVS_VPORT_TYPE_GRE; | |
c060c4cf EJ |
470 | } else if (!strcmp(type, "vxlan")) { |
471 | return OVS_VPORT_TYPE_VXLAN; | |
a6ae068b LJ |
472 | } else if (!strcmp(type, "lisp")) { |
473 | return OVS_VPORT_TYPE_LISP; | |
c060c4cf EJ |
474 | } else { |
475 | return OVS_VPORT_TYPE_UNSPEC; | |
476 | } | |
477 | } | |
478 | ||
96fba48f | 479 | static int |
9fafa796 BP |
480 | dpif_linux_port_add__(struct dpif *dpif_, struct netdev *netdev, |
481 | odp_port_t *port_nop) | |
96fba48f | 482 | { |
c19e6535 | 483 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
26508d9a | 484 | const struct netdev_tunnel_config *tnl_cfg; |
3aa30359 BP |
485 | char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; |
486 | const char *name = netdev_vport_get_dpif_port(netdev, | |
487 | namebuf, sizeof namebuf); | |
c3827f61 | 488 | const char *type = netdev_get_type(netdev); |
c19e6535 | 489 | struct dpif_linux_vport request, reply; |
989fd548 | 490 | struct nl_sock *sock = NULL; |
78a2d59c | 491 | uint32_t upcall_pid; |
c19e6535 | 492 | struct ofpbuf *buf; |
26508d9a KM |
493 | uint64_t options_stub[64 / 8]; |
494 | struct ofpbuf options; | |
78a2d59c | 495 | int error; |
96fba48f | 496 | |
989fd548 JP |
497 | if (dpif->epoll_fd >= 0) { |
498 | error = nl_sock_create(NETLINK_GENERIC, &sock); | |
499 | if (error) { | |
500 | return error; | |
501 | } | |
502 | } | |
503 | ||
c19e6535 | 504 | dpif_linux_vport_init(&request); |
df2c07f4 | 505 | request.cmd = OVS_VPORT_CMD_NEW; |
254f2dc8 | 506 | request.dp_ifindex = dpif->dp_ifindex; |
c060c4cf | 507 | request.type = netdev_to_ovs_vport_type(netdev); |
df2c07f4 | 508 | if (request.type == OVS_VPORT_TYPE_UNSPEC) { |
c283069c BP |
509 | VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has " |
510 | "unsupported type `%s'", | |
c19e6535 | 511 | dpif_name(dpif_), name, type); |
989fd548 | 512 | nl_sock_destroy(sock); |
c283069c BP |
513 | return EINVAL; |
514 | } | |
c19e6535 | 515 | request.name = name; |
c3827f61 | 516 | |
24b019f8 JP |
517 | if (request.type == OVS_VPORT_TYPE_NETDEV) { |
518 | netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false); | |
519 | } | |
520 | ||
26508d9a KM |
521 | tnl_cfg = netdev_get_tunnel_config(netdev); |
522 | if (tnl_cfg && tnl_cfg->dst_port != 0) { | |
523 | ofpbuf_use_stack(&options, options_stub, sizeof options_stub); | |
524 | nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT, | |
7e2d8aea | 525 | ntohs(tnl_cfg->dst_port)); |
26508d9a KM |
526 | request.options = options.data; |
527 | request.options_len = options.size; | |
528 | } | |
529 | ||
78a2d59c | 530 | request.port_no = *port_nop; |
989fd548 | 531 | upcall_pid = sock ? nl_sock_pid(sock) : 0; |
78a2d59c | 532 | request.upcall_pid = &upcall_pid; |
95b1d73a | 533 | |
78a2d59c | 534 | error = dpif_linux_vport_transact(&request, &reply, &buf); |
78a2d59c JP |
535 | if (!error) { |
536 | *port_nop = reply.port_no; | |
537 | VLOG_DBG("%s: assigning port %"PRIu32" to netlink pid %"PRIu32, | |
f205882a | 538 | dpif_name(dpif_), reply.port_no, upcall_pid); |
2510ba7c | 539 | } else { |
4e022ec0 | 540 | if (error == EBUSY && *port_nop != ODPP_NONE) { |
2510ba7c JP |
541 | VLOG_INFO("%s: requested port %"PRIu32" is in use", |
542 | dpif_name(dpif_), *port_nop); | |
543 | } | |
989fd548 JP |
544 | nl_sock_destroy(sock); |
545 | ofpbuf_delete(buf); | |
546 | return error; | |
78a2d59c | 547 | } |
78a2d59c | 548 | ofpbuf_delete(buf); |
c3827f61 | 549 | |
989fd548 JP |
550 | if (sock) { |
551 | error = add_channel(dpif, *port_nop, sock); | |
552 | if (error) { | |
553 | VLOG_INFO("%s: could not add channel for port %s", | |
554 | dpif_name(dpif_), name); | |
555 | ||
556 | /* Delete the port. */ | |
557 | dpif_linux_vport_init(&request); | |
558 | request.cmd = OVS_VPORT_CMD_DEL; | |
559 | request.dp_ifindex = dpif->dp_ifindex; | |
560 | request.port_no = *port_nop; | |
561 | dpif_linux_vport_transact(&request, NULL, NULL); | |
562 | ||
563 | nl_sock_destroy(sock); | |
564 | return error; | |
565 | } | |
566 | } | |
567 | ||
568 | return 0; | |
96fba48f BP |
569 | } |
570 | ||
571 | static int | |
9fafa796 BP |
572 | dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev, |
573 | odp_port_t *port_nop) | |
574 | { | |
575 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
576 | int error; | |
577 | ||
97be1538 | 578 | ovs_mutex_lock(&dpif->upcall_lock); |
9fafa796 | 579 | error = dpif_linux_port_add__(dpif_, netdev, port_nop); |
97be1538 | 580 | ovs_mutex_unlock(&dpif->upcall_lock); |
9fafa796 BP |
581 | |
582 | return error; | |
583 | } | |
584 | ||
585 | static int | |
586 | dpif_linux_port_del__(struct dpif *dpif_, odp_port_t port_no) | |
96fba48f | 587 | { |
c19e6535 BP |
588 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
589 | struct dpif_linux_vport vport; | |
773cd538 | 590 | int error; |
c19e6535 BP |
591 | |
592 | dpif_linux_vport_init(&vport); | |
df2c07f4 | 593 | vport.cmd = OVS_VPORT_CMD_DEL; |
254f2dc8 | 594 | vport.dp_ifindex = dpif->dp_ifindex; |
c19e6535 | 595 | vport.port_no = port_no; |
773cd538 EJ |
596 | error = dpif_linux_vport_transact(&vport, NULL, NULL); |
597 | ||
989fd548 JP |
598 | del_channel(dpif, port_no); |
599 | ||
773cd538 | 600 | return error; |
c3827f61 | 601 | } |
3abc4a1a | 602 | |
9fafa796 BP |
603 | static int |
604 | dpif_linux_port_del(struct dpif *dpif_, odp_port_t port_no) | |
605 | { | |
606 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
607 | int error; | |
608 | ||
97be1538 | 609 | ovs_mutex_lock(&dpif->upcall_lock); |
9fafa796 | 610 | error = dpif_linux_port_del__(dpif_, port_no); |
97be1538 | 611 | ovs_mutex_unlock(&dpif->upcall_lock); |
9fafa796 BP |
612 | |
613 | return error; | |
614 | } | |
615 | ||
c3827f61 | 616 | static int |
4e022ec0 | 617 | dpif_linux_port_query__(const struct dpif *dpif, odp_port_t port_no, |
4c738a8d | 618 | const char *port_name, struct dpif_port *dpif_port) |
c3827f61 | 619 | { |
c19e6535 BP |
620 | struct dpif_linux_vport request; |
621 | struct dpif_linux_vport reply; | |
622 | struct ofpbuf *buf; | |
4c738a8d BP |
623 | int error; |
624 | ||
c19e6535 | 625 | dpif_linux_vport_init(&request); |
df2c07f4 | 626 | request.cmd = OVS_VPORT_CMD_GET; |
254f2dc8 | 627 | request.dp_ifindex = dpif_linux_cast(dpif)->dp_ifindex; |
c19e6535 BP |
628 | request.port_no = port_no; |
629 | request.name = port_name; | |
4c738a8d | 630 | |
c19e6535 BP |
631 | error = dpif_linux_vport_transact(&request, &reply, &buf); |
632 | if (!error) { | |
33db1592 BP |
633 | if (reply.dp_ifindex != request.dp_ifindex) { |
634 | /* A query by name reported that 'port_name' is in some datapath | |
635 | * other than 'dpif', but the caller wants to know about 'dpif'. */ | |
636 | error = ENODEV; | |
4afba28d | 637 | } else if (dpif_port) { |
33db1592 | 638 | dpif_port->name = xstrdup(reply.name); |
b9ad7294 | 639 | dpif_port->type = xstrdup(get_vport_type(&reply)); |
33db1592 BP |
640 | dpif_port->port_no = reply.port_no; |
641 | } | |
c19e6535 | 642 | ofpbuf_delete(buf); |
3abc4a1a | 643 | } |
c19e6535 | 644 | return error; |
96fba48f BP |
645 | } |
646 | ||
647 | static int | |
4e022ec0 | 648 | dpif_linux_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, |
4c738a8d | 649 | struct dpif_port *dpif_port) |
96fba48f | 650 | { |
c19e6535 | 651 | return dpif_linux_port_query__(dpif, port_no, NULL, dpif_port); |
96fba48f BP |
652 | } |
653 | ||
654 | static int | |
4c738a8d BP |
655 | dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname, |
656 | struct dpif_port *dpif_port) | |
96fba48f | 657 | { |
4c738a8d | 658 | return dpif_linux_port_query__(dpif, 0, devname, dpif_port); |
96fba48f BP |
659 | } |
660 | ||
1dd16b9a | 661 | static uint32_t |
996c1b3d BP |
662 | dpif_linux_get_max_ports(const struct dpif *dpif OVS_UNUSED) |
663 | { | |
1dd16b9a | 664 | return MAX_PORTS; |
996c1b3d BP |
665 | } |
666 | ||
98403001 | 667 | static uint32_t |
4e022ec0 | 668 | dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no) |
98403001 | 669 | { |
9fafa796 | 670 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
4e022ec0 | 671 | uint32_t port_idx = odp_to_u32(port_no); |
9fafa796 | 672 | uint32_t pid = 0; |
98403001 | 673 | |
97be1538 | 674 | ovs_mutex_lock(&dpif->upcall_lock); |
9fafa796 | 675 | if (dpif->epoll_fd >= 0) { |
4e022ec0 | 676 | /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s |
989fd548 | 677 | * channel, since it is not heavily loaded. */ |
4e022ec0 | 678 | uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx; |
9fafa796 | 679 | pid = nl_sock_pid(dpif->channels[idx].sock); |
98403001 | 680 | } |
97be1538 | 681 | ovs_mutex_unlock(&dpif->upcall_lock); |
9fafa796 BP |
682 | |
683 | return pid; | |
98403001 BP |
684 | } |
685 | ||
96fba48f BP |
686 | static int |
687 | dpif_linux_flow_flush(struct dpif *dpif_) | |
688 | { | |
550f0db4 | 689 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
37a1300c BP |
690 | struct dpif_linux_flow flow; |
691 | ||
692 | dpif_linux_flow_init(&flow); | |
df2c07f4 | 693 | flow.cmd = OVS_FLOW_CMD_DEL; |
254f2dc8 | 694 | flow.dp_ifindex = dpif->dp_ifindex; |
37a1300c | 695 | return dpif_linux_flow_transact(&flow, NULL, NULL); |
96fba48f BP |
696 | } |
697 | ||
c19e6535 | 698 | struct dpif_linux_port_state { |
f0fef760 | 699 | struct nl_dump dump; |
c19e6535 BP |
700 | }; |
701 | ||
96fba48f | 702 | static int |
f0fef760 | 703 | dpif_linux_port_dump_start(const struct dpif *dpif_, void **statep) |
96fba48f | 704 | { |
550f0db4 | 705 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
f0fef760 BP |
706 | struct dpif_linux_port_state *state; |
707 | struct dpif_linux_vport request; | |
708 | struct ofpbuf *buf; | |
709 | ||
710 | *statep = state = xmalloc(sizeof *state); | |
711 | ||
712 | dpif_linux_vport_init(&request); | |
067f1e23 | 713 | request.cmd = OVS_VPORT_CMD_GET; |
254f2dc8 | 714 | request.dp_ifindex = dpif->dp_ifindex; |
f0fef760 BP |
715 | |
716 | buf = ofpbuf_new(1024); | |
717 | dpif_linux_vport_to_ofpbuf(&request, buf); | |
a88b4e04 | 718 | nl_dump_start(&state->dump, NETLINK_GENERIC, buf); |
f0fef760 BP |
719 | ofpbuf_delete(buf); |
720 | ||
b0ec0f27 BP |
721 | return 0; |
722 | } | |
723 | ||
724 | static int | |
f0fef760 | 725 | dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_, |
4c738a8d | 726 | struct dpif_port *dpif_port) |
b0ec0f27 | 727 | { |
c19e6535 | 728 | struct dpif_linux_port_state *state = state_; |
f0fef760 BP |
729 | struct dpif_linux_vport vport; |
730 | struct ofpbuf buf; | |
96fba48f BP |
731 | int error; |
732 | ||
f0fef760 BP |
733 | if (!nl_dump_next(&state->dump, &buf)) { |
734 | return EOF; | |
735 | } | |
c19e6535 | 736 | |
f0fef760 | 737 | error = dpif_linux_vport_from_ofpbuf(&vport, &buf); |
c3827f61 | 738 | if (error) { |
f0fef760 | 739 | return error; |
c3827f61 | 740 | } |
f0fef760 | 741 | |
ebc56baa | 742 | dpif_port->name = CONST_CAST(char *, vport.name); |
b9ad7294 | 743 | dpif_port->type = CONST_CAST(char *, get_vport_type(&vport)); |
f0fef760 BP |
744 | dpif_port->port_no = vport.port_no; |
745 | return 0; | |
b0ec0f27 BP |
746 | } |
747 | ||
748 | static int | |
95b1d73a | 749 | dpif_linux_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_) |
b0ec0f27 | 750 | { |
c19e6535 | 751 | struct dpif_linux_port_state *state = state_; |
f0fef760 | 752 | int error = nl_dump_done(&state->dump); |
8522b383 | 753 | |
b0ec0f27 | 754 | free(state); |
f0fef760 | 755 | return error; |
96fba48f BP |
756 | } |
757 | ||
e9e28be3 BP |
758 | static int |
759 | dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) | |
760 | { | |
761 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
e9e28be3 | 762 | |
e4516b20 BP |
763 | /* Lazily create the Netlink socket to listen for notifications. */ |
764 | if (!dpif->port_notifier) { | |
765 | struct nl_sock *sock; | |
766 | int error; | |
767 | ||
768 | error = nl_sock_create(NETLINK_GENERIC, &sock); | |
769 | if (error) { | |
770 | return error; | |
771 | } | |
772 | ||
773 | error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup); | |
774 | if (error) { | |
775 | nl_sock_destroy(sock); | |
776 | return error; | |
777 | } | |
778 | dpif->port_notifier = sock; | |
779 | ||
780 | /* We have no idea of the current state so report that everything | |
781 | * changed. */ | |
782 | return ENOBUFS; | |
783 | } | |
784 | ||
785 | for (;;) { | |
786 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
787 | uint64_t buf_stub[4096 / 8]; | |
788 | struct ofpbuf buf; | |
789 | int error; | |
790 | ||
791 | ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); | |
792 | error = nl_sock_recv(dpif->port_notifier, &buf, false); | |
793 | if (!error) { | |
794 | struct dpif_linux_vport vport; | |
795 | ||
796 | error = dpif_linux_vport_from_ofpbuf(&vport, &buf); | |
797 | if (!error) { | |
798 | if (vport.dp_ifindex == dpif->dp_ifindex | |
799 | && (vport.cmd == OVS_VPORT_CMD_NEW | |
800 | || vport.cmd == OVS_VPORT_CMD_DEL | |
801 | || vport.cmd == OVS_VPORT_CMD_SET)) { | |
802 | VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8, | |
803 | dpif->dpif.full_name, vport.name, vport.cmd); | |
804 | *devnamep = xstrdup(vport.name); | |
59e0c910 | 805 | ofpbuf_uninit(&buf); |
e4516b20 | 806 | return 0; |
e4516b20 BP |
807 | } |
808 | } | |
59e0c910 BP |
809 | } else if (error != EAGAIN) { |
810 | VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)", | |
811 | ovs_strerror(error)); | |
812 | nl_sock_drain(dpif->port_notifier); | |
813 | error = ENOBUFS; | |
e4516b20 BP |
814 | } |
815 | ||
59e0c910 BP |
816 | ofpbuf_uninit(&buf); |
817 | if (error) { | |
818 | return error; | |
819 | } | |
e9e28be3 | 820 | } |
e9e28be3 BP |
821 | } |
822 | ||
823 | static void | |
824 | dpif_linux_port_poll_wait(const struct dpif *dpif_) | |
825 | { | |
550f0db4 | 826 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
e4516b20 BP |
827 | |
828 | if (dpif->port_notifier) { | |
829 | nl_sock_wait(dpif->port_notifier, POLLIN); | |
830 | } else { | |
e9e28be3 | 831 | poll_immediate_wake(); |
e9e28be3 BP |
832 | } |
833 | } | |
834 | ||
96fba48f | 835 | static int |
30053024 BP |
836 | dpif_linux_flow_get__(const struct dpif *dpif_, |
837 | const struct nlattr *key, size_t key_len, | |
838 | struct dpif_linux_flow *reply, struct ofpbuf **bufp) | |
96fba48f | 839 | { |
550f0db4 | 840 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
30053024 | 841 | struct dpif_linux_flow request; |
feebdea2 | 842 | |
d6569377 | 843 | dpif_linux_flow_init(&request); |
df2c07f4 | 844 | request.cmd = OVS_FLOW_CMD_GET; |
254f2dc8 | 845 | request.dp_ifindex = dpif->dp_ifindex; |
d6569377 BP |
846 | request.key = key; |
847 | request.key_len = key_len; | |
30053024 BP |
848 | return dpif_linux_flow_transact(&request, reply, bufp); |
849 | } | |
850 | ||
851 | static int | |
852 | dpif_linux_flow_get(const struct dpif *dpif_, | |
853 | const struct nlattr *key, size_t key_len, | |
854 | struct ofpbuf **actionsp, struct dpif_flow_stats *stats) | |
855 | { | |
856 | struct dpif_linux_flow reply; | |
857 | struct ofpbuf *buf; | |
858 | int error; | |
859 | ||
860 | error = dpif_linux_flow_get__(dpif_, key, key_len, &reply, &buf); | |
feebdea2 BP |
861 | if (!error) { |
862 | if (stats) { | |
d6569377 | 863 | dpif_linux_flow_get_stats(&reply, stats); |
feebdea2 | 864 | } |
d6569377 | 865 | if (actionsp) { |
ebc56baa | 866 | buf->data = CONST_CAST(struct nlattr *, reply.actions); |
d6569377 BP |
867 | buf->size = reply.actions_len; |
868 | *actionsp = buf; | |
869 | } else { | |
870 | ofpbuf_delete(buf); | |
feebdea2 BP |
871 | } |
872 | } | |
873 | return error; | |
96fba48f BP |
874 | } |
875 | ||
6bc60024 | 876 | static void |
89625d1e | 877 | dpif_linux_init_flow_put(struct dpif *dpif_, const struct dpif_flow_put *put, |
6bc60024 BP |
878 | struct dpif_linux_flow *request) |
879 | { | |
d64e176c | 880 | static const struct nlattr dummy_action; |
6bc60024 | 881 | |
550f0db4 | 882 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
6bc60024 BP |
883 | |
884 | dpif_linux_flow_init(request); | |
89625d1e | 885 | request->cmd = (put->flags & DPIF_FP_CREATE |
6bc60024 BP |
886 | ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET); |
887 | request->dp_ifindex = dpif->dp_ifindex; | |
89625d1e BP |
888 | request->key = put->key; |
889 | request->key_len = put->key_len; | |
e6cc0bab AZ |
890 | request->mask = put->mask; |
891 | request->mask_len = put->mask_len; | |
6bc60024 | 892 | /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */ |
d64e176c BP |
893 | request->actions = (put->actions |
894 | ? put->actions | |
895 | : CONST_CAST(struct nlattr *, &dummy_action)); | |
89625d1e BP |
896 | request->actions_len = put->actions_len; |
897 | if (put->flags & DPIF_FP_ZERO_STATS) { | |
6bc60024 BP |
898 | request->clear = true; |
899 | } | |
89625d1e | 900 | request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE; |
6bc60024 BP |
901 | } |
902 | ||
96fba48f | 903 | static int |
89625d1e | 904 | dpif_linux_flow_put(struct dpif *dpif_, const struct dpif_flow_put *put) |
96fba48f | 905 | { |
d6569377 BP |
906 | struct dpif_linux_flow request, reply; |
907 | struct ofpbuf *buf; | |
feebdea2 BP |
908 | int error; |
909 | ||
89625d1e | 910 | dpif_linux_init_flow_put(dpif_, put, &request); |
d6569377 | 911 | error = dpif_linux_flow_transact(&request, |
89625d1e BP |
912 | put->stats ? &reply : NULL, |
913 | put->stats ? &buf : NULL); | |
914 | if (!error && put->stats) { | |
915 | dpif_linux_flow_get_stats(&reply, put->stats); | |
d6569377 | 916 | ofpbuf_delete(buf); |
feebdea2 BP |
917 | } |
918 | return error; | |
96fba48f BP |
919 | } |
920 | ||
b99d3cee BP |
921 | static void |
922 | dpif_linux_init_flow_del(struct dpif *dpif_, const struct dpif_flow_del *del, | |
923 | struct dpif_linux_flow *request) | |
96fba48f | 924 | { |
550f0db4 | 925 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
b99d3cee BP |
926 | |
927 | dpif_linux_flow_init(request); | |
928 | request->cmd = OVS_FLOW_CMD_DEL; | |
929 | request->dp_ifindex = dpif->dp_ifindex; | |
930 | request->key = del->key; | |
931 | request->key_len = del->key_len; | |
932 | } | |
933 | ||
934 | static int | |
935 | dpif_linux_flow_del(struct dpif *dpif_, const struct dpif_flow_del *del) | |
936 | { | |
d6569377 BP |
937 | struct dpif_linux_flow request, reply; |
938 | struct ofpbuf *buf; | |
feebdea2 BP |
939 | int error; |
940 | ||
b99d3cee | 941 | dpif_linux_init_flow_del(dpif_, del, &request); |
d6569377 | 942 | error = dpif_linux_flow_transact(&request, |
b99d3cee BP |
943 | del->stats ? &reply : NULL, |
944 | del->stats ? &buf : NULL); | |
945 | if (!error && del->stats) { | |
946 | dpif_linux_flow_get_stats(&reply, del->stats); | |
d6569377 | 947 | ofpbuf_delete(buf); |
feebdea2 BP |
948 | } |
949 | return error; | |
96fba48f BP |
950 | } |
951 | ||
feebdea2 | 952 | struct dpif_linux_flow_state { |
37a1300c | 953 | struct nl_dump dump; |
d6569377 | 954 | struct dpif_linux_flow flow; |
c97fb132 | 955 | struct dpif_flow_stats stats; |
30053024 | 956 | struct ofpbuf *buf; |
feebdea2 BP |
957 | }; |
958 | ||
96fba48f | 959 | static int |
37a1300c | 960 | dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep) |
96fba48f | 961 | { |
550f0db4 | 962 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
37a1300c BP |
963 | struct dpif_linux_flow_state *state; |
964 | struct dpif_linux_flow request; | |
965 | struct ofpbuf *buf; | |
966 | ||
967 | *statep = state = xmalloc(sizeof *state); | |
968 | ||
969 | dpif_linux_flow_init(&request); | |
067f1e23 | 970 | request.cmd = OVS_FLOW_CMD_GET; |
254f2dc8 | 971 | request.dp_ifindex = dpif->dp_ifindex; |
37a1300c BP |
972 | |
973 | buf = ofpbuf_new(1024); | |
974 | dpif_linux_flow_to_ofpbuf(&request, buf); | |
a88b4e04 | 975 | nl_dump_start(&state->dump, NETLINK_GENERIC, buf); |
37a1300c BP |
976 | ofpbuf_delete(buf); |
977 | ||
30053024 BP |
978 | state->buf = NULL; |
979 | ||
704a1e09 BP |
980 | return 0; |
981 | } | |
982 | ||
983 | static int | |
37a1300c | 984 | dpif_linux_flow_dump_next(const struct dpif *dpif_ OVS_UNUSED, void *state_, |
feebdea2 | 985 | const struct nlattr **key, size_t *key_len, |
e6cc0bab | 986 | const struct nlattr **mask, size_t *mask_len, |
feebdea2 | 987 | const struct nlattr **actions, size_t *actions_len, |
c97fb132 | 988 | const struct dpif_flow_stats **stats) |
704a1e09 | 989 | { |
feebdea2 | 990 | struct dpif_linux_flow_state *state = state_; |
37a1300c | 991 | struct ofpbuf buf; |
96fba48f BP |
992 | int error; |
993 | ||
30053024 BP |
994 | do { |
995 | ofpbuf_delete(state->buf); | |
996 | state->buf = NULL; | |
feebdea2 | 997 | |
30053024 BP |
998 | if (!nl_dump_next(&state->dump, &buf)) { |
999 | return EOF; | |
feebdea2 | 1000 | } |
30053024 BP |
1001 | |
1002 | error = dpif_linux_flow_from_ofpbuf(&state->flow, &buf); | |
1003 | if (error) { | |
1004 | return error; | |
feebdea2 | 1005 | } |
30053024 BP |
1006 | |
1007 | if (actions && !state->flow.actions) { | |
1008 | error = dpif_linux_flow_get__(dpif_, state->flow.key, | |
1009 | state->flow.key_len, | |
1010 | &state->flow, &state->buf); | |
1011 | if (error == ENOENT) { | |
1012 | VLOG_DBG("dumped flow disappeared on get"); | |
1013 | } else if (error) { | |
10a89ef0 BP |
1014 | VLOG_WARN("error fetching dumped flow: %s", |
1015 | ovs_strerror(error)); | |
30053024 | 1016 | } |
feebdea2 | 1017 | } |
30053024 BP |
1018 | } while (error); |
1019 | ||
1020 | if (actions) { | |
1021 | *actions = state->flow.actions; | |
1022 | *actions_len = state->flow.actions_len; | |
1023 | } | |
1024 | if (key) { | |
1025 | *key = state->flow.key; | |
1026 | *key_len = state->flow.key_len; | |
1027 | } | |
e6cc0bab AZ |
1028 | if (mask) { |
1029 | *mask = state->flow.mask; | |
1030 | *mask_len = state->flow.mask ? state->flow.mask_len : 0; | |
1031 | } | |
30053024 BP |
1032 | if (stats) { |
1033 | dpif_linux_flow_get_stats(&state->flow, &state->stats); | |
1034 | *stats = &state->stats; | |
feebdea2 | 1035 | } |
37a1300c | 1036 | return error; |
704a1e09 BP |
1037 | } |
1038 | ||
1039 | static int | |
d6569377 | 1040 | dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) |
704a1e09 | 1041 | { |
d6569377 | 1042 | struct dpif_linux_flow_state *state = state_; |
37a1300c | 1043 | int error = nl_dump_done(&state->dump); |
30053024 | 1044 | ofpbuf_delete(state->buf); |
704a1e09 | 1045 | free(state); |
37a1300c | 1046 | return error; |
96fba48f BP |
1047 | } |
1048 | ||
eabe7c68 BP |
1049 | static void |
1050 | dpif_linux_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec, | |
1051 | struct ofpbuf *buf) | |
96fba48f | 1052 | { |
89625d1e | 1053 | struct ovs_header *k_exec; |
f7cd0081 | 1054 | |
eabe7c68 BP |
1055 | ofpbuf_prealloc_tailroom(buf, (64 |
1056 | + d_exec->packet->size | |
1057 | + d_exec->key_len | |
1058 | + d_exec->actions_len)); | |
f7cd0081 | 1059 | |
df2c07f4 | 1060 | nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST, |
69685a88 | 1061 | OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION); |
f7cd0081 | 1062 | |
89625d1e BP |
1063 | k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec); |
1064 | k_exec->dp_ifindex = dp_ifindex; | |
f7cd0081 | 1065 | |
89625d1e BP |
1066 | nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET, |
1067 | d_exec->packet->data, d_exec->packet->size); | |
1068 | nl_msg_put_unspec(buf, OVS_PACKET_ATTR_KEY, d_exec->key, d_exec->key_len); | |
1069 | nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS, | |
1070 | d_exec->actions, d_exec->actions_len); | |
6bc60024 BP |
1071 | } |
1072 | ||
1073 | static int | |
89625d1e | 1074 | dpif_linux_execute__(int dp_ifindex, const struct dpif_execute *execute) |
6bc60024 | 1075 | { |
eabe7c68 BP |
1076 | uint64_t request_stub[1024 / 8]; |
1077 | struct ofpbuf request; | |
6bc60024 BP |
1078 | int error; |
1079 | ||
eabe7c68 BP |
1080 | ofpbuf_use_stub(&request, request_stub, sizeof request_stub); |
1081 | dpif_linux_encode_execute(dp_ifindex, execute, &request); | |
a88b4e04 | 1082 | error = nl_transact(NETLINK_GENERIC, &request, NULL); |
eabe7c68 | 1083 | ofpbuf_uninit(&request); |
6bc60024 | 1084 | |
f7cd0081 | 1085 | return error; |
96fba48f BP |
1086 | } |
1087 | ||
eb8b28e7 | 1088 | static int |
89625d1e | 1089 | dpif_linux_execute(struct dpif *dpif_, const struct dpif_execute *execute) |
eb8b28e7 | 1090 | { |
550f0db4 | 1091 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
eb8b28e7 | 1092 | |
89625d1e | 1093 | return dpif_linux_execute__(dpif->dp_ifindex, execute); |
eb8b28e7 EJ |
1094 | } |
1095 | ||
eabe7c68 BP |
1096 | #define MAX_OPS 50 |
1097 | ||
6bc60024 | 1098 | static void |
eabe7c68 | 1099 | dpif_linux_operate__(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) |
6bc60024 | 1100 | { |
550f0db4 | 1101 | const struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
eabe7c68 BP |
1102 | |
1103 | struct op_auxdata { | |
1104 | struct nl_transaction txn; | |
72d32ac0 | 1105 | |
eabe7c68 BP |
1106 | struct ofpbuf request; |
1107 | uint64_t request_stub[1024 / 8]; | |
72d32ac0 BP |
1108 | |
1109 | struct ofpbuf reply; | |
1110 | uint64_t reply_stub[1024 / 8]; | |
eabe7c68 BP |
1111 | } auxes[MAX_OPS]; |
1112 | ||
1113 | struct nl_transaction *txnsp[MAX_OPS]; | |
6bc60024 BP |
1114 | size_t i; |
1115 | ||
cb22974d | 1116 | ovs_assert(n_ops <= MAX_OPS); |
6bc60024 | 1117 | for (i = 0; i < n_ops; i++) { |
eabe7c68 | 1118 | struct op_auxdata *aux = &auxes[i]; |
c2b565b5 | 1119 | struct dpif_op *op = ops[i]; |
b99d3cee BP |
1120 | struct dpif_flow_put *put; |
1121 | struct dpif_flow_del *del; | |
1122 | struct dpif_execute *execute; | |
eabe7c68 BP |
1123 | struct dpif_linux_flow flow; |
1124 | ||
1125 | ofpbuf_use_stub(&aux->request, | |
1126 | aux->request_stub, sizeof aux->request_stub); | |
1127 | aux->txn.request = &aux->request; | |
b99d3cee | 1128 | |
72d32ac0 BP |
1129 | ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub); |
1130 | aux->txn.reply = NULL; | |
1131 | ||
b99d3cee BP |
1132 | switch (op->type) { |
1133 | case DPIF_OP_FLOW_PUT: | |
1134 | put = &op->u.flow_put; | |
eabe7c68 | 1135 | dpif_linux_init_flow_put(dpif_, put, &flow); |
6bc60024 | 1136 | if (put->stats) { |
eabe7c68 | 1137 | flow.nlmsg_flags |= NLM_F_ECHO; |
72d32ac0 | 1138 | aux->txn.reply = &aux->reply; |
6bc60024 | 1139 | } |
eabe7c68 | 1140 | dpif_linux_flow_to_ofpbuf(&flow, &aux->request); |
b99d3cee BP |
1141 | break; |
1142 | ||
1143 | case DPIF_OP_FLOW_DEL: | |
1144 | del = &op->u.flow_del; | |
eabe7c68 | 1145 | dpif_linux_init_flow_del(dpif_, del, &flow); |
b99d3cee | 1146 | if (del->stats) { |
eabe7c68 | 1147 | flow.nlmsg_flags |= NLM_F_ECHO; |
72d32ac0 | 1148 | aux->txn.reply = &aux->reply; |
b99d3cee | 1149 | } |
eabe7c68 | 1150 | dpif_linux_flow_to_ofpbuf(&flow, &aux->request); |
b99d3cee | 1151 | break; |
6bc60024 | 1152 | |
b99d3cee BP |
1153 | case DPIF_OP_EXECUTE: |
1154 | execute = &op->u.execute; | |
eabe7c68 BP |
1155 | dpif_linux_encode_execute(dpif->dp_ifindex, execute, |
1156 | &aux->request); | |
b99d3cee BP |
1157 | break; |
1158 | ||
1159 | default: | |
6bc60024 BP |
1160 | NOT_REACHED(); |
1161 | } | |
1162 | } | |
1163 | ||
6bc60024 | 1164 | for (i = 0; i < n_ops; i++) { |
eabe7c68 | 1165 | txnsp[i] = &auxes[i].txn; |
6bc60024 | 1166 | } |
a88b4e04 | 1167 | nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops); |
6bc60024 | 1168 | |
6bc60024 | 1169 | for (i = 0; i < n_ops; i++) { |
72d32ac0 | 1170 | struct op_auxdata *aux = &auxes[i]; |
eabe7c68 | 1171 | struct nl_transaction *txn = &auxes[i].txn; |
c2b565b5 | 1172 | struct dpif_op *op = ops[i]; |
b99d3cee BP |
1173 | struct dpif_flow_put *put; |
1174 | struct dpif_flow_del *del; | |
6bc60024 | 1175 | |
b99d3cee | 1176 | op->error = txn->error; |
6bc60024 | 1177 | |
b99d3cee BP |
1178 | switch (op->type) { |
1179 | case DPIF_OP_FLOW_PUT: | |
1180 | put = &op->u.flow_put; | |
cfceb2b5 | 1181 | if (put->stats) { |
b99d3cee | 1182 | if (!op->error) { |
cfceb2b5 BP |
1183 | struct dpif_linux_flow reply; |
1184 | ||
1185 | op->error = dpif_linux_flow_from_ofpbuf(&reply, | |
1186 | txn->reply); | |
1187 | if (!op->error) { | |
1188 | dpif_linux_flow_get_stats(&reply, put->stats); | |
1189 | } | |
1190 | } | |
1191 | ||
1192 | if (op->error) { | |
1193 | memset(put->stats, 0, sizeof *put->stats); | |
6bc60024 BP |
1194 | } |
1195 | } | |
b99d3cee BP |
1196 | break; |
1197 | ||
1198 | case DPIF_OP_FLOW_DEL: | |
1199 | del = &op->u.flow_del; | |
cfceb2b5 | 1200 | if (del->stats) { |
b99d3cee | 1201 | if (!op->error) { |
cfceb2b5 BP |
1202 | struct dpif_linux_flow reply; |
1203 | ||
1204 | op->error = dpif_linux_flow_from_ofpbuf(&reply, | |
1205 | txn->reply); | |
1206 | if (!op->error) { | |
1207 | dpif_linux_flow_get_stats(&reply, del->stats); | |
1208 | } | |
1209 | } | |
1210 | ||
1211 | if (op->error) { | |
1212 | memset(del->stats, 0, sizeof *del->stats); | |
b99d3cee BP |
1213 | } |
1214 | } | |
1215 | break; | |
1216 | ||
1217 | case DPIF_OP_EXECUTE: | |
1218 | break; | |
1219 | ||
1220 | default: | |
6bc60024 BP |
1221 | NOT_REACHED(); |
1222 | } | |
1223 | ||
72d32ac0 BP |
1224 | ofpbuf_uninit(&aux->request); |
1225 | ofpbuf_uninit(&aux->reply); | |
6bc60024 | 1226 | } |
eabe7c68 BP |
1227 | } |
1228 | ||
1229 | static void | |
1230 | dpif_linux_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) | |
1231 | { | |
1232 | while (n_ops > 0) { | |
1233 | size_t chunk = MIN(n_ops, MAX_OPS); | |
1234 | dpif_linux_operate__(dpif, ops, chunk); | |
1235 | ops += chunk; | |
1236 | n_ops -= chunk; | |
1237 | } | |
6bc60024 BP |
1238 | } |
1239 | ||
96fba48f | 1240 | static int |
9fafa796 | 1241 | dpif_linux_recv_set__(struct dpif *dpif_, bool enable) |
96fba48f | 1242 | { |
982b8810 | 1243 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
982b8810 | 1244 | |
a12b3ead | 1245 | if ((dpif->epoll_fd >= 0) == enable) { |
982b8810 | 1246 | return 0; |
17411ecf | 1247 | } |
b063d9f0 | 1248 | |
a12b3ead | 1249 | if (!enable) { |
fe3d61b3 | 1250 | destroy_channels(dpif); |
a12b3ead | 1251 | } else { |
989fd548 JP |
1252 | struct dpif_port_dump port_dump; |
1253 | struct dpif_port port; | |
982b8810 | 1254 | |
50f80534 | 1255 | if (dpif->epoll_fd < 0) { |
9fafa796 BP |
1256 | dpif->epoll_fd = epoll_create(10); |
1257 | if (dpif->epoll_fd < 0) { | |
1258 | return errno; | |
1259 | } | |
50f80534 BP |
1260 | } |
1261 | ||
989fd548 JP |
1262 | DPIF_PORT_FOR_EACH (&port, &port_dump, &dpif->dpif) { |
1263 | struct dpif_linux_vport vport_request; | |
1264 | struct nl_sock *sock; | |
1265 | uint32_t upcall_pid; | |
1266 | int error; | |
50f80534 | 1267 | |
989fd548 | 1268 | error = nl_sock_create(NETLINK_GENERIC, &sock); |
b063d9f0 | 1269 | if (error) { |
17411ecf | 1270 | return error; |
982b8810 | 1271 | } |
50f80534 | 1272 | |
989fd548 JP |
1273 | upcall_pid = nl_sock_pid(sock); |
1274 | ||
1275 | dpif_linux_vport_init(&vport_request); | |
1276 | vport_request.cmd = OVS_VPORT_CMD_SET; | |
1277 | vport_request.dp_ifindex = dpif->dp_ifindex; | |
1278 | vport_request.port_no = port.port_no; | |
1279 | vport_request.upcall_pid = &upcall_pid; | |
1280 | error = dpif_linux_vport_transact(&vport_request, NULL, NULL); | |
1281 | if (!error) { | |
1282 | VLOG_DBG("%s: assigning port %"PRIu32" to netlink pid %"PRIu32, | |
1283 | dpif_name(&dpif->dpif), vport_request.port_no, | |
1284 | upcall_pid); | |
1285 | } else { | |
1286 | VLOG_WARN_RL(&error_rl, | |
1287 | "%s: failed to set upcall pid on port: %s", | |
10a89ef0 | 1288 | dpif_name(&dpif->dpif), ovs_strerror(error)); |
989fd548 JP |
1289 | nl_sock_destroy(sock); |
1290 | ||
1291 | if (error == ENODEV || error == ENOENT) { | |
1292 | /* This device isn't there, but keep trying the others. */ | |
1293 | continue; | |
1294 | } else { | |
1295 | return error; | |
1296 | } | |
50f80534 | 1297 | } |
14b4d2f9 | 1298 | |
989fd548 JP |
1299 | error = add_channel(dpif, port.port_no, sock); |
1300 | if (error) { | |
1301 | VLOG_INFO("%s: could not add channel for port %s", | |
1302 | dpif_name(dpif_), port.name); | |
1303 | nl_sock_destroy(sock); | |
1304 | return error; | |
1305 | } | |
982b8810 BP |
1306 | } |
1307 | } | |
b063d9f0 | 1308 | |
b063d9f0 | 1309 | return 0; |
96fba48f BP |
1310 | } |
1311 | ||
9fafa796 BP |
1312 | static int |
1313 | dpif_linux_recv_set(struct dpif *dpif_, bool enable) | |
1314 | { | |
1315 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
1316 | int error; | |
1317 | ||
97be1538 | 1318 | ovs_mutex_lock(&dpif->upcall_lock); |
9fafa796 | 1319 | error = dpif_linux_recv_set__(dpif_, enable); |
97be1538 | 1320 | ovs_mutex_unlock(&dpif->upcall_lock); |
9fafa796 BP |
1321 | |
1322 | return error; | |
1323 | } | |
1324 | ||
aae51f53 BP |
1325 | static int |
1326 | dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, | |
1327 | uint32_t queue_id, uint32_t *priority) | |
1328 | { | |
1329 | if (queue_id < 0xf000) { | |
17ee3c1f | 1330 | *priority = TC_H_MAKE(1 << 16, queue_id + 1); |
aae51f53 BP |
1331 | return 0; |
1332 | } else { | |
1333 | return EINVAL; | |
1334 | } | |
1335 | } | |
1336 | ||
96fba48f | 1337 | static int |
982b8810 | 1338 | parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, |
254f2dc8 | 1339 | int *dp_ifindex) |
856081f6 | 1340 | { |
df2c07f4 | 1341 | static const struct nl_policy ovs_packet_policy[] = { |
856081f6 | 1342 | /* Always present. */ |
df2c07f4 | 1343 | [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC, |
856081f6 | 1344 | .min_len = ETH_HEADER_LEN }, |
df2c07f4 | 1345 | [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED }, |
856081f6 | 1346 | |
df2c07f4 | 1347 | /* OVS_PACKET_CMD_ACTION only. */ |
e995e3df | 1348 | [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true }, |
856081f6 BP |
1349 | }; |
1350 | ||
df2c07f4 JP |
1351 | struct ovs_header *ovs_header; |
1352 | struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)]; | |
982b8810 BP |
1353 | struct nlmsghdr *nlmsg; |
1354 | struct genlmsghdr *genl; | |
1355 | struct ofpbuf b; | |
aaff4b55 | 1356 | int type; |
982b8810 BP |
1357 | |
1358 | ofpbuf_use_const(&b, buf->data, buf->size); | |
1359 | ||
1360 | nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); | |
1361 | genl = ofpbuf_try_pull(&b, sizeof *genl); | |
df2c07f4 JP |
1362 | ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); |
1363 | if (!nlmsg || !genl || !ovs_header | |
1364 | || nlmsg->nlmsg_type != ovs_packet_family | |
1365 | || !nl_policy_parse(&b, 0, ovs_packet_policy, a, | |
1366 | ARRAY_SIZE(ovs_packet_policy))) { | |
856081f6 BP |
1367 | return EINVAL; |
1368 | } | |
1369 | ||
df2c07f4 JP |
1370 | type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS |
1371 | : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION | |
aaff4b55 BP |
1372 | : -1); |
1373 | if (type < 0) { | |
1374 | return EINVAL; | |
1375 | } | |
82272ede | 1376 | |
aaff4b55 BP |
1377 | memset(upcall, 0, sizeof *upcall); |
1378 | upcall->type = type; | |
856081f6 | 1379 | upcall->packet = buf; |
ebc56baa BP |
1380 | upcall->packet->data = CONST_CAST(struct nlattr *, |
1381 | nl_attr_get(a[OVS_PACKET_ATTR_PACKET])); | |
df2c07f4 | 1382 | upcall->packet->size = nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]); |
ebc56baa BP |
1383 | upcall->key = CONST_CAST(struct nlattr *, |
1384 | nl_attr_get(a[OVS_PACKET_ATTR_KEY])); | |
df2c07f4 | 1385 | upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]); |
e995e3df | 1386 | upcall->userdata = a[OVS_PACKET_ATTR_USERDATA]; |
df2c07f4 | 1387 | *dp_ifindex = ovs_header->dp_ifindex; |
982b8810 | 1388 | |
856081f6 BP |
1389 | return 0; |
1390 | } | |
1391 | ||
1392 | static int | |
9fafa796 BP |
1393 | dpif_linux_recv__(struct dpif *dpif_, struct dpif_upcall *upcall, |
1394 | struct ofpbuf *buf) | |
96fba48f BP |
1395 | { |
1396 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
17411ecf | 1397 | int read_tries = 0; |
96fba48f | 1398 | |
a12b3ead | 1399 | if (dpif->epoll_fd < 0) { |
17411ecf | 1400 | return EAGAIN; |
982b8810 BP |
1401 | } |
1402 | ||
989fd548 | 1403 | if (dpif->event_offset >= dpif->n_events) { |
8522ba09 | 1404 | int retval; |
989fd548 JP |
1405 | |
1406 | dpif->event_offset = dpif->n_events = 0; | |
f6d1465c | 1407 | |
8522ba09 | 1408 | do { |
989fd548 JP |
1409 | retval = epoll_wait(dpif->epoll_fd, dpif->epoll_events, |
1410 | dpif->uc_array_size, 0); | |
8522ba09 BP |
1411 | } while (retval < 0 && errno == EINTR); |
1412 | if (retval < 0) { | |
1413 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
10a89ef0 | 1414 | VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno)); |
989fd548 JP |
1415 | } else if (retval > 0) { |
1416 | dpif->n_events = retval; | |
8522ba09 | 1417 | } |
8522ba09 BP |
1418 | } |
1419 | ||
989fd548 JP |
1420 | while (dpif->event_offset < dpif->n_events) { |
1421 | int idx = dpif->epoll_events[dpif->event_offset].data.u32; | |
1422 | struct dpif_channel *ch = &dpif->channels[idx]; | |
8522ba09 | 1423 | |
989fd548 | 1424 | dpif->event_offset++; |
17411ecf | 1425 | |
f6d1465c | 1426 | for (;;) { |
8522ba09 | 1427 | int dp_ifindex; |
f6d1465c | 1428 | int error; |
17411ecf | 1429 | |
f6d1465c BP |
1430 | if (++read_tries > 50) { |
1431 | return EAGAIN; | |
1432 | } | |
17411ecf | 1433 | |
fe3d61b3 | 1434 | error = nl_sock_recv(ch->sock, buf, false); |
14b4d2f9 BP |
1435 | if (error == ENOBUFS) { |
1436 | /* ENOBUFS typically means that we've received so many | |
1437 | * packets that the buffer overflowed. Try again | |
1438 | * immediately because there's almost certainly a packet | |
1439 | * waiting for us. */ | |
1440 | report_loss(dpif_, ch); | |
1441 | continue; | |
1442 | } | |
1443 | ||
1444 | ch->last_poll = time_msec(); | |
72d32ac0 | 1445 | if (error) { |
72d32ac0 BP |
1446 | if (error == EAGAIN) { |
1447 | break; | |
1448 | } | |
f6d1465c BP |
1449 | return error; |
1450 | } | |
17411ecf | 1451 | |
f6d1465c | 1452 | error = parse_odp_packet(buf, upcall, &dp_ifindex); |
a12b3ead | 1453 | if (!error && dp_ifindex == dpif->dp_ifindex) { |
f6d1465c | 1454 | return 0; |
989fd548 | 1455 | } else if (error) { |
f6d1465c | 1456 | return error; |
17411ecf | 1457 | } |
982b8810 | 1458 | } |
50f80534 | 1459 | } |
982b8810 BP |
1460 | |
1461 | return EAGAIN; | |
96fba48f BP |
1462 | } |
1463 | ||
9fafa796 BP |
1464 | static int |
1465 | dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall, | |
1466 | struct ofpbuf *buf) | |
1467 | { | |
1468 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
1469 | int error; | |
1470 | ||
97be1538 | 1471 | ovs_mutex_lock(&dpif->upcall_lock); |
9fafa796 | 1472 | error = dpif_linux_recv__(dpif_, upcall, buf); |
97be1538 | 1473 | ovs_mutex_unlock(&dpif->upcall_lock); |
9fafa796 BP |
1474 | |
1475 | return error; | |
1476 | } | |
1477 | ||
96fba48f BP |
1478 | static void |
1479 | dpif_linux_recv_wait(struct dpif *dpif_) | |
1480 | { | |
9fafa796 | 1481 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); |
17411ecf | 1482 | |
97be1538 | 1483 | ovs_mutex_lock(&dpif->upcall_lock); |
9fafa796 BP |
1484 | if (dpif->epoll_fd >= 0) { |
1485 | poll_fd_wait(dpif->epoll_fd, POLLIN); | |
17411ecf | 1486 | } |
97be1538 | 1487 | ovs_mutex_unlock(&dpif->upcall_lock); |
96fba48f BP |
1488 | } |
1489 | ||
1ba530f4 BP |
1490 | static void |
1491 | dpif_linux_recv_purge(struct dpif *dpif_) | |
1492 | { | |
1493 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
17411ecf | 1494 | |
97be1538 | 1495 | ovs_mutex_lock(&dpif->upcall_lock); |
9fafa796 BP |
1496 | if (dpif->epoll_fd >= 0) { |
1497 | struct dpif_channel *ch; | |
1ba530f4 | 1498 | |
9fafa796 BP |
1499 | for (ch = dpif->channels; ch < &dpif->channels[dpif->uc_array_size]; |
1500 | ch++) { | |
1501 | if (ch->sock) { | |
1502 | nl_sock_drain(ch->sock); | |
1503 | } | |
989fd548 | 1504 | } |
1ba530f4 | 1505 | } |
97be1538 | 1506 | ovs_mutex_unlock(&dpif->upcall_lock); |
1ba530f4 BP |
1507 | } |
1508 | ||
96fba48f | 1509 | const struct dpif_class dpif_linux_class = { |
1a6f1e2a | 1510 | "system", |
d3d22744 | 1511 | dpif_linux_enumerate, |
0aeaabc8 | 1512 | NULL, |
96fba48f BP |
1513 | dpif_linux_open, |
1514 | dpif_linux_close, | |
7dab847a | 1515 | dpif_linux_destroy, |
e4516b20 BP |
1516 | NULL, /* run */ |
1517 | NULL, /* wait */ | |
96fba48f | 1518 | dpif_linux_get_stats, |
96fba48f BP |
1519 | dpif_linux_port_add, |
1520 | dpif_linux_port_del, | |
1521 | dpif_linux_port_query_by_number, | |
1522 | dpif_linux_port_query_by_name, | |
996c1b3d | 1523 | dpif_linux_get_max_ports, |
98403001 | 1524 | dpif_linux_port_get_pid, |
b0ec0f27 BP |
1525 | dpif_linux_port_dump_start, |
1526 | dpif_linux_port_dump_next, | |
1527 | dpif_linux_port_dump_done, | |
e9e28be3 BP |
1528 | dpif_linux_port_poll, |
1529 | dpif_linux_port_poll_wait, | |
96fba48f BP |
1530 | dpif_linux_flow_get, |
1531 | dpif_linux_flow_put, | |
1532 | dpif_linux_flow_del, | |
1533 | dpif_linux_flow_flush, | |
704a1e09 BP |
1534 | dpif_linux_flow_dump_start, |
1535 | dpif_linux_flow_dump_next, | |
1536 | dpif_linux_flow_dump_done, | |
96fba48f | 1537 | dpif_linux_execute, |
6bc60024 | 1538 | dpif_linux_operate, |
a12b3ead | 1539 | dpif_linux_recv_set, |
aae51f53 | 1540 | dpif_linux_queue_to_priority, |
96fba48f BP |
1541 | dpif_linux_recv, |
1542 | dpif_linux_recv_wait, | |
1ba530f4 | 1543 | dpif_linux_recv_purge, |
96fba48f BP |
1544 | }; |
1545 | \f | |
96fba48f | 1546 | static int |
982b8810 | 1547 | dpif_linux_init(void) |
96fba48f | 1548 | { |
eb8ed438 BP |
1549 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; |
1550 | static int error; | |
982b8810 | 1551 | |
eb8ed438 | 1552 | if (ovsthread_once_start(&once)) { |
df2c07f4 JP |
1553 | error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY, |
1554 | &ovs_datapath_family); | |
37a1300c BP |
1555 | if (error) { |
1556 | VLOG_ERR("Generic Netlink family '%s' does not exist. " | |
1557 | "The Open vSwitch kernel module is probably not loaded.", | |
df2c07f4 | 1558 | OVS_DATAPATH_FAMILY); |
37a1300c | 1559 | } |
f0fef760 | 1560 | if (!error) { |
df2c07f4 | 1561 | error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family); |
f0fef760 | 1562 | } |
37a1300c | 1563 | if (!error) { |
df2c07f4 | 1564 | error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family); |
37a1300c | 1565 | } |
aaff4b55 | 1566 | if (!error) { |
df2c07f4 JP |
1567 | error = nl_lookup_genl_family(OVS_PACKET_FAMILY, |
1568 | &ovs_packet_family); | |
aaff4b55 | 1569 | } |
c7178a0b EJ |
1570 | if (!error) { |
1571 | error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP, | |
b3dcb73c | 1572 | &ovs_vport_mcgroup); |
c7178a0b | 1573 | } |
eb8ed438 BP |
1574 | |
1575 | ovsthread_once_done(&once); | |
982b8810 BP |
1576 | } |
1577 | ||
1578 | return error; | |
96fba48f BP |
1579 | } |
1580 | ||
c19e6535 BP |
1581 | bool |
1582 | dpif_linux_is_internal_device(const char *name) | |
9fe3b9a2 | 1583 | { |
c19e6535 BP |
1584 | struct dpif_linux_vport reply; |
1585 | struct ofpbuf *buf; | |
9fe3b9a2 | 1586 | int error; |
96fba48f | 1587 | |
c19e6535 BP |
1588 | error = dpif_linux_vport_get(name, &reply, &buf); |
1589 | if (!error) { | |
1590 | ofpbuf_delete(buf); | |
141d9ce4 | 1591 | } else if (error != ENODEV && error != ENOENT) { |
c19e6535 | 1592 | VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)", |
10a89ef0 | 1593 | name, ovs_strerror(error)); |
96fba48f BP |
1594 | } |
1595 | ||
df2c07f4 | 1596 | return reply.type == OVS_VPORT_TYPE_INTERNAL; |
96fba48f | 1597 | } |
c19e6535 | 1598 | \f |
df2c07f4 | 1599 | /* Parses the contents of 'buf', which contains a "struct ovs_header" followed |
c19e6535 BP |
1600 | * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a |
1601 | * positive errno value. | |
1602 | * | |
1603 | * 'vport' will contain pointers into 'buf', so the caller should not free | |
1604 | * 'buf' while 'vport' is still in use. */ | |
1605 | static int | |
1606 | dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport, | |
1607 | const struct ofpbuf *buf) | |
1608 | { | |
df2c07f4 JP |
1609 | static const struct nl_policy ovs_vport_policy[] = { |
1610 | [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 }, | |
1611 | [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 }, | |
1612 | [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ }, | |
b063d9f0 | 1613 | [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_U32 }, |
f7df9823 | 1614 | [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats), |
c19e6535 | 1615 | .optional = true }, |
df2c07f4 | 1616 | [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true }, |
c19e6535 BP |
1617 | }; |
1618 | ||
df2c07f4 JP |
1619 | struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)]; |
1620 | struct ovs_header *ovs_header; | |
f0fef760 BP |
1621 | struct nlmsghdr *nlmsg; |
1622 | struct genlmsghdr *genl; | |
1623 | struct ofpbuf b; | |
c19e6535 BP |
1624 | |
1625 | dpif_linux_vport_init(vport); | |
1626 | ||
f0fef760 BP |
1627 | ofpbuf_use_const(&b, buf->data, buf->size); |
1628 | nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); | |
1629 | genl = ofpbuf_try_pull(&b, sizeof *genl); | |
df2c07f4 JP |
1630 | ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); |
1631 | if (!nlmsg || !genl || !ovs_header | |
1632 | || nlmsg->nlmsg_type != ovs_vport_family | |
1633 | || !nl_policy_parse(&b, 0, ovs_vport_policy, a, | |
1634 | ARRAY_SIZE(ovs_vport_policy))) { | |
c19e6535 BP |
1635 | return EINVAL; |
1636 | } | |
c19e6535 | 1637 | |
f0fef760 | 1638 | vport->cmd = genl->cmd; |
df2c07f4 | 1639 | vport->dp_ifindex = ovs_header->dp_ifindex; |
4e022ec0 | 1640 | vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]); |
df2c07f4 JP |
1641 | vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]); |
1642 | vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]); | |
b063d9f0 | 1643 | if (a[OVS_VPORT_ATTR_UPCALL_PID]) { |
a24a6574 | 1644 | vport->upcall_pid = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]); |
b063d9f0 | 1645 | } |
df2c07f4 JP |
1646 | if (a[OVS_VPORT_ATTR_STATS]) { |
1647 | vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]); | |
1648 | } | |
df2c07f4 JP |
1649 | if (a[OVS_VPORT_ATTR_OPTIONS]) { |
1650 | vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]); | |
1651 | vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]); | |
c19e6535 | 1652 | } |
c19e6535 BP |
1653 | return 0; |
1654 | } | |
1655 | ||
df2c07f4 | 1656 | /* Appends to 'buf' (which must initially be empty) a "struct ovs_header" |
c19e6535 BP |
1657 | * followed by Netlink attributes corresponding to 'vport'. */ |
1658 | static void | |
1659 | dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport, | |
1660 | struct ofpbuf *buf) | |
1661 | { | |
df2c07f4 | 1662 | struct ovs_header *ovs_header; |
f0fef760 | 1663 | |
df2c07f4 | 1664 | nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO, |
69685a88 | 1665 | vport->cmd, OVS_VPORT_VERSION); |
c19e6535 | 1666 | |
df2c07f4 JP |
1667 | ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); |
1668 | ovs_header->dp_ifindex = vport->dp_ifindex; | |
c19e6535 | 1669 | |
4e022ec0 AW |
1670 | if (vport->port_no != ODPP_NONE) { |
1671 | nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no); | |
c19e6535 BP |
1672 | } |
1673 | ||
df2c07f4 JP |
1674 | if (vport->type != OVS_VPORT_TYPE_UNSPEC) { |
1675 | nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type); | |
c19e6535 BP |
1676 | } |
1677 | ||
1678 | if (vport->name) { | |
df2c07f4 | 1679 | nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name); |
c19e6535 BP |
1680 | } |
1681 | ||
a24a6574 BP |
1682 | if (vport->upcall_pid) { |
1683 | nl_msg_put_u32(buf, OVS_VPORT_ATTR_UPCALL_PID, *vport->upcall_pid); | |
1684 | } | |
b063d9f0 | 1685 | |
c19e6535 | 1686 | if (vport->stats) { |
df2c07f4 | 1687 | nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS, |
c19e6535 BP |
1688 | vport->stats, sizeof *vport->stats); |
1689 | } | |
1690 | ||
c19e6535 | 1691 | if (vport->options) { |
df2c07f4 | 1692 | nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS, |
c19e6535 BP |
1693 | vport->options, vport->options_len); |
1694 | } | |
c19e6535 BP |
1695 | } |
1696 | ||
1697 | /* Clears 'vport' to "empty" values. */ | |
1698 | void | |
1699 | dpif_linux_vport_init(struct dpif_linux_vport *vport) | |
1700 | { | |
1701 | memset(vport, 0, sizeof *vport); | |
4e022ec0 | 1702 | vport->port_no = ODPP_NONE; |
c19e6535 BP |
1703 | } |
1704 | ||
1705 | /* Executes 'request' in the kernel datapath. If the command fails, returns a | |
1706 | * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 | |
1707 | * without doing anything else. If 'reply' and 'bufp' are nonnull, then the | |
df2c07f4 | 1708 | * result of the command is expected to be an ovs_vport also, which is decoded |
c19e6535 BP |
1709 | * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the |
1710 | * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ | |
1711 | int | |
1712 | dpif_linux_vport_transact(const struct dpif_linux_vport *request, | |
1713 | struct dpif_linux_vport *reply, | |
1714 | struct ofpbuf **bufp) | |
1715 | { | |
f0fef760 | 1716 | struct ofpbuf *request_buf; |
c19e6535 BP |
1717 | int error; |
1718 | ||
cb22974d | 1719 | ovs_assert((reply != NULL) == (bufp != NULL)); |
c19e6535 | 1720 | |
42bb6c72 BP |
1721 | error = dpif_linux_init(); |
1722 | if (error) { | |
1723 | if (reply) { | |
1724 | *bufp = NULL; | |
1725 | dpif_linux_vport_init(reply); | |
1726 | } | |
1727 | return error; | |
1728 | } | |
1729 | ||
f0fef760 BP |
1730 | request_buf = ofpbuf_new(1024); |
1731 | dpif_linux_vport_to_ofpbuf(request, request_buf); | |
a88b4e04 | 1732 | error = nl_transact(NETLINK_GENERIC, request_buf, bufp); |
f0fef760 | 1733 | ofpbuf_delete(request_buf); |
c19e6535 | 1734 | |
f0fef760 BP |
1735 | if (reply) { |
1736 | if (!error) { | |
1737 | error = dpif_linux_vport_from_ofpbuf(reply, *bufp); | |
1738 | } | |
c19e6535 | 1739 | if (error) { |
f0fef760 BP |
1740 | dpif_linux_vport_init(reply); |
1741 | ofpbuf_delete(*bufp); | |
1742 | *bufp = NULL; | |
c19e6535 | 1743 | } |
c19e6535 BP |
1744 | } |
1745 | return error; | |
1746 | } | |
1747 | ||
1748 | /* Obtains information about the kernel vport named 'name' and stores it into | |
1749 | * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no | |
1750 | * longer needed ('reply' will contain pointers into '*bufp'). */ | |
1751 | int | |
1752 | dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply, | |
1753 | struct ofpbuf **bufp) | |
1754 | { | |
1755 | struct dpif_linux_vport request; | |
1756 | ||
1757 | dpif_linux_vport_init(&request); | |
df2c07f4 | 1758 | request.cmd = OVS_VPORT_CMD_GET; |
c19e6535 BP |
1759 | request.name = name; |
1760 | ||
1761 | return dpif_linux_vport_transact(&request, reply, bufp); | |
1762 | } | |
d6569377 | 1763 | \f |
df2c07f4 | 1764 | /* Parses the contents of 'buf', which contains a "struct ovs_header" followed |
aaff4b55 BP |
1765 | * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a |
1766 | * positive errno value. | |
d6569377 BP |
1767 | * |
1768 | * 'dp' will contain pointers into 'buf', so the caller should not free 'buf' | |
1769 | * while 'dp' is still in use. */ | |
1770 | static int | |
1771 | dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) | |
1772 | { | |
df2c07f4 JP |
1773 | static const struct nl_policy ovs_datapath_policy[] = { |
1774 | [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ }, | |
f7df9823 | 1775 | [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats), |
d6569377 | 1776 | .optional = true }, |
847108dc AZ |
1777 | [OVS_DP_ATTR_MEGAFLOW_STATS] = { |
1778 | NL_POLICY_FOR(struct ovs_dp_megaflow_stats), | |
1779 | .optional = true }, | |
d6569377 BP |
1780 | }; |
1781 | ||
df2c07f4 JP |
1782 | struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)]; |
1783 | struct ovs_header *ovs_header; | |
aaff4b55 BP |
1784 | struct nlmsghdr *nlmsg; |
1785 | struct genlmsghdr *genl; | |
1786 | struct ofpbuf b; | |
d6569377 BP |
1787 | |
1788 | dpif_linux_dp_init(dp); | |
1789 | ||
aaff4b55 BP |
1790 | ofpbuf_use_const(&b, buf->data, buf->size); |
1791 | nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); | |
1792 | genl = ofpbuf_try_pull(&b, sizeof *genl); | |
df2c07f4 JP |
1793 | ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); |
1794 | if (!nlmsg || !genl || !ovs_header | |
1795 | || nlmsg->nlmsg_type != ovs_datapath_family | |
1796 | || !nl_policy_parse(&b, 0, ovs_datapath_policy, a, | |
1797 | ARRAY_SIZE(ovs_datapath_policy))) { | |
d6569377 BP |
1798 | return EINVAL; |
1799 | } | |
d6569377 | 1800 | |
aaff4b55 | 1801 | dp->cmd = genl->cmd; |
df2c07f4 JP |
1802 | dp->dp_ifindex = ovs_header->dp_ifindex; |
1803 | dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]); | |
1804 | if (a[OVS_DP_ATTR_STATS]) { | |
d6569377 BP |
1805 | /* Can't use structure assignment because Netlink doesn't ensure |
1806 | * sufficient alignment for 64-bit members. */ | |
df2c07f4 | 1807 | memcpy(&dp->stats, nl_attr_get(a[OVS_DP_ATTR_STATS]), |
d6569377 BP |
1808 | sizeof dp->stats); |
1809 | } | |
982b8810 | 1810 | |
847108dc AZ |
1811 | if (a[OVS_DP_ATTR_MEGAFLOW_STATS]) { |
1812 | /* Can't use structure assignment because Netlink doesn't ensure | |
1813 | * sufficient alignment for 64-bit members. */ | |
1814 | memcpy(&dp->megaflow_stats, nl_attr_get(a[OVS_DP_ATTR_MEGAFLOW_STATS]), | |
1815 | sizeof dp->megaflow_stats); | |
1816 | } | |
1817 | ||
d6569377 BP |
1818 | return 0; |
1819 | } | |
1820 | ||
aaff4b55 | 1821 | /* Appends to 'buf' the Generic Netlink message described by 'dp'. */ |
d6569377 BP |
1822 | static void |
1823 | dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf) | |
1824 | { | |
df2c07f4 | 1825 | struct ovs_header *ovs_header; |
d6569377 | 1826 | |
df2c07f4 | 1827 | nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family, |
69685a88 JG |
1828 | NLM_F_REQUEST | NLM_F_ECHO, dp->cmd, |
1829 | OVS_DATAPATH_VERSION); | |
aaff4b55 | 1830 | |
df2c07f4 JP |
1831 | ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); |
1832 | ovs_header->dp_ifindex = dp->dp_ifindex; | |
d6569377 BP |
1833 | |
1834 | if (dp->name) { | |
df2c07f4 | 1835 | nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name); |
d6569377 BP |
1836 | } |
1837 | ||
a24a6574 BP |
1838 | if (dp->upcall_pid) { |
1839 | nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid); | |
1840 | } | |
b063d9f0 | 1841 | |
df2c07f4 | 1842 | /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */ |
d6569377 BP |
1843 | } |
1844 | ||
1845 | /* Clears 'dp' to "empty" values. */ | |
d3d8f1f7 | 1846 | static void |
d6569377 BP |
1847 | dpif_linux_dp_init(struct dpif_linux_dp *dp) |
1848 | { | |
1849 | memset(dp, 0, sizeof *dp); | |
847108dc AZ |
1850 | dp->megaflow_stats.n_masks = UINT32_MAX; |
1851 | dp->megaflow_stats.n_mask_hit = UINT64_MAX; | |
d6569377 BP |
1852 | } |
1853 | ||
aaff4b55 BP |
1854 | static void |
1855 | dpif_linux_dp_dump_start(struct nl_dump *dump) | |
1856 | { | |
1857 | struct dpif_linux_dp request; | |
1858 | struct ofpbuf *buf; | |
1859 | ||
1860 | dpif_linux_dp_init(&request); | |
df2c07f4 | 1861 | request.cmd = OVS_DP_CMD_GET; |
aaff4b55 BP |
1862 | |
1863 | buf = ofpbuf_new(1024); | |
1864 | dpif_linux_dp_to_ofpbuf(&request, buf); | |
a88b4e04 | 1865 | nl_dump_start(dump, NETLINK_GENERIC, buf); |
aaff4b55 BP |
1866 | ofpbuf_delete(buf); |
1867 | } | |
1868 | ||
d6569377 BP |
1869 | /* Executes 'request' in the kernel datapath. If the command fails, returns a |
1870 | * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 | |
1871 | * without doing anything else. If 'reply' and 'bufp' are nonnull, then the | |
aaff4b55 BP |
1872 | * result of the command is expected to be of the same form, which is decoded |
1873 | * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the | |
1874 | * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ | |
d3d8f1f7 | 1875 | static int |
d6569377 BP |
1876 | dpif_linux_dp_transact(const struct dpif_linux_dp *request, |
1877 | struct dpif_linux_dp *reply, struct ofpbuf **bufp) | |
1878 | { | |
aaff4b55 | 1879 | struct ofpbuf *request_buf; |
d6569377 | 1880 | int error; |
d6569377 | 1881 | |
cb22974d | 1882 | ovs_assert((reply != NULL) == (bufp != NULL)); |
d6569377 | 1883 | |
aaff4b55 BP |
1884 | request_buf = ofpbuf_new(1024); |
1885 | dpif_linux_dp_to_ofpbuf(request, request_buf); | |
a88b4e04 | 1886 | error = nl_transact(NETLINK_GENERIC, request_buf, bufp); |
aaff4b55 | 1887 | ofpbuf_delete(request_buf); |
d6569377 | 1888 | |
aaff4b55 | 1889 | if (reply) { |
847108dc | 1890 | dpif_linux_dp_init(reply); |
aaff4b55 BP |
1891 | if (!error) { |
1892 | error = dpif_linux_dp_from_ofpbuf(reply, *bufp); | |
1893 | } | |
d6569377 | 1894 | if (error) { |
aaff4b55 BP |
1895 | ofpbuf_delete(*bufp); |
1896 | *bufp = NULL; | |
d6569377 | 1897 | } |
d6569377 BP |
1898 | } |
1899 | return error; | |
1900 | } | |
1901 | ||
1902 | /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'. | |
1903 | * The caller must free '*bufp' when the reply is no longer needed ('reply' | |
1904 | * will contain pointers into '*bufp'). */ | |
d3d8f1f7 | 1905 | static int |
d6569377 BP |
1906 | dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply, |
1907 | struct ofpbuf **bufp) | |
1908 | { | |
1909 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
1910 | struct dpif_linux_dp request; | |
1911 | ||
1912 | dpif_linux_dp_init(&request); | |
df2c07f4 | 1913 | request.cmd = OVS_DP_CMD_GET; |
254f2dc8 | 1914 | request.dp_ifindex = dpif->dp_ifindex; |
d6569377 BP |
1915 | |
1916 | return dpif_linux_dp_transact(&request, reply, bufp); | |
1917 | } | |
1918 | \f | |
df2c07f4 | 1919 | /* Parses the contents of 'buf', which contains a "struct ovs_header" followed |
37a1300c | 1920 | * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a |
d6569377 BP |
1921 | * positive errno value. |
1922 | * | |
1923 | * 'flow' will contain pointers into 'buf', so the caller should not free 'buf' | |
1924 | * while 'flow' is still in use. */ | |
1925 | static int | |
1926 | dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow, | |
1927 | const struct ofpbuf *buf) | |
1928 | { | |
df2c07f4 JP |
1929 | static const struct nl_policy ovs_flow_policy[] = { |
1930 | [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED }, | |
e6cc0bab | 1931 | [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true }, |
df2c07f4 | 1932 | [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true }, |
f7df9823 | 1933 | [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats), |
d6569377 | 1934 | .optional = true }, |
df2c07f4 JP |
1935 | [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true }, |
1936 | [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true }, | |
1937 | /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */ | |
d6569377 BP |
1938 | }; |
1939 | ||
df2c07f4 JP |
1940 | struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)]; |
1941 | struct ovs_header *ovs_header; | |
37a1300c BP |
1942 | struct nlmsghdr *nlmsg; |
1943 | struct genlmsghdr *genl; | |
1944 | struct ofpbuf b; | |
d6569377 BP |
1945 | |
1946 | dpif_linux_flow_init(flow); | |
1947 | ||
37a1300c BP |
1948 | ofpbuf_use_const(&b, buf->data, buf->size); |
1949 | nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); | |
1950 | genl = ofpbuf_try_pull(&b, sizeof *genl); | |
df2c07f4 JP |
1951 | ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); |
1952 | if (!nlmsg || !genl || !ovs_header | |
1953 | || nlmsg->nlmsg_type != ovs_flow_family | |
1954 | || !nl_policy_parse(&b, 0, ovs_flow_policy, a, | |
1955 | ARRAY_SIZE(ovs_flow_policy))) { | |
d6569377 BP |
1956 | return EINVAL; |
1957 | } | |
d6569377 | 1958 | |
37a1300c | 1959 | flow->nlmsg_flags = nlmsg->nlmsg_flags; |
df2c07f4 JP |
1960 | flow->dp_ifindex = ovs_header->dp_ifindex; |
1961 | flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]); | |
1962 | flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]); | |
e6cc0bab AZ |
1963 | |
1964 | if (a[OVS_FLOW_ATTR_MASK]) { | |
1965 | flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]); | |
1966 | flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]); | |
1967 | } | |
df2c07f4 JP |
1968 | if (a[OVS_FLOW_ATTR_ACTIONS]) { |
1969 | flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]); | |
1970 | flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]); | |
d6569377 | 1971 | } |
df2c07f4 JP |
1972 | if (a[OVS_FLOW_ATTR_STATS]) { |
1973 | flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]); | |
d6569377 | 1974 | } |
df2c07f4 JP |
1975 | if (a[OVS_FLOW_ATTR_TCP_FLAGS]) { |
1976 | flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]); | |
d6569377 | 1977 | } |
df2c07f4 JP |
1978 | if (a[OVS_FLOW_ATTR_USED]) { |
1979 | flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]); | |
9e980142 | 1980 | } |
d6569377 BP |
1981 | return 0; |
1982 | } | |
1983 | ||
df2c07f4 | 1984 | /* Appends to 'buf' (which must initially be empty) a "struct ovs_header" |
d6569377 BP |
1985 | * followed by Netlink attributes corresponding to 'flow'. */ |
1986 | static void | |
1987 | dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, | |
1988 | struct ofpbuf *buf) | |
1989 | { | |
df2c07f4 | 1990 | struct ovs_header *ovs_header; |
d6569377 | 1991 | |
df2c07f4 | 1992 | nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family, |
30b44744 | 1993 | NLM_F_REQUEST | flow->nlmsg_flags, |
69685a88 | 1994 | flow->cmd, OVS_FLOW_VERSION); |
37a1300c | 1995 | |
df2c07f4 JP |
1996 | ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); |
1997 | ovs_header->dp_ifindex = flow->dp_ifindex; | |
d6569377 BP |
1998 | |
1999 | if (flow->key_len) { | |
df2c07f4 | 2000 | nl_msg_put_unspec(buf, OVS_FLOW_ATTR_KEY, flow->key, flow->key_len); |
d6569377 BP |
2001 | } |
2002 | ||
e6cc0bab AZ |
2003 | if (flow->mask_len) { |
2004 | nl_msg_put_unspec(buf, OVS_FLOW_ATTR_MASK, flow->mask, flow->mask_len); | |
2005 | } | |
2006 | ||
d2a23af2 | 2007 | if (flow->actions || flow->actions_len) { |
df2c07f4 | 2008 | nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS, |
d6569377 BP |
2009 | flow->actions, flow->actions_len); |
2010 | } | |
2011 | ||
2012 | /* We never need to send these to the kernel. */ | |
cb22974d BP |
2013 | ovs_assert(!flow->stats); |
2014 | ovs_assert(!flow->tcp_flags); | |
2015 | ovs_assert(!flow->used); | |
d6569377 BP |
2016 | |
2017 | if (flow->clear) { | |
df2c07f4 | 2018 | nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR); |
d6569377 | 2019 | } |
d6569377 BP |
2020 | } |
2021 | ||
2022 | /* Clears 'flow' to "empty" values. */ | |
d3d8f1f7 | 2023 | static void |
d6569377 BP |
2024 | dpif_linux_flow_init(struct dpif_linux_flow *flow) |
2025 | { | |
2026 | memset(flow, 0, sizeof *flow); | |
2027 | } | |
2028 | ||
2029 | /* Executes 'request' in the kernel datapath. If the command fails, returns a | |
2030 | * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 | |
2031 | * without doing anything else. If 'reply' and 'bufp' are nonnull, then the | |
37a1300c BP |
2032 | * result of the command is expected to be a flow also, which is decoded and |
2033 | * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply | |
2034 | * is no longer needed ('reply' will contain pointers into '*bufp'). */ | |
d3d8f1f7 | 2035 | static int |
30b44744 | 2036 | dpif_linux_flow_transact(struct dpif_linux_flow *request, |
d6569377 BP |
2037 | struct dpif_linux_flow *reply, struct ofpbuf **bufp) |
2038 | { | |
37a1300c | 2039 | struct ofpbuf *request_buf; |
d6569377 | 2040 | int error; |
d6569377 | 2041 | |
cb22974d | 2042 | ovs_assert((reply != NULL) == (bufp != NULL)); |
d6569377 | 2043 | |
30b44744 BP |
2044 | if (reply) { |
2045 | request->nlmsg_flags |= NLM_F_ECHO; | |
2046 | } | |
2047 | ||
37a1300c BP |
2048 | request_buf = ofpbuf_new(1024); |
2049 | dpif_linux_flow_to_ofpbuf(request, request_buf); | |
a88b4e04 | 2050 | error = nl_transact(NETLINK_GENERIC, request_buf, bufp); |
37a1300c | 2051 | ofpbuf_delete(request_buf); |
d6569377 | 2052 | |
37a1300c BP |
2053 | if (reply) { |
2054 | if (!error) { | |
2055 | error = dpif_linux_flow_from_ofpbuf(reply, *bufp); | |
2056 | } | |
d6569377 | 2057 | if (error) { |
37a1300c BP |
2058 | dpif_linux_flow_init(reply); |
2059 | ofpbuf_delete(*bufp); | |
2060 | *bufp = NULL; | |
d6569377 | 2061 | } |
d6569377 BP |
2062 | } |
2063 | return error; | |
2064 | } | |
2065 | ||
2066 | static void | |
2067 | dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow, | |
2068 | struct dpif_flow_stats *stats) | |
2069 | { | |
2070 | if (flow->stats) { | |
2071 | stats->n_packets = get_unaligned_u64(&flow->stats->n_packets); | |
2072 | stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes); | |
2073 | } else { | |
2074 | stats->n_packets = 0; | |
2075 | stats->n_bytes = 0; | |
2076 | } | |
0e70cdcb | 2077 | stats->used = flow->used ? get_32aligned_u64(flow->used) : 0; |
d6569377 BP |
2078 | stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0; |
2079 | } | |
14b4d2f9 | 2080 | \f |
14b4d2f9 BP |
2081 | /* Logs information about a packet that was recently lost in 'ch' (in |
2082 | * 'dpif_'). */ | |
2083 | static void | |
2084 | report_loss(struct dpif *dpif_, struct dpif_channel *ch) | |
2085 | { | |
2086 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
2087 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); | |
14b4d2f9 BP |
2088 | struct ds s; |
2089 | ||
8d675c5a | 2090 | if (VLOG_DROP_WARN(&rl)) { |
14b4d2f9 BP |
2091 | return; |
2092 | } | |
2093 | ||
2094 | ds_init(&s); | |
2095 | if (ch->last_poll != LLONG_MIN) { | |
2096 | ds_put_format(&s, " (last polled %lld ms ago)", | |
2097 | time_msec() - ch->last_poll); | |
2098 | } | |
14b4d2f9 | 2099 | |
34582733 | 2100 | VLOG_WARN("%s: lost packet on channel %"PRIdPTR"%s", |
8d0abb5e | 2101 | dpif_name(dpif_), ch - dpif->channels, ds_cstr(&s)); |
14b4d2f9 BP |
2102 | ds_destroy(&s); |
2103 | } |