]>
Commit | Line | Data |
---|---|---|
064af421 BP |
1 | /* |
2 | * Copyright (c) 2008, 2009 Nicira Networks. | |
3 | * | |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "dpif.h" | |
19 | ||
20 | #include <assert.h> | |
21 | #include <ctype.h> | |
22 | #include <errno.h> | |
23 | #include <fcntl.h> | |
24 | #include <inttypes.h> | |
25 | #include <net/if.h> | |
26 | #include <linux/rtnetlink.h> | |
27 | #include <linux/ethtool.h> | |
28 | #include <linux/sockios.h> | |
29 | #include <netinet/in.h> | |
30 | #include <stdlib.h> | |
31 | #include <string.h> | |
32 | #include <sys/ioctl.h> | |
33 | #include <sys/stat.h> | |
34 | #include <sys/sysmacros.h> | |
35 | #include <unistd.h> | |
36 | ||
37 | #include "coverage.h" | |
38 | #include "dynamic-string.h" | |
39 | #include "flow.h" | |
40 | #include "netlink.h" | |
41 | #include "odp-util.h" | |
42 | #include "ofp-print.h" | |
43 | #include "ofpbuf.h" | |
44 | #include "packets.h" | |
45 | #include "poll-loop.h" | |
46 | #include "util.h" | |
47 | #include "valgrind.h" | |
48 | ||
49 | #include "vlog.h" | |
50 | #define THIS_MODULE VLM_dpif | |
51 | ||
52 | /* Rate limit for individual messages going to or from the datapath, output at | |
53 | * DBG level. This is very high because, if these are enabled, it is because | |
54 | * we really need to see them. */ | |
55 | static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600); | |
56 | ||
57 | /* Not really much point in logging many dpif errors. */ | |
58 | static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); | |
59 | ||
60 | static int get_minor_from_name(const char *name, unsigned int *minor); | |
61 | static int name_to_minor(const char *name, unsigned int *minor); | |
62 | static int lookup_minor(const char *name, unsigned int *minor); | |
63 | static int open_by_minor(unsigned int minor, struct dpif *); | |
64 | static int make_openvswitch_device(unsigned int minor, char **fnp); | |
65 | static void check_rw_odp_flow(struct odp_flow *); | |
66 | ||
67 | int | |
68 | dpif_open(const char *name, struct dpif *dpif) | |
69 | { | |
70 | int listen_mask; | |
71 | int error; | |
72 | ||
73 | dpif->fd = -1; | |
74 | ||
75 | error = name_to_minor(name, &dpif->minor); | |
76 | if (error) { | |
77 | return error; | |
78 | } | |
79 | ||
80 | error = open_by_minor(dpif->minor, dpif); | |
81 | if (error) { | |
82 | return error; | |
83 | } | |
84 | ||
85 | /* We can open the device, but that doesn't mean that it's been created. | |
86 | * If it hasn't been, then any command other than ODP_DP_CREATE will | |
87 | * return ENODEV. Try something innocuous. */ | |
88 | listen_mask = 0; /* Make Valgrind happy. */ | |
89 | if (ioctl(dpif->fd, ODP_GET_LISTEN_MASK, &listen_mask)) { | |
90 | error = errno; | |
91 | if (error != ENODEV) { | |
b29ba128 BP |
92 | VLOG_WARN("%s: probe returned unexpected error: %s", |
93 | dpif_name(dpif), strerror(error)); | |
064af421 BP |
94 | } |
95 | dpif_close(dpif); | |
96 | return error; | |
97 | } | |
98 | return 0; | |
99 | } | |
100 | ||
101 | void | |
102 | dpif_close(struct dpif *dpif) | |
103 | { | |
104 | if (dpif) { | |
b29ba128 BP |
105 | free(dpif->name); |
106 | dpif->name = NULL; | |
064af421 BP |
107 | close(dpif->fd); |
108 | dpif->fd = -1; | |
109 | } | |
110 | } | |
111 | ||
112 | static int | |
113 | do_ioctl(const struct dpif *dpif, int cmd, const char *cmd_name, | |
114 | const void *arg) | |
115 | { | |
116 | int error = ioctl(dpif->fd, cmd, arg) ? errno : 0; | |
117 | if (cmd_name) { | |
118 | if (error) { | |
b29ba128 BP |
119 | VLOG_WARN_RL(&error_rl, "%s: ioctl(%s) failed (%s)", |
120 | dpif_name(dpif), cmd_name, strerror(error)); | |
064af421 | 121 | } else { |
b29ba128 BP |
122 | VLOG_DBG_RL(&dpmsg_rl, "%s: ioctl(%s): success", |
123 | dpif_name(dpif), cmd_name); | |
064af421 BP |
124 | } |
125 | } | |
126 | return error; | |
127 | } | |
128 | ||
129 | int | |
130 | dpif_create(const char *name, struct dpif *dpif) | |
131 | { | |
132 | unsigned int minor; | |
133 | int error; | |
134 | ||
135 | if (!get_minor_from_name(name, &minor)) { | |
136 | /* Minor was specified in 'name', go ahead and create it. */ | |
137 | error = open_by_minor(minor, dpif); | |
138 | if (error) { | |
139 | return error; | |
140 | } | |
141 | ||
142 | if (!strncmp(name, "nl:", 3)) { | |
143 | char devname[128]; | |
144 | sprintf(devname, "of%u", minor); | |
145 | error = ioctl(dpif->fd, ODP_DP_CREATE, devname) < 0 ? errno : 0; | |
146 | } else { | |
147 | error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0; | |
148 | } | |
149 | if (error) { | |
150 | dpif_close(dpif); | |
151 | } | |
152 | return error; | |
153 | } else { | |
154 | for (minor = 0; minor < ODP_MAX; minor++) { | |
155 | error = open_by_minor(minor, dpif); | |
156 | if (error) { | |
157 | return error; | |
158 | } | |
159 | ||
160 | error = ioctl(dpif->fd, ODP_DP_CREATE, name) < 0 ? errno : 0; | |
161 | if (!error) { | |
162 | return 0; | |
163 | } | |
164 | dpif_close(dpif); | |
165 | if (error != EBUSY) { | |
166 | return error; | |
167 | } | |
168 | } | |
169 | return ENOBUFS; | |
170 | } | |
171 | } | |
172 | ||
b29ba128 BP |
173 | const char * |
174 | dpif_name(const struct dpif *dpif) | |
175 | { | |
176 | return dpif->name; | |
177 | } | |
178 | ||
064af421 BP |
179 | int |
180 | dpif_delete(struct dpif *dpif) | |
181 | { | |
182 | COVERAGE_INC(dpif_destroy); | |
183 | return do_ioctl(dpif, ODP_DP_DESTROY, "ODP_DP_DESTROY", NULL); | |
184 | } | |
185 | ||
186 | int | |
187 | dpif_get_dp_stats(const struct dpif *dpif, struct odp_stats *stats) | |
188 | { | |
189 | memset(stats, 0, sizeof *stats); | |
190 | return do_ioctl(dpif, ODP_DP_STATS, "ODP_DP_STATS", stats); | |
191 | } | |
192 | ||
193 | int | |
194 | dpif_get_drop_frags(const struct dpif *dpif, bool *drop_frags) | |
195 | { | |
196 | int tmp; | |
197 | int error = do_ioctl(dpif, ODP_GET_DROP_FRAGS, "ODP_GET_DROP_FRAGS", &tmp); | |
198 | *drop_frags = error ? tmp & 1 : false; | |
199 | return error; | |
200 | } | |
201 | ||
202 | int | |
203 | dpif_set_drop_frags(struct dpif *dpif, bool drop_frags) | |
204 | { | |
205 | int tmp = drop_frags; | |
206 | return do_ioctl(dpif, ODP_SET_DROP_FRAGS, "ODP_SET_DROP_FRAGS", &tmp); | |
207 | } | |
208 | ||
209 | int | |
210 | dpif_get_listen_mask(const struct dpif *dpif, int *listen_mask) | |
211 | { | |
212 | int error = do_ioctl(dpif, ODP_GET_LISTEN_MASK, "ODP_GET_LISTEN_MASK", | |
213 | listen_mask); | |
214 | if (error) { | |
215 | *listen_mask = 0; | |
216 | } | |
217 | return error; | |
218 | } | |
219 | ||
220 | int | |
221 | dpif_set_listen_mask(struct dpif *dpif, int listen_mask) | |
222 | { | |
223 | return do_ioctl(dpif, ODP_SET_LISTEN_MASK, "ODP_SET_LISTEN_MASK", | |
224 | &listen_mask); | |
225 | } | |
226 | ||
227 | int | |
228 | dpif_purge(struct dpif *dpif) | |
229 | { | |
230 | struct odp_stats stats; | |
231 | unsigned int i; | |
232 | int error; | |
233 | ||
234 | COVERAGE_INC(dpif_purge); | |
235 | ||
236 | error = dpif_get_dp_stats(dpif, &stats); | |
237 | if (error) { | |
238 | return error; | |
239 | } | |
240 | ||
241 | for (i = 0; i < stats.max_miss_queue + stats.max_action_queue; i++) { | |
242 | struct ofpbuf *buf; | |
243 | error = dpif_recv(dpif, &buf); | |
244 | if (error) { | |
245 | return error == EAGAIN ? 0 : error; | |
246 | } | |
247 | ofpbuf_delete(buf); | |
248 | } | |
249 | return 0; | |
250 | } | |
251 | ||
252 | int | |
253 | dpif_port_add(struct dpif *dpif, const char *devname, uint16_t port_no, | |
254 | uint16_t flags) | |
255 | { | |
256 | struct odp_port port; | |
257 | ||
258 | COVERAGE_INC(dpif_port_add); | |
259 | memset(&port, 0, sizeof port); | |
260 | strncpy(port.devname, devname, sizeof port.devname); | |
261 | port.port = port_no; | |
262 | port.flags = flags; | |
263 | if (!ioctl(dpif->fd, ODP_PORT_ADD, &port)) { | |
b29ba128 BP |
264 | VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu16, |
265 | dpif_name(dpif), devname, port_no); | |
064af421 BP |
266 | return 0; |
267 | } else { | |
b29ba128 BP |
268 | VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port %"PRIu16": %s", |
269 | dpif_name(dpif), devname, port_no, strerror(errno)); | |
064af421 BP |
270 | return errno; |
271 | } | |
272 | } | |
273 | ||
274 | int | |
275 | dpif_port_del(struct dpif *dpif, uint16_t port_no) | |
276 | { | |
277 | int tmp = port_no; | |
278 | COVERAGE_INC(dpif_port_del); | |
279 | return do_ioctl(dpif, ODP_PORT_DEL, "ODP_PORT_DEL", &tmp); | |
280 | } | |
281 | ||
282 | int | |
283 | dpif_port_query_by_number(const struct dpif *dpif, uint16_t port_no, | |
284 | struct odp_port *port) | |
285 | { | |
286 | memset(port, 0, sizeof *port); | |
287 | port->port = port_no; | |
288 | if (!ioctl(dpif->fd, ODP_PORT_QUERY, port)) { | |
b29ba128 BP |
289 | VLOG_DBG_RL(&dpmsg_rl, "%s: port %"PRIu16" is device %s", |
290 | dpif_name(dpif), port_no, port->devname); | |
064af421 BP |
291 | return 0; |
292 | } else { | |
b29ba128 BP |
293 | VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu16": %s", |
294 | dpif_name(dpif), port_no, strerror(errno)); | |
064af421 BP |
295 | return errno; |
296 | } | |
297 | } | |
298 | ||
299 | int | |
300 | dpif_port_query_by_name(const struct dpif *dpif, const char *devname, | |
301 | struct odp_port *port) | |
302 | { | |
303 | memset(port, 0, sizeof *port); | |
304 | strncpy(port->devname, devname, sizeof port->devname); | |
305 | if (!ioctl(dpif->fd, ODP_PORT_QUERY, port)) { | |
b29ba128 BP |
306 | VLOG_DBG_RL(&dpmsg_rl, "%s: device %s is on port %"PRIu16, |
307 | dpif_name(dpif), devname, port->port); | |
064af421 BP |
308 | return 0; |
309 | } else { | |
5c6d2a3f BP |
310 | /* Log level is DBG here because all the current callers are interested |
311 | * in whether 'dpif' actually has a port 'devname', so that it's not an | |
312 | * issue worth logging if it doesn't. */ | |
313 | VLOG_DBG_RL(&error_rl, "%s: failed to query port %s: %s", | |
314 | dpif_name(dpif), devname, strerror(errno)); | |
064af421 BP |
315 | return errno; |
316 | } | |
317 | } | |
318 | ||
335562c0 BP |
319 | int |
320 | dpif_port_get_name(struct dpif *dpif, uint16_t port_no, | |
321 | char *name, size_t name_size) | |
322 | { | |
323 | struct odp_port port; | |
324 | int error; | |
325 | ||
326 | assert(name_size > 0); | |
327 | ||
328 | error = dpif_port_query_by_number(dpif, port_no, &port); | |
329 | if (!error) { | |
330 | ovs_strlcpy(name, port.devname, name_size); | |
331 | } else { | |
332 | *name = '\0'; | |
333 | } | |
334 | return error; | |
335 | } | |
336 | ||
064af421 BP |
337 | int |
338 | dpif_port_list(const struct dpif *dpif, | |
339 | struct odp_port **ports, size_t *n_ports) | |
340 | { | |
341 | struct odp_portvec pv; | |
342 | struct odp_stats stats; | |
343 | int error; | |
344 | ||
345 | do { | |
346 | error = dpif_get_dp_stats(dpif, &stats); | |
347 | if (error) { | |
348 | goto error; | |
349 | } | |
350 | ||
351 | *ports = xcalloc(1, stats.n_ports * sizeof **ports); | |
352 | pv.ports = *ports; | |
353 | pv.n_ports = stats.n_ports; | |
354 | error = do_ioctl(dpif, ODP_PORT_LIST, "ODP_PORT_LIST", &pv); | |
355 | if (error) { | |
356 | free(*ports); | |
357 | goto error; | |
358 | } | |
359 | } while (pv.n_ports != stats.n_ports); | |
360 | *n_ports = pv.n_ports; | |
361 | return 0; | |
362 | ||
363 | error: | |
364 | *ports = NULL; | |
365 | *n_ports = 0; | |
366 | return error; | |
367 | } | |
368 | ||
369 | int | |
370 | dpif_port_group_set(struct dpif *dpif, uint16_t group, | |
371 | const uint16_t ports[], size_t n_ports) | |
372 | { | |
373 | struct odp_port_group pg; | |
374 | ||
375 | COVERAGE_INC(dpif_port_group_set); | |
376 | assert(n_ports <= UINT16_MAX); | |
377 | pg.group = group; | |
378 | pg.ports = (uint16_t *) ports; | |
379 | pg.n_ports = n_ports; | |
380 | return do_ioctl(dpif, ODP_PORT_GROUP_SET, "ODP_PORT_GROUP_SET", &pg); | |
381 | } | |
382 | ||
383 | /* Careful: '*n_out' can be greater than 'n_ports' on return, if 'n_ports' is | |
384 | * less than the number of ports in 'group'. */ | |
385 | int | |
386 | dpif_port_group_get(const struct dpif *dpif, uint16_t group, | |
387 | uint16_t ports[], size_t n_ports, size_t *n_out) | |
388 | { | |
389 | struct odp_port_group pg; | |
390 | int error; | |
391 | ||
392 | assert(n_ports <= UINT16_MAX); | |
393 | pg.group = group; | |
394 | pg.ports = ports; | |
395 | pg.n_ports = n_ports; | |
396 | error = do_ioctl(dpif, ODP_PORT_GROUP_GET, "ODP_PORT_GROUP_GET", &pg); | |
397 | *n_out = error ? 0 : pg.n_ports; | |
398 | return error; | |
399 | } | |
400 | ||
401 | int | |
402 | dpif_flow_flush(struct dpif *dpif) | |
403 | { | |
404 | COVERAGE_INC(dpif_flow_flush); | |
405 | return do_ioctl(dpif, ODP_FLOW_FLUSH, "ODP_FLOW_FLUSH", NULL); | |
406 | } | |
407 | ||
408 | static enum vlog_level | |
409 | flow_message_log_level(int error) | |
410 | { | |
411 | return error ? VLL_WARN : VLL_DBG; | |
412 | } | |
413 | ||
414 | static bool | |
415 | should_log_flow_message(int error) | |
416 | { | |
417 | return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error), | |
418 | error ? &error_rl : &dpmsg_rl); | |
419 | } | |
420 | ||
421 | static void | |
422 | log_flow_message(const struct dpif *dpif, int error, | |
423 | const char *operation, | |
424 | const flow_t *flow, const struct odp_flow_stats *stats, | |
425 | const union odp_action *actions, size_t n_actions) | |
426 | { | |
427 | struct ds ds = DS_EMPTY_INITIALIZER; | |
b29ba128 | 428 | ds_put_format(&ds, "%s: ", dpif_name(dpif)); |
064af421 BP |
429 | if (error) { |
430 | ds_put_cstr(&ds, "failed to "); | |
431 | } | |
432 | ds_put_format(&ds, "%s ", operation); | |
433 | if (error) { | |
434 | ds_put_format(&ds, "(%s) ", strerror(error)); | |
435 | } | |
436 | flow_format(&ds, flow); | |
437 | if (stats) { | |
438 | ds_put_cstr(&ds, ", "); | |
439 | format_odp_flow_stats(&ds, stats); | |
440 | } | |
441 | if (actions || n_actions) { | |
442 | ds_put_cstr(&ds, ", actions:"); | |
443 | format_odp_actions(&ds, actions, n_actions); | |
444 | } | |
445 | vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds)); | |
446 | ds_destroy(&ds); | |
447 | } | |
448 | ||
449 | static int | |
450 | do_flow_ioctl(const struct dpif *dpif, int cmd, struct odp_flow *flow, | |
451 | const char *operation, bool show_stats) | |
452 | { | |
453 | int error = do_ioctl(dpif, cmd, NULL, flow); | |
454 | if (error && show_stats) { | |
455 | flow->n_actions = 0; | |
456 | } | |
457 | if (should_log_flow_message(error)) { | |
458 | log_flow_message(dpif, error, operation, &flow->key, | |
459 | show_stats && !error ? &flow->stats : NULL, | |
460 | flow->actions, flow->n_actions); | |
461 | } | |
462 | return error; | |
463 | } | |
464 | ||
465 | int | |
466 | dpif_flow_put(struct dpif *dpif, struct odp_flow_put *put) | |
467 | { | |
468 | int error = do_ioctl(dpif, ODP_FLOW_PUT, NULL, put); | |
469 | COVERAGE_INC(dpif_flow_put); | |
470 | if (should_log_flow_message(error)) { | |
471 | struct ds operation = DS_EMPTY_INITIALIZER; | |
472 | ds_put_cstr(&operation, "put"); | |
473 | if (put->flags & ODPPF_CREATE) { | |
474 | ds_put_cstr(&operation, "[create]"); | |
475 | } | |
476 | if (put->flags & ODPPF_MODIFY) { | |
477 | ds_put_cstr(&operation, "[modify]"); | |
478 | } | |
479 | if (put->flags & ODPPF_ZERO_STATS) { | |
480 | ds_put_cstr(&operation, "[zero]"); | |
481 | } | |
482 | #define ODPPF_ALL (ODPPF_CREATE | ODPPF_MODIFY | ODPPF_ZERO_STATS) | |
483 | if (put->flags & ~ODPPF_ALL) { | |
484 | ds_put_format(&operation, "[%x]", put->flags & ~ODPPF_ALL); | |
485 | } | |
486 | log_flow_message(dpif, error, ds_cstr(&operation), &put->flow.key, | |
487 | !error ? &put->flow.stats : NULL, | |
488 | put->flow.actions, put->flow.n_actions); | |
489 | ds_destroy(&operation); | |
490 | } | |
491 | return error; | |
492 | } | |
493 | ||
494 | int | |
495 | dpif_flow_del(struct dpif *dpif, struct odp_flow *flow) | |
496 | { | |
497 | COVERAGE_INC(dpif_flow_del); | |
498 | check_rw_odp_flow(flow); | |
499 | memset(&flow->stats, 0, sizeof flow->stats); | |
500 | return do_flow_ioctl(dpif, ODP_FLOW_DEL, flow, "delete flow", true); | |
501 | } | |
502 | ||
503 | int | |
504 | dpif_flow_get(const struct dpif *dpif, struct odp_flow *flow) | |
505 | { | |
506 | COVERAGE_INC(dpif_flow_query); | |
507 | check_rw_odp_flow(flow); | |
508 | memset(&flow->stats, 0, sizeof flow->stats); | |
509 | return do_flow_ioctl(dpif, ODP_FLOW_GET, flow, "get flow", true); | |
510 | } | |
511 | ||
512 | int | |
513 | dpif_flow_get_multiple(const struct dpif *dpif, | |
514 | struct odp_flow flows[], size_t n) | |
515 | { | |
516 | struct odp_flowvec fv; | |
517 | size_t i; | |
518 | ||
519 | COVERAGE_ADD(dpif_flow_query_multiple, n); | |
520 | fv.flows = flows; | |
521 | fv.n_flows = n; | |
522 | for (i = 0; i < n; i++) { | |
523 | check_rw_odp_flow(&flows[i]); | |
524 | } | |
525 | return do_ioctl(dpif, ODP_FLOW_GET_MULTIPLE, "ODP_FLOW_GET_MULTIPLE", | |
526 | &fv); | |
527 | } | |
528 | ||
529 | int | |
530 | dpif_flow_list(const struct dpif *dpif, struct odp_flow flows[], size_t n, | |
531 | size_t *n_out) | |
532 | { | |
533 | struct odp_flowvec fv; | |
534 | uint32_t i; | |
535 | int error; | |
536 | ||
537 | COVERAGE_INC(dpif_flow_query_list); | |
538 | fv.flows = flows; | |
539 | fv.n_flows = n; | |
540 | if (RUNNING_ON_VALGRIND) { | |
541 | memset(flows, 0, n * sizeof *flows); | |
542 | } else { | |
543 | for (i = 0; i < n; i++) { | |
544 | flows[i].actions = NULL; | |
545 | flows[i].n_actions = 0; | |
546 | } | |
547 | } | |
548 | error = do_ioctl(dpif, ODP_FLOW_LIST, NULL, &fv); | |
549 | if (error) { | |
550 | *n_out = 0; | |
b29ba128 BP |
551 | VLOG_WARN_RL(&error_rl, "%s: flow list failed (%s)", |
552 | dpif_name(dpif), strerror(error)); | |
064af421 BP |
553 | } else { |
554 | COVERAGE_ADD(dpif_flow_query_list_n, fv.n_flows); | |
555 | *n_out = fv.n_flows; | |
b29ba128 BP |
556 | VLOG_DBG_RL(&dpmsg_rl, "%s: listed %zu flows", |
557 | dpif_name(dpif), *n_out); | |
064af421 BP |
558 | } |
559 | return error; | |
560 | } | |
561 | ||
562 | int | |
563 | dpif_flow_list_all(const struct dpif *dpif, | |
564 | struct odp_flow **flowsp, size_t *np) | |
565 | { | |
566 | struct odp_stats stats; | |
567 | struct odp_flow *flows; | |
568 | size_t n_flows; | |
569 | int error; | |
570 | ||
571 | *flowsp = NULL; | |
572 | *np = 0; | |
573 | ||
574 | error = dpif_get_dp_stats(dpif, &stats); | |
575 | if (error) { | |
576 | return error; | |
577 | } | |
578 | ||
579 | flows = xmalloc(sizeof *flows * stats.n_flows); | |
580 | error = dpif_flow_list(dpif, flows, stats.n_flows, &n_flows); | |
581 | if (error) { | |
582 | free(flows); | |
583 | return error; | |
584 | } | |
585 | ||
586 | if (stats.n_flows != n_flows) { | |
b29ba128 | 587 | VLOG_WARN_RL(&error_rl, "%s: datapath stats reported %"PRIu32" " |
064af421 | 588 | "flows but flow listing reported %zu", |
b29ba128 | 589 | dpif_name(dpif), stats.n_flows, n_flows); |
064af421 BP |
590 | } |
591 | *flowsp = flows; | |
592 | *np = n_flows; | |
593 | return 0; | |
594 | } | |
595 | ||
596 | int | |
597 | dpif_execute(struct dpif *dpif, uint16_t in_port, | |
598 | const union odp_action actions[], size_t n_actions, | |
599 | const struct ofpbuf *buf) | |
600 | { | |
601 | int error; | |
602 | ||
603 | COVERAGE_INC(dpif_execute); | |
604 | if (n_actions > 0) { | |
605 | struct odp_execute execute; | |
606 | memset(&execute, 0, sizeof execute); | |
607 | execute.in_port = in_port; | |
608 | execute.actions = (union odp_action *) actions; | |
609 | execute.n_actions = n_actions; | |
610 | execute.data = buf->data; | |
611 | execute.length = buf->size; | |
612 | error = do_ioctl(dpif, ODP_EXECUTE, NULL, &execute); | |
613 | } else { | |
614 | error = 0; | |
615 | } | |
616 | ||
617 | if (!(error ? VLOG_DROP_WARN(&error_rl) : VLOG_DROP_DBG(&dpmsg_rl))) { | |
618 | struct ds ds = DS_EMPTY_INITIALIZER; | |
619 | char *packet = ofp_packet_to_string(buf->data, buf->size, buf->size); | |
b29ba128 | 620 | ds_put_format(&ds, "%s: execute ", dpif_name(dpif)); |
064af421 BP |
621 | format_odp_actions(&ds, actions, n_actions); |
622 | if (error) { | |
623 | ds_put_format(&ds, " failed (%s)", strerror(error)); | |
624 | } | |
625 | ds_put_format(&ds, " on packet %s", packet); | |
626 | vlog(THIS_MODULE, error ? VLL_WARN : VLL_DBG, "%s", ds_cstr(&ds)); | |
627 | ds_destroy(&ds); | |
628 | free(packet); | |
629 | } | |
630 | return error; | |
631 | } | |
632 | ||
633 | int | |
634 | dpif_recv(struct dpif *dpif, struct ofpbuf **bufp) | |
635 | { | |
636 | struct ofpbuf *buf; | |
637 | int retval; | |
638 | int error; | |
639 | ||
640 | buf = ofpbuf_new(65536); | |
641 | retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); | |
642 | if (retval < 0) { | |
643 | error = errno; | |
644 | if (error != EAGAIN) { | |
b29ba128 BP |
645 | VLOG_WARN_RL(&error_rl, "%s: read failed: %s", |
646 | dpif_name(dpif), strerror(error)); | |
064af421 BP |
647 | } |
648 | } else if (retval >= sizeof(struct odp_msg)) { | |
649 | struct odp_msg *msg = buf->data; | |
650 | if (msg->length <= retval) { | |
651 | buf->size += retval; | |
652 | if (VLOG_IS_DBG_ENABLED()) { | |
653 | void *payload = msg + 1; | |
654 | size_t length = buf->size - sizeof *msg; | |
655 | char *s = ofp_packet_to_string(payload, length, length); | |
b29ba128 BP |
656 | VLOG_DBG_RL(&dpmsg_rl, "%s: received %s message of length " |
657 | "%zu on port %"PRIu16": %s", dpif_name(dpif), | |
064af421 BP |
658 | (msg->type == _ODPL_MISS_NR ? "miss" |
659 | : msg->type == _ODPL_ACTION_NR ? "action" | |
660 | : "<unknown>"), | |
661 | msg->length - sizeof(struct odp_msg), | |
662 | msg->port, s); | |
663 | free(s); | |
664 | } | |
665 | *bufp = buf; | |
666 | COVERAGE_INC(dpif_recv); | |
667 | return 0; | |
668 | } else { | |
b29ba128 | 669 | VLOG_WARN_RL(&error_rl, "%s: discarding message truncated " |
064af421 | 670 | "from %zu bytes to %d", |
b29ba128 | 671 | dpif_name(dpif), msg->length, retval); |
064af421 BP |
672 | error = ERANGE; |
673 | } | |
674 | } else if (!retval) { | |
b29ba128 | 675 | VLOG_WARN_RL(&error_rl, "%s: unexpected end of file", dpif_name(dpif)); |
064af421 BP |
676 | error = EPROTO; |
677 | } else { | |
678 | VLOG_WARN_RL(&error_rl, | |
b29ba128 BP |
679 | "%s: discarding too-short message (%d bytes)", |
680 | dpif_name(dpif), retval); | |
064af421 BP |
681 | error = ERANGE; |
682 | } | |
683 | ||
684 | *bufp = NULL; | |
685 | ofpbuf_delete(buf); | |
686 | return error; | |
687 | } | |
688 | ||
689 | void | |
690 | dpif_recv_wait(struct dpif *dpif) | |
691 | { | |
692 | poll_fd_wait(dpif->fd, POLLIN); | |
693 | } | |
53a4218d BP |
694 | |
695 | void | |
696 | dpif_get_netflow_ids(const struct dpif *dpif, | |
697 | uint8_t *engine_type, uint8_t *engine_id) | |
698 | { | |
699 | *engine_type = *engine_id = dpif->minor; | |
700 | } | |
064af421 BP |
701 | \f |
702 | struct dpifmon { | |
703 | struct dpif dpif; | |
704 | struct nl_sock *sock; | |
705 | int local_ifindex; | |
706 | }; | |
707 | ||
708 | int | |
709 | dpifmon_create(const char *datapath_name, struct dpifmon **monp) | |
710 | { | |
711 | struct dpifmon *mon; | |
712 | char local_name[IFNAMSIZ]; | |
713 | int error; | |
714 | ||
715 | mon = *monp = xmalloc(sizeof *mon); | |
716 | ||
717 | error = dpif_open(datapath_name, &mon->dpif); | |
718 | if (error) { | |
719 | goto error; | |
720 | } | |
335562c0 BP |
721 | error = dpif_port_get_name(&mon->dpif, ODPP_LOCAL, |
722 | local_name, sizeof local_name); | |
064af421 BP |
723 | if (error) { |
724 | goto error_close_dpif; | |
725 | } | |
726 | ||
727 | mon->local_ifindex = if_nametoindex(local_name); | |
728 | if (!mon->local_ifindex) { | |
729 | error = errno; | |
730 | VLOG_WARN("could not get ifindex of %s device: %s", | |
731 | local_name, strerror(errno)); | |
732 | goto error_close_dpif; | |
733 | } | |
734 | ||
735 | error = nl_sock_create(NETLINK_ROUTE, RTNLGRP_LINK, 0, 0, &mon->sock); | |
736 | if (error) { | |
737 | VLOG_WARN("could not create rtnetlink socket: %s", strerror(error)); | |
738 | goto error_close_dpif; | |
739 | } | |
740 | ||
741 | return 0; | |
742 | ||
743 | error_close_dpif: | |
744 | dpif_close(&mon->dpif); | |
745 | error: | |
746 | free(mon); | |
747 | *monp = NULL; | |
748 | return error; | |
749 | } | |
750 | ||
751 | void | |
752 | dpifmon_destroy(struct dpifmon *mon) | |
753 | { | |
754 | if (mon) { | |
755 | dpif_close(&mon->dpif); | |
756 | nl_sock_destroy(mon->sock); | |
757 | } | |
758 | } | |
759 | ||
760 | int | |
761 | dpifmon_poll(struct dpifmon *mon, char **devnamep) | |
762 | { | |
763 | static struct vlog_rate_limit slow_rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
764 | static const struct nl_policy rtnlgrp_link_policy[] = { | |
765 | [IFLA_IFNAME] = { .type = NL_A_STRING }, | |
766 | [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, | |
767 | }; | |
768 | struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; | |
769 | struct ofpbuf *buf; | |
770 | int error; | |
771 | ||
772 | *devnamep = NULL; | |
773 | again: | |
774 | error = nl_sock_recv(mon->sock, &buf, false); | |
775 | switch (error) { | |
776 | case 0: | |
777 | if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), | |
778 | rtnlgrp_link_policy, | |
779 | attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { | |
780 | VLOG_WARN_RL(&slow_rl, "received bad rtnl message"); | |
781 | error = ENOBUFS; | |
782 | } else { | |
783 | const char *devname = nl_attr_get_string(attrs[IFLA_IFNAME]); | |
784 | bool for_us; | |
785 | ||
786 | if (attrs[IFLA_MASTER]) { | |
787 | uint32_t master_ifindex = nl_attr_get_u32(attrs[IFLA_MASTER]); | |
788 | for_us = master_ifindex == mon->local_ifindex; | |
789 | } else { | |
5c6d2a3f | 790 | /* It's for us if that device is one of our ports. */ |
064af421 | 791 | struct odp_port port; |
5c6d2a3f | 792 | for_us = !dpif_port_query_by_name(mon->dpif, devname, &port); |
064af421 BP |
793 | } |
794 | ||
795 | if (!for_us) { | |
796 | /* Not for us, try again. */ | |
797 | ofpbuf_delete(buf); | |
798 | COVERAGE_INC(dpifmon_poll_false_wakeup); | |
799 | goto again; | |
800 | } | |
801 | COVERAGE_INC(dpifmon_poll_changed); | |
802 | *devnamep = xstrdup(devname); | |
803 | } | |
804 | ofpbuf_delete(buf); | |
805 | break; | |
806 | ||
807 | case EAGAIN: | |
808 | /* Nothing to do. */ | |
809 | break; | |
810 | ||
811 | case ENOBUFS: | |
812 | VLOG_WARN_RL(&slow_rl, "dpifmon socket overflowed"); | |
813 | break; | |
814 | ||
815 | default: | |
816 | VLOG_WARN_RL(&slow_rl, "error on dpifmon socket: %s", strerror(error)); | |
817 | break; | |
818 | } | |
819 | return error; | |
820 | } | |
821 | ||
822 | void | |
823 | dpifmon_run(struct dpifmon *mon UNUSED) | |
824 | { | |
825 | /* Nothing to do in this implementation. */ | |
826 | } | |
827 | ||
828 | void | |
829 | dpifmon_wait(struct dpifmon *mon) | |
830 | { | |
831 | nl_sock_wait(mon->sock, POLLIN); | |
832 | } | |
833 | \f | |
834 | static int get_openvswitch_major(void); | |
835 | static int get_major(const char *target, int default_major); | |
836 | ||
837 | static int | |
838 | lookup_minor(const char *name, unsigned int *minor) | |
839 | { | |
840 | struct ethtool_drvinfo drvinfo; | |
841 | struct ifreq ifr; | |
842 | int error; | |
843 | int sock; | |
844 | ||
845 | *minor = -1; | |
846 | sock = socket(AF_INET, SOCK_DGRAM, 0); | |
847 | if (sock < 0) { | |
848 | VLOG_WARN("socket(AF_INET) failed: %s", strerror(errno)); | |
849 | error = errno; | |
850 | goto error; | |
851 | } | |
852 | ||
853 | memset(&ifr, 0, sizeof ifr); | |
854 | strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); | |
855 | ifr.ifr_data = (caddr_t) &drvinfo; | |
856 | ||
857 | memset(&drvinfo, 0, sizeof drvinfo); | |
858 | drvinfo.cmd = ETHTOOL_GDRVINFO; | |
859 | if (ioctl(sock, SIOCETHTOOL, &ifr)) { | |
860 | VLOG_WARN("ioctl(SIOCETHTOOL) failed: %s", strerror(errno)); | |
861 | error = errno; | |
862 | goto error_close_sock; | |
863 | } | |
864 | ||
865 | if (strcmp(drvinfo.driver, "openvswitch")) { | |
866 | VLOG_WARN("%s is not an openvswitch device", name); | |
867 | error = EOPNOTSUPP; | |
868 | goto error_close_sock; | |
869 | } | |
870 | ||
871 | if (!isdigit(drvinfo.bus_info[0])) { | |
872 | VLOG_WARN("%s ethtool info does not contain an openvswitch minor", | |
873 | name); | |
874 | error = EPROTOTYPE; | |
875 | goto error_close_sock; | |
876 | } | |
877 | ||
878 | *minor = atoi(drvinfo.bus_info); | |
879 | close(sock); | |
880 | return 0; | |
881 | ||
882 | error_close_sock: | |
883 | close(sock); | |
884 | error: | |
885 | return error; | |
886 | } | |
887 | ||
888 | static int | |
889 | make_openvswitch_device(unsigned int minor, char **fnp) | |
890 | { | |
891 | dev_t dev = makedev(get_openvswitch_major(), minor); | |
892 | const char dirname[] = "/dev/net"; | |
893 | struct stat s; | |
894 | char fn[128]; | |
895 | ||
896 | *fnp = NULL; | |
897 | sprintf(fn, "%s/dp%d", dirname, minor); | |
898 | if (!stat(fn, &s)) { | |
899 | if (!S_ISCHR(s.st_mode)) { | |
900 | VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing", | |
901 | fn); | |
902 | } else if (s.st_rdev != dev) { | |
903 | VLOG_WARN_RL(&error_rl, | |
904 | "%s is device %u:%u instead of %u:%u, fixing", | |
905 | fn, major(s.st_rdev), minor(s.st_rdev), | |
906 | major(dev), minor(dev)); | |
907 | } else { | |
908 | goto success; | |
909 | } | |
910 | if (unlink(fn)) { | |
911 | VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)", | |
912 | fn, strerror(errno)); | |
913 | return errno; | |
914 | } | |
915 | } else if (errno == ENOENT) { | |
916 | if (stat(dirname, &s)) { | |
917 | if (errno == ENOENT) { | |
918 | if (mkdir(dirname, 0755)) { | |
919 | VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)", | |
920 | dirname, strerror(errno)); | |
921 | return errno; | |
922 | } | |
923 | } else { | |
924 | VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", | |
925 | dirname, strerror(errno)); | |
926 | return errno; | |
927 | } | |
928 | } | |
929 | } else { | |
930 | VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno)); | |
931 | return errno; | |
932 | } | |
933 | ||
934 | /* The device needs to be created. */ | |
935 | if (mknod(fn, S_IFCHR | 0700, dev)) { | |
936 | VLOG_WARN_RL(&error_rl, | |
937 | "%s: creating character device %u:%u failed (%s)", | |
938 | fn, major(dev), minor(dev), strerror(errno)); | |
939 | return errno; | |
940 | } | |
941 | ||
942 | success: | |
943 | *fnp = xstrdup(fn); | |
944 | return 0; | |
945 | } | |
946 | ||
947 | ||
948 | static int | |
949 | get_openvswitch_major(void) | |
950 | { | |
951 | static unsigned int openvswitch_major; | |
952 | if (!openvswitch_major) { | |
953 | enum { DEFAULT_MAJOR = 248 }; | |
954 | openvswitch_major = get_major("openvswitch", DEFAULT_MAJOR); | |
955 | } | |
956 | return openvswitch_major; | |
957 | } | |
958 | ||
959 | static int | |
960 | get_major(const char *target, int default_major) | |
961 | { | |
962 | const char fn[] = "/proc/devices"; | |
963 | char line[128]; | |
964 | FILE *file; | |
965 | int ln; | |
966 | ||
967 | file = fopen(fn, "r"); | |
968 | if (!file) { | |
969 | VLOG_ERR("opening %s failed (%s)", fn, strerror(errno)); | |
970 | goto error; | |
971 | } | |
972 | ||
973 | for (ln = 1; fgets(line, sizeof line, file); ln++) { | |
974 | char name[64]; | |
975 | int major; | |
976 | ||
977 | if (!strncmp(line, "Character", 9) || line[0] == '\0') { | |
978 | /* Nothing to do. */ | |
979 | } else if (!strncmp(line, "Block", 5)) { | |
980 | /* We only want character devices, so skip the rest of the file. */ | |
981 | break; | |
982 | } else if (sscanf(line, "%d %63s", &major, name)) { | |
983 | if (!strcmp(name, target)) { | |
984 | fclose(file); | |
985 | return major; | |
986 | } | |
987 | } else { | |
988 | static bool warned; | |
989 | if (!warned) { | |
990 | VLOG_WARN("%s:%d: syntax error", fn, ln); | |
991 | } | |
992 | warned = true; | |
993 | } | |
994 | } | |
995 | ||
996 | VLOG_ERR("%s: %s major not found (is the module loaded?), using " | |
997 | "default major %d", fn, target, default_major); | |
998 | error: | |
999 | VLOG_INFO("using default major %d for %s", default_major, target); | |
1000 | return default_major; | |
1001 | } | |
1002 | ||
1003 | static int | |
1004 | name_to_minor(const char *name, unsigned int *minor) | |
1005 | { | |
1006 | if (!get_minor_from_name(name, minor)) { | |
1007 | return 0; | |
1008 | } | |
1009 | return lookup_minor(name, minor); | |
1010 | } | |
1011 | ||
1012 | static int | |
1013 | get_minor_from_name(const char *name, unsigned int *minor) | |
1014 | { | |
1015 | if (!strncmp(name, "dp", 2) && isdigit(name[2])) { | |
1016 | *minor = atoi(name + 2); | |
1017 | return 0; | |
1018 | } else if (!strncmp(name, "nl:", 3) && isdigit(name[3])) { | |
1019 | /* This is for compatibility only and will be dropped. */ | |
1020 | *minor = atoi(name + 3); | |
1021 | return 0; | |
1022 | } else { | |
1023 | return EINVAL; | |
1024 | } | |
1025 | } | |
1026 | ||
1027 | static int | |
1028 | open_by_minor(unsigned int minor, struct dpif *dpif) | |
1029 | { | |
1030 | int error; | |
1031 | char *fn; | |
1032 | int fd; | |
1033 | ||
1034 | dpif->minor = -1; | |
1035 | dpif->fd = -1; | |
1036 | error = make_openvswitch_device(minor, &fn); | |
1037 | if (error) { | |
1038 | return error; | |
1039 | } | |
1040 | ||
1041 | fd = open(fn, O_RDONLY | O_NONBLOCK); | |
1042 | if (fd < 0) { | |
1043 | error = errno; | |
1044 | VLOG_WARN("%s: open failed (%s)", fn, strerror(error)); | |
1045 | free(fn); | |
1046 | return error; | |
1047 | } | |
1048 | ||
1049 | free(fn); | |
b29ba128 | 1050 | dpif->name = xasprintf("dp%u", dpif->minor); |
064af421 BP |
1051 | dpif->minor = minor; |
1052 | dpif->fd = fd; | |
1053 | return 0; | |
1054 | } | |
1055 | \f | |
1056 | /* There is a tendency to construct odp_flow objects on the stack and to | |
1057 | * forget to properly initialize their "actions" and "n_actions" members. | |
1058 | * When this happens, we get memory corruption because the kernel | |
1059 | * writes through the random pointer that is in the "actions" member. | |
1060 | * | |
1061 | * This function attempts to combat the problem by: | |
1062 | * | |
1063 | * - Forcing a segfault if "actions" points to an invalid region (instead | |
1064 | * of just getting back EFAULT, which can be easily missed in the log). | |
1065 | * | |
1066 | * - Storing a distinctive value that is likely to cause an | |
1067 | * easy-to-identify error later if it is dereferenced, etc. | |
1068 | * | |
1069 | * - Triggering a warning on uninitialized memory from Valgrind if | |
1070 | * "actions" or "n_actions" was not initialized. | |
1071 | */ | |
1072 | static void | |
1073 | check_rw_odp_flow(struct odp_flow *flow) | |
1074 | { | |
1075 | if (flow->n_actions) { | |
1076 | memset(&flow->actions[0], 0xcc, sizeof flow->actions[0]); | |
1077 | } | |
1078 | } |