]>
Commit | Line | Data |
---|---|---|
96fba48f | 1 | /* |
1a6f1e2a | 2 | * Copyright (c) 2008, 2009, 2010 Nicira Networks. |
96fba48f BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "dpif.h" | |
19 | ||
20 | #include <assert.h> | |
21 | #include <ctype.h> | |
22 | #include <errno.h> | |
23 | #include <fcntl.h> | |
24 | #include <inttypes.h> | |
25 | #include <net/if.h> | |
b90fa799 | 26 | #include <linux/types.h> |
96fba48f | 27 | #include <linux/ethtool.h> |
e9e28be3 | 28 | #include <linux/rtnetlink.h> |
96fba48f BP |
29 | #include <linux/sockios.h> |
30 | #include <stdlib.h> | |
31 | #include <sys/ioctl.h> | |
10dcf8de | 32 | #include <sys/stat.h> |
96fba48f BP |
33 | #include <unistd.h> |
34 | ||
35 | #include "dpif-provider.h" | |
3abc4a1a | 36 | #include "netdev.h" |
96fba48f BP |
37 | #include "ofpbuf.h" |
38 | #include "poll-loop.h" | |
559843ed | 39 | #include "rtnetlink.h" |
54825e09 | 40 | #include "shash.h" |
e9e28be3 | 41 | #include "svec.h" |
96fba48f BP |
42 | #include "util.h" |
43 | ||
44 | #include "vlog.h" | |
45 | #define THIS_MODULE VLM_dpif_linux | |
46 | ||
47 | /* Datapath interface for the openvswitch Linux kernel module. */ | |
48 | struct dpif_linux { | |
49 | struct dpif dpif; | |
50 | int fd; | |
e9e28be3 | 51 | |
d3d22744 BP |
52 | /* Used by dpif_linux_get_all_names(). */ |
53 | char *local_ifname; | |
54 | int minor; | |
55 | ||
e9e28be3 BP |
56 | /* Change notification. */ |
57 | int local_ifindex; /* Ifindex of local port. */ | |
54825e09 | 58 | struct shash changed_ports; /* Ports that have changed. */ |
46097491 | 59 | struct rtnetlink_notifier port_notifier; |
8b61709d | 60 | bool change_error; |
96fba48f BP |
61 | }; |
62 | ||
63 | static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); | |
64 | ||
65 | static int do_ioctl(const struct dpif *, int cmd, const void *arg); | |
66 | static int lookup_minor(const char *name, int *minor); | |
e9e28be3 | 67 | static int finish_open(struct dpif *, const char *local_ifname); |
57aaff8a | 68 | static int get_openvswitch_major(void); |
96fba48f BP |
69 | static int create_minor(const char *name, int minor, struct dpif **dpifp); |
70 | static int open_minor(int minor, struct dpif **dpifp); | |
71 | static int make_openvswitch_device(int minor, char **fnp); | |
46097491 | 72 | static void dpif_linux_port_changed(const struct rtnetlink_change *, |
e9e28be3 | 73 | void *dpif); |
96fba48f BP |
74 | |
75 | static struct dpif_linux * | |
76 | dpif_linux_cast(const struct dpif *dpif) | |
77 | { | |
78 | dpif_assert_class(dpif, &dpif_linux_class); | |
79 | return CONTAINER_OF(dpif, struct dpif_linux, dpif); | |
80 | } | |
81 | ||
d3d22744 BP |
82 | static int |
83 | dpif_linux_enumerate(struct svec *all_dps) | |
84 | { | |
57aaff8a | 85 | int major; |
d3d22744 BP |
86 | int error; |
87 | int i; | |
88 | ||
57aaff8a JP |
89 | /* Check that the Open vSwitch module is loaded. */ |
90 | major = get_openvswitch_major(); | |
91 | if (major < 0) { | |
92 | return -major; | |
93 | } | |
94 | ||
d3d22744 BP |
95 | error = 0; |
96 | for (i = 0; i < ODP_MAX; i++) { | |
97 | struct dpif *dpif; | |
98 | char devname[16]; | |
99 | int retval; | |
100 | ||
101 | sprintf(devname, "dp%d", i); | |
1a6f1e2a | 102 | retval = dpif_open(devname, "system", &dpif); |
d3d22744 BP |
103 | if (!retval) { |
104 | svec_add(all_dps, devname); | |
999401aa | 105 | dpif_uninit(dpif, true); |
d3d22744 BP |
106 | } else if (retval != ENODEV && !error) { |
107 | error = retval; | |
108 | } | |
109 | } | |
110 | return error; | |
111 | } | |
112 | ||
96fba48f | 113 | static int |
c69ee87c | 114 | dpif_linux_open(const char *name, const char *type OVS_UNUSED, bool create, |
96fba48f BP |
115 | struct dpif **dpifp) |
116 | { | |
117 | int minor; | |
118 | ||
be2c418b JP |
119 | minor = !strncmp(name, "dp", 2) |
120 | && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1; | |
96fba48f BP |
121 | if (create) { |
122 | if (minor >= 0) { | |
1a6f1e2a | 123 | return create_minor(name, minor, dpifp); |
96fba48f BP |
124 | } else { |
125 | /* Scan for unused minor number. */ | |
126 | for (minor = 0; minor < ODP_MAX; minor++) { | |
1a6f1e2a | 127 | int error = create_minor(name, minor, dpifp); |
96fba48f BP |
128 | if (error != EBUSY) { |
129 | return error; | |
130 | } | |
131 | } | |
132 | ||
133 | /* All datapath numbers in use. */ | |
134 | return ENOBUFS; | |
135 | } | |
136 | } else { | |
137 | struct dpif_linux *dpif; | |
e9e28be3 | 138 | struct odp_port port; |
96fba48f BP |
139 | int error; |
140 | ||
141 | if (minor < 0) { | |
1a6f1e2a | 142 | error = lookup_minor(name, &minor); |
96fba48f BP |
143 | if (error) { |
144 | return error; | |
145 | } | |
146 | } | |
147 | ||
148 | error = open_minor(minor, dpifp); | |
149 | if (error) { | |
150 | return error; | |
151 | } | |
152 | dpif = dpif_linux_cast(*dpifp); | |
153 | ||
e9e28be3 BP |
154 | /* We need the local port's ifindex for the poll function. Start by |
155 | * getting the local port's name. */ | |
156 | memset(&port, 0, sizeof port); | |
157 | port.port = ODPP_LOCAL; | |
158 | if (ioctl(dpif->fd, ODP_PORT_QUERY, &port)) { | |
159 | error = errno; | |
96fba48f BP |
160 | if (error != ENODEV) { |
161 | VLOG_WARN("%s: probe returned unexpected error: %s", | |
162 | dpif_name(*dpifp), strerror(error)); | |
163 | } | |
999401aa | 164 | dpif_uninit(*dpifp, true); |
e9e28be3 | 165 | return error; |
96fba48f | 166 | } |
e9e28be3 BP |
167 | |
168 | /* Then use that to finish up opening. */ | |
169 | return finish_open(&dpif->dpif, port.devname); | |
96fba48f BP |
170 | } |
171 | } | |
172 | ||
173 | static void | |
174 | dpif_linux_close(struct dpif *dpif_) | |
175 | { | |
176 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
46097491 | 177 | rtnetlink_notifier_unregister(&dpif->port_notifier); |
54825e09 | 178 | shash_destroy(&dpif->changed_ports); |
d3d22744 | 179 | free(dpif->local_ifname); |
96fba48f BP |
180 | close(dpif->fd); |
181 | free(dpif); | |
182 | } | |
183 | ||
d3d22744 BP |
184 | static int |
185 | dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names) | |
186 | { | |
187 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
188 | ||
189 | svec_add_nocopy(all_names, xasprintf("dp%d", dpif->minor)); | |
190 | svec_add(all_names, dpif->local_ifname); | |
191 | return 0; | |
192 | } | |
193 | ||
96fba48f | 194 | static int |
7dab847a | 195 | dpif_linux_destroy(struct dpif *dpif_) |
96fba48f | 196 | { |
3abc4a1a JG |
197 | struct odp_port *ports; |
198 | size_t n_ports; | |
199 | int err; | |
200 | int i; | |
201 | ||
202 | err = dpif_port_list(dpif_, &ports, &n_ports); | |
203 | if (err) { | |
204 | return err; | |
205 | } | |
206 | ||
207 | for (i = 0; i < n_ports; i++) { | |
208 | if (ports[i].port != ODPP_LOCAL) { | |
209 | err = do_ioctl(dpif_, ODP_VPORT_DEL, ports[i].devname); | |
210 | if (err) { | |
211 | VLOG_WARN_RL(&error_rl, "%s: error deleting port %s (%s)", | |
212 | dpif_name(dpif_), ports[i].devname, strerror(err)); | |
213 | } | |
214 | } | |
215 | } | |
216 | ||
217 | free(ports); | |
218 | ||
96fba48f BP |
219 | return do_ioctl(dpif_, ODP_DP_DESTROY, NULL); |
220 | } | |
221 | ||
222 | static int | |
223 | dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats) | |
224 | { | |
72b06300 | 225 | memset(stats, 0, sizeof *stats); |
96fba48f BP |
226 | return do_ioctl(dpif_, ODP_DP_STATS, stats); |
227 | } | |
228 | ||
229 | static int | |
230 | dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp) | |
231 | { | |
232 | int drop_frags; | |
233 | int error; | |
234 | ||
235 | error = do_ioctl(dpif_, ODP_GET_DROP_FRAGS, &drop_frags); | |
236 | if (!error) { | |
237 | *drop_fragsp = drop_frags & 1; | |
238 | } | |
239 | return error; | |
240 | } | |
241 | ||
242 | static int | |
243 | dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags) | |
244 | { | |
245 | int drop_frags_int = drop_frags; | |
246 | return do_ioctl(dpif_, ODP_SET_DROP_FRAGS, &drop_frags_int); | |
247 | } | |
248 | ||
249 | static int | |
250 | dpif_linux_port_add(struct dpif *dpif_, const char *devname, uint16_t flags, | |
251 | uint16_t *port_no) | |
252 | { | |
253 | struct odp_port port; | |
254 | int error; | |
255 | ||
256 | memset(&port, 0, sizeof port); | |
257 | strncpy(port.devname, devname, sizeof port.devname); | |
258 | port.flags = flags; | |
f2459fe7 | 259 | error = do_ioctl(dpif_, ODP_PORT_ATTACH, &port); |
96fba48f BP |
260 | if (!error) { |
261 | *port_no = port.port; | |
262 | } | |
263 | return error; | |
264 | } | |
265 | ||
266 | static int | |
267 | dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no) | |
268 | { | |
269 | int tmp = port_no; | |
3abc4a1a JG |
270 | int err; |
271 | struct odp_port port; | |
272 | ||
273 | err = dpif_port_query_by_number(dpif_, port_no, &port); | |
274 | if (err) { | |
275 | return err; | |
276 | } | |
277 | ||
278 | err = do_ioctl(dpif_, ODP_PORT_DETACH, &tmp); | |
279 | if (err) { | |
280 | return err; | |
281 | } | |
282 | ||
283 | if (!netdev_is_open(port.devname)) { | |
284 | /* Try deleting the port if no one has it open. This shouldn't | |
285 | * actually be necessary unless the config changed while we weren't | |
286 | * running but it won't hurt anything if the port is already gone. */ | |
287 | do_ioctl(dpif_, ODP_VPORT_DEL, port.devname); | |
288 | } | |
289 | ||
290 | return 0; | |
96fba48f BP |
291 | } |
292 | ||
293 | static int | |
294 | dpif_linux_port_query_by_number(const struct dpif *dpif_, uint16_t port_no, | |
295 | struct odp_port *port) | |
296 | { | |
297 | memset(port, 0, sizeof *port); | |
298 | port->port = port_no; | |
299 | return do_ioctl(dpif_, ODP_PORT_QUERY, port); | |
300 | } | |
301 | ||
302 | static int | |
303 | dpif_linux_port_query_by_name(const struct dpif *dpif_, const char *devname, | |
304 | struct odp_port *port) | |
305 | { | |
306 | memset(port, 0, sizeof *port); | |
307 | strncpy(port->devname, devname, sizeof port->devname); | |
308 | return do_ioctl(dpif_, ODP_PORT_QUERY, port); | |
309 | } | |
310 | ||
311 | static int | |
312 | dpif_linux_flow_flush(struct dpif *dpif_) | |
313 | { | |
314 | return do_ioctl(dpif_, ODP_FLOW_FLUSH, NULL); | |
315 | } | |
316 | ||
317 | static int | |
318 | dpif_linux_port_list(const struct dpif *dpif_, struct odp_port *ports, int n) | |
319 | { | |
320 | struct odp_portvec pv; | |
321 | int error; | |
322 | ||
323 | pv.ports = ports; | |
324 | pv.n_ports = n; | |
325 | error = do_ioctl(dpif_, ODP_PORT_LIST, &pv); | |
326 | return error ? -error : pv.n_ports; | |
327 | } | |
328 | ||
e9e28be3 BP |
329 | static int |
330 | dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) | |
331 | { | |
332 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
e9e28be3 | 333 | |
8b61709d BP |
334 | if (dpif->change_error) { |
335 | dpif->change_error = false; | |
54825e09 | 336 | shash_clear(&dpif->changed_ports); |
8b61709d | 337 | return ENOBUFS; |
54825e09 BP |
338 | } else if (!shash_is_empty(&dpif->changed_ports)) { |
339 | struct shash_node *node = shash_first(&dpif->changed_ports); | |
340 | *devnamep = xstrdup(node->name); | |
341 | shash_delete(&dpif->changed_ports, node); | |
8b61709d | 342 | return 0; |
e9e28be3 | 343 | } else { |
8b61709d | 344 | return EAGAIN; |
e9e28be3 | 345 | } |
e9e28be3 BP |
346 | } |
347 | ||
348 | static void | |
349 | dpif_linux_port_poll_wait(const struct dpif *dpif_) | |
350 | { | |
351 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
54825e09 | 352 | if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) { |
e9e28be3 BP |
353 | poll_immediate_wake(); |
354 | } else { | |
46097491 | 355 | rtnetlink_notifier_wait(); |
e9e28be3 BP |
356 | } |
357 | } | |
358 | ||
96fba48f BP |
359 | static int |
360 | dpif_linux_port_group_get(const struct dpif *dpif_, int group, | |
361 | uint16_t ports[], int n) | |
362 | { | |
363 | struct odp_port_group pg; | |
364 | int error; | |
365 | ||
366 | assert(n <= UINT16_MAX); | |
367 | pg.group = group; | |
368 | pg.ports = ports; | |
369 | pg.n_ports = n; | |
370 | error = do_ioctl(dpif_, ODP_PORT_GROUP_GET, &pg); | |
371 | return error ? -error : pg.n_ports; | |
372 | } | |
373 | ||
374 | static int | |
375 | dpif_linux_port_group_set(struct dpif *dpif_, int group, | |
376 | const uint16_t ports[], int n) | |
377 | { | |
378 | struct odp_port_group pg; | |
379 | ||
380 | assert(n <= UINT16_MAX); | |
381 | pg.group = group; | |
382 | pg.ports = (uint16_t *) ports; | |
383 | pg.n_ports = n; | |
384 | return do_ioctl(dpif_, ODP_PORT_GROUP_SET, &pg); | |
385 | } | |
386 | ||
387 | static int | |
388 | dpif_linux_flow_get(const struct dpif *dpif_, struct odp_flow flows[], int n) | |
389 | { | |
390 | struct odp_flowvec fv; | |
391 | fv.flows = flows; | |
392 | fv.n_flows = n; | |
393 | return do_ioctl(dpif_, ODP_FLOW_GET, &fv); | |
394 | } | |
395 | ||
396 | static int | |
397 | dpif_linux_flow_put(struct dpif *dpif_, struct odp_flow_put *put) | |
398 | { | |
399 | return do_ioctl(dpif_, ODP_FLOW_PUT, put); | |
400 | } | |
401 | ||
402 | static int | |
403 | dpif_linux_flow_del(struct dpif *dpif_, struct odp_flow *flow) | |
404 | { | |
405 | return do_ioctl(dpif_, ODP_FLOW_DEL, flow); | |
406 | } | |
407 | ||
408 | static int | |
409 | dpif_linux_flow_list(const struct dpif *dpif_, struct odp_flow flows[], int n) | |
410 | { | |
411 | struct odp_flowvec fv; | |
412 | int error; | |
413 | ||
414 | fv.flows = flows; | |
415 | fv.n_flows = n; | |
416 | error = do_ioctl(dpif_, ODP_FLOW_LIST, &fv); | |
417 | return error ? -error : fv.n_flows; | |
418 | } | |
419 | ||
420 | static int | |
421 | dpif_linux_execute(struct dpif *dpif_, uint16_t in_port, | |
422 | const union odp_action actions[], int n_actions, | |
423 | const struct ofpbuf *buf) | |
424 | { | |
425 | struct odp_execute execute; | |
426 | memset(&execute, 0, sizeof execute); | |
427 | execute.in_port = in_port; | |
428 | execute.actions = (union odp_action *) actions; | |
429 | execute.n_actions = n_actions; | |
430 | execute.data = buf->data; | |
431 | execute.length = buf->size; | |
432 | return do_ioctl(dpif_, ODP_EXECUTE, &execute); | |
433 | } | |
434 | ||
435 | static int | |
436 | dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask) | |
437 | { | |
438 | return do_ioctl(dpif_, ODP_GET_LISTEN_MASK, listen_mask); | |
439 | } | |
440 | ||
441 | static int | |
442 | dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask) | |
443 | { | |
444 | return do_ioctl(dpif_, ODP_SET_LISTEN_MASK, &listen_mask); | |
445 | } | |
446 | ||
72b06300 BP |
447 | static int |
448 | dpif_linux_get_sflow_probability(const struct dpif *dpif_, | |
449 | uint32_t *probability) | |
450 | { | |
451 | return do_ioctl(dpif_, ODP_GET_SFLOW_PROBABILITY, probability); | |
452 | } | |
453 | ||
454 | static int | |
455 | dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability) | |
456 | { | |
457 | return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability); | |
458 | } | |
459 | ||
96fba48f BP |
460 | static int |
461 | dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) | |
462 | { | |
463 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
464 | struct ofpbuf *buf; | |
465 | int retval; | |
466 | int error; | |
467 | ||
43253595 BP |
468 | buf = ofpbuf_new(65536 + DPIF_RECV_MSG_PADDING); |
469 | ofpbuf_reserve(buf, DPIF_RECV_MSG_PADDING); | |
96fba48f BP |
470 | retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); |
471 | if (retval < 0) { | |
472 | error = errno; | |
473 | if (error != EAGAIN) { | |
474 | VLOG_WARN_RL(&error_rl, "%s: read failed: %s", | |
475 | dpif_name(dpif_), strerror(error)); | |
476 | } | |
477 | } else if (retval >= sizeof(struct odp_msg)) { | |
478 | struct odp_msg *msg = buf->data; | |
479 | if (msg->length <= retval) { | |
480 | buf->size += retval; | |
481 | *bufp = buf; | |
482 | return 0; | |
483 | } else { | |
484 | VLOG_WARN_RL(&error_rl, "%s: discarding message truncated " | |
d65349ea | 485 | "from %"PRIu32" bytes to %d", |
96fba48f BP |
486 | dpif_name(dpif_), msg->length, retval); |
487 | error = ERANGE; | |
488 | } | |
489 | } else if (!retval) { | |
490 | VLOG_WARN_RL(&error_rl, "%s: unexpected end of file", dpif_name(dpif_)); | |
491 | error = EPROTO; | |
492 | } else { | |
493 | VLOG_WARN_RL(&error_rl, | |
494 | "%s: discarding too-short message (%d bytes)", | |
495 | dpif_name(dpif_), retval); | |
496 | error = ERANGE; | |
497 | } | |
498 | ||
499 | *bufp = NULL; | |
500 | ofpbuf_delete(buf); | |
501 | return error; | |
502 | } | |
503 | ||
504 | static void | |
505 | dpif_linux_recv_wait(struct dpif *dpif_) | |
506 | { | |
507 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
508 | poll_fd_wait(dpif->fd, POLLIN); | |
509 | } | |
510 | ||
511 | const struct dpif_class dpif_linux_class = { | |
1a6f1e2a | 512 | "system", |
8b61709d BP |
513 | NULL, |
514 | NULL, | |
d3d22744 | 515 | dpif_linux_enumerate, |
96fba48f BP |
516 | dpif_linux_open, |
517 | dpif_linux_close, | |
d3d22744 | 518 | dpif_linux_get_all_names, |
7dab847a | 519 | dpif_linux_destroy, |
96fba48f BP |
520 | dpif_linux_get_stats, |
521 | dpif_linux_get_drop_frags, | |
522 | dpif_linux_set_drop_frags, | |
523 | dpif_linux_port_add, | |
524 | dpif_linux_port_del, | |
525 | dpif_linux_port_query_by_number, | |
526 | dpif_linux_port_query_by_name, | |
527 | dpif_linux_port_list, | |
e9e28be3 BP |
528 | dpif_linux_port_poll, |
529 | dpif_linux_port_poll_wait, | |
96fba48f BP |
530 | dpif_linux_port_group_get, |
531 | dpif_linux_port_group_set, | |
532 | dpif_linux_flow_get, | |
533 | dpif_linux_flow_put, | |
534 | dpif_linux_flow_del, | |
535 | dpif_linux_flow_flush, | |
536 | dpif_linux_flow_list, | |
537 | dpif_linux_execute, | |
538 | dpif_linux_recv_get_mask, | |
539 | dpif_linux_recv_set_mask, | |
72b06300 BP |
540 | dpif_linux_get_sflow_probability, |
541 | dpif_linux_set_sflow_probability, | |
96fba48f BP |
542 | dpif_linux_recv, |
543 | dpif_linux_recv_wait, | |
544 | }; | |
545 | \f | |
546 | static int get_openvswitch_major(void); | |
57aaff8a | 547 | static int get_major(const char *target); |
96fba48f BP |
548 | |
549 | static int | |
550 | do_ioctl(const struct dpif *dpif_, int cmd, const void *arg) | |
551 | { | |
552 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
553 | return ioctl(dpif->fd, cmd, arg) ? errno : 0; | |
554 | } | |
555 | ||
556 | static int | |
a165b67e | 557 | lookup_minor(const char *name, int *minorp) |
96fba48f BP |
558 | { |
559 | struct ethtool_drvinfo drvinfo; | |
a165b67e | 560 | int minor, port_no; |
96fba48f BP |
561 | struct ifreq ifr; |
562 | int error; | |
563 | int sock; | |
564 | ||
565 | sock = socket(AF_INET, SOCK_DGRAM, 0); | |
566 | if (sock < 0) { | |
567 | VLOG_WARN("socket(AF_INET) failed: %s", strerror(errno)); | |
568 | error = errno; | |
569 | goto error; | |
570 | } | |
571 | ||
572 | memset(&ifr, 0, sizeof ifr); | |
573 | strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); | |
574 | ifr.ifr_data = (caddr_t) &drvinfo; | |
575 | ||
576 | memset(&drvinfo, 0, sizeof drvinfo); | |
577 | drvinfo.cmd = ETHTOOL_GDRVINFO; | |
578 | if (ioctl(sock, SIOCETHTOOL, &ifr)) { | |
579 | VLOG_WARN("ioctl(SIOCETHTOOL) failed: %s", strerror(errno)); | |
580 | error = errno; | |
581 | goto error_close_sock; | |
582 | } | |
583 | ||
584 | if (strcmp(drvinfo.driver, "openvswitch")) { | |
585 | VLOG_WARN("%s is not an openvswitch device", name); | |
586 | error = EOPNOTSUPP; | |
587 | goto error_close_sock; | |
588 | } | |
589 | ||
a165b67e BP |
590 | if (sscanf(drvinfo.bus_info, "%d.%d", &minor, &port_no) != 2) { |
591 | VLOG_WARN("%s ethtool bus_info has unexpected format", name); | |
96fba48f BP |
592 | error = EPROTOTYPE; |
593 | goto error_close_sock; | |
a165b67e BP |
594 | } else if (port_no != ODPP_LOCAL) { |
595 | /* This is an Open vSwitch device but not the local port. We | |
596 | * intentionally support only using the name of the local port as the | |
597 | * name of a datapath; otherwise, it would be too difficult to | |
598 | * enumerate all the names of a datapath. */ | |
599 | error = EOPNOTSUPP; | |
600 | goto error_close_sock; | |
96fba48f BP |
601 | } |
602 | ||
a165b67e | 603 | *minorp = minor; |
96fba48f BP |
604 | close(sock); |
605 | return 0; | |
606 | ||
607 | error_close_sock: | |
608 | close(sock); | |
609 | error: | |
610 | return error; | |
611 | } | |
612 | ||
613 | static int | |
614 | make_openvswitch_device(int minor, char **fnp) | |
615 | { | |
96fba48f | 616 | const char dirname[] = "/dev/net"; |
57aaff8a JP |
617 | int major; |
618 | dev_t dev; | |
96fba48f BP |
619 | struct stat s; |
620 | char fn[128]; | |
621 | ||
8334b477 BP |
622 | *fnp = NULL; |
623 | ||
57aaff8a JP |
624 | major = get_openvswitch_major(); |
625 | if (major < 0) { | |
626 | return -major; | |
627 | } | |
628 | dev = makedev(major, minor); | |
629 | ||
96fba48f BP |
630 | sprintf(fn, "%s/dp%d", dirname, minor); |
631 | if (!stat(fn, &s)) { | |
632 | if (!S_ISCHR(s.st_mode)) { | |
633 | VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing", | |
634 | fn); | |
635 | } else if (s.st_rdev != dev) { | |
636 | VLOG_WARN_RL(&error_rl, | |
f17d7bd8 | 637 | "%s is device %u:%u but should be %u:%u, fixing", |
96fba48f BP |
638 | fn, major(s.st_rdev), minor(s.st_rdev), |
639 | major(dev), minor(dev)); | |
640 | } else { | |
641 | goto success; | |
642 | } | |
643 | if (unlink(fn)) { | |
644 | VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)", | |
645 | fn, strerror(errno)); | |
646 | return errno; | |
647 | } | |
648 | } else if (errno == ENOENT) { | |
649 | if (stat(dirname, &s)) { | |
650 | if (errno == ENOENT) { | |
651 | if (mkdir(dirname, 0755)) { | |
652 | VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)", | |
653 | dirname, strerror(errno)); | |
654 | return errno; | |
655 | } | |
656 | } else { | |
657 | VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", | |
658 | dirname, strerror(errno)); | |
659 | return errno; | |
660 | } | |
661 | } | |
662 | } else { | |
663 | VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno)); | |
664 | return errno; | |
665 | } | |
666 | ||
667 | /* The device needs to be created. */ | |
668 | if (mknod(fn, S_IFCHR | 0700, dev)) { | |
669 | VLOG_WARN_RL(&error_rl, | |
670 | "%s: creating character device %u:%u failed (%s)", | |
671 | fn, major(dev), minor(dev), strerror(errno)); | |
672 | return errno; | |
673 | } | |
674 | ||
675 | success: | |
676 | *fnp = xstrdup(fn); | |
677 | return 0; | |
678 | } | |
679 | ||
57aaff8a JP |
680 | /* Return the major device number of the Open vSwitch device. If it |
681 | * cannot be determined, a negative errno is returned. */ | |
96fba48f BP |
682 | static int |
683 | get_openvswitch_major(void) | |
684 | { | |
57aaff8a JP |
685 | static int openvswitch_major = -1; |
686 | if (openvswitch_major < 0) { | |
687 | openvswitch_major = get_major("openvswitch"); | |
96fba48f BP |
688 | } |
689 | return openvswitch_major; | |
690 | } | |
691 | ||
692 | static int | |
57aaff8a | 693 | get_major(const char *target) |
96fba48f BP |
694 | { |
695 | const char fn[] = "/proc/devices"; | |
696 | char line[128]; | |
697 | FILE *file; | |
698 | int ln; | |
699 | ||
700 | file = fopen(fn, "r"); | |
701 | if (!file) { | |
702 | VLOG_ERR("opening %s failed (%s)", fn, strerror(errno)); | |
57aaff8a | 703 | return -errno; |
96fba48f BP |
704 | } |
705 | ||
706 | for (ln = 1; fgets(line, sizeof line, file); ln++) { | |
707 | char name[64]; | |
708 | int major; | |
709 | ||
710 | if (!strncmp(line, "Character", 9) || line[0] == '\0') { | |
711 | /* Nothing to do. */ | |
712 | } else if (!strncmp(line, "Block", 5)) { | |
713 | /* We only want character devices, so skip the rest of the file. */ | |
714 | break; | |
715 | } else if (sscanf(line, "%d %63s", &major, name)) { | |
716 | if (!strcmp(name, target)) { | |
717 | fclose(file); | |
718 | return major; | |
719 | } | |
720 | } else { | |
721 | static bool warned; | |
722 | if (!warned) { | |
723 | VLOG_WARN("%s:%d: syntax error", fn, ln); | |
724 | } | |
725 | warned = true; | |
726 | } | |
727 | } | |
728 | ||
ed30fb10 TN |
729 | fclose(file); |
730 | ||
57aaff8a JP |
731 | VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target); |
732 | return -ENODEV; | |
96fba48f BP |
733 | } |
734 | ||
e9e28be3 BP |
735 | static int |
736 | finish_open(struct dpif *dpif_, const char *local_ifname) | |
737 | { | |
738 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
999401aa | 739 | dpif->local_ifname = xstrdup(local_ifname); |
e9e28be3 BP |
740 | dpif->local_ifindex = if_nametoindex(local_ifname); |
741 | if (!dpif->local_ifindex) { | |
742 | int error = errno; | |
999401aa | 743 | dpif_uninit(dpif_, true); |
e9e28be3 BP |
744 | VLOG_WARN("could not get ifindex of %s device: %s", |
745 | local_ifname, strerror(errno)); | |
746 | return error; | |
747 | } | |
748 | return 0; | |
749 | } | |
750 | ||
96fba48f BP |
751 | static int |
752 | create_minor(const char *name, int minor, struct dpif **dpifp) | |
753 | { | |
754 | int error = open_minor(minor, dpifp); | |
755 | if (!error) { | |
756 | error = do_ioctl(*dpifp, ODP_DP_CREATE, name); | |
e9e28be3 BP |
757 | if (!error) { |
758 | error = finish_open(*dpifp, name); | |
759 | } else { | |
999401aa | 760 | dpif_uninit(*dpifp, true); |
96fba48f BP |
761 | } |
762 | } | |
763 | return error; | |
764 | } | |
765 | ||
766 | static int | |
767 | open_minor(int minor, struct dpif **dpifp) | |
768 | { | |
769 | int error; | |
770 | char *fn; | |
771 | int fd; | |
772 | ||
773 | error = make_openvswitch_device(minor, &fn); | |
774 | if (error) { | |
775 | return error; | |
776 | } | |
777 | ||
778 | fd = open(fn, O_RDONLY | O_NONBLOCK); | |
779 | if (fd >= 0) { | |
e9e28be3 | 780 | struct dpif_linux *dpif = xmalloc(sizeof *dpif); |
46097491 BP |
781 | error = rtnetlink_notifier_register(&dpif->port_notifier, |
782 | dpif_linux_port_changed, dpif); | |
e9e28be3 BP |
783 | if (!error) { |
784 | char *name; | |
785 | ||
786 | name = xasprintf("dp%d", minor); | |
787 | dpif_init(&dpif->dpif, &dpif_linux_class, name, minor, minor); | |
788 | free(name); | |
789 | ||
790 | dpif->fd = fd; | |
d3d22744 BP |
791 | dpif->local_ifname = NULL; |
792 | dpif->minor = minor; | |
e9e28be3 | 793 | dpif->local_ifindex = 0; |
54825e09 | 794 | shash_init(&dpif->changed_ports); |
8b61709d | 795 | dpif->change_error = false; |
e9e28be3 BP |
796 | *dpifp = &dpif->dpif; |
797 | } else { | |
798 | free(dpif); | |
799 | } | |
96fba48f BP |
800 | } else { |
801 | error = errno; | |
802 | VLOG_WARN("%s: open failed (%s)", fn, strerror(error)); | |
803 | } | |
804 | free(fn); | |
805 | ||
806 | return error; | |
807 | } | |
e9e28be3 BP |
808 | |
809 | static void | |
46097491 | 810 | dpif_linux_port_changed(const struct rtnetlink_change *change, void *dpif_) |
e9e28be3 BP |
811 | { |
812 | struct dpif_linux *dpif = dpif_; | |
813 | ||
8b61709d BP |
814 | if (change) { |
815 | if (change->master_ifindex == dpif->local_ifindex | |
816 | && (change->nlmsg_type == RTM_NEWLINK | |
817 | || change->nlmsg_type == RTM_DELLINK)) | |
818 | { | |
819 | /* Our datapath changed, either adding a new port or deleting an | |
820 | * existing one. */ | |
54825e09 | 821 | shash_add_once(&dpif->changed_ports, change->ifname, NULL); |
e9e28be3 | 822 | } |
8b61709d BP |
823 | } else { |
824 | dpif->change_error = true; | |
e9e28be3 BP |
825 | } |
826 | } |