]>
Commit | Line | Data |
---|---|---|
96fba48f | 1 | /* |
1a6f1e2a | 2 | * Copyright (c) 2008, 2009, 2010 Nicira Networks. |
96fba48f BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "dpif.h" | |
19 | ||
20 | #include <assert.h> | |
21 | #include <ctype.h> | |
22 | #include <errno.h> | |
23 | #include <fcntl.h> | |
24 | #include <inttypes.h> | |
25 | #include <net/if.h> | |
b90fa799 | 26 | #include <linux/types.h> |
96fba48f | 27 | #include <linux/ethtool.h> |
aae51f53 | 28 | #include <linux/pkt_sched.h> |
e9e28be3 | 29 | #include <linux/rtnetlink.h> |
96fba48f BP |
30 | #include <linux/sockios.h> |
31 | #include <stdlib.h> | |
32 | #include <sys/ioctl.h> | |
10dcf8de | 33 | #include <sys/stat.h> |
96fba48f BP |
34 | #include <unistd.h> |
35 | ||
36 | #include "dpif-provider.h" | |
3abc4a1a | 37 | #include "netdev.h" |
96fba48f BP |
38 | #include "ofpbuf.h" |
39 | #include "poll-loop.h" | |
559843ed | 40 | #include "rtnetlink.h" |
54825e09 | 41 | #include "shash.h" |
e9e28be3 | 42 | #include "svec.h" |
96fba48f | 43 | #include "util.h" |
96fba48f | 44 | #include "vlog.h" |
5136ce49 | 45 | |
d98e6007 | 46 | VLOG_DEFINE_THIS_MODULE(dpif_linux); |
96fba48f BP |
47 | |
48 | /* Datapath interface for the openvswitch Linux kernel module. */ | |
49 | struct dpif_linux { | |
50 | struct dpif dpif; | |
51 | int fd; | |
e9e28be3 | 52 | |
d3d22744 BP |
53 | /* Used by dpif_linux_get_all_names(). */ |
54 | char *local_ifname; | |
55 | int minor; | |
56 | ||
e9e28be3 BP |
57 | /* Change notification. */ |
58 | int local_ifindex; /* Ifindex of local port. */ | |
54825e09 | 59 | struct shash changed_ports; /* Ports that have changed. */ |
46097491 | 60 | struct rtnetlink_notifier port_notifier; |
8b61709d | 61 | bool change_error; |
96fba48f BP |
62 | }; |
63 | ||
64 | static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); | |
65 | ||
66 | static int do_ioctl(const struct dpif *, int cmd, const void *arg); | |
67 | static int lookup_minor(const char *name, int *minor); | |
e9e28be3 | 68 | static int finish_open(struct dpif *, const char *local_ifname); |
57aaff8a | 69 | static int get_openvswitch_major(void); |
96fba48f BP |
70 | static int create_minor(const char *name, int minor, struct dpif **dpifp); |
71 | static int open_minor(int minor, struct dpif **dpifp); | |
72 | static int make_openvswitch_device(int minor, char **fnp); | |
46097491 | 73 | static void dpif_linux_port_changed(const struct rtnetlink_change *, |
e9e28be3 | 74 | void *dpif); |
96fba48f BP |
75 | |
76 | static struct dpif_linux * | |
77 | dpif_linux_cast(const struct dpif *dpif) | |
78 | { | |
79 | dpif_assert_class(dpif, &dpif_linux_class); | |
80 | return CONTAINER_OF(dpif, struct dpif_linux, dpif); | |
81 | } | |
82 | ||
d3d22744 BP |
83 | static int |
84 | dpif_linux_enumerate(struct svec *all_dps) | |
85 | { | |
57aaff8a | 86 | int major; |
d3d22744 BP |
87 | int error; |
88 | int i; | |
89 | ||
57aaff8a JP |
90 | /* Check that the Open vSwitch module is loaded. */ |
91 | major = get_openvswitch_major(); | |
92 | if (major < 0) { | |
93 | return -major; | |
94 | } | |
95 | ||
d3d22744 BP |
96 | error = 0; |
97 | for (i = 0; i < ODP_MAX; i++) { | |
98 | struct dpif *dpif; | |
99 | char devname[16]; | |
100 | int retval; | |
101 | ||
102 | sprintf(devname, "dp%d", i); | |
1a6f1e2a | 103 | retval = dpif_open(devname, "system", &dpif); |
d3d22744 BP |
104 | if (!retval) { |
105 | svec_add(all_dps, devname); | |
999401aa | 106 | dpif_uninit(dpif, true); |
d3d22744 BP |
107 | } else if (retval != ENODEV && !error) { |
108 | error = retval; | |
109 | } | |
110 | } | |
111 | return error; | |
112 | } | |
113 | ||
96fba48f | 114 | static int |
c69ee87c | 115 | dpif_linux_open(const char *name, const char *type OVS_UNUSED, bool create, |
96fba48f BP |
116 | struct dpif **dpifp) |
117 | { | |
118 | int minor; | |
119 | ||
be2c418b JP |
120 | minor = !strncmp(name, "dp", 2) |
121 | && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1; | |
96fba48f BP |
122 | if (create) { |
123 | if (minor >= 0) { | |
1a6f1e2a | 124 | return create_minor(name, minor, dpifp); |
96fba48f BP |
125 | } else { |
126 | /* Scan for unused minor number. */ | |
127 | for (minor = 0; minor < ODP_MAX; minor++) { | |
1a6f1e2a | 128 | int error = create_minor(name, minor, dpifp); |
96fba48f BP |
129 | if (error != EBUSY) { |
130 | return error; | |
131 | } | |
132 | } | |
133 | ||
134 | /* All datapath numbers in use. */ | |
135 | return ENOBUFS; | |
136 | } | |
137 | } else { | |
138 | struct dpif_linux *dpif; | |
e9e28be3 | 139 | struct odp_port port; |
96fba48f BP |
140 | int error; |
141 | ||
142 | if (minor < 0) { | |
1a6f1e2a | 143 | error = lookup_minor(name, &minor); |
96fba48f BP |
144 | if (error) { |
145 | return error; | |
146 | } | |
147 | } | |
148 | ||
149 | error = open_minor(minor, dpifp); | |
150 | if (error) { | |
151 | return error; | |
152 | } | |
153 | dpif = dpif_linux_cast(*dpifp); | |
154 | ||
e9e28be3 BP |
155 | /* We need the local port's ifindex for the poll function. Start by |
156 | * getting the local port's name. */ | |
157 | memset(&port, 0, sizeof port); | |
158 | port.port = ODPP_LOCAL; | |
159 | if (ioctl(dpif->fd, ODP_PORT_QUERY, &port)) { | |
160 | error = errno; | |
96fba48f BP |
161 | if (error != ENODEV) { |
162 | VLOG_WARN("%s: probe returned unexpected error: %s", | |
163 | dpif_name(*dpifp), strerror(error)); | |
164 | } | |
999401aa | 165 | dpif_uninit(*dpifp, true); |
e9e28be3 | 166 | return error; |
96fba48f | 167 | } |
e9e28be3 BP |
168 | |
169 | /* Then use that to finish up opening. */ | |
170 | return finish_open(&dpif->dpif, port.devname); | |
96fba48f BP |
171 | } |
172 | } | |
173 | ||
174 | static void | |
175 | dpif_linux_close(struct dpif *dpif_) | |
176 | { | |
177 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
46097491 | 178 | rtnetlink_notifier_unregister(&dpif->port_notifier); |
54825e09 | 179 | shash_destroy(&dpif->changed_ports); |
d3d22744 | 180 | free(dpif->local_ifname); |
96fba48f BP |
181 | close(dpif->fd); |
182 | free(dpif); | |
183 | } | |
184 | ||
d3d22744 BP |
185 | static int |
186 | dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names) | |
187 | { | |
188 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
189 | ||
190 | svec_add_nocopy(all_names, xasprintf("dp%d", dpif->minor)); | |
191 | svec_add(all_names, dpif->local_ifname); | |
192 | return 0; | |
193 | } | |
194 | ||
96fba48f | 195 | static int |
7dab847a | 196 | dpif_linux_destroy(struct dpif *dpif_) |
96fba48f | 197 | { |
3abc4a1a JG |
198 | struct odp_port *ports; |
199 | size_t n_ports; | |
200 | int err; | |
201 | int i; | |
202 | ||
203 | err = dpif_port_list(dpif_, &ports, &n_ports); | |
204 | if (err) { | |
205 | return err; | |
206 | } | |
207 | ||
208 | for (i = 0; i < n_ports; i++) { | |
209 | if (ports[i].port != ODPP_LOCAL) { | |
210 | err = do_ioctl(dpif_, ODP_VPORT_DEL, ports[i].devname); | |
211 | if (err) { | |
212 | VLOG_WARN_RL(&error_rl, "%s: error deleting port %s (%s)", | |
213 | dpif_name(dpif_), ports[i].devname, strerror(err)); | |
214 | } | |
215 | } | |
216 | } | |
217 | ||
218 | free(ports); | |
219 | ||
96fba48f BP |
220 | return do_ioctl(dpif_, ODP_DP_DESTROY, NULL); |
221 | } | |
222 | ||
223 | static int | |
224 | dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats) | |
225 | { | |
72b06300 | 226 | memset(stats, 0, sizeof *stats); |
96fba48f BP |
227 | return do_ioctl(dpif_, ODP_DP_STATS, stats); |
228 | } | |
229 | ||
230 | static int | |
231 | dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp) | |
232 | { | |
233 | int drop_frags; | |
234 | int error; | |
235 | ||
236 | error = do_ioctl(dpif_, ODP_GET_DROP_FRAGS, &drop_frags); | |
237 | if (!error) { | |
238 | *drop_fragsp = drop_frags & 1; | |
239 | } | |
240 | return error; | |
241 | } | |
242 | ||
243 | static int | |
244 | dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags) | |
245 | { | |
246 | int drop_frags_int = drop_frags; | |
247 | return do_ioctl(dpif_, ODP_SET_DROP_FRAGS, &drop_frags_int); | |
248 | } | |
249 | ||
250 | static int | |
251 | dpif_linux_port_add(struct dpif *dpif_, const char *devname, uint16_t flags, | |
252 | uint16_t *port_no) | |
253 | { | |
254 | struct odp_port port; | |
255 | int error; | |
256 | ||
257 | memset(&port, 0, sizeof port); | |
258 | strncpy(port.devname, devname, sizeof port.devname); | |
259 | port.flags = flags; | |
f2459fe7 | 260 | error = do_ioctl(dpif_, ODP_PORT_ATTACH, &port); |
96fba48f BP |
261 | if (!error) { |
262 | *port_no = port.port; | |
263 | } | |
264 | return error; | |
265 | } | |
266 | ||
267 | static int | |
268 | dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no) | |
269 | { | |
270 | int tmp = port_no; | |
3abc4a1a JG |
271 | int err; |
272 | struct odp_port port; | |
273 | ||
274 | err = dpif_port_query_by_number(dpif_, port_no, &port); | |
275 | if (err) { | |
276 | return err; | |
277 | } | |
278 | ||
279 | err = do_ioctl(dpif_, ODP_PORT_DETACH, &tmp); | |
280 | if (err) { | |
281 | return err; | |
282 | } | |
283 | ||
284 | if (!netdev_is_open(port.devname)) { | |
285 | /* Try deleting the port if no one has it open. This shouldn't | |
286 | * actually be necessary unless the config changed while we weren't | |
287 | * running but it won't hurt anything if the port is already gone. */ | |
288 | do_ioctl(dpif_, ODP_VPORT_DEL, port.devname); | |
289 | } | |
290 | ||
291 | return 0; | |
96fba48f BP |
292 | } |
293 | ||
294 | static int | |
295 | dpif_linux_port_query_by_number(const struct dpif *dpif_, uint16_t port_no, | |
296 | struct odp_port *port) | |
297 | { | |
298 | memset(port, 0, sizeof *port); | |
299 | port->port = port_no; | |
300 | return do_ioctl(dpif_, ODP_PORT_QUERY, port); | |
301 | } | |
302 | ||
303 | static int | |
304 | dpif_linux_port_query_by_name(const struct dpif *dpif_, const char *devname, | |
305 | struct odp_port *port) | |
306 | { | |
307 | memset(port, 0, sizeof *port); | |
308 | strncpy(port->devname, devname, sizeof port->devname); | |
309 | return do_ioctl(dpif_, ODP_PORT_QUERY, port); | |
310 | } | |
311 | ||
312 | static int | |
313 | dpif_linux_flow_flush(struct dpif *dpif_) | |
314 | { | |
315 | return do_ioctl(dpif_, ODP_FLOW_FLUSH, NULL); | |
316 | } | |
317 | ||
318 | static int | |
319 | dpif_linux_port_list(const struct dpif *dpif_, struct odp_port *ports, int n) | |
320 | { | |
321 | struct odp_portvec pv; | |
322 | int error; | |
323 | ||
324 | pv.ports = ports; | |
325 | pv.n_ports = n; | |
326 | error = do_ioctl(dpif_, ODP_PORT_LIST, &pv); | |
327 | return error ? -error : pv.n_ports; | |
328 | } | |
329 | ||
e9e28be3 BP |
330 | static int |
331 | dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) | |
332 | { | |
333 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
e9e28be3 | 334 | |
8b61709d BP |
335 | if (dpif->change_error) { |
336 | dpif->change_error = false; | |
54825e09 | 337 | shash_clear(&dpif->changed_ports); |
8b61709d | 338 | return ENOBUFS; |
54825e09 BP |
339 | } else if (!shash_is_empty(&dpif->changed_ports)) { |
340 | struct shash_node *node = shash_first(&dpif->changed_ports); | |
4f222648 | 341 | *devnamep = shash_steal(&dpif->changed_ports, node); |
8b61709d | 342 | return 0; |
e9e28be3 | 343 | } else { |
8b61709d | 344 | return EAGAIN; |
e9e28be3 | 345 | } |
e9e28be3 BP |
346 | } |
347 | ||
348 | static void | |
349 | dpif_linux_port_poll_wait(const struct dpif *dpif_) | |
350 | { | |
351 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
54825e09 | 352 | if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) { |
e9e28be3 BP |
353 | poll_immediate_wake(); |
354 | } else { | |
46097491 | 355 | rtnetlink_notifier_wait(); |
e9e28be3 BP |
356 | } |
357 | } | |
358 | ||
96fba48f BP |
359 | static int |
360 | dpif_linux_flow_get(const struct dpif *dpif_, struct odp_flow flows[], int n) | |
361 | { | |
362 | struct odp_flowvec fv; | |
363 | fv.flows = flows; | |
364 | fv.n_flows = n; | |
365 | return do_ioctl(dpif_, ODP_FLOW_GET, &fv); | |
366 | } | |
367 | ||
368 | static int | |
369 | dpif_linux_flow_put(struct dpif *dpif_, struct odp_flow_put *put) | |
370 | { | |
371 | return do_ioctl(dpif_, ODP_FLOW_PUT, put); | |
372 | } | |
373 | ||
374 | static int | |
375 | dpif_linux_flow_del(struct dpif *dpif_, struct odp_flow *flow) | |
376 | { | |
377 | return do_ioctl(dpif_, ODP_FLOW_DEL, flow); | |
378 | } | |
379 | ||
380 | static int | |
381 | dpif_linux_flow_list(const struct dpif *dpif_, struct odp_flow flows[], int n) | |
382 | { | |
383 | struct odp_flowvec fv; | |
384 | int error; | |
385 | ||
386 | fv.flows = flows; | |
387 | fv.n_flows = n; | |
388 | error = do_ioctl(dpif_, ODP_FLOW_LIST, &fv); | |
389 | return error ? -error : fv.n_flows; | |
390 | } | |
391 | ||
392 | static int | |
f1588b1f | 393 | dpif_linux_execute(struct dpif *dpif_, |
96fba48f BP |
394 | const union odp_action actions[], int n_actions, |
395 | const struct ofpbuf *buf) | |
396 | { | |
397 | struct odp_execute execute; | |
398 | memset(&execute, 0, sizeof execute); | |
96fba48f BP |
399 | execute.actions = (union odp_action *) actions; |
400 | execute.n_actions = n_actions; | |
401 | execute.data = buf->data; | |
402 | execute.length = buf->size; | |
403 | return do_ioctl(dpif_, ODP_EXECUTE, &execute); | |
404 | } | |
405 | ||
406 | static int | |
407 | dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask) | |
408 | { | |
409 | return do_ioctl(dpif_, ODP_GET_LISTEN_MASK, listen_mask); | |
410 | } | |
411 | ||
412 | static int | |
413 | dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask) | |
414 | { | |
415 | return do_ioctl(dpif_, ODP_SET_LISTEN_MASK, &listen_mask); | |
416 | } | |
417 | ||
72b06300 BP |
418 | static int |
419 | dpif_linux_get_sflow_probability(const struct dpif *dpif_, | |
420 | uint32_t *probability) | |
421 | { | |
422 | return do_ioctl(dpif_, ODP_GET_SFLOW_PROBABILITY, probability); | |
423 | } | |
424 | ||
425 | static int | |
426 | dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability) | |
427 | { | |
428 | return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability); | |
429 | } | |
430 | ||
aae51f53 BP |
431 | static int |
432 | dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, | |
433 | uint32_t queue_id, uint32_t *priority) | |
434 | { | |
435 | if (queue_id < 0xf000) { | |
17ee3c1f | 436 | *priority = TC_H_MAKE(1 << 16, queue_id + 1); |
aae51f53 BP |
437 | return 0; |
438 | } else { | |
439 | return EINVAL; | |
440 | } | |
441 | } | |
442 | ||
96fba48f BP |
443 | static int |
444 | dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) | |
445 | { | |
446 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
447 | struct ofpbuf *buf; | |
448 | int retval; | |
449 | int error; | |
450 | ||
68efcbec | 451 | buf = ofpbuf_new_with_headroom(65536, DPIF_RECV_MSG_PADDING); |
96fba48f BP |
452 | retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); |
453 | if (retval < 0) { | |
454 | error = errno; | |
455 | if (error != EAGAIN) { | |
456 | VLOG_WARN_RL(&error_rl, "%s: read failed: %s", | |
457 | dpif_name(dpif_), strerror(error)); | |
458 | } | |
459 | } else if (retval >= sizeof(struct odp_msg)) { | |
460 | struct odp_msg *msg = buf->data; | |
461 | if (msg->length <= retval) { | |
462 | buf->size += retval; | |
463 | *bufp = buf; | |
464 | return 0; | |
465 | } else { | |
466 | VLOG_WARN_RL(&error_rl, "%s: discarding message truncated " | |
d65349ea | 467 | "from %"PRIu32" bytes to %d", |
96fba48f BP |
468 | dpif_name(dpif_), msg->length, retval); |
469 | error = ERANGE; | |
470 | } | |
471 | } else if (!retval) { | |
472 | VLOG_WARN_RL(&error_rl, "%s: unexpected end of file", dpif_name(dpif_)); | |
473 | error = EPROTO; | |
474 | } else { | |
475 | VLOG_WARN_RL(&error_rl, | |
476 | "%s: discarding too-short message (%d bytes)", | |
477 | dpif_name(dpif_), retval); | |
478 | error = ERANGE; | |
479 | } | |
480 | ||
481 | *bufp = NULL; | |
482 | ofpbuf_delete(buf); | |
483 | return error; | |
484 | } | |
485 | ||
486 | static void | |
487 | dpif_linux_recv_wait(struct dpif *dpif_) | |
488 | { | |
489 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
490 | poll_fd_wait(dpif->fd, POLLIN); | |
491 | } | |
492 | ||
493 | const struct dpif_class dpif_linux_class = { | |
1a6f1e2a | 494 | "system", |
8b61709d BP |
495 | NULL, |
496 | NULL, | |
d3d22744 | 497 | dpif_linux_enumerate, |
96fba48f BP |
498 | dpif_linux_open, |
499 | dpif_linux_close, | |
d3d22744 | 500 | dpif_linux_get_all_names, |
7dab847a | 501 | dpif_linux_destroy, |
96fba48f BP |
502 | dpif_linux_get_stats, |
503 | dpif_linux_get_drop_frags, | |
504 | dpif_linux_set_drop_frags, | |
505 | dpif_linux_port_add, | |
506 | dpif_linux_port_del, | |
507 | dpif_linux_port_query_by_number, | |
508 | dpif_linux_port_query_by_name, | |
509 | dpif_linux_port_list, | |
e9e28be3 BP |
510 | dpif_linux_port_poll, |
511 | dpif_linux_port_poll_wait, | |
96fba48f BP |
512 | dpif_linux_flow_get, |
513 | dpif_linux_flow_put, | |
514 | dpif_linux_flow_del, | |
515 | dpif_linux_flow_flush, | |
516 | dpif_linux_flow_list, | |
517 | dpif_linux_execute, | |
518 | dpif_linux_recv_get_mask, | |
519 | dpif_linux_recv_set_mask, | |
72b06300 BP |
520 | dpif_linux_get_sflow_probability, |
521 | dpif_linux_set_sflow_probability, | |
aae51f53 | 522 | dpif_linux_queue_to_priority, |
96fba48f BP |
523 | dpif_linux_recv, |
524 | dpif_linux_recv_wait, | |
525 | }; | |
526 | \f | |
527 | static int get_openvswitch_major(void); | |
57aaff8a | 528 | static int get_major(const char *target); |
96fba48f BP |
529 | |
530 | static int | |
531 | do_ioctl(const struct dpif *dpif_, int cmd, const void *arg) | |
532 | { | |
533 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
534 | return ioctl(dpif->fd, cmd, arg) ? errno : 0; | |
535 | } | |
536 | ||
537 | static int | |
a165b67e | 538 | lookup_minor(const char *name, int *minorp) |
96fba48f BP |
539 | { |
540 | struct ethtool_drvinfo drvinfo; | |
a165b67e | 541 | int minor, port_no; |
96fba48f BP |
542 | struct ifreq ifr; |
543 | int error; | |
544 | int sock; | |
545 | ||
546 | sock = socket(AF_INET, SOCK_DGRAM, 0); | |
547 | if (sock < 0) { | |
548 | VLOG_WARN("socket(AF_INET) failed: %s", strerror(errno)); | |
549 | error = errno; | |
550 | goto error; | |
551 | } | |
552 | ||
553 | memset(&ifr, 0, sizeof ifr); | |
554 | strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); | |
555 | ifr.ifr_data = (caddr_t) &drvinfo; | |
556 | ||
557 | memset(&drvinfo, 0, sizeof drvinfo); | |
558 | drvinfo.cmd = ETHTOOL_GDRVINFO; | |
559 | if (ioctl(sock, SIOCETHTOOL, &ifr)) { | |
560 | VLOG_WARN("ioctl(SIOCETHTOOL) failed: %s", strerror(errno)); | |
561 | error = errno; | |
562 | goto error_close_sock; | |
563 | } | |
564 | ||
565 | if (strcmp(drvinfo.driver, "openvswitch")) { | |
566 | VLOG_WARN("%s is not an openvswitch device", name); | |
567 | error = EOPNOTSUPP; | |
568 | goto error_close_sock; | |
569 | } | |
570 | ||
a165b67e BP |
571 | if (sscanf(drvinfo.bus_info, "%d.%d", &minor, &port_no) != 2) { |
572 | VLOG_WARN("%s ethtool bus_info has unexpected format", name); | |
96fba48f BP |
573 | error = EPROTOTYPE; |
574 | goto error_close_sock; | |
a165b67e BP |
575 | } else if (port_no != ODPP_LOCAL) { |
576 | /* This is an Open vSwitch device but not the local port. We | |
577 | * intentionally support only using the name of the local port as the | |
578 | * name of a datapath; otherwise, it would be too difficult to | |
579 | * enumerate all the names of a datapath. */ | |
580 | error = EOPNOTSUPP; | |
581 | goto error_close_sock; | |
96fba48f BP |
582 | } |
583 | ||
a165b67e | 584 | *minorp = minor; |
96fba48f BP |
585 | close(sock); |
586 | return 0; | |
587 | ||
588 | error_close_sock: | |
589 | close(sock); | |
590 | error: | |
591 | return error; | |
592 | } | |
593 | ||
594 | static int | |
595 | make_openvswitch_device(int minor, char **fnp) | |
596 | { | |
96fba48f | 597 | const char dirname[] = "/dev/net"; |
57aaff8a JP |
598 | int major; |
599 | dev_t dev; | |
96fba48f BP |
600 | struct stat s; |
601 | char fn[128]; | |
602 | ||
8334b477 BP |
603 | *fnp = NULL; |
604 | ||
57aaff8a JP |
605 | major = get_openvswitch_major(); |
606 | if (major < 0) { | |
607 | return -major; | |
608 | } | |
609 | dev = makedev(major, minor); | |
610 | ||
96fba48f BP |
611 | sprintf(fn, "%s/dp%d", dirname, minor); |
612 | if (!stat(fn, &s)) { | |
613 | if (!S_ISCHR(s.st_mode)) { | |
614 | VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing", | |
615 | fn); | |
616 | } else if (s.st_rdev != dev) { | |
617 | VLOG_WARN_RL(&error_rl, | |
f17d7bd8 | 618 | "%s is device %u:%u but should be %u:%u, fixing", |
96fba48f BP |
619 | fn, major(s.st_rdev), minor(s.st_rdev), |
620 | major(dev), minor(dev)); | |
621 | } else { | |
622 | goto success; | |
623 | } | |
624 | if (unlink(fn)) { | |
625 | VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)", | |
626 | fn, strerror(errno)); | |
627 | return errno; | |
628 | } | |
629 | } else if (errno == ENOENT) { | |
630 | if (stat(dirname, &s)) { | |
631 | if (errno == ENOENT) { | |
632 | if (mkdir(dirname, 0755)) { | |
633 | VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)", | |
634 | dirname, strerror(errno)); | |
635 | return errno; | |
636 | } | |
637 | } else { | |
638 | VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", | |
639 | dirname, strerror(errno)); | |
640 | return errno; | |
641 | } | |
642 | } | |
643 | } else { | |
644 | VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno)); | |
645 | return errno; | |
646 | } | |
647 | ||
648 | /* The device needs to be created. */ | |
649 | if (mknod(fn, S_IFCHR | 0700, dev)) { | |
650 | VLOG_WARN_RL(&error_rl, | |
651 | "%s: creating character device %u:%u failed (%s)", | |
652 | fn, major(dev), minor(dev), strerror(errno)); | |
653 | return errno; | |
654 | } | |
655 | ||
656 | success: | |
657 | *fnp = xstrdup(fn); | |
658 | return 0; | |
659 | } | |
660 | ||
57aaff8a JP |
661 | /* Return the major device number of the Open vSwitch device. If it |
662 | * cannot be determined, a negative errno is returned. */ | |
96fba48f BP |
663 | static int |
664 | get_openvswitch_major(void) | |
665 | { | |
57aaff8a JP |
666 | static int openvswitch_major = -1; |
667 | if (openvswitch_major < 0) { | |
668 | openvswitch_major = get_major("openvswitch"); | |
96fba48f BP |
669 | } |
670 | return openvswitch_major; | |
671 | } | |
672 | ||
673 | static int | |
57aaff8a | 674 | get_major(const char *target) |
96fba48f BP |
675 | { |
676 | const char fn[] = "/proc/devices"; | |
677 | char line[128]; | |
678 | FILE *file; | |
679 | int ln; | |
680 | ||
681 | file = fopen(fn, "r"); | |
682 | if (!file) { | |
683 | VLOG_ERR("opening %s failed (%s)", fn, strerror(errno)); | |
57aaff8a | 684 | return -errno; |
96fba48f BP |
685 | } |
686 | ||
687 | for (ln = 1; fgets(line, sizeof line, file); ln++) { | |
688 | char name[64]; | |
689 | int major; | |
690 | ||
691 | if (!strncmp(line, "Character", 9) || line[0] == '\0') { | |
692 | /* Nothing to do. */ | |
693 | } else if (!strncmp(line, "Block", 5)) { | |
694 | /* We only want character devices, so skip the rest of the file. */ | |
695 | break; | |
696 | } else if (sscanf(line, "%d %63s", &major, name)) { | |
697 | if (!strcmp(name, target)) { | |
698 | fclose(file); | |
699 | return major; | |
700 | } | |
701 | } else { | |
5136364f | 702 | VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln); |
96fba48f BP |
703 | } |
704 | } | |
705 | ||
ed30fb10 TN |
706 | fclose(file); |
707 | ||
57aaff8a JP |
708 | VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target); |
709 | return -ENODEV; | |
96fba48f BP |
710 | } |
711 | ||
e9e28be3 BP |
712 | static int |
713 | finish_open(struct dpif *dpif_, const char *local_ifname) | |
714 | { | |
715 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
999401aa | 716 | dpif->local_ifname = xstrdup(local_ifname); |
e9e28be3 BP |
717 | dpif->local_ifindex = if_nametoindex(local_ifname); |
718 | if (!dpif->local_ifindex) { | |
719 | int error = errno; | |
999401aa | 720 | dpif_uninit(dpif_, true); |
e9e28be3 BP |
721 | VLOG_WARN("could not get ifindex of %s device: %s", |
722 | local_ifname, strerror(errno)); | |
723 | return error; | |
724 | } | |
725 | return 0; | |
726 | } | |
727 | ||
96fba48f BP |
728 | static int |
729 | create_minor(const char *name, int minor, struct dpif **dpifp) | |
730 | { | |
731 | int error = open_minor(minor, dpifp); | |
732 | if (!error) { | |
733 | error = do_ioctl(*dpifp, ODP_DP_CREATE, name); | |
e9e28be3 BP |
734 | if (!error) { |
735 | error = finish_open(*dpifp, name); | |
736 | } else { | |
999401aa | 737 | dpif_uninit(*dpifp, true); |
96fba48f BP |
738 | } |
739 | } | |
740 | return error; | |
741 | } | |
742 | ||
743 | static int | |
744 | open_minor(int minor, struct dpif **dpifp) | |
745 | { | |
746 | int error; | |
747 | char *fn; | |
748 | int fd; | |
749 | ||
750 | error = make_openvswitch_device(minor, &fn); | |
751 | if (error) { | |
752 | return error; | |
753 | } | |
754 | ||
755 | fd = open(fn, O_RDONLY | O_NONBLOCK); | |
756 | if (fd >= 0) { | |
e9e28be3 | 757 | struct dpif_linux *dpif = xmalloc(sizeof *dpif); |
46097491 BP |
758 | error = rtnetlink_notifier_register(&dpif->port_notifier, |
759 | dpif_linux_port_changed, dpif); | |
e9e28be3 BP |
760 | if (!error) { |
761 | char *name; | |
762 | ||
763 | name = xasprintf("dp%d", minor); | |
764 | dpif_init(&dpif->dpif, &dpif_linux_class, name, minor, minor); | |
765 | free(name); | |
766 | ||
767 | dpif->fd = fd; | |
d3d22744 BP |
768 | dpif->local_ifname = NULL; |
769 | dpif->minor = minor; | |
e9e28be3 | 770 | dpif->local_ifindex = 0; |
54825e09 | 771 | shash_init(&dpif->changed_ports); |
8b61709d | 772 | dpif->change_error = false; |
e9e28be3 BP |
773 | *dpifp = &dpif->dpif; |
774 | } else { | |
775 | free(dpif); | |
776 | } | |
96fba48f BP |
777 | } else { |
778 | error = errno; | |
779 | VLOG_WARN("%s: open failed (%s)", fn, strerror(error)); | |
780 | } | |
781 | free(fn); | |
782 | ||
783 | return error; | |
784 | } | |
e9e28be3 BP |
785 | |
786 | static void | |
46097491 | 787 | dpif_linux_port_changed(const struct rtnetlink_change *change, void *dpif_) |
e9e28be3 BP |
788 | { |
789 | struct dpif_linux *dpif = dpif_; | |
790 | ||
8b61709d BP |
791 | if (change) { |
792 | if (change->master_ifindex == dpif->local_ifindex | |
793 | && (change->nlmsg_type == RTM_NEWLINK | |
794 | || change->nlmsg_type == RTM_DELLINK)) | |
795 | { | |
796 | /* Our datapath changed, either adding a new port or deleting an | |
797 | * existing one. */ | |
54825e09 | 798 | shash_add_once(&dpif->changed_ports, change->ifname, NULL); |
e9e28be3 | 799 | } |
8b61709d BP |
800 | } else { |
801 | dpif->change_error = true; | |
e9e28be3 BP |
802 | } |
803 | } |