]>
Commit | Line | Data |
---|---|---|
96fba48f | 1 | /* |
1a6f1e2a | 2 | * Copyright (c) 2008, 2009, 2010 Nicira Networks. |
96fba48f BP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "dpif.h" | |
19 | ||
20 | #include <assert.h> | |
21 | #include <ctype.h> | |
22 | #include <errno.h> | |
23 | #include <fcntl.h> | |
24 | #include <inttypes.h> | |
25 | #include <net/if.h> | |
b90fa799 | 26 | #include <linux/types.h> |
96fba48f | 27 | #include <linux/ethtool.h> |
aae51f53 | 28 | #include <linux/pkt_sched.h> |
e9e28be3 | 29 | #include <linux/rtnetlink.h> |
96fba48f BP |
30 | #include <linux/sockios.h> |
31 | #include <stdlib.h> | |
32 | #include <sys/ioctl.h> | |
10dcf8de | 33 | #include <sys/stat.h> |
96fba48f BP |
34 | #include <unistd.h> |
35 | ||
36 | #include "dpif-provider.h" | |
3abc4a1a | 37 | #include "netdev.h" |
96fba48f BP |
38 | #include "ofpbuf.h" |
39 | #include "poll-loop.h" | |
559843ed | 40 | #include "rtnetlink.h" |
54825e09 | 41 | #include "shash.h" |
e9e28be3 | 42 | #include "svec.h" |
96fba48f | 43 | #include "util.h" |
96fba48f | 44 | #include "vlog.h" |
5136ce49 BP |
45 | |
46 | VLOG_DEFINE_THIS_MODULE(dpif_linux) | |
96fba48f BP |
47 | |
48 | /* Datapath interface for the openvswitch Linux kernel module. */ | |
49 | struct dpif_linux { | |
50 | struct dpif dpif; | |
51 | int fd; | |
e9e28be3 | 52 | |
d3d22744 BP |
53 | /* Used by dpif_linux_get_all_names(). */ |
54 | char *local_ifname; | |
55 | int minor; | |
56 | ||
e9e28be3 BP |
57 | /* Change notification. */ |
58 | int local_ifindex; /* Ifindex of local port. */ | |
54825e09 | 59 | struct shash changed_ports; /* Ports that have changed. */ |
46097491 | 60 | struct rtnetlink_notifier port_notifier; |
8b61709d | 61 | bool change_error; |
96fba48f BP |
62 | }; |
63 | ||
64 | static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); | |
65 | ||
66 | static int do_ioctl(const struct dpif *, int cmd, const void *arg); | |
67 | static int lookup_minor(const char *name, int *minor); | |
e9e28be3 | 68 | static int finish_open(struct dpif *, const char *local_ifname); |
57aaff8a | 69 | static int get_openvswitch_major(void); |
96fba48f BP |
70 | static int create_minor(const char *name, int minor, struct dpif **dpifp); |
71 | static int open_minor(int minor, struct dpif **dpifp); | |
72 | static int make_openvswitch_device(int minor, char **fnp); | |
46097491 | 73 | static void dpif_linux_port_changed(const struct rtnetlink_change *, |
e9e28be3 | 74 | void *dpif); |
96fba48f BP |
75 | |
76 | static struct dpif_linux * | |
77 | dpif_linux_cast(const struct dpif *dpif) | |
78 | { | |
79 | dpif_assert_class(dpif, &dpif_linux_class); | |
80 | return CONTAINER_OF(dpif, struct dpif_linux, dpif); | |
81 | } | |
82 | ||
d3d22744 BP |
83 | static int |
84 | dpif_linux_enumerate(struct svec *all_dps) | |
85 | { | |
57aaff8a | 86 | int major; |
d3d22744 BP |
87 | int error; |
88 | int i; | |
89 | ||
57aaff8a JP |
90 | /* Check that the Open vSwitch module is loaded. */ |
91 | major = get_openvswitch_major(); | |
92 | if (major < 0) { | |
93 | return -major; | |
94 | } | |
95 | ||
d3d22744 BP |
96 | error = 0; |
97 | for (i = 0; i < ODP_MAX; i++) { | |
98 | struct dpif *dpif; | |
99 | char devname[16]; | |
100 | int retval; | |
101 | ||
102 | sprintf(devname, "dp%d", i); | |
1a6f1e2a | 103 | retval = dpif_open(devname, "system", &dpif); |
d3d22744 BP |
104 | if (!retval) { |
105 | svec_add(all_dps, devname); | |
999401aa | 106 | dpif_uninit(dpif, true); |
d3d22744 BP |
107 | } else if (retval != ENODEV && !error) { |
108 | error = retval; | |
109 | } | |
110 | } | |
111 | return error; | |
112 | } | |
113 | ||
96fba48f | 114 | static int |
c69ee87c | 115 | dpif_linux_open(const char *name, const char *type OVS_UNUSED, bool create, |
96fba48f BP |
116 | struct dpif **dpifp) |
117 | { | |
118 | int minor; | |
119 | ||
be2c418b JP |
120 | minor = !strncmp(name, "dp", 2) |
121 | && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1; | |
96fba48f BP |
122 | if (create) { |
123 | if (minor >= 0) { | |
1a6f1e2a | 124 | return create_minor(name, minor, dpifp); |
96fba48f BP |
125 | } else { |
126 | /* Scan for unused minor number. */ | |
127 | for (minor = 0; minor < ODP_MAX; minor++) { | |
1a6f1e2a | 128 | int error = create_minor(name, minor, dpifp); |
96fba48f BP |
129 | if (error != EBUSY) { |
130 | return error; | |
131 | } | |
132 | } | |
133 | ||
134 | /* All datapath numbers in use. */ | |
135 | return ENOBUFS; | |
136 | } | |
137 | } else { | |
138 | struct dpif_linux *dpif; | |
e9e28be3 | 139 | struct odp_port port; |
96fba48f BP |
140 | int error; |
141 | ||
142 | if (minor < 0) { | |
1a6f1e2a | 143 | error = lookup_minor(name, &minor); |
96fba48f BP |
144 | if (error) { |
145 | return error; | |
146 | } | |
147 | } | |
148 | ||
149 | error = open_minor(minor, dpifp); | |
150 | if (error) { | |
151 | return error; | |
152 | } | |
153 | dpif = dpif_linux_cast(*dpifp); | |
154 | ||
e9e28be3 BP |
155 | /* We need the local port's ifindex for the poll function. Start by |
156 | * getting the local port's name. */ | |
157 | memset(&port, 0, sizeof port); | |
158 | port.port = ODPP_LOCAL; | |
159 | if (ioctl(dpif->fd, ODP_PORT_QUERY, &port)) { | |
160 | error = errno; | |
96fba48f BP |
161 | if (error != ENODEV) { |
162 | VLOG_WARN("%s: probe returned unexpected error: %s", | |
163 | dpif_name(*dpifp), strerror(error)); | |
164 | } | |
999401aa | 165 | dpif_uninit(*dpifp, true); |
e9e28be3 | 166 | return error; |
96fba48f | 167 | } |
e9e28be3 BP |
168 | |
169 | /* Then use that to finish up opening. */ | |
170 | return finish_open(&dpif->dpif, port.devname); | |
96fba48f BP |
171 | } |
172 | } | |
173 | ||
174 | static void | |
175 | dpif_linux_close(struct dpif *dpif_) | |
176 | { | |
177 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
46097491 | 178 | rtnetlink_notifier_unregister(&dpif->port_notifier); |
54825e09 | 179 | shash_destroy(&dpif->changed_ports); |
d3d22744 | 180 | free(dpif->local_ifname); |
96fba48f BP |
181 | close(dpif->fd); |
182 | free(dpif); | |
183 | } | |
184 | ||
d3d22744 BP |
185 | static int |
186 | dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names) | |
187 | { | |
188 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
189 | ||
190 | svec_add_nocopy(all_names, xasprintf("dp%d", dpif->minor)); | |
191 | svec_add(all_names, dpif->local_ifname); | |
192 | return 0; | |
193 | } | |
194 | ||
96fba48f | 195 | static int |
7dab847a | 196 | dpif_linux_destroy(struct dpif *dpif_) |
96fba48f | 197 | { |
3abc4a1a JG |
198 | struct odp_port *ports; |
199 | size_t n_ports; | |
200 | int err; | |
201 | int i; | |
202 | ||
203 | err = dpif_port_list(dpif_, &ports, &n_ports); | |
204 | if (err) { | |
205 | return err; | |
206 | } | |
207 | ||
208 | for (i = 0; i < n_ports; i++) { | |
209 | if (ports[i].port != ODPP_LOCAL) { | |
210 | err = do_ioctl(dpif_, ODP_VPORT_DEL, ports[i].devname); | |
211 | if (err) { | |
212 | VLOG_WARN_RL(&error_rl, "%s: error deleting port %s (%s)", | |
213 | dpif_name(dpif_), ports[i].devname, strerror(err)); | |
214 | } | |
215 | } | |
216 | } | |
217 | ||
218 | free(ports); | |
219 | ||
96fba48f BP |
220 | return do_ioctl(dpif_, ODP_DP_DESTROY, NULL); |
221 | } | |
222 | ||
223 | static int | |
224 | dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats) | |
225 | { | |
72b06300 | 226 | memset(stats, 0, sizeof *stats); |
96fba48f BP |
227 | return do_ioctl(dpif_, ODP_DP_STATS, stats); |
228 | } | |
229 | ||
230 | static int | |
231 | dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp) | |
232 | { | |
233 | int drop_frags; | |
234 | int error; | |
235 | ||
236 | error = do_ioctl(dpif_, ODP_GET_DROP_FRAGS, &drop_frags); | |
237 | if (!error) { | |
238 | *drop_fragsp = drop_frags & 1; | |
239 | } | |
240 | return error; | |
241 | } | |
242 | ||
243 | static int | |
244 | dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags) | |
245 | { | |
246 | int drop_frags_int = drop_frags; | |
247 | return do_ioctl(dpif_, ODP_SET_DROP_FRAGS, &drop_frags_int); | |
248 | } | |
249 | ||
250 | static int | |
251 | dpif_linux_port_add(struct dpif *dpif_, const char *devname, uint16_t flags, | |
252 | uint16_t *port_no) | |
253 | { | |
254 | struct odp_port port; | |
255 | int error; | |
256 | ||
257 | memset(&port, 0, sizeof port); | |
258 | strncpy(port.devname, devname, sizeof port.devname); | |
259 | port.flags = flags; | |
f2459fe7 | 260 | error = do_ioctl(dpif_, ODP_PORT_ATTACH, &port); |
96fba48f BP |
261 | if (!error) { |
262 | *port_no = port.port; | |
263 | } | |
264 | return error; | |
265 | } | |
266 | ||
267 | static int | |
268 | dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no) | |
269 | { | |
270 | int tmp = port_no; | |
3abc4a1a JG |
271 | int err; |
272 | struct odp_port port; | |
273 | ||
274 | err = dpif_port_query_by_number(dpif_, port_no, &port); | |
275 | if (err) { | |
276 | return err; | |
277 | } | |
278 | ||
279 | err = do_ioctl(dpif_, ODP_PORT_DETACH, &tmp); | |
280 | if (err) { | |
281 | return err; | |
282 | } | |
283 | ||
284 | if (!netdev_is_open(port.devname)) { | |
285 | /* Try deleting the port if no one has it open. This shouldn't | |
286 | * actually be necessary unless the config changed while we weren't | |
287 | * running but it won't hurt anything if the port is already gone. */ | |
288 | do_ioctl(dpif_, ODP_VPORT_DEL, port.devname); | |
289 | } | |
290 | ||
291 | return 0; | |
96fba48f BP |
292 | } |
293 | ||
294 | static int | |
295 | dpif_linux_port_query_by_number(const struct dpif *dpif_, uint16_t port_no, | |
296 | struct odp_port *port) | |
297 | { | |
298 | memset(port, 0, sizeof *port); | |
299 | port->port = port_no; | |
300 | return do_ioctl(dpif_, ODP_PORT_QUERY, port); | |
301 | } | |
302 | ||
303 | static int | |
304 | dpif_linux_port_query_by_name(const struct dpif *dpif_, const char *devname, | |
305 | struct odp_port *port) | |
306 | { | |
307 | memset(port, 0, sizeof *port); | |
308 | strncpy(port->devname, devname, sizeof port->devname); | |
309 | return do_ioctl(dpif_, ODP_PORT_QUERY, port); | |
310 | } | |
311 | ||
312 | static int | |
313 | dpif_linux_flow_flush(struct dpif *dpif_) | |
314 | { | |
315 | return do_ioctl(dpif_, ODP_FLOW_FLUSH, NULL); | |
316 | } | |
317 | ||
318 | static int | |
319 | dpif_linux_port_list(const struct dpif *dpif_, struct odp_port *ports, int n) | |
320 | { | |
321 | struct odp_portvec pv; | |
322 | int error; | |
323 | ||
324 | pv.ports = ports; | |
325 | pv.n_ports = n; | |
326 | error = do_ioctl(dpif_, ODP_PORT_LIST, &pv); | |
327 | return error ? -error : pv.n_ports; | |
328 | } | |
329 | ||
e9e28be3 BP |
330 | static int |
331 | dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) | |
332 | { | |
333 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
e9e28be3 | 334 | |
8b61709d BP |
335 | if (dpif->change_error) { |
336 | dpif->change_error = false; | |
54825e09 | 337 | shash_clear(&dpif->changed_ports); |
8b61709d | 338 | return ENOBUFS; |
54825e09 BP |
339 | } else if (!shash_is_empty(&dpif->changed_ports)) { |
340 | struct shash_node *node = shash_first(&dpif->changed_ports); | |
341 | *devnamep = xstrdup(node->name); | |
342 | shash_delete(&dpif->changed_ports, node); | |
8b61709d | 343 | return 0; |
e9e28be3 | 344 | } else { |
8b61709d | 345 | return EAGAIN; |
e9e28be3 | 346 | } |
e9e28be3 BP |
347 | } |
348 | ||
349 | static void | |
350 | dpif_linux_port_poll_wait(const struct dpif *dpif_) | |
351 | { | |
352 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
54825e09 | 353 | if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) { |
e9e28be3 BP |
354 | poll_immediate_wake(); |
355 | } else { | |
46097491 | 356 | rtnetlink_notifier_wait(); |
e9e28be3 BP |
357 | } |
358 | } | |
359 | ||
96fba48f BP |
360 | static int |
361 | dpif_linux_port_group_get(const struct dpif *dpif_, int group, | |
362 | uint16_t ports[], int n) | |
363 | { | |
364 | struct odp_port_group pg; | |
365 | int error; | |
366 | ||
367 | assert(n <= UINT16_MAX); | |
368 | pg.group = group; | |
369 | pg.ports = ports; | |
370 | pg.n_ports = n; | |
371 | error = do_ioctl(dpif_, ODP_PORT_GROUP_GET, &pg); | |
372 | return error ? -error : pg.n_ports; | |
373 | } | |
374 | ||
375 | static int | |
376 | dpif_linux_port_group_set(struct dpif *dpif_, int group, | |
377 | const uint16_t ports[], int n) | |
378 | { | |
379 | struct odp_port_group pg; | |
380 | ||
381 | assert(n <= UINT16_MAX); | |
382 | pg.group = group; | |
383 | pg.ports = (uint16_t *) ports; | |
384 | pg.n_ports = n; | |
385 | return do_ioctl(dpif_, ODP_PORT_GROUP_SET, &pg); | |
386 | } | |
387 | ||
388 | static int | |
389 | dpif_linux_flow_get(const struct dpif *dpif_, struct odp_flow flows[], int n) | |
390 | { | |
391 | struct odp_flowvec fv; | |
392 | fv.flows = flows; | |
393 | fv.n_flows = n; | |
394 | return do_ioctl(dpif_, ODP_FLOW_GET, &fv); | |
395 | } | |
396 | ||
397 | static int | |
398 | dpif_linux_flow_put(struct dpif *dpif_, struct odp_flow_put *put) | |
399 | { | |
400 | return do_ioctl(dpif_, ODP_FLOW_PUT, put); | |
401 | } | |
402 | ||
403 | static int | |
404 | dpif_linux_flow_del(struct dpif *dpif_, struct odp_flow *flow) | |
405 | { | |
406 | return do_ioctl(dpif_, ODP_FLOW_DEL, flow); | |
407 | } | |
408 | ||
409 | static int | |
410 | dpif_linux_flow_list(const struct dpif *dpif_, struct odp_flow flows[], int n) | |
411 | { | |
412 | struct odp_flowvec fv; | |
413 | int error; | |
414 | ||
415 | fv.flows = flows; | |
416 | fv.n_flows = n; | |
417 | error = do_ioctl(dpif_, ODP_FLOW_LIST, &fv); | |
418 | return error ? -error : fv.n_flows; | |
419 | } | |
420 | ||
421 | static int | |
422 | dpif_linux_execute(struct dpif *dpif_, uint16_t in_port, | |
423 | const union odp_action actions[], int n_actions, | |
424 | const struct ofpbuf *buf) | |
425 | { | |
426 | struct odp_execute execute; | |
427 | memset(&execute, 0, sizeof execute); | |
428 | execute.in_port = in_port; | |
429 | execute.actions = (union odp_action *) actions; | |
430 | execute.n_actions = n_actions; | |
431 | execute.data = buf->data; | |
432 | execute.length = buf->size; | |
433 | return do_ioctl(dpif_, ODP_EXECUTE, &execute); | |
434 | } | |
435 | ||
436 | static int | |
437 | dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask) | |
438 | { | |
439 | return do_ioctl(dpif_, ODP_GET_LISTEN_MASK, listen_mask); | |
440 | } | |
441 | ||
442 | static int | |
443 | dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask) | |
444 | { | |
445 | return do_ioctl(dpif_, ODP_SET_LISTEN_MASK, &listen_mask); | |
446 | } | |
447 | ||
72b06300 BP |
448 | static int |
449 | dpif_linux_get_sflow_probability(const struct dpif *dpif_, | |
450 | uint32_t *probability) | |
451 | { | |
452 | return do_ioctl(dpif_, ODP_GET_SFLOW_PROBABILITY, probability); | |
453 | } | |
454 | ||
455 | static int | |
456 | dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability) | |
457 | { | |
458 | return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability); | |
459 | } | |
460 | ||
aae51f53 BP |
461 | static int |
462 | dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, | |
463 | uint32_t queue_id, uint32_t *priority) | |
464 | { | |
465 | if (queue_id < 0xf000) { | |
17ee3c1f | 466 | *priority = TC_H_MAKE(1 << 16, queue_id + 1); |
aae51f53 BP |
467 | return 0; |
468 | } else { | |
469 | return EINVAL; | |
470 | } | |
471 | } | |
472 | ||
96fba48f BP |
473 | static int |
474 | dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp) | |
475 | { | |
476 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
477 | struct ofpbuf *buf; | |
478 | int retval; | |
479 | int error; | |
480 | ||
68efcbec | 481 | buf = ofpbuf_new_with_headroom(65536, DPIF_RECV_MSG_PADDING); |
96fba48f BP |
482 | retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf)); |
483 | if (retval < 0) { | |
484 | error = errno; | |
485 | if (error != EAGAIN) { | |
486 | VLOG_WARN_RL(&error_rl, "%s: read failed: %s", | |
487 | dpif_name(dpif_), strerror(error)); | |
488 | } | |
489 | } else if (retval >= sizeof(struct odp_msg)) { | |
490 | struct odp_msg *msg = buf->data; | |
491 | if (msg->length <= retval) { | |
492 | buf->size += retval; | |
493 | *bufp = buf; | |
494 | return 0; | |
495 | } else { | |
496 | VLOG_WARN_RL(&error_rl, "%s: discarding message truncated " | |
d65349ea | 497 | "from %"PRIu32" bytes to %d", |
96fba48f BP |
498 | dpif_name(dpif_), msg->length, retval); |
499 | error = ERANGE; | |
500 | } | |
501 | } else if (!retval) { | |
502 | VLOG_WARN_RL(&error_rl, "%s: unexpected end of file", dpif_name(dpif_)); | |
503 | error = EPROTO; | |
504 | } else { | |
505 | VLOG_WARN_RL(&error_rl, | |
506 | "%s: discarding too-short message (%d bytes)", | |
507 | dpif_name(dpif_), retval); | |
508 | error = ERANGE; | |
509 | } | |
510 | ||
511 | *bufp = NULL; | |
512 | ofpbuf_delete(buf); | |
513 | return error; | |
514 | } | |
515 | ||
516 | static void | |
517 | dpif_linux_recv_wait(struct dpif *dpif_) | |
518 | { | |
519 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
520 | poll_fd_wait(dpif->fd, POLLIN); | |
521 | } | |
522 | ||
523 | const struct dpif_class dpif_linux_class = { | |
1a6f1e2a | 524 | "system", |
8b61709d BP |
525 | NULL, |
526 | NULL, | |
d3d22744 | 527 | dpif_linux_enumerate, |
96fba48f BP |
528 | dpif_linux_open, |
529 | dpif_linux_close, | |
d3d22744 | 530 | dpif_linux_get_all_names, |
7dab847a | 531 | dpif_linux_destroy, |
96fba48f BP |
532 | dpif_linux_get_stats, |
533 | dpif_linux_get_drop_frags, | |
534 | dpif_linux_set_drop_frags, | |
535 | dpif_linux_port_add, | |
536 | dpif_linux_port_del, | |
537 | dpif_linux_port_query_by_number, | |
538 | dpif_linux_port_query_by_name, | |
539 | dpif_linux_port_list, | |
e9e28be3 BP |
540 | dpif_linux_port_poll, |
541 | dpif_linux_port_poll_wait, | |
96fba48f BP |
542 | dpif_linux_port_group_get, |
543 | dpif_linux_port_group_set, | |
544 | dpif_linux_flow_get, | |
545 | dpif_linux_flow_put, | |
546 | dpif_linux_flow_del, | |
547 | dpif_linux_flow_flush, | |
548 | dpif_linux_flow_list, | |
549 | dpif_linux_execute, | |
550 | dpif_linux_recv_get_mask, | |
551 | dpif_linux_recv_set_mask, | |
72b06300 BP |
552 | dpif_linux_get_sflow_probability, |
553 | dpif_linux_set_sflow_probability, | |
aae51f53 | 554 | dpif_linux_queue_to_priority, |
96fba48f BP |
555 | dpif_linux_recv, |
556 | dpif_linux_recv_wait, | |
557 | }; | |
558 | \f | |
559 | static int get_openvswitch_major(void); | |
57aaff8a | 560 | static int get_major(const char *target); |
96fba48f BP |
561 | |
562 | static int | |
563 | do_ioctl(const struct dpif *dpif_, int cmd, const void *arg) | |
564 | { | |
565 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
566 | return ioctl(dpif->fd, cmd, arg) ? errno : 0; | |
567 | } | |
568 | ||
569 | static int | |
a165b67e | 570 | lookup_minor(const char *name, int *minorp) |
96fba48f BP |
571 | { |
572 | struct ethtool_drvinfo drvinfo; | |
a165b67e | 573 | int minor, port_no; |
96fba48f BP |
574 | struct ifreq ifr; |
575 | int error; | |
576 | int sock; | |
577 | ||
578 | sock = socket(AF_INET, SOCK_DGRAM, 0); | |
579 | if (sock < 0) { | |
580 | VLOG_WARN("socket(AF_INET) failed: %s", strerror(errno)); | |
581 | error = errno; | |
582 | goto error; | |
583 | } | |
584 | ||
585 | memset(&ifr, 0, sizeof ifr); | |
586 | strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name); | |
587 | ifr.ifr_data = (caddr_t) &drvinfo; | |
588 | ||
589 | memset(&drvinfo, 0, sizeof drvinfo); | |
590 | drvinfo.cmd = ETHTOOL_GDRVINFO; | |
591 | if (ioctl(sock, SIOCETHTOOL, &ifr)) { | |
592 | VLOG_WARN("ioctl(SIOCETHTOOL) failed: %s", strerror(errno)); | |
593 | error = errno; | |
594 | goto error_close_sock; | |
595 | } | |
596 | ||
597 | if (strcmp(drvinfo.driver, "openvswitch")) { | |
598 | VLOG_WARN("%s is not an openvswitch device", name); | |
599 | error = EOPNOTSUPP; | |
600 | goto error_close_sock; | |
601 | } | |
602 | ||
a165b67e BP |
603 | if (sscanf(drvinfo.bus_info, "%d.%d", &minor, &port_no) != 2) { |
604 | VLOG_WARN("%s ethtool bus_info has unexpected format", name); | |
96fba48f BP |
605 | error = EPROTOTYPE; |
606 | goto error_close_sock; | |
a165b67e BP |
607 | } else if (port_no != ODPP_LOCAL) { |
608 | /* This is an Open vSwitch device but not the local port. We | |
609 | * intentionally support only using the name of the local port as the | |
610 | * name of a datapath; otherwise, it would be too difficult to | |
611 | * enumerate all the names of a datapath. */ | |
612 | error = EOPNOTSUPP; | |
613 | goto error_close_sock; | |
96fba48f BP |
614 | } |
615 | ||
a165b67e | 616 | *minorp = minor; |
96fba48f BP |
617 | close(sock); |
618 | return 0; | |
619 | ||
620 | error_close_sock: | |
621 | close(sock); | |
622 | error: | |
623 | return error; | |
624 | } | |
625 | ||
626 | static int | |
627 | make_openvswitch_device(int minor, char **fnp) | |
628 | { | |
96fba48f | 629 | const char dirname[] = "/dev/net"; |
57aaff8a JP |
630 | int major; |
631 | dev_t dev; | |
96fba48f BP |
632 | struct stat s; |
633 | char fn[128]; | |
634 | ||
8334b477 BP |
635 | *fnp = NULL; |
636 | ||
57aaff8a JP |
637 | major = get_openvswitch_major(); |
638 | if (major < 0) { | |
639 | return -major; | |
640 | } | |
641 | dev = makedev(major, minor); | |
642 | ||
96fba48f BP |
643 | sprintf(fn, "%s/dp%d", dirname, minor); |
644 | if (!stat(fn, &s)) { | |
645 | if (!S_ISCHR(s.st_mode)) { | |
646 | VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing", | |
647 | fn); | |
648 | } else if (s.st_rdev != dev) { | |
649 | VLOG_WARN_RL(&error_rl, | |
f17d7bd8 | 650 | "%s is device %u:%u but should be %u:%u, fixing", |
96fba48f BP |
651 | fn, major(s.st_rdev), minor(s.st_rdev), |
652 | major(dev), minor(dev)); | |
653 | } else { | |
654 | goto success; | |
655 | } | |
656 | if (unlink(fn)) { | |
657 | VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)", | |
658 | fn, strerror(errno)); | |
659 | return errno; | |
660 | } | |
661 | } else if (errno == ENOENT) { | |
662 | if (stat(dirname, &s)) { | |
663 | if (errno == ENOENT) { | |
664 | if (mkdir(dirname, 0755)) { | |
665 | VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)", | |
666 | dirname, strerror(errno)); | |
667 | return errno; | |
668 | } | |
669 | } else { | |
670 | VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", | |
671 | dirname, strerror(errno)); | |
672 | return errno; | |
673 | } | |
674 | } | |
675 | } else { | |
676 | VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno)); | |
677 | return errno; | |
678 | } | |
679 | ||
680 | /* The device needs to be created. */ | |
681 | if (mknod(fn, S_IFCHR | 0700, dev)) { | |
682 | VLOG_WARN_RL(&error_rl, | |
683 | "%s: creating character device %u:%u failed (%s)", | |
684 | fn, major(dev), minor(dev), strerror(errno)); | |
685 | return errno; | |
686 | } | |
687 | ||
688 | success: | |
689 | *fnp = xstrdup(fn); | |
690 | return 0; | |
691 | } | |
692 | ||
57aaff8a JP |
693 | /* Return the major device number of the Open vSwitch device. If it |
694 | * cannot be determined, a negative errno is returned. */ | |
96fba48f BP |
695 | static int |
696 | get_openvswitch_major(void) | |
697 | { | |
57aaff8a JP |
698 | static int openvswitch_major = -1; |
699 | if (openvswitch_major < 0) { | |
700 | openvswitch_major = get_major("openvswitch"); | |
96fba48f BP |
701 | } |
702 | return openvswitch_major; | |
703 | } | |
704 | ||
705 | static int | |
57aaff8a | 706 | get_major(const char *target) |
96fba48f BP |
707 | { |
708 | const char fn[] = "/proc/devices"; | |
709 | char line[128]; | |
710 | FILE *file; | |
711 | int ln; | |
712 | ||
713 | file = fopen(fn, "r"); | |
714 | if (!file) { | |
715 | VLOG_ERR("opening %s failed (%s)", fn, strerror(errno)); | |
57aaff8a | 716 | return -errno; |
96fba48f BP |
717 | } |
718 | ||
719 | for (ln = 1; fgets(line, sizeof line, file); ln++) { | |
720 | char name[64]; | |
721 | int major; | |
722 | ||
723 | if (!strncmp(line, "Character", 9) || line[0] == '\0') { | |
724 | /* Nothing to do. */ | |
725 | } else if (!strncmp(line, "Block", 5)) { | |
726 | /* We only want character devices, so skip the rest of the file. */ | |
727 | break; | |
728 | } else if (sscanf(line, "%d %63s", &major, name)) { | |
729 | if (!strcmp(name, target)) { | |
730 | fclose(file); | |
731 | return major; | |
732 | } | |
733 | } else { | |
734 | static bool warned; | |
735 | if (!warned) { | |
736 | VLOG_WARN("%s:%d: syntax error", fn, ln); | |
737 | } | |
738 | warned = true; | |
739 | } | |
740 | } | |
741 | ||
ed30fb10 TN |
742 | fclose(file); |
743 | ||
57aaff8a JP |
744 | VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target); |
745 | return -ENODEV; | |
96fba48f BP |
746 | } |
747 | ||
e9e28be3 BP |
748 | static int |
749 | finish_open(struct dpif *dpif_, const char *local_ifname) | |
750 | { | |
751 | struct dpif_linux *dpif = dpif_linux_cast(dpif_); | |
999401aa | 752 | dpif->local_ifname = xstrdup(local_ifname); |
e9e28be3 BP |
753 | dpif->local_ifindex = if_nametoindex(local_ifname); |
754 | if (!dpif->local_ifindex) { | |
755 | int error = errno; | |
999401aa | 756 | dpif_uninit(dpif_, true); |
e9e28be3 BP |
757 | VLOG_WARN("could not get ifindex of %s device: %s", |
758 | local_ifname, strerror(errno)); | |
759 | return error; | |
760 | } | |
761 | return 0; | |
762 | } | |
763 | ||
96fba48f BP |
764 | static int |
765 | create_minor(const char *name, int minor, struct dpif **dpifp) | |
766 | { | |
767 | int error = open_minor(minor, dpifp); | |
768 | if (!error) { | |
769 | error = do_ioctl(*dpifp, ODP_DP_CREATE, name); | |
e9e28be3 BP |
770 | if (!error) { |
771 | error = finish_open(*dpifp, name); | |
772 | } else { | |
999401aa | 773 | dpif_uninit(*dpifp, true); |
96fba48f BP |
774 | } |
775 | } | |
776 | return error; | |
777 | } | |
778 | ||
779 | static int | |
780 | open_minor(int minor, struct dpif **dpifp) | |
781 | { | |
782 | int error; | |
783 | char *fn; | |
784 | int fd; | |
785 | ||
786 | error = make_openvswitch_device(minor, &fn); | |
787 | if (error) { | |
788 | return error; | |
789 | } | |
790 | ||
791 | fd = open(fn, O_RDONLY | O_NONBLOCK); | |
792 | if (fd >= 0) { | |
e9e28be3 | 793 | struct dpif_linux *dpif = xmalloc(sizeof *dpif); |
46097491 BP |
794 | error = rtnetlink_notifier_register(&dpif->port_notifier, |
795 | dpif_linux_port_changed, dpif); | |
e9e28be3 BP |
796 | if (!error) { |
797 | char *name; | |
798 | ||
799 | name = xasprintf("dp%d", minor); | |
800 | dpif_init(&dpif->dpif, &dpif_linux_class, name, minor, minor); | |
801 | free(name); | |
802 | ||
803 | dpif->fd = fd; | |
d3d22744 BP |
804 | dpif->local_ifname = NULL; |
805 | dpif->minor = minor; | |
e9e28be3 | 806 | dpif->local_ifindex = 0; |
54825e09 | 807 | shash_init(&dpif->changed_ports); |
8b61709d | 808 | dpif->change_error = false; |
e9e28be3 BP |
809 | *dpifp = &dpif->dpif; |
810 | } else { | |
811 | free(dpif); | |
812 | } | |
96fba48f BP |
813 | } else { |
814 | error = errno; | |
815 | VLOG_WARN("%s: open failed (%s)", fn, strerror(error)); | |
816 | } | |
817 | free(fn); | |
818 | ||
819 | return error; | |
820 | } | |
e9e28be3 BP |
821 | |
822 | static void | |
46097491 | 823 | dpif_linux_port_changed(const struct rtnetlink_change *change, void *dpif_) |
e9e28be3 BP |
824 | { |
825 | struct dpif_linux *dpif = dpif_; | |
826 | ||
8b61709d BP |
827 | if (change) { |
828 | if (change->master_ifindex == dpif->local_ifindex | |
829 | && (change->nlmsg_type == RTM_NEWLINK | |
830 | || change->nlmsg_type == RTM_DELLINK)) | |
831 | { | |
832 | /* Our datapath changed, either adding a new port or deleting an | |
833 | * existing one. */ | |
54825e09 | 834 | shash_add_once(&dpif->changed_ports, change->ifname, NULL); |
e9e28be3 | 835 | } |
8b61709d BP |
836 | } else { |
837 | dpif->change_error = true; | |
e9e28be3 BP |
838 | } |
839 | } |