]>
Commit | Line | Data |
---|---|---|
7f1e1c7f GS |
1 | /* |
2 | * Copyright (c) 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "socket-util.h" | |
19 | #include <errno.h> | |
20 | #include <fcntl.h> | |
21 | #include <net/if.h> | |
22 | #include <sys/ioctl.h> | |
23 | #include <sys/types.h> | |
24 | #include <sys/stat.h> | |
25 | #include <sys/un.h> | |
2d5cfef8 | 26 | #include <sys/wait.h> |
7f1e1c7f GS |
27 | #include <unistd.h> |
28 | #include "fatal-signal.h" | |
29 | #include "random.h" | |
30 | #include "util.h" | |
31 | #include "vlog.h" | |
32 | ||
33 | VLOG_DEFINE_THIS_MODULE(socket_util_unix); | |
34 | ||
35 | /* #ifdefs make it a pain to maintain code: you have to try to build both ways. | |
36 | * Thus, this file compiles all of the code regardless of the target, by | |
37 | * writing "if (LINUX)" instead of "#ifdef __linux__". */ | |
38 | #ifdef __linux__ | |
39 | #define LINUX 1 | |
40 | #else | |
41 | #define LINUX 0 | |
42 | #endif | |
43 | ||
44 | #ifndef O_DIRECTORY | |
45 | #define O_DIRECTORY 0 | |
46 | #endif | |
47 | ||
48 | /* Maximum length of the sun_path member in a struct sockaddr_un, excluding | |
49 | * space for a null terminator. */ | |
50 | #define MAX_UN_LEN (sizeof(((struct sockaddr_un *) 0)->sun_path) - 1) | |
51 | ||
52 | void | |
53 | xpipe(int fds[2]) | |
54 | { | |
55 | if (pipe(fds)) { | |
56 | VLOG_FATAL("failed to create pipe (%s)", ovs_strerror(errno)); | |
57 | } | |
58 | } | |
59 | ||
60 | void | |
61 | xpipe_nonblocking(int fds[2]) | |
62 | { | |
63 | xpipe(fds); | |
64 | xset_nonblocking(fds[0]); | |
65 | xset_nonblocking(fds[1]); | |
66 | } | |
67 | ||
68 | /* Drain all the data currently in the receive queue of a datagram socket (and | |
69 | * possibly additional data). There is no way to know how many packets are in | |
70 | * the receive queue, but we do know that the total number of bytes queued does | |
71 | * not exceed the receive buffer size, so we pull packets until none are left | |
72 | * or we've read that many bytes. */ | |
73 | int | |
74 | drain_rcvbuf(int fd) | |
75 | { | |
76 | int rcvbuf; | |
77 | ||
78 | rcvbuf = get_socket_rcvbuf(fd); | |
79 | if (rcvbuf < 0) { | |
80 | return -rcvbuf; | |
81 | } | |
82 | ||
83 | while (rcvbuf > 0) { | |
84 | /* In Linux, specifying MSG_TRUNC in the flags argument causes the | |
85 | * datagram length to be returned, even if that is longer than the | |
86 | * buffer provided. Thus, we can use a 1-byte buffer to discard the | |
87 | * incoming datagram and still be able to account how many bytes were | |
88 | * removed from the receive buffer. | |
89 | * | |
90 | * On other Unix-like OSes, MSG_TRUNC has no effect in the flags | |
91 | * argument. */ | |
92 | char buffer[LINUX ? 1 : 2048]; | |
93 | ssize_t n_bytes = recv(fd, buffer, sizeof buffer, | |
94 | MSG_TRUNC | MSG_DONTWAIT); | |
95 | if (n_bytes <= 0 || n_bytes >= rcvbuf) { | |
96 | break; | |
97 | } | |
98 | rcvbuf -= n_bytes; | |
99 | } | |
100 | return 0; | |
101 | } | |
102 | ||
103 | /* Attempts to shorten 'name' by opening a file descriptor for the directory | |
104 | * part of the name and indirecting through /proc/self/fd/<dirfd>/<basename>. | |
105 | * On systems with Linux-like /proc, this works as long as <basename> isn't too | |
106 | * long. | |
107 | * | |
108 | * On success, returns 0 and stores the short name in 'short_name' and a | |
109 | * directory file descriptor to eventually be closed in '*dirfpd'. */ | |
110 | static int | |
111 | shorten_name_via_proc(const char *name, char short_name[MAX_UN_LEN + 1], | |
112 | int *dirfdp) | |
113 | { | |
114 | char *dir, *base; | |
115 | int dirfd; | |
116 | int len; | |
117 | ||
118 | if (!LINUX) { | |
119 | return ENAMETOOLONG; | |
120 | } | |
121 | ||
122 | dir = dir_name(name); | |
123 | dirfd = open(dir, O_DIRECTORY | O_RDONLY); | |
124 | if (dirfd < 0) { | |
125 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
126 | int error = errno; | |
127 | ||
128 | VLOG_WARN_RL(&rl, "%s: open failed (%s)", dir, ovs_strerror(error)); | |
129 | free(dir); | |
130 | ||
131 | return error; | |
132 | } | |
133 | free(dir); | |
134 | ||
135 | base = base_name(name); | |
136 | len = snprintf(short_name, MAX_UN_LEN + 1, | |
137 | "/proc/self/fd/%d/%s", dirfd, base); | |
138 | free(base); | |
139 | ||
140 | if (len >= 0 && len <= MAX_UN_LEN) { | |
141 | *dirfdp = dirfd; | |
142 | return 0; | |
143 | } else { | |
144 | close(dirfd); | |
145 | return ENAMETOOLONG; | |
146 | } | |
147 | } | |
148 | ||
149 | /* Attempts to shorten 'name' by creating a symlink for the directory part of | |
150 | * the name and indirecting through <symlink>/<basename>. This works on | |
151 | * systems that support symlinks, as long as <basename> isn't too long. | |
152 | * | |
153 | * On success, returns 0 and stores the short name in 'short_name' and the | |
154 | * symbolic link to eventually delete in 'linkname'. */ | |
155 | static int | |
156 | shorten_name_via_symlink(const char *name, char short_name[MAX_UN_LEN + 1], | |
157 | char linkname[MAX_UN_LEN + 1]) | |
158 | { | |
159 | char *abs, *dir, *base; | |
160 | const char *tmpdir; | |
161 | int error; | |
162 | int i; | |
163 | ||
164 | abs = abs_file_name(NULL, name); | |
165 | dir = dir_name(abs); | |
166 | base = base_name(abs); | |
167 | free(abs); | |
168 | ||
169 | tmpdir = getenv("TMPDIR"); | |
170 | if (tmpdir == NULL) { | |
171 | tmpdir = "/tmp"; | |
172 | } | |
173 | ||
174 | for (i = 0; i < 1000; i++) { | |
175 | int len; | |
176 | ||
177 | len = snprintf(linkname, MAX_UN_LEN + 1, | |
178 | "%s/ovs-un-c-%"PRIu32, tmpdir, random_uint32()); | |
179 | error = (len < 0 || len > MAX_UN_LEN ? ENAMETOOLONG | |
180 | : symlink(dir, linkname) ? errno | |
181 | : 0); | |
182 | if (error != EEXIST) { | |
183 | break; | |
184 | } | |
185 | } | |
186 | ||
187 | if (!error) { | |
188 | int len; | |
189 | ||
190 | fatal_signal_add_file_to_unlink(linkname); | |
191 | ||
192 | len = snprintf(short_name, MAX_UN_LEN + 1, "%s/%s", linkname, base); | |
193 | if (len < 0 || len > MAX_UN_LEN) { | |
194 | fatal_signal_unlink_file_now(linkname); | |
195 | error = ENAMETOOLONG; | |
196 | } | |
197 | } | |
198 | ||
199 | if (error) { | |
200 | linkname[0] = '\0'; | |
201 | } | |
202 | free(dir); | |
203 | free(base); | |
204 | ||
205 | return error; | |
206 | } | |
207 | ||
208 | /* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in | |
209 | * '*un_len' the size of the sockaddr_un. | |
210 | * | |
211 | * Returns 0 on success, otherwise a positive errno value. | |
212 | * | |
213 | * Uses '*dirfdp' and 'linkname' to store references to data when the caller no | |
214 | * longer needs to use 'un'. On success, freeing these references with | |
215 | * free_sockaddr_un() is mandatory to avoid a leak; on failure, freeing them is | |
216 | * unnecessary but harmless. */ | |
217 | static int | |
218 | make_sockaddr_un(const char *name, struct sockaddr_un *un, socklen_t *un_len, | |
219 | int *dirfdp, char linkname[MAX_UN_LEN + 1]) | |
220 | { | |
221 | char short_name[MAX_UN_LEN + 1]; | |
222 | ||
223 | *dirfdp = -1; | |
224 | linkname[0] = '\0'; | |
225 | if (strlen(name) > MAX_UN_LEN) { | |
226 | /* 'name' is too long to fit in a sockaddr_un. Try a workaround. */ | |
227 | int error = shorten_name_via_proc(name, short_name, dirfdp); | |
228 | if (error == ENAMETOOLONG) { | |
229 | error = shorten_name_via_symlink(name, short_name, linkname); | |
230 | } | |
231 | if (error) { | |
232 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
233 | ||
234 | VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum " | |
235 | "%"PRIuSIZE" bytes", name, MAX_UN_LEN); | |
236 | return error; | |
237 | } | |
238 | ||
239 | name = short_name; | |
240 | } | |
241 | ||
242 | un->sun_family = AF_UNIX; | |
243 | ovs_strzcpy(un->sun_path, name, sizeof un->sun_path); | |
244 | *un_len = (offsetof(struct sockaddr_un, sun_path) | |
245 | + strlen (un->sun_path) + 1); | |
246 | return 0; | |
247 | } | |
248 | ||
249 | /* Clean up after make_sockaddr_un(). */ | |
250 | static void | |
251 | free_sockaddr_un(int dirfd, const char *linkname) | |
252 | { | |
253 | if (dirfd >= 0) { | |
254 | close(dirfd); | |
255 | } | |
256 | if (linkname[0]) { | |
257 | fatal_signal_unlink_file_now(linkname); | |
258 | } | |
259 | } | |
260 | ||
261 | /* Binds Unix domain socket 'fd' to a file with permissions 0700. */ | |
262 | static int | |
263 | bind_unix_socket(int fd, struct sockaddr *sun, socklen_t sun_len) | |
264 | { | |
2d5cfef8 BP |
265 | const mode_t mode = 0700; |
266 | if (LINUX) { | |
267 | /* On Linux, the fd's permissions become the file's permissions. | |
268 | * fchmod() does not affect other files, like umask() does. */ | |
269 | if (fchmod(fd, mode)) { | |
270 | return errno; | |
271 | } | |
272 | ||
273 | /* Must be after fchmod(). */ | |
274 | if (bind(fd, sun, sun_len)) { | |
275 | return errno; | |
276 | } | |
277 | return 0; | |
278 | } else { | |
279 | /* On FreeBSD and NetBSD, only the umask affects permissions. The | |
280 | * umask is process-wide rather than thread-specific, so we have to use | |
281 | * a subprocess for safety. */ | |
282 | pid_t pid = fork(); | |
283 | ||
284 | if (!pid) { | |
285 | umask(mode ^ 0777); | |
286 | _exit(bind(fd, sun, sun_len) ? errno : 0); | |
287 | } else if (pid > 0) { | |
288 | int status; | |
289 | int error; | |
290 | ||
291 | do { | |
292 | error = waitpid(pid, &status, 0) < 0 ? errno : 0; | |
293 | } while (error == EINTR); | |
294 | ||
295 | return (error ? error | |
296 | : WIFEXITED(status) ? WEXITSTATUS(status) | |
297 | : WIFSIGNALED(status) ? EINTR | |
298 | : ECHILD /* WTF? */); | |
299 | } else { | |
300 | return errno; | |
301 | } | |
302 | } | |
7f1e1c7f GS |
303 | } |
304 | ||
305 | /* Creates a Unix domain socket in the given 'style' (either SOCK_DGRAM or | |
306 | * SOCK_STREAM) that is bound to '*bind_path' (if 'bind_path' is non-null) and | |
307 | * connected to '*connect_path' (if 'connect_path' is non-null). If 'nonblock' | |
308 | * is true, the socket is made non-blocking. | |
309 | * | |
310 | * Returns the socket's fd if successful, otherwise a negative errno value. */ | |
311 | int | |
312 | make_unix_socket(int style, bool nonblock, | |
313 | const char *bind_path, const char *connect_path) | |
314 | { | |
315 | int error; | |
316 | int fd; | |
317 | ||
318 | fd = socket(PF_UNIX, style, 0); | |
319 | if (fd < 0) { | |
320 | return -errno; | |
321 | } | |
322 | ||
323 | /* Set nonblocking mode right away, if we want it. This prevents blocking | |
324 | * in connect(), if connect_path != NULL. (In turn, that's a corner case: | |
325 | * it will only happen if style is SOCK_STREAM or SOCK_SEQPACKET, and only | |
326 | * if a backlog of un-accepted connections has built up in the kernel.) */ | |
327 | if (nonblock) { | |
328 | error = set_nonblocking(fd); | |
329 | if (error) { | |
330 | goto error; | |
331 | } | |
332 | } | |
333 | ||
334 | if (bind_path) { | |
335 | char linkname[MAX_UN_LEN + 1]; | |
336 | struct sockaddr_un un; | |
337 | socklen_t un_len; | |
338 | int dirfd; | |
339 | ||
340 | if (unlink(bind_path) && errno != ENOENT) { | |
341 | VLOG_WARN("unlinking \"%s\": %s\n", | |
342 | bind_path, ovs_strerror(errno)); | |
343 | } | |
344 | fatal_signal_add_file_to_unlink(bind_path); | |
345 | ||
346 | error = make_sockaddr_un(bind_path, &un, &un_len, &dirfd, linkname); | |
347 | if (!error) { | |
348 | error = bind_unix_socket(fd, (struct sockaddr *) &un, un_len); | |
349 | } | |
350 | free_sockaddr_un(dirfd, linkname); | |
351 | ||
352 | if (error) { | |
353 | goto error; | |
354 | } | |
355 | } | |
356 | ||
357 | if (connect_path) { | |
358 | char linkname[MAX_UN_LEN + 1]; | |
359 | struct sockaddr_un un; | |
360 | socklen_t un_len; | |
361 | int dirfd; | |
362 | ||
363 | error = make_sockaddr_un(connect_path, &un, &un_len, &dirfd, linkname); | |
364 | if (!error | |
365 | && connect(fd, (struct sockaddr*) &un, un_len) | |
366 | && errno != EINPROGRESS) { | |
367 | error = errno; | |
368 | } | |
369 | free_sockaddr_un(dirfd, linkname); | |
370 | ||
371 | if (error) { | |
372 | goto error; | |
373 | } | |
374 | } | |
375 | ||
376 | return fd; | |
377 | ||
378 | error: | |
379 | if (error == EAGAIN) { | |
380 | error = EPROTO; | |
381 | } | |
382 | if (bind_path) { | |
383 | fatal_signal_unlink_file_now(bind_path); | |
384 | } | |
385 | close(fd); | |
386 | return -error; | |
387 | } | |
388 | ||
389 | int | |
390 | get_unix_name_len(socklen_t sun_len) | |
391 | { | |
392 | return (sun_len >= offsetof(struct sockaddr_un, sun_path) | |
393 | ? sun_len - offsetof(struct sockaddr_un, sun_path) | |
394 | : 0); | |
395 | } | |
396 | ||
397 | /* Calls ioctl() on an AF_INET sock, passing the specified 'command' and | |
398 | * 'arg'. Returns 0 if successful, otherwise a positive errno value. */ | |
399 | int | |
400 | af_inet_ioctl(unsigned long int command, const void *arg) | |
401 | { | |
402 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
403 | static int sock; | |
404 | ||
405 | if (ovsthread_once_start(&once)) { | |
406 | sock = socket(AF_INET, SOCK_DGRAM, 0); | |
407 | if (sock < 0) { | |
408 | int error = sock_errno(); | |
409 | VLOG_ERR("failed to create inet socket: %s", sock_strerror(error)); | |
410 | sock = -error; | |
411 | } | |
412 | ovsthread_once_done(&once); | |
413 | } | |
414 | ||
415 | return (sock < 0 ? -sock | |
416 | : ioctl(sock, command, arg) == -1 ? errno | |
417 | : 0); | |
418 | } | |
419 | ||
420 | int | |
421 | af_inet_ifreq_ioctl(const char *name, struct ifreq *ifr, unsigned long int cmd, | |
422 | const char *cmd_name) | |
423 | { | |
424 | int error; | |
425 | ||
426 | ovs_strzcpy(ifr->ifr_name, name, sizeof ifr->ifr_name); | |
427 | error = af_inet_ioctl(cmd, ifr); | |
428 | if (error) { | |
429 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); | |
430 | VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", name, cmd_name, | |
431 | ovs_strerror(error)); | |
432 | } | |
433 | return error; | |
434 | } |