]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2008, 2009 Nicira Networks. | |
3 | * | |
4 | * Permission to use, copy, modify, and/or distribute this software for any | |
5 | * purpose with or without fee is hereby granted, provided that the above | |
6 | * copyright notice and this permission notice appear in all copies. | |
7 | * | |
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "socket-util.h" | |
19 | #include <arpa/inet.h> | |
20 | #include <errno.h> | |
21 | #include <fcntl.h> | |
22 | #include <netdb.h> | |
23 | #include <poll.h> | |
24 | #include <stddef.h> | |
25 | #include <stdio.h> | |
26 | #include <string.h> | |
27 | #include <sys/resource.h> | |
28 | #include <sys/un.h> | |
29 | #include <unistd.h> | |
30 | #include "fatal-signal.h" | |
31 | #include "util.h" | |
32 | ||
33 | #include "vlog.h" | |
34 | #define THIS_MODULE VLM_socket_util | |
35 | ||
36 | /* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a | |
37 | * positive errno value. */ | |
38 | int | |
39 | set_nonblocking(int fd) | |
40 | { | |
41 | int flags = fcntl(fd, F_GETFL, 0); | |
42 | if (flags != -1) { | |
43 | if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != -1) { | |
44 | return 0; | |
45 | } else { | |
46 | VLOG_ERR("fcntl(F_SETFL) failed: %s", strerror(errno)); | |
47 | return errno; | |
48 | } | |
49 | } else { | |
50 | VLOG_ERR("fcntl(F_GETFL) failed: %s", strerror(errno)); | |
51 | return errno; | |
52 | } | |
53 | } | |
54 | ||
55 | /* Returns the maximum valid FD value, plus 1. */ | |
56 | int | |
57 | get_max_fds(void) | |
58 | { | |
59 | static int max_fds = -1; | |
60 | if (max_fds < 0) { | |
61 | struct rlimit r; | |
62 | if (!getrlimit(RLIMIT_NOFILE, &r) | |
63 | && r.rlim_cur != RLIM_INFINITY | |
64 | && r.rlim_cur != RLIM_SAVED_MAX | |
65 | && r.rlim_cur != RLIM_SAVED_CUR) { | |
66 | max_fds = r.rlim_cur; | |
67 | } else { | |
68 | VLOG_WARN("failed to obtain fd limit, defaulting to 1024"); | |
69 | max_fds = 1024; | |
70 | } | |
71 | } | |
72 | return max_fds; | |
73 | } | |
74 | ||
75 | /* Translates 'host_name', which may be a DNS name or an IP address, into a | |
76 | * numeric IP address in '*addr'. Returns 0 if successful, otherwise a | |
77 | * positive errno value. */ | |
78 | int | |
79 | lookup_ip(const char *host_name, struct in_addr *addr) | |
80 | { | |
81 | if (!inet_aton(host_name, addr)) { | |
82 | struct hostent *he = gethostbyname(host_name); | |
83 | if (he == NULL) { | |
84 | struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
85 | VLOG_ERR_RL(&rl, "gethostbyname(%s): %s", host_name, | |
86 | (h_errno == HOST_NOT_FOUND ? "host not found" | |
87 | : h_errno == TRY_AGAIN ? "try again" | |
88 | : h_errno == NO_RECOVERY ? "non-recoverable error" | |
89 | : h_errno == NO_ADDRESS ? "no address" | |
90 | : "unknown error")); | |
91 | return ENOENT; | |
92 | } | |
93 | addr->s_addr = *(uint32_t *) he->h_addr; | |
94 | } | |
95 | return 0; | |
96 | } | |
97 | ||
98 | /* Returns the error condition associated with socket 'fd' and resets the | |
99 | * socket's error status. */ | |
100 | int | |
101 | get_socket_error(int fd) | |
102 | { | |
103 | int error; | |
104 | socklen_t len = sizeof(error); | |
105 | if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0) { | |
106 | struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10); | |
107 | error = errno; | |
108 | VLOG_ERR_RL(&rl, "getsockopt(SO_ERROR): %s", strerror(error)); | |
109 | } | |
110 | return error; | |
111 | } | |
112 | ||
113 | int | |
114 | check_connection_completion(int fd) | |
115 | { | |
116 | struct pollfd pfd; | |
117 | int retval; | |
118 | ||
119 | pfd.fd = fd; | |
120 | pfd.events = POLLOUT; | |
121 | do { | |
122 | retval = poll(&pfd, 1, 0); | |
123 | } while (retval < 0 && errno == EINTR); | |
124 | if (retval == 1) { | |
125 | return get_socket_error(fd); | |
126 | } else if (retval < 0) { | |
127 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10); | |
128 | VLOG_ERR_RL(&rl, "poll: %s", strerror(errno)); | |
129 | return errno; | |
130 | } else { | |
131 | return EAGAIN; | |
132 | } | |
133 | } | |
134 | ||
135 | /* Drain all the data currently in the receive queue of a datagram socket (and | |
136 | * possibly additional data). There is no way to know how many packets are in | |
137 | * the receive queue, but we do know that the total number of bytes queued does | |
138 | * not exceed the receive buffer size, so we pull packets until none are left | |
139 | * or we've read that many bytes. */ | |
140 | int | |
141 | drain_rcvbuf(int fd) | |
142 | { | |
143 | socklen_t rcvbuf_len; | |
144 | size_t rcvbuf; | |
145 | ||
146 | rcvbuf_len = sizeof rcvbuf; | |
147 | if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, &rcvbuf_len) < 0) { | |
148 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10); | |
149 | VLOG_ERR_RL(&rl, "getsockopt(SO_RCVBUF) failed: %s", strerror(errno)); | |
150 | return errno; | |
151 | } | |
152 | while (rcvbuf > 0) { | |
153 | /* In Linux, specifying MSG_TRUNC in the flags argument causes the | |
154 | * datagram length to be returned, even if that is longer than the | |
155 | * buffer provided. Thus, we can use a 1-byte buffer to discard the | |
156 | * incoming datagram and still be able to account how many bytes were | |
157 | * removed from the receive buffer. | |
158 | * | |
159 | * On other Unix-like OSes, MSG_TRUNC has no effect in the flags | |
160 | * argument. */ | |
161 | #ifdef __linux__ | |
162 | #define BUFFER_SIZE 1 | |
163 | #else | |
164 | #define BUFFER_SIZE 2048 | |
165 | #endif | |
166 | char buffer[BUFFER_SIZE]; | |
167 | ssize_t n_bytes = recv(fd, buffer, sizeof buffer, | |
168 | MSG_TRUNC | MSG_DONTWAIT); | |
169 | if (n_bytes <= 0 || n_bytes >= rcvbuf) { | |
170 | break; | |
171 | } | |
172 | rcvbuf -= n_bytes; | |
173 | } | |
174 | return 0; | |
175 | } | |
176 | ||
177 | /* Reads and discards up to 'n' datagrams from 'fd', stopping as soon as no | |
178 | * more data can be immediately read. ('fd' should therefore be in | |
179 | * non-blocking mode.)*/ | |
180 | void | |
181 | drain_fd(int fd, size_t n_packets) | |
182 | { | |
183 | for (; n_packets > 0; n_packets--) { | |
184 | /* 'buffer' only needs to be 1 byte long in most circumstances. This | |
185 | * size is defensive against the possibility that we someday want to | |
186 | * use a Linux tap device without TUN_NO_PI, in which case a buffer | |
187 | * smaller than sizeof(struct tun_pi) will give EINVAL on read. */ | |
188 | char buffer[128]; | |
189 | if (read(fd, buffer, sizeof buffer) <= 0) { | |
190 | break; | |
191 | } | |
192 | } | |
193 | } | |
194 | ||
195 | /* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in | |
196 | * '*un_len' the size of the sockaddr_un. */ | |
197 | static void | |
198 | make_sockaddr_un(const char *name, struct sockaddr_un* un, socklen_t *un_len) | |
199 | { | |
200 | un->sun_family = AF_UNIX; | |
201 | strncpy(un->sun_path, name, sizeof un->sun_path); | |
202 | un->sun_path[sizeof un->sun_path - 1] = '\0'; | |
203 | *un_len = (offsetof(struct sockaddr_un, sun_path) | |
204 | + strlen (un->sun_path) + 1); | |
205 | } | |
206 | ||
207 | /* Creates a Unix domain socket in the given 'style' (either SOCK_DGRAM or | |
208 | * SOCK_STREAM) that is bound to '*bind_path' (if 'bind_path' is non-null) and | |
209 | * connected to '*connect_path' (if 'connect_path' is non-null). If 'nonblock' | |
210 | * is true, the socket is made non-blocking. If 'passcred' is true, the socket | |
211 | * is configured to receive SCM_CREDENTIALS control messages. | |
212 | * | |
213 | * Returns the socket's fd if successful, otherwise a negative errno value. */ | |
214 | int | |
215 | make_unix_socket(int style, bool nonblock, bool passcred UNUSED, | |
216 | const char *bind_path, const char *connect_path) | |
217 | { | |
218 | int error; | |
219 | int fd; | |
220 | ||
221 | fd = socket(PF_UNIX, style, 0); | |
222 | if (fd < 0) { | |
223 | return -errno; | |
224 | } | |
225 | ||
226 | /* Set nonblocking mode right away, if we want it. This prevents blocking | |
227 | * in connect(), if connect_path != NULL. (In turn, that's a corner case: | |
228 | * it will only happen if style is SOCK_STREAM or SOCK_SEQPACKET, and only | |
229 | * if a backlog of un-accepted connections has built up in the kernel.) */ | |
230 | if (nonblock) { | |
231 | int flags = fcntl(fd, F_GETFL, 0); | |
232 | if (flags == -1) { | |
233 | goto error; | |
234 | } | |
235 | if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) { | |
236 | goto error; | |
237 | } | |
238 | } | |
239 | ||
240 | if (bind_path) { | |
241 | struct sockaddr_un un; | |
242 | socklen_t un_len; | |
243 | make_sockaddr_un(bind_path, &un, &un_len); | |
244 | if (unlink(un.sun_path) && errno != ENOENT) { | |
245 | VLOG_WARN("unlinking \"%s\": %s\n", un.sun_path, strerror(errno)); | |
246 | } | |
247 | fatal_signal_add_file_to_unlink(bind_path); | |
248 | if (bind(fd, (struct sockaddr*) &un, un_len) | |
249 | || fchmod(fd, S_IRWXU)) { | |
250 | goto error; | |
251 | } | |
252 | } | |
253 | ||
254 | if (connect_path) { | |
255 | struct sockaddr_un un; | |
256 | socklen_t un_len; | |
257 | make_sockaddr_un(connect_path, &un, &un_len); | |
258 | if (connect(fd, (struct sockaddr*) &un, un_len) | |
259 | && errno != EINPROGRESS) { | |
260 | goto error; | |
261 | } | |
262 | } | |
263 | ||
264 | #ifdef SCM_CREDENTIALS | |
265 | if (passcred) { | |
266 | int enable = 1; | |
267 | if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &enable, sizeof(enable))) { | |
268 | goto error; | |
269 | } | |
270 | } | |
271 | #endif | |
272 | ||
273 | return fd; | |
274 | ||
275 | error: | |
276 | if (bind_path) { | |
277 | fatal_signal_remove_file_to_unlink(bind_path); | |
278 | } | |
279 | error = errno; | |
280 | close(fd); | |
281 | return -error; | |
282 | } | |
283 | ||
284 | int | |
285 | get_unix_name_len(socklen_t sun_len) | |
286 | { | |
287 | return (sun_len >= offsetof(struct sockaddr_un, sun_path) | |
288 | ? sun_len - offsetof(struct sockaddr_un, sun_path) | |
289 | : 0); | |
290 | } | |
291 | ||
292 | uint32_t | |
293 | guess_netmask(uint32_t ip) | |
294 | { | |
295 | ip = ntohl(ip); | |
296 | return ((ip >> 31) == 0 ? htonl(0xff000000) /* Class A */ | |
297 | : (ip >> 30) == 2 ? htonl(0xffff0000) /* Class B */ | |
298 | : (ip >> 29) == 6 ? htonl(0xffffff00) /* Class C */ | |
299 | : htonl(0)); /* ??? */ | |
300 | } | |
301 | ||
302 | int | |
303 | read_fully(int fd, void *p_, size_t size, size_t *bytes_read) | |
304 | { | |
305 | uint8_t *p = p_; | |
306 | ||
307 | *bytes_read = 0; | |
308 | while (size > 0) { | |
309 | ssize_t retval = read(fd, p, size); | |
310 | if (retval > 0) { | |
311 | *bytes_read += retval; | |
312 | size -= retval; | |
313 | p += retval; | |
314 | } else if (retval == 0) { | |
315 | return EOF; | |
316 | } else if (errno != EINTR) { | |
317 | return errno; | |
318 | } | |
319 | } | |
320 | return 0; | |
321 | } | |
322 | ||
323 | int | |
324 | write_fully(int fd, const void *p_, size_t size, size_t *bytes_written) | |
325 | { | |
326 | const uint8_t *p = p_; | |
327 | ||
328 | *bytes_written = 0; | |
329 | while (size > 0) { | |
330 | ssize_t retval = write(fd, p, size); | |
331 | if (retval > 0) { | |
332 | *bytes_written += retval; | |
333 | size -= retval; | |
334 | p += retval; | |
335 | } else if (retval == 0) { | |
336 | VLOG_WARN("write returned 0"); | |
337 | return EPROTO; | |
338 | } else if (errno != EINTR) { | |
339 | return errno; | |
340 | } | |
341 | } | |
342 | return 0; | |
343 | } |