]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * lxc: linux Container library | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2007, 2008 | |
5 | * | |
6 | * Authors: | |
7 | * Daniel Lezcano <daniel.lezcano at free.fr> | |
8 | * Dwight Engen <dwight.engen@oracle.com> | |
9 | * | |
10 | * This library is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License as published by the Free Software Foundation; either | |
13 | * version 2.1 of the License, or (at your option) any later version. | |
14 | * | |
15 | * This library is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * Lesser General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU Lesser General Public | |
21 | * License along with this library; if not, write to the Free Software | |
22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
23 | */ | |
24 | ||
25 | #ifndef _GNU_SOURCE | |
26 | #define _GNU_SOURCE 1 | |
27 | #endif | |
28 | #include <errno.h> | |
29 | #include <fcntl.h> | |
30 | #include <inttypes.h> | |
31 | #include <net/if.h> | |
32 | #include <netinet/in.h> | |
33 | #include <poll.h> | |
34 | #include <stddef.h> | |
35 | #include <stdint.h> | |
36 | #include <stdio.h> | |
37 | #include <stdlib.h> | |
38 | #include <string.h> | |
39 | #include <sys/param.h> | |
40 | #include <sys/socket.h> | |
41 | #include <sys/stat.h> | |
42 | #include <sys/types.h> | |
43 | #include <sys/wait.h> | |
44 | #include <unistd.h> | |
45 | ||
46 | #include "af_unix.h" | |
47 | #include "config.h" | |
48 | #include "error.h" | |
49 | #include "log.h" | |
50 | #include "lxclock.h" | |
51 | #include "macro.h" | |
52 | #include "memory_utils.h" | |
53 | #include "monitor.h" | |
54 | #include "state.h" | |
55 | #include "utils.h" | |
56 | ||
57 | #ifndef HAVE_STRLCPY | |
58 | #include "include/strlcpy.h" | |
59 | #endif | |
60 | ||
61 | lxc_log_define(monitor, lxc); | |
62 | ||
63 | /* routines used by monitor publishers (containers) */ | |
64 | int lxc_monitor_fifo_name(const char *lxcpath, char *fifo_path, size_t fifo_path_sz, | |
65 | int do_mkdirp) | |
66 | { | |
67 | int ret; | |
68 | char *rundir; | |
69 | ||
70 | rundir = get_rundir(); | |
71 | if (!rundir) | |
72 | return -1; | |
73 | ||
74 | if (do_mkdirp) { | |
75 | ret = snprintf(fifo_path, fifo_path_sz, "%s/lxc/%s", rundir, lxcpath); | |
76 | if (ret < 0 || (size_t)ret >= fifo_path_sz) { | |
77 | ERROR("rundir/lxcpath (%s/%s) too long for monitor fifo", rundir, lxcpath); | |
78 | free(rundir); | |
79 | return -1; | |
80 | } | |
81 | ret = mkdir_p(fifo_path, 0755); | |
82 | if (ret < 0) { | |
83 | ERROR("Unable to create monitor fifo directory %s", fifo_path); | |
84 | free(rundir); | |
85 | return ret; | |
86 | } | |
87 | } | |
88 | ret = snprintf(fifo_path, fifo_path_sz, "%s/lxc/%s/monitor-fifo", rundir, lxcpath); | |
89 | if (ret < 0 || (size_t)ret >= fifo_path_sz) { | |
90 | ERROR("rundir/lxcpath (%s/%s) too long for monitor fifo", rundir, lxcpath); | |
91 | free(rundir); | |
92 | return -1; | |
93 | } | |
94 | free(rundir); | |
95 | return 0; | |
96 | } | |
97 | ||
98 | static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath) | |
99 | { | |
100 | int fd,ret; | |
101 | char fifo_path[PATH_MAX]; | |
102 | ||
103 | BUILD_BUG_ON(sizeof(*msg) > PIPE_BUF); /* write not guaranteed atomic */ | |
104 | ||
105 | ret = lxc_monitor_fifo_name(lxcpath, fifo_path, sizeof(fifo_path), 0); | |
106 | if (ret < 0) | |
107 | return; | |
108 | ||
109 | /* Open the fifo nonblock in case the monitor is dead, we don't want the | |
110 | * open to wait for a reader since it may never come. | |
111 | */ | |
112 | fd = open(fifo_path, O_WRONLY | O_NONBLOCK); | |
113 | if (fd < 0) { | |
114 | /* It is normal for this open() to fail with ENXIO when there is | |
115 | * no monitor running, so we don't log it. | |
116 | */ | |
117 | if (errno == ENXIO || errno == ENOENT) | |
118 | return; | |
119 | ||
120 | SYSWARN("Failed to open fifo to send message"); | |
121 | return; | |
122 | } | |
123 | ||
124 | if (fcntl(fd, F_SETFL, O_WRONLY) < 0) { | |
125 | close(fd); | |
126 | return; | |
127 | } | |
128 | ||
129 | ret = lxc_write_nointr(fd, msg, sizeof(*msg)); | |
130 | if (ret != sizeof(*msg)) { | |
131 | close(fd); | |
132 | SYSERROR("Failed to write to monitor fifo \"%s\"", fifo_path); | |
133 | return; | |
134 | } | |
135 | ||
136 | close(fd); | |
137 | } | |
138 | ||
139 | void lxc_monitor_send_state(const char *name, lxc_state_t state, | |
140 | const char *lxcpath) | |
141 | { | |
142 | struct lxc_msg msg = {.type = lxc_msg_state, .value = state}; | |
143 | ||
144 | (void)strlcpy(msg.name, name, sizeof(msg.name)); | |
145 | lxc_monitor_fifo_send(&msg, lxcpath); | |
146 | } | |
147 | ||
148 | void lxc_monitor_send_exit_code(const char *name, int exit_code, | |
149 | const char *lxcpath) | |
150 | { | |
151 | struct lxc_msg msg = {.type = lxc_msg_exit_code, .value = exit_code}; | |
152 | ||
153 | (void)strlcpy(msg.name, name, sizeof(msg.name)); | |
154 | lxc_monitor_fifo_send(&msg, lxcpath); | |
155 | } | |
156 | ||
157 | /* routines used by monitor subscribers (lxc-monitor) */ | |
158 | int lxc_monitor_close(int fd) | |
159 | { | |
160 | return close(fd); | |
161 | } | |
162 | ||
163 | /* Enforces \0-termination for the abstract unix socket. This is not required | |
164 | * but allows us to print it out. | |
165 | * | |
166 | * Older version of liblxc only allowed for 105 bytes to be used for the | |
167 | * abstract unix domain socket name because the code for our abstract unix | |
168 | * socket handling performed invalid checks. Since we \0-terminate we could now | |
169 | * have a maximum of 106 chars. But to not break backwards compatibility we keep | |
170 | * the limit at 105. | |
171 | */ | |
172 | int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr) | |
173 | { | |
174 | __do_free char *path = NULL; | |
175 | size_t len; | |
176 | int ret; | |
177 | uint64_t hash; | |
178 | ||
179 | /* addr.sun_path is only 108 bytes, so we hash the full name and | |
180 | * then append as much of the name as we can fit. | |
181 | */ | |
182 | memset(addr, 0, sizeof(*addr)); | |
183 | addr->sun_family = AF_UNIX; | |
184 | ||
185 | /* strlen("lxc/") + strlen("/monitor-sock") + 1 = 18 */ | |
186 | len = strlen(lxcpath) + 18; | |
187 | path = must_realloc(NULL, len); | |
188 | ret = snprintf(path, len, "lxc/%s/monitor-sock", lxcpath); | |
189 | if (ret < 0 || (size_t)ret >= len) { | |
190 | ERROR("Failed to create name for monitor socket"); | |
191 | return -1; | |
192 | } | |
193 | ||
194 | /* Note: snprintf() will \0-terminate addr->sun_path on the 106th byte | |
195 | * and so the abstract socket name has 105 "meaningful" characters. This | |
196 | * is absolutely intentional. For further info read the comment for this | |
197 | * function above! | |
198 | */ | |
199 | len = sizeof(addr->sun_path) - 1; | |
200 | hash = fnv_64a_buf(path, ret, FNV1A_64_INIT); | |
201 | ret = snprintf(addr->sun_path, len, "@lxc/%016" PRIx64 "/%s", hash, lxcpath); | |
202 | if (ret < 0) { | |
203 | ERROR("Failed to create hashed name for monitor socket"); | |
204 | goto on_error; | |
205 | } else if ((size_t)ret >= len) { | |
206 | errno = ENAMETOOLONG; | |
207 | SYSERROR("The name of monitor socket too long (%d bytes)", ret); | |
208 | goto on_error; | |
209 | } | |
210 | ||
211 | /* replace @ with \0 */ | |
212 | addr->sun_path[0] = '\0'; | |
213 | INFO("Using monitor socket name \"%s\" (length of socket name %zu must be <= %zu)", &addr->sun_path[1], strlen(&addr->sun_path[1]), sizeof(addr->sun_path) - 3); | |
214 | ||
215 | return 0; | |
216 | ||
217 | on_error: | |
218 | return -1; | |
219 | } | |
220 | ||
221 | int lxc_monitor_open(const char *lxcpath) | |
222 | { | |
223 | struct sockaddr_un addr; | |
224 | int fd; | |
225 | size_t retry; | |
226 | int backoff_ms[] = {10, 50, 100}; | |
227 | ||
228 | if (lxc_monitor_sock_name(lxcpath, &addr) < 0) | |
229 | return -1; | |
230 | ||
231 | DEBUG("Opening monitor socket %s with len %zu", &addr.sun_path[1], strlen(&addr.sun_path[1])); | |
232 | ||
233 | for (retry = 0; retry < sizeof(backoff_ms) / sizeof(backoff_ms[0]); retry++) { | |
234 | fd = lxc_abstract_unix_connect(addr.sun_path); | |
235 | if (fd != -1 || errno != ECONNREFUSED) | |
236 | break; | |
237 | ||
238 | SYSERROR("Failed to connect to monitor socket. Retrying in %d ms", backoff_ms[retry]); | |
239 | usleep(backoff_ms[retry] * 1000); | |
240 | } | |
241 | ||
242 | if (fd < 0) { | |
243 | SYSERROR("Failed to connect to monitor socket"); | |
244 | return -1; | |
245 | } | |
246 | ||
247 | return fd; | |
248 | } | |
249 | ||
250 | int lxc_monitor_read_fdset(struct pollfd *fds, nfds_t nfds, struct lxc_msg *msg, | |
251 | int timeout) | |
252 | { | |
253 | long i; | |
254 | int ret; | |
255 | ||
256 | ret = poll(fds, nfds, timeout * 1000); | |
257 | if (ret == -1) | |
258 | return -1; | |
259 | else if (ret == 0) | |
260 | return -2; /* timed out */ | |
261 | ||
262 | /* Only read from the first ready fd, the others will remain ready for | |
263 | * when this routine is called again. | |
264 | */ | |
265 | for (i = 0; i < nfds; i++) { | |
266 | if (fds[i].revents != 0) { | |
267 | fds[i].revents = 0; | |
268 | ret = recv(fds[i].fd, msg, sizeof(*msg), 0); | |
269 | if (ret <= 0) { | |
270 | SYSERROR("Failed to receive message. Did monitord die?"); | |
271 | return -1; | |
272 | } | |
273 | return ret; | |
274 | } | |
275 | } | |
276 | ||
277 | SYSERROR("No ready fd found"); | |
278 | ||
279 | return -1; | |
280 | } | |
281 | ||
282 | int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout) | |
283 | { | |
284 | struct pollfd fds; | |
285 | ||
286 | fds.fd = fd; | |
287 | fds.events = POLLIN | POLLPRI; | |
288 | fds.revents = 0; | |
289 | ||
290 | return lxc_monitor_read_fdset(&fds, 1, msg, timeout); | |
291 | } | |
292 | ||
293 | int lxc_monitor_read(int fd, struct lxc_msg *msg) | |
294 | { | |
295 | return lxc_monitor_read_timeout(fd, msg, -1); | |
296 | } | |
297 | ||
298 | #define LXC_MONITORD_PATH LIBEXECDIR "/lxc/lxc-monitord" | |
299 | ||
300 | /* Used to spawn a monitord either on startup of a daemon container, or when | |
301 | * lxc-monitor starts. | |
302 | */ | |
303 | int lxc_monitord_spawn(const char *lxcpath) | |
304 | { | |
305 | int ret; | |
306 | int pipefd[2]; | |
307 | char pipefd_str[INTTYPE_TO_STRLEN(int)]; | |
308 | pid_t pid1, pid2; | |
309 | ||
310 | char *const args[] = { | |
311 | LXC_MONITORD_PATH, | |
312 | (char *)lxcpath, | |
313 | pipefd_str, | |
314 | NULL, | |
315 | }; | |
316 | ||
317 | /* double fork to avoid zombies when monitord exits */ | |
318 | pid1 = fork(); | |
319 | if (pid1 < 0) { | |
320 | SYSERROR("Failed to fork()"); | |
321 | return -1; | |
322 | } | |
323 | ||
324 | if (pid1) { | |
325 | DEBUG("Going to wait for pid %d", pid1); | |
326 | ||
327 | if (waitpid(pid1, NULL, 0) != pid1) | |
328 | return -1; | |
329 | ||
330 | DEBUG("Finished waiting on pid %d", pid1); | |
331 | return 0; | |
332 | } | |
333 | ||
334 | if (pipe(pipefd) < 0) { | |
335 | SYSERROR("Failed to create pipe"); | |
336 | _exit(EXIT_FAILURE); | |
337 | } | |
338 | ||
339 | pid2 = fork(); | |
340 | if (pid2 < 0) { | |
341 | SYSERROR("Failed to fork()"); | |
342 | _exit(EXIT_FAILURE); | |
343 | } | |
344 | ||
345 | if (pid2) { | |
346 | DEBUG("Trying to sync with child process"); | |
347 | char c; | |
348 | /* Wait for daemon to create socket. */ | |
349 | close(pipefd[1]); | |
350 | ||
351 | /* Sync with child, we're ignoring the return from read | |
352 | * because regardless if it works or not, either way we've | |
353 | * synced with the child process. the if-empty-statement | |
354 | * construct is to quiet the warn-unused-result warning. | |
355 | */ | |
356 | if (lxc_read_nointr(pipefd[0], &c, 1)) | |
357 | ; | |
358 | ||
359 | close(pipefd[0]); | |
360 | ||
361 | DEBUG("Successfully synced with child process"); | |
362 | _exit(EXIT_SUCCESS); | |
363 | } | |
364 | ||
365 | if (setsid() < 0) { | |
366 | SYSERROR("Failed to setsid()"); | |
367 | _exit(EXIT_FAILURE); | |
368 | } | |
369 | ||
370 | lxc_check_inherited(NULL, true, &pipefd[1], 1); | |
371 | if (null_stdfds() < 0) { | |
372 | SYSERROR("Failed to dup2() standard file descriptors to /dev/null"); | |
373 | _exit(EXIT_FAILURE); | |
374 | } | |
375 | ||
376 | close(pipefd[0]); | |
377 | ||
378 | ret = snprintf(pipefd_str, sizeof(pipefd_str), "%d", pipefd[1]); | |
379 | if (ret < 0 || ret >= sizeof(pipefd_str)) { | |
380 | ERROR("Failed to create pid argument to pass to monitord"); | |
381 | _exit(EXIT_FAILURE); | |
382 | } | |
383 | ||
384 | DEBUG("Using pipe file descriptor %d for monitord", pipefd[1]); | |
385 | ||
386 | execvp(args[0], args); | |
387 | SYSERROR("Failed to exec lxc-monitord"); | |
388 | ||
389 | _exit(EXIT_FAILURE); | |
390 | } |