]>
Commit | Line | Data |
---|---|---|
0ad19a3f | 1 | /* |
2 | * lxc: linux Container library | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2007, 2008 | |
5 | * | |
6 | * Authors: | |
9afe19d6 | 7 | * Daniel Lezcano <daniel.lezcano at free.fr> |
e51d4895 | 8 | * Dwight Engen <dwight.engen@oracle.com> |
0ad19a3f | 9 | * |
10 | * This library is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License as published by the Free Software Foundation; either | |
13 | * version 2.1 of the License, or (at your option) any later version. | |
14 | * | |
15 | * This library is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * Lesser General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU Lesser General Public | |
21 | * License along with this library; if not, write to the Free Software | |
250b1eec | 22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0ad19a3f | 23 | */ |
e51d4895 | 24 | |
0ad19a3f | 25 | #include <errno.h> |
0ad19a3f | 26 | #include <fcntl.h> |
b45c7011 | 27 | #include <inttypes.h> |
292b1d17 CB |
28 | #include <poll.h> |
29 | #include <stddef.h> | |
b45c7011 | 30 | #include <stdint.h> |
292b1d17 CB |
31 | #include <stdio.h> |
32 | #include <stdlib.h> | |
33 | #include <string.h> | |
34 | #include <unistd.h> | |
35 | #include <net/if.h> | |
36 | #include <netinet/in.h> | |
0ad19a3f | 37 | #include <sys/param.h> |
0ad19a3f | 38 | #include <sys/socket.h> |
292b1d17 CB |
39 | #include <sys/stat.h> |
40 | #include <sys/types.h> | |
e51d4895 | 41 | #include <sys/wait.h> |
b113348e | 42 | |
292b1d17 | 43 | #include "config.h" |
31c53c2e | 44 | #include "af_unix.h" |
292b1d17 | 45 | #include "error.h" |
f2363e38 ÇO |
46 | #include "log.h" |
47 | #include "lxclock.h" | |
f2363e38 | 48 | #include "monitor.h" |
292b1d17 | 49 | #include "state.h" |
f2363e38 | 50 | #include "utils.h" |
36eb9bde CLG |
51 | |
52 | lxc_log_define(lxc_monitor, lxc); | |
0ad19a3f | 53 | |
e51d4895 | 54 | /* routines used by monitor publishers (containers) */ |
9e60f51d DE |
55 | int lxc_monitor_fifo_name(const char *lxcpath, char *fifo_path, size_t fifo_path_sz, |
56 | int do_mkdirp) | |
57 | { | |
58 | int ret; | |
44b9ae4b | 59 | char *rundir; |
9e60f51d DE |
60 | |
61 | rundir = get_rundir(); | |
97a696c6 SG |
62 | if (!rundir) |
63 | return -1; | |
64 | ||
9e60f51d DE |
65 | if (do_mkdirp) { |
66 | ret = snprintf(fifo_path, fifo_path_sz, "%s/lxc/%s", rundir, lxcpath); | |
292b1d17 CB |
67 | if (ret < 0 || (size_t)ret >= fifo_path_sz) { |
68 | ERROR("rundir/lxcpath (%s/%s) too long for monitor fifo.", rundir, lxcpath); | |
44b9ae4b | 69 | free(rundir); |
9e60f51d DE |
70 | return -1; |
71 | } | |
9e60f51d | 72 | ret = mkdir_p(fifo_path, 0755); |
9e60f51d | 73 | if (ret < 0) { |
292b1d17 | 74 | ERROR("Unable to create monitor fifo directory %s.", fifo_path); |
44b9ae4b | 75 | free(rundir); |
9e60f51d DE |
76 | return ret; |
77 | } | |
78 | } | |
79 | ret = snprintf(fifo_path, fifo_path_sz, "%s/lxc/%s/monitor-fifo", rundir, lxcpath); | |
292b1d17 CB |
80 | if (ret < 0 || (size_t)ret >= fifo_path_sz) { |
81 | ERROR("rundir/lxcpath (%s/%s) too long for monitor fifo.", rundir, lxcpath); | |
44b9ae4b | 82 | free(rundir); |
9e60f51d DE |
83 | return -1; |
84 | } | |
44b9ae4b | 85 | free(rundir); |
9e60f51d DE |
86 | return 0; |
87 | } | |
88 | ||
e51d4895 | 89 | static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath) |
0ad19a3f | 90 | { |
e51d4895 DE |
91 | int fd,ret; |
92 | char fifo_path[PATH_MAX]; | |
93 | ||
94 | BUILD_BUG_ON(sizeof(*msg) > PIPE_BUF); /* write not guaranteed atomic */ | |
9e60f51d DE |
95 | |
96 | ret = lxc_monitor_fifo_name(lxcpath, fifo_path, sizeof(fifo_path), 0); | |
97 | if (ret < 0) | |
9123e471 | 98 | return; |
80f41298 | 99 | |
292b1d17 CB |
100 | /* Open the fifo nonblock in case the monitor is dead, we don't want the |
101 | * open to wait for a reader since it may never come. | |
8bf1e61e | 102 | */ |
292b1d17 | 103 | fd = open(fifo_path, O_WRONLY | O_NONBLOCK); |
e51d4895 | 104 | if (fd < 0) { |
292b1d17 CB |
105 | /* It is normal for this open() to fail with ENXIO when there is |
106 | * no monitor running, so we don't log it. | |
e51d4895 | 107 | */ |
292b1d17 CB |
108 | if (errno == ENXIO) |
109 | return; | |
110 | ||
111 | WARN("Failed to open fifo to send message: %s.", strerror(errno)); | |
31c53c2e | 112 | return; |
e51d4895 | 113 | } |
0ad19a3f | 114 | |
92ffb6d8 DE |
115 | if (fcntl(fd, F_SETFL, O_WRONLY) < 0) { |
116 | close(fd); | |
8bf1e61e | 117 | return; |
92ffb6d8 | 118 | } |
8bf1e61e | 119 | |
e51d4895 DE |
120 | ret = write(fd, msg, sizeof(*msg)); |
121 | if (ret != sizeof(*msg)) { | |
e8b9ac8f | 122 | close(fd); |
292b1d17 | 123 | SYSERROR("Failed to write to monitor fifo \"%s\".", fifo_path); |
e51d4895 DE |
124 | return; |
125 | } | |
0ad19a3f | 126 | |
127 | close(fd); | |
128 | } | |
129 | ||
292b1d17 CB |
130 | void lxc_monitor_send_state(const char *name, lxc_state_t state, |
131 | const char *lxcpath) | |
eae6543d | 132 | { |
292b1d17 | 133 | struct lxc_msg msg = {.type = lxc_msg_state, .value = state}; |
80f41298 | 134 | strncpy(msg.name, name, sizeof(msg.name)); |
f3bc28bd | 135 | msg.name[sizeof(msg.name) - 1] = 0; |
eae6543d | 136 | |
e51d4895 | 137 | lxc_monitor_fifo_send(&msg, lxcpath); |
0ad19a3f | 138 | } |
139 | ||
292b1d17 CB |
140 | void lxc_monitor_send_exit_code(const char *name, int exit_code, |
141 | const char *lxcpath) | |
1787abca | 142 | { |
292b1d17 | 143 | struct lxc_msg msg = {.type = lxc_msg_exit_code, .value = exit_code}; |
1787abca JTLB |
144 | strncpy(msg.name, name, sizeof(msg.name)); |
145 | msg.name[sizeof(msg.name) - 1] = 0; | |
146 | ||
147 | lxc_monitor_fifo_send(&msg, lxcpath); | |
148 | } | |
149 | ||
e51d4895 DE |
150 | /* routines used by monitor subscribers (lxc-monitor) */ |
151 | int lxc_monitor_close(int fd) | |
0ad19a3f | 152 | { |
dd1d77f9 | 153 | return close(fd); |
e51d4895 DE |
154 | } |
155 | ||
fcaef9c7 CB |
156 | /* Enforces \0-termination for the abstract unix socket. This is not required |
157 | * but allows us to print it out. | |
158 | * | |
159 | * Older version of liblxc only allowed for 105 bytes to be used for the | |
160 | * abstract unix domain socket name because the code for our abstract unix | |
161 | * socket handling performed invalid checks. Since we \0-terminate we could now | |
162 | * have a maximum of 106 chars. But to not break backwards compatibility we keep | |
163 | * the limit at 105. | |
164 | */ | |
e51d4895 DE |
165 | int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr) { |
166 | size_t len; | |
167 | int ret; | |
073135ba | 168 | char *path; |
b45c7011 | 169 | uint64_t hash; |
e51d4895 | 170 | |
b45c7011 DE |
171 | /* addr.sun_path is only 108 bytes, so we hash the full name and |
172 | * then append as much of the name as we can fit. | |
9123e471 | 173 | */ |
e51d4895 DE |
174 | memset(addr, 0, sizeof(*addr)); |
175 | addr->sun_family = AF_UNIX; | |
292b1d17 | 176 | |
fcaef9c7 | 177 | /* strlen("lxc/") + strlen("/monitor-sock") + 1 = 18 */ |
073135ba SH |
178 | len = strlen(lxcpath) + 18; |
179 | path = alloca(len); | |
180 | ret = snprintf(path, len, "lxc/%s/monitor-sock", lxcpath); | |
292b1d17 | 181 | if (ret < 0 || (size_t)ret >= len) { |
fcaef9c7 | 182 | ERROR("failed to create name for monitor socket"); |
9e60f51d DE |
183 | return -1; |
184 | } | |
9e60f51d | 185 | |
fcaef9c7 CB |
186 | /* Note: snprintf() will \0-terminate addr->sun_path on the 106th byte |
187 | * and so the abstract socket name has 105 "meaningful" characters. This | |
188 | * is absolutely intentional. For further info read the comment for this | |
189 | * function above! | |
190 | */ | |
073135ba | 191 | len = sizeof(addr->sun_path) - 1; |
b45c7011 | 192 | hash = fnv_64a_buf(path, ret, FNV1A_64_INIT); |
fcaef9c7 CB |
193 | ret = snprintf(addr->sun_path, len, "@lxc/%016" PRIx64 "/%s", hash, lxcpath); |
194 | if (ret < 0) { | |
195 | ERROR("failed to create hashed name for monitor socket"); | |
9123e471 | 196 | return -1; |
fcaef9c7 | 197 | } |
292b1d17 | 198 | |
fcaef9c7 CB |
199 | /* replace @ with \0 */ |
200 | addr->sun_path[0] = '\0'; | |
201 | INFO("using monitor socket name \"%s\" (length of socket name %zu must be <= %zu)", &addr->sun_path[1], strlen(&addr->sun_path[1]), sizeof(addr->sun_path) - 3); | |
292b1d17 | 202 | |
e51d4895 DE |
203 | return 0; |
204 | } | |
0ad19a3f | 205 | |
e51d4895 DE |
206 | int lxc_monitor_open(const char *lxcpath) |
207 | { | |
208 | struct sockaddr_un addr; | |
292b1d17 CB |
209 | int fd; |
210 | size_t retry; | |
aae93dd3 | 211 | size_t len; |
fcaef9c7 CB |
212 | int ret = -1; |
213 | int backoff_ms[] = {10, 50, 100}; | |
e51d4895 DE |
214 | |
215 | if (lxc_monitor_sock_name(lxcpath, &addr) < 0) | |
216 | return -1; | |
217 | ||
218 | fd = socket(PF_UNIX, SOCK_STREAM, 0); | |
2c396e12 | 219 | if (fd < 0) { |
292b1d17 | 220 | ERROR("Failed to create socket: %s.", strerror(errno)); |
fcaef9c7 | 221 | return -errno; |
2c396e12 | 222 | } |
0ad19a3f | 223 | |
fcaef9c7 CB |
224 | len = strlen(&addr.sun_path[1]); |
225 | DEBUG("opening monitor socket %s with len %zu", &addr.sun_path[1], len); | |
aae93dd3 | 226 | if (len >= sizeof(addr.sun_path) - 1) { |
aae93dd3 | 227 | errno = ENAMETOOLONG; |
fcaef9c7 CB |
228 | ret = -errno; |
229 | ERROR("name of monitor socket too long (%zu bytes): %s", len, strerror(errno)); | |
292b1d17 | 230 | goto on_error; |
aae93dd3 ÇO |
231 | } |
232 | ||
292b1d17 | 233 | for (retry = 0; retry < sizeof(backoff_ms) / sizeof(backoff_ms[0]); retry++) { |
fcaef9c7 CB |
234 | fd = lxc_abstract_unix_connect(addr.sun_path); |
235 | if (fd < 0 || errno != ECONNREFUSED) | |
e51d4895 | 236 | break; |
fcaef9c7 | 237 | ERROR("Failed to connect to monitor socket. Retrying in %d ms: %s", backoff_ms[retry], strerror(errno)); |
e51d4895 | 238 | usleep(backoff_ms[retry] * 1000); |
0ad19a3f | 239 | } |
240 | ||
fcaef9c7 CB |
241 | if (fd < 0) { |
242 | ret = -errno; | |
292b1d17 CB |
243 | ERROR("Failed to connect to monitor socket: %s.", strerror(errno)); |
244 | goto on_error; | |
e51d4895 | 245 | } |
fcaef9c7 | 246 | ret = 0; |
292b1d17 | 247 | |
0ad19a3f | 248 | return fd; |
292b1d17 CB |
249 | |
250 | on_error: | |
e51d4895 DE |
251 | close(fd); |
252 | return ret; | |
0ad19a3f | 253 | } |
254 | ||
2366b8a7 | 255 | int lxc_monitor_read_fdset(struct pollfd *fds, nfds_t nfds, struct lxc_msg *msg, |
8d06bd13 | 256 | int timeout) |
0ad19a3f | 257 | { |
2366b8a7 SH |
258 | long i; |
259 | int ret; | |
0ad19a3f | 260 | |
2366b8a7 | 261 | ret = poll(fds, nfds, timeout * 1000); |
8d06bd13 | 262 | if (ret == -1) |
75b1e198 | 263 | return -1; |
8d06bd13 DE |
264 | else if (ret == 0) |
265 | return -2; // timed out | |
266 | ||
292b1d17 CB |
267 | /* Only read from the first ready fd, the others will remain ready for |
268 | * when this routine is called again. | |
8d06bd13 DE |
269 | */ |
270 | for (i = 0; i < nfds; i++) { | |
2366b8a7 SH |
271 | if (fds[i].revents != 0) { |
272 | fds[i].revents = 0; | |
273 | ret = recv(fds[i].fd, msg, sizeof(*msg), 0); | |
8d06bd13 | 274 | if (ret <= 0) { |
292b1d17 | 275 | SYSERROR("Failed to receive message. Did monitord die?: %s.", strerror(errno)); |
8d06bd13 DE |
276 | return -1; |
277 | } | |
278 | return ret; | |
279 | } | |
0ad19a3f | 280 | } |
292b1d17 CB |
281 | |
282 | SYSERROR("No ready fd found."); | |
283 | ||
8d06bd13 DE |
284 | return -1; |
285 | } | |
286 | ||
287 | int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout) | |
288 | { | |
2366b8a7 | 289 | struct pollfd fds; |
8d06bd13 | 290 | |
2366b8a7 SH |
291 | fds.fd = fd; |
292 | fds.events = POLLIN | POLLPRI; | |
293 | fds.revents = 0; | |
8d06bd13 | 294 | |
2366b8a7 | 295 | return lxc_monitor_read_fdset(&fds, 1, msg, timeout); |
0ad19a3f | 296 | } |
297 | ||
72d0e1cb SG |
298 | int lxc_monitor_read(int fd, struct lxc_msg *msg) |
299 | { | |
300 | return lxc_monitor_read_timeout(fd, msg, -1); | |
301 | } | |
302 | ||
45e854dc | 303 | #define LXC_MONITORD_PATH LIBEXECDIR "/lxc/lxc-monitord" |
e51d4895 | 304 | |
292b1d17 CB |
305 | /* Used to spawn a monitord either on startup of a daemon container, or when |
306 | * lxc-monitor starts. | |
e51d4895 DE |
307 | */ |
308 | int lxc_monitord_spawn(const char *lxcpath) | |
0ad19a3f | 309 | { |
487b14b6 | 310 | int ret; |
e51d4895 | 311 | int pipefd[2]; |
eab15c1e | 312 | char pipefd_str[LXC_NUMSTRLEN64]; |
487b14b6 | 313 | pid_t pid1, pid2; |
e51d4895 | 314 | |
292b1d17 CB |
315 | char *const args[] = { |
316 | LXC_MONITORD_PATH, | |
317 | (char *)lxcpath, | |
318 | pipefd_str, | |
319 | NULL, | |
e51d4895 DE |
320 | }; |
321 | ||
322 | /* double fork to avoid zombies when monitord exits */ | |
323 | pid1 = fork(); | |
324 | if (pid1 < 0) { | |
292b1d17 | 325 | SYSERROR("Failed to fork()."); |
e51d4895 DE |
326 | return -1; |
327 | } | |
328 | ||
329 | if (pid1) { | |
292b1d17 | 330 | DEBUG("Going to wait for pid %d.", pid1); |
f2bbe86d DE |
331 | if (waitpid(pid1, NULL, 0) != pid1) |
332 | return -1; | |
487b14b6 | 333 | DEBUG("Finished waiting on pid %d.", pid1); |
e51d4895 DE |
334 | return 0; |
335 | } | |
336 | ||
337 | if (pipe(pipefd) < 0) { | |
292b1d17 | 338 | SYSERROR("Failed to create pipe."); |
e51d4895 DE |
339 | exit(EXIT_FAILURE); |
340 | } | |
341 | ||
342 | pid2 = fork(); | |
343 | if (pid2 < 0) { | |
292b1d17 | 344 | SYSERROR("Failed to fork()."); |
e51d4895 DE |
345 | exit(EXIT_FAILURE); |
346 | } | |
292b1d17 | 347 | |
e51d4895 | 348 | if (pid2) { |
5cc0f22d | 349 | DEBUG("Trying to sync with child process."); |
e51d4895 | 350 | char c; |
292b1d17 | 351 | /* Wait for daemon to create socket. */ |
e51d4895 | 352 | close(pipefd[1]); |
292b1d17 CB |
353 | |
354 | /* Sync with child, we're ignoring the return from read | |
e51d4895 DE |
355 | * because regardless if it works or not, either way we've |
356 | * synced with the child process. the if-empty-statement | |
357 | * construct is to quiet the warn-unused-result warning. | |
358 | */ | |
8f47bc3f SG |
359 | if (read(pipefd[0], &c, 1)) |
360 | ; | |
292b1d17 | 361 | |
e51d4895 | 362 | close(pipefd[0]); |
292b1d17 | 363 | |
073000e2 | 364 | DEBUG("Successfully synced with child process."); |
e51d4895 DE |
365 | exit(EXIT_SUCCESS); |
366 | } | |
367 | ||
e51d4895 | 368 | if (setsid() < 0) { |
292b1d17 | 369 | SYSERROR("Failed to setsid()."); |
e51d4895 DE |
370 | exit(EXIT_FAILURE); |
371 | } | |
292b1d17 | 372 | |
d2cf4c37 | 373 | lxc_check_inherited(NULL, true, pipefd[1]); |
aec1ea62 CB |
374 | if (null_stdfds() < 0) { |
375 | SYSERROR("Failed to dup2() standard file descriptors to /dev/null."); | |
69aeabac | 376 | exit(EXIT_FAILURE); |
aec1ea62 | 377 | } |
292b1d17 | 378 | |
e51d4895 | 379 | close(pipefd[0]); |
292b1d17 | 380 | |
eab15c1e CB |
381 | ret = snprintf(pipefd_str, LXC_NUMSTRLEN64, "%d", pipefd[1]); |
382 | if (ret < 0 || ret >= LXC_NUMSTRLEN64) { | |
aec1ea62 | 383 | ERROR("Failed to create pid argument to pass to monitord."); |
487b14b6 | 384 | exit(EXIT_FAILURE); |
aec1ea62 | 385 | } |
487b14b6 CB |
386 | |
387 | DEBUG("Using pipe file descriptor %d for monitord.", pipefd[1]); | |
292b1d17 | 388 | |
e51d4895 | 389 | execvp(args[0], args); |
00cccc8b | 390 | SYSERROR("failed to exec lxc-monitord"); |
292b1d17 | 391 | |
e51d4895 | 392 | exit(EXIT_FAILURE); |
0ad19a3f | 393 | } |