]>
Commit | Line | Data |
---|---|---|
0ad19a3f | 1 | /* |
2 | * lxc: linux Container library | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2007, 2008 | |
5 | * | |
6 | * Authors: | |
9afe19d6 | 7 | * Daniel Lezcano <daniel.lezcano at free.fr> |
e51d4895 | 8 | * Dwight Engen <dwight.engen@oracle.com> |
0ad19a3f | 9 | * |
10 | * This library is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License as published by the Free Software Foundation; either | |
13 | * version 2.1 of the License, or (at your option) any later version. | |
14 | * | |
15 | * This library is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * Lesser General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU Lesser General Public | |
21 | * License along with this library; if not, write to the Free Software | |
250b1eec | 22 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0ad19a3f | 23 | */ |
e51d4895 | 24 | |
0ad19a3f | 25 | #include <stdio.h> |
26 | #include <errno.h> | |
27 | #include <unistd.h> | |
28 | #include <string.h> | |
29 | #include <stdlib.h> | |
aae93dd3 | 30 | #include <stddef.h> |
0ad19a3f | 31 | #include <fcntl.h> |
b45c7011 DE |
32 | #include <inttypes.h> |
33 | #include <stdint.h> | |
0ad19a3f | 34 | #include <sys/types.h> |
35 | #include <sys/stat.h> | |
36 | #include <sys/param.h> | |
0ad19a3f | 37 | #include <sys/socket.h> |
e51d4895 | 38 | #include <sys/wait.h> |
0ad19a3f | 39 | #include <netinet/in.h> |
40 | #include <net/if.h> | |
b113348e | 41 | |
e2bcd7db | 42 | #include "error.h" |
31c53c2e | 43 | #include "af_unix.h" |
00b3c2e2 | 44 | |
36eb9bde | 45 | #include <lxc/log.h> |
9e60f51d | 46 | #include <lxc/lxclock.h> |
00b3c2e2 CLG |
47 | #include <lxc/state.h> |
48 | #include <lxc/monitor.h> | |
e51d4895 | 49 | #include <lxc/utils.h> |
36eb9bde CLG |
50 | |
51 | lxc_log_define(lxc_monitor, lxc); | |
0ad19a3f | 52 | |
e51d4895 | 53 | /* routines used by monitor publishers (containers) */ |
9e60f51d DE |
54 | int lxc_monitor_fifo_name(const char *lxcpath, char *fifo_path, size_t fifo_path_sz, |
55 | int do_mkdirp) | |
56 | { | |
57 | int ret; | |
58 | const char *rundir; | |
59 | ||
60 | rundir = get_rundir(); | |
61 | if (do_mkdirp) { | |
62 | ret = snprintf(fifo_path, fifo_path_sz, "%s/lxc/%s", rundir, lxcpath); | |
63 | if (ret < 0 || ret >= fifo_path_sz) { | |
64 | ERROR("rundir/lxcpath (%s/%s) too long for monitor fifo", rundir, lxcpath); | |
65 | return -1; | |
66 | } | |
67 | process_lock(); | |
68 | ret = mkdir_p(fifo_path, 0755); | |
69 | process_unlock(); | |
70 | if (ret < 0) { | |
71 | ERROR("unable to create monitor fifo dir %s", fifo_path); | |
72 | return ret; | |
73 | } | |
74 | } | |
75 | ret = snprintf(fifo_path, fifo_path_sz, "%s/lxc/%s/monitor-fifo", rundir, lxcpath); | |
76 | if (ret < 0 || ret >= fifo_path_sz) { | |
77 | ERROR("rundir/lxcpath (%s/%s) too long for monitor fifo", rundir, lxcpath); | |
78 | return -1; | |
79 | } | |
80 | return 0; | |
81 | } | |
82 | ||
e51d4895 | 83 | static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath) |
0ad19a3f | 84 | { |
e51d4895 DE |
85 | int fd,ret; |
86 | char fifo_path[PATH_MAX]; | |
87 | ||
88 | BUILD_BUG_ON(sizeof(*msg) > PIPE_BUF); /* write not guaranteed atomic */ | |
9e60f51d DE |
89 | |
90 | ret = lxc_monitor_fifo_name(lxcpath, fifo_path, sizeof(fifo_path), 0); | |
91 | if (ret < 0) | |
9123e471 | 92 | return; |
80f41298 | 93 | |
025ed0f3 | 94 | process_lock(); |
e51d4895 | 95 | fd = open(fifo_path, O_WRONLY); |
025ed0f3 | 96 | process_unlock(); |
e51d4895 DE |
97 | if (fd < 0) { |
98 | /* it is normal for this open to fail when there is no monitor | |
99 | * running, so we don't log it | |
100 | */ | |
31c53c2e | 101 | return; |
e51d4895 | 102 | } |
0ad19a3f | 103 | |
e51d4895 DE |
104 | ret = write(fd, msg, sizeof(*msg)); |
105 | if (ret != sizeof(*msg)) { | |
025ed0f3 | 106 | process_lock(); |
e8b9ac8f | 107 | close(fd); |
025ed0f3 | 108 | process_unlock(); |
e51d4895 DE |
109 | SYSERROR("failed to write monitor fifo %s", fifo_path); |
110 | return; | |
111 | } | |
0ad19a3f | 112 | |
025ed0f3 | 113 | process_lock(); |
0ad19a3f | 114 | close(fd); |
025ed0f3 | 115 | process_unlock(); |
0ad19a3f | 116 | } |
117 | ||
9123e471 | 118 | void lxc_monitor_send_state(const char *name, lxc_state_t state, const char *lxcpath) |
eae6543d | 119 | { |
120 | struct lxc_msg msg = { .type = lxc_msg_state, | |
121 | .value = state }; | |
80f41298 | 122 | strncpy(msg.name, name, sizeof(msg.name)); |
f3bc28bd | 123 | msg.name[sizeof(msg.name) - 1] = 0; |
eae6543d | 124 | |
e51d4895 | 125 | lxc_monitor_fifo_send(&msg, lxcpath); |
0ad19a3f | 126 | } |
127 | ||
e51d4895 DE |
128 | |
129 | /* routines used by monitor subscribers (lxc-monitor) */ | |
130 | int lxc_monitor_close(int fd) | |
0ad19a3f | 131 | { |
025ed0f3 SH |
132 | int ret; |
133 | ||
134 | process_lock(); | |
135 | ret = close(fd); | |
136 | process_unlock(); | |
137 | return ret; | |
e51d4895 DE |
138 | } |
139 | ||
b45c7011 DE |
140 | /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS. |
141 | * FNV has good anti collision properties and we're not worried | |
142 | * about pre-image resistance or one-way-ness, we're just trying to make | |
143 | * the name unique in the 108 bytes of space we have. | |
144 | */ | |
145 | #define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL) | |
146 | static uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) | |
147 | { | |
148 | unsigned char *bp; | |
149 | ||
150 | for(bp = buf; bp < (unsigned char *)buf + len; bp++) | |
151 | { | |
152 | /* xor the bottom with the current octet */ | |
153 | hval ^= (uint64_t)*bp; | |
154 | ||
155 | /* gcc optimised: | |
156 | * multiply by the 64 bit FNV magic prime mod 2^64 | |
157 | */ | |
158 | hval += (hval << 1) + (hval << 4) + (hval << 5) + | |
159 | (hval << 7) + (hval << 8) + (hval << 40); | |
160 | } | |
161 | ||
162 | return hval; | |
163 | } | |
164 | ||
e51d4895 DE |
165 | int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr) { |
166 | size_t len; | |
167 | int ret; | |
b45c7011 DE |
168 | char *sockname = &addr->sun_path[1]; |
169 | char path[PATH_MAX+18]; | |
170 | uint64_t hash; | |
e51d4895 | 171 | |
b45c7011 DE |
172 | /* addr.sun_path is only 108 bytes, so we hash the full name and |
173 | * then append as much of the name as we can fit. | |
9123e471 | 174 | */ |
e51d4895 DE |
175 | memset(addr, 0, sizeof(*addr)); |
176 | addr->sun_family = AF_UNIX; | |
177 | len = sizeof(addr->sun_path) - 1; | |
b45c7011 DE |
178 | ret = snprintf(path, sizeof(path), "lxc/%s/monitor-sock", lxcpath); |
179 | if (ret < 0 || ret >= sizeof(path)) { | |
180 | ERROR("lxcpath %s too long for monitor unix socket", lxcpath); | |
9e60f51d DE |
181 | return -1; |
182 | } | |
9e60f51d | 183 | |
b45c7011 DE |
184 | hash = fnv_64a_buf(path, ret, FNV1A_64_INIT); |
185 | ret = snprintf(sockname, len, "lxc/%016" PRIx64 "/%s", hash, lxcpath); | |
186 | if (ret < 0) | |
9123e471 | 187 | return -1; |
b45c7011 DE |
188 | sockname[sizeof(addr->sun_path)-2] = '\0'; |
189 | INFO("using monitor sock name %s", sockname); | |
e51d4895 DE |
190 | return 0; |
191 | } | |
0ad19a3f | 192 | |
e51d4895 DE |
193 | int lxc_monitor_open(const char *lxcpath) |
194 | { | |
195 | struct sockaddr_un addr; | |
196 | int fd,ret; | |
197 | int retry,backoff_ms[] = {10, 50, 100}; | |
aae93dd3 | 198 | size_t len; |
e51d4895 DE |
199 | |
200 | if (lxc_monitor_sock_name(lxcpath, &addr) < 0) | |
201 | return -1; | |
202 | ||
025ed0f3 | 203 | process_lock(); |
e51d4895 | 204 | fd = socket(PF_UNIX, SOCK_STREAM, 0); |
025ed0f3 | 205 | process_unlock(); |
2c396e12 MN |
206 | if (fd < 0) { |
207 | ERROR("socket : %s", strerror(errno)); | |
31c53c2e | 208 | return -1; |
2c396e12 | 209 | } |
0ad19a3f | 210 | |
aae93dd3 ÇO |
211 | len = strlen(&addr.sun_path[1]) + 1; |
212 | if (len >= sizeof(addr.sun_path) - 1) { | |
213 | ret = -1; | |
214 | errno = ENAMETOOLONG; | |
215 | goto err1; | |
216 | } | |
217 | ||
e51d4895 | 218 | for (retry = 0; retry < sizeof(backoff_ms)/sizeof(backoff_ms[0]); retry++) { |
aae93dd3 | 219 | ret = connect(fd, (struct sockaddr *)&addr, offsetof(struct sockaddr_un, sun_path) + len); |
e51d4895 DE |
220 | if (ret == 0 || errno != ECONNREFUSED) |
221 | break; | |
222 | ERROR("connect : backing off %d", backoff_ms[retry]); | |
223 | usleep(backoff_ms[retry] * 1000); | |
0ad19a3f | 224 | } |
225 | ||
e51d4895 DE |
226 | if (ret < 0) { |
227 | ERROR("connect : %s", strerror(errno)); | |
228 | goto err1; | |
229 | } | |
0ad19a3f | 230 | return fd; |
e51d4895 | 231 | err1: |
025ed0f3 | 232 | process_lock(); |
e51d4895 | 233 | close(fd); |
025ed0f3 | 234 | process_unlock(); |
e51d4895 | 235 | return ret; |
0ad19a3f | 236 | } |
237 | ||
8d06bd13 DE |
238 | int lxc_monitor_read_fdset(fd_set *rfds, int nfds, struct lxc_msg *msg, |
239 | int timeout) | |
0ad19a3f | 240 | { |
8d06bd13 DE |
241 | struct timeval tval,*tv = NULL; |
242 | int ret,i; | |
72d0e1cb SG |
243 | |
244 | if (timeout != -1) { | |
8d06bd13 DE |
245 | tv = &tval; |
246 | tv->tv_sec = timeout; | |
247 | tv->tv_usec = 0; | |
72d0e1cb | 248 | } |
0ad19a3f | 249 | |
8d06bd13 DE |
250 | ret = select(nfds, rfds, NULL, NULL, tv); |
251 | if (ret == -1) | |
75b1e198 | 252 | return -1; |
8d06bd13 DE |
253 | else if (ret == 0) |
254 | return -2; // timed out | |
255 | ||
256 | /* only read from the first ready fd, the others will remain ready | |
257 | * for when this routine is called again | |
258 | */ | |
259 | for (i = 0; i < nfds; i++) { | |
260 | if (FD_ISSET(i, rfds)) { | |
261 | ret = recv(i, msg, sizeof(*msg), 0); | |
262 | if (ret <= 0) { | |
263 | SYSERROR("client failed to recv (monitord died?) %s", | |
264 | strerror(errno)); | |
265 | return -1; | |
266 | } | |
267 | return ret; | |
268 | } | |
0ad19a3f | 269 | } |
8d06bd13 DE |
270 | SYSERROR("no ready fd found?"); |
271 | return -1; | |
272 | } | |
273 | ||
274 | int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout) | |
275 | { | |
276 | fd_set rfds; | |
277 | ||
278 | FD_ZERO(&rfds); | |
279 | FD_SET(fd, &rfds); | |
280 | ||
281 | return lxc_monitor_read_fdset(&rfds, fd+1, msg, timeout); | |
0ad19a3f | 282 | } |
283 | ||
72d0e1cb SG |
284 | int lxc_monitor_read(int fd, struct lxc_msg *msg) |
285 | { | |
286 | return lxc_monitor_read_timeout(fd, msg, -1); | |
287 | } | |
288 | ||
e51d4895 DE |
289 | |
290 | ||
291 | /* used to spawn a monitord either on startup of a daemon container, or when | |
292 | * lxc-monitor starts | |
293 | */ | |
294 | int lxc_monitord_spawn(const char *lxcpath) | |
0ad19a3f | 295 | { |
e51d4895 DE |
296 | pid_t pid1,pid2; |
297 | int pipefd[2]; | |
298 | char pipefd_str[11]; | |
299 | ||
300 | char * const args[] = { | |
31f58b3f | 301 | "lxc-monitord", |
e51d4895 DE |
302 | (char *)lxcpath, |
303 | pipefd_str, | |
304 | NULL, | |
305 | }; | |
306 | ||
307 | /* double fork to avoid zombies when monitord exits */ | |
308 | pid1 = fork(); | |
309 | if (pid1 < 0) { | |
310 | SYSERROR("failed to fork"); | |
311 | return -1; | |
312 | } | |
313 | ||
314 | if (pid1) { | |
f2bbe86d DE |
315 | if (waitpid(pid1, NULL, 0) != pid1) |
316 | return -1; | |
e51d4895 DE |
317 | return 0; |
318 | } | |
319 | ||
025ed0f3 | 320 | process_unlock(); // we're no longer sharing |
e51d4895 DE |
321 | if (pipe(pipefd) < 0) { |
322 | SYSERROR("failed to create pipe"); | |
323 | exit(EXIT_FAILURE); | |
324 | } | |
325 | ||
326 | pid2 = fork(); | |
327 | if (pid2 < 0) { | |
328 | SYSERROR("failed to fork"); | |
329 | exit(EXIT_FAILURE); | |
330 | } | |
331 | if (pid2) { | |
332 | char c; | |
333 | /* wait for daemon to create socket */ | |
334 | close(pipefd[1]); | |
335 | /* sync with child, we're ignoring the return from read | |
336 | * because regardless if it works or not, either way we've | |
337 | * synced with the child process. the if-empty-statement | |
338 | * construct is to quiet the warn-unused-result warning. | |
339 | */ | |
8f47bc3f SG |
340 | if (read(pipefd[0], &c, 1)) |
341 | ; | |
e51d4895 DE |
342 | close(pipefd[0]); |
343 | exit(EXIT_SUCCESS); | |
344 | } | |
345 | ||
346 | umask(0); | |
347 | if (setsid() < 0) { | |
348 | SYSERROR("failed to setsid"); | |
349 | exit(EXIT_FAILURE); | |
350 | } | |
351 | close(0); | |
352 | close(1); | |
353 | close(2); | |
354 | open("/dev/null", O_RDONLY); | |
355 | open("/dev/null", O_RDWR); | |
356 | open("/dev/null", O_RDWR); | |
357 | close(pipefd[0]); | |
358 | sprintf(pipefd_str, "%d", pipefd[1]); | |
359 | execvp(args[0], args); | |
360 | exit(EXIT_FAILURE); | |
0ad19a3f | 361 | } |