]> git.proxmox.com Git - systemd.git/blame - src/journal/journald-server.c
Imported Upstream version 217
[systemd.git] / src / journal / journald-server.c
CommitLineData
663996b3
MS
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/signalfd.h>
23#include <sys/ioctl.h>
24#include <linux/sockios.h>
25#include <sys/statvfs.h>
26#include <sys/mman.h>
27#include <sys/timerfd.h>
28
29#include <libudev.h>
663996b3 30
60f067b4
JS
31#include "sd-journal.h"
32#include "sd-messages.h"
33#include "sd-daemon.h"
663996b3
MS
34#include "fileio.h"
35#include "mkdir.h"
36#include "hashmap.h"
37#include "journal-file.h"
38#include "socket-util.h"
39#include "cgroup-util.h"
40#include "list.h"
663996b3
MS
41#include "missing.h"
42#include "conf-parser.h"
60f067b4 43#include "selinux-util.h"
663996b3
MS
44#include "journal-internal.h"
45#include "journal-vacuum.h"
46#include "journal-authenticate.h"
663996b3
MS
47#include "journald-rate-limit.h"
48#include "journald-kmsg.h"
49#include "journald-syslog.h"
50#include "journald-stream.h"
51#include "journald-console.h"
52#include "journald-native.h"
60f067b4 53#include "journald-server.h"
663996b3
MS
54
55#ifdef HAVE_ACL
56#include <sys/acl.h>
57#include <acl/libacl.h>
58#include "acl-util.h"
59#endif
60
61#ifdef HAVE_SELINUX
62#include <selinux/selinux.h>
63#endif
64
65#define USER_JOURNALS_MAX 1024
66
67#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
14228c0d
MB
68#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69#define DEFAULT_RATE_LIMIT_BURST 1000
e842803a 70#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
663996b3
MS
71
72#define RECHECK_AVAILABLE_SPACE_USEC (30*USEC_PER_SEC)
73
60f067b4 74static const char* const storage_table[_STORAGE_MAX] = {
663996b3
MS
75 [STORAGE_AUTO] = "auto",
76 [STORAGE_VOLATILE] = "volatile",
77 [STORAGE_PERSISTENT] = "persistent",
78 [STORAGE_NONE] = "none"
79};
80
81DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
82DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
83
60f067b4
JS
84static const char* const split_mode_table[_SPLIT_MAX] = {
85 [SPLIT_LOGIN] = "login",
663996b3 86 [SPLIT_UID] = "uid",
60f067b4 87 [SPLIT_NONE] = "none",
663996b3
MS
88};
89
90DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
91DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
92
14228c0d 93static uint64_t available_space(Server *s, bool verbose) {
663996b3
MS
94 char ids[33];
95 _cleanup_free_ char *p = NULL;
663996b3
MS
96 sd_id128_t machine;
97 struct statvfs ss;
14228c0d 98 uint64_t sum = 0, ss_avail = 0, avail = 0;
663996b3
MS
99 int r;
100 _cleanup_closedir_ DIR *d = NULL;
101 usec_t ts;
14228c0d 102 const char *f;
663996b3
MS
103 JournalMetrics *m;
104
105 ts = now(CLOCK_MONOTONIC);
106
14228c0d
MB
107 if (s->cached_available_space_timestamp + RECHECK_AVAILABLE_SPACE_USEC > ts
108 && !verbose)
663996b3
MS
109 return s->cached_available_space;
110
111 r = sd_id128_get_machine(&machine);
112 if (r < 0)
113 return 0;
114
115 if (s->system_journal) {
116 f = "/var/log/journal/";
117 m = &s->system_metrics;
118 } else {
119 f = "/run/log/journal/";
120 m = &s->runtime_metrics;
121 }
122
123 assert(m);
124
125 p = strappend(f, sd_id128_to_string(machine, ids));
126 if (!p)
127 return 0;
128
129 d = opendir(p);
130 if (!d)
131 return 0;
132
133 if (fstatvfs(dirfd(d), &ss) < 0)
134 return 0;
135
136 for (;;) {
137 struct stat st;
138 struct dirent *de;
663996b3 139
60f067b4
JS
140 errno = 0;
141 de = readdir(d);
142 if (!de && errno != 0)
143 return 0;
663996b3
MS
144
145 if (!de)
146 break;
147
148 if (!endswith(de->d_name, ".journal") &&
149 !endswith(de->d_name, ".journal~"))
150 continue;
151
152 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
153 continue;
154
155 if (!S_ISREG(st.st_mode))
156 continue;
157
158 sum += (uint64_t) st.st_blocks * 512UL;
159 }
160
663996b3
MS
161 ss_avail = ss.f_bsize * ss.f_bavail;
162
60f067b4
JS
163 /* If we reached a high mark, we will always allow this much
164 * again, unless usage goes above max_use. This watermark
165 * value is cached so that we don't give up space on pressure,
166 * but hover below the maximum usage. */
167
168 if (m->use < sum)
169 m->use = sum;
170
171 avail = LESS_BY(ss_avail, m->keep_free);
172
173 s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
663996b3
MS
174 s->cached_available_space_timestamp = ts;
175
14228c0d
MB
176 if (verbose) {
177 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
178 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
179
180 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
60f067b4
JS
181 "%s journal is using %s (max allowed %s, "
182 "trying to leave %s free of %s available → current limit %s).",
14228c0d
MB
183 s->system_journal ? "Permanent" : "Runtime",
184 format_bytes(fb1, sizeof(fb1), sum),
185 format_bytes(fb2, sizeof(fb2), m->max_use),
186 format_bytes(fb3, sizeof(fb3), m->keep_free),
187 format_bytes(fb4, sizeof(fb4), ss_avail),
60f067b4 188 format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
14228c0d 189 }
663996b3 190
14228c0d 191 return s->cached_available_space;
663996b3
MS
192}
193
194void server_fix_perms(Server *s, JournalFile *f, uid_t uid) {
195 int r;
196#ifdef HAVE_ACL
197 acl_t acl;
198 acl_entry_t entry;
199 acl_permset_t permset;
200#endif
201
202 assert(f);
203
14228c0d 204 r = fchmod(f->fd, 0640);
663996b3 205 if (r < 0)
14228c0d 206 log_warning("Failed to fix access mode on %s, ignoring: %s", f->path, strerror(-r));
663996b3
MS
207
208#ifdef HAVE_ACL
e842803a 209 if (uid <= SYSTEM_UID_MAX)
663996b3
MS
210 return;
211
212 acl = acl_get_fd(f->fd);
213 if (!acl) {
214 log_warning("Failed to read ACL on %s, ignoring: %m", f->path);
215 return;
216 }
217
218 r = acl_find_uid(acl, uid, &entry);
219 if (r <= 0) {
220
221 if (acl_create_entry(&acl, &entry) < 0 ||
222 acl_set_tag_type(entry, ACL_USER) < 0 ||
223 acl_set_qualifier(entry, &uid) < 0) {
224 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
225 goto finish;
226 }
227 }
228
14228c0d
MB
229 /* We do not recalculate the mask unconditionally here,
230 * so that the fchmod() mask above stays intact. */
663996b3 231 if (acl_get_permset(entry, &permset) < 0 ||
14228c0d
MB
232 acl_add_perm(permset, ACL_READ) < 0 ||
233 calc_acl_mask_if_needed(&acl) < 0) {
663996b3
MS
234 log_warning("Failed to patch ACL on %s, ignoring: %m", f->path);
235 goto finish;
236 }
237
238 if (acl_set_fd(f->fd, acl) < 0)
239 log_warning("Failed to set ACL on %s, ignoring: %m", f->path);
240
241finish:
242 acl_free(acl);
243#endif
244}
245
246static JournalFile* find_journal(Server *s, uid_t uid) {
14228c0d 247 _cleanup_free_ char *p = NULL;
663996b3
MS
248 int r;
249 JournalFile *f;
250 sd_id128_t machine;
251
252 assert(s);
253
254 /* We split up user logs only on /var, not on /run. If the
255 * runtime file is open, we write to it exclusively, in order
256 * to guarantee proper order as soon as we flush /run to
257 * /var and close the runtime file. */
258
259 if (s->runtime_journal)
260 return s->runtime_journal;
261
60f067b4 262 if (uid <= SYSTEM_UID_MAX)
663996b3
MS
263 return s->system_journal;
264
265 r = sd_id128_get_machine(&machine);
266 if (r < 0)
267 return s->system_journal;
268
5eef597e 269 f = ordered_hashmap_get(s->user_journals, UINT32_TO_PTR(uid));
663996b3
MS
270 if (f)
271 return f;
272
60f067b4
JS
273 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
274 SD_ID128_FORMAT_VAL(machine), uid) < 0)
663996b3
MS
275 return s->system_journal;
276
5eef597e 277 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
663996b3 278 /* Too many open? Then let's close one */
5eef597e 279 f = ordered_hashmap_steal_first(s->user_journals);
663996b3
MS
280 assert(f);
281 journal_file_close(f);
282 }
283
14228c0d 284 r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &f);
663996b3
MS
285 if (r < 0)
286 return s->system_journal;
287
288 server_fix_perms(s, f, uid);
289
5eef597e 290 r = ordered_hashmap_put(s->user_journals, UINT32_TO_PTR(uid), f);
663996b3
MS
291 if (r < 0) {
292 journal_file_close(f);
293 return s->system_journal;
294 }
295
296 return f;
297}
298
60f067b4
JS
299static int do_rotate(Server *s, JournalFile **f, const char* name,
300 bool seal, uint32_t uid) {
301 int r;
302 assert(s);
303
304 if (!*f)
305 return -EINVAL;
306
307 r = journal_file_rotate(f, s->compress, seal);
308 if (r < 0)
309 if (*f)
310 log_error("Failed to rotate %s: %s",
311 (*f)->path, strerror(-r));
312 else
313 log_error("Failed to create new %s journal: %s",
314 name, strerror(-r));
315 else
316 server_fix_perms(s, *f, uid);
317 return r;
318}
319
663996b3
MS
320void server_rotate(Server *s) {
321 JournalFile *f;
322 void *k;
323 Iterator i;
324 int r;
325
326 log_debug("Rotating...");
327
60f067b4
JS
328 do_rotate(s, &s->runtime_journal, "runtime", false, 0);
329 do_rotate(s, &s->system_journal, "system", s->seal, 0);
663996b3 330
5eef597e 331 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
60f067b4
JS
332 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UINT32(k));
333 if (r >= 0)
5eef597e 334 ordered_hashmap_replace(s->user_journals, k, f);
60f067b4
JS
335 else if (!f)
336 /* Old file has been closed and deallocated */
5eef597e 337 ordered_hashmap_remove(s->user_journals, k);
663996b3
MS
338 }
339}
340
341void server_sync(Server *s) {
342 JournalFile *f;
343 void *k;
344 Iterator i;
345 int r;
346
663996b3
MS
347 if (s->system_journal) {
348 r = journal_file_set_offline(s->system_journal);
349 if (r < 0)
350 log_error("Failed to sync system journal: %s", strerror(-r));
351 }
352
5eef597e 353 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
663996b3
MS
354 r = journal_file_set_offline(f);
355 if (r < 0)
356 log_error("Failed to sync user journal: %s", strerror(-r));
357 }
358
60f067b4
JS
359 if (s->sync_event_source) {
360 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
361 if (r < 0)
362 log_error("Failed to disable sync timer source: %s", strerror(-r));
363 }
663996b3
MS
364
365 s->sync_scheduled = false;
366}
367
60f067b4
JS
368static void do_vacuum(Server *s, char *ids, JournalFile *f, const char* path,
369 JournalMetrics *metrics) {
370 char *p;
371 int r;
372
373 if (!f)
374 return;
375
376 p = strappenda(path, ids);
377 r = journal_directory_vacuum(p, metrics->max_use, s->max_retention_usec, &s->oldest_file_usec);
378 if (r < 0 && r != -ENOENT)
379 log_error("Failed to vacuum %s: %s", p, strerror(-r));
380}
381
663996b3 382void server_vacuum(Server *s) {
663996b3
MS
383 char ids[33];
384 sd_id128_t machine;
385 int r;
386
387 log_debug("Vacuuming...");
388
389 s->oldest_file_usec = 0;
390
391 r = sd_id128_get_machine(&machine);
392 if (r < 0) {
393 log_error("Failed to get machine ID: %s", strerror(-r));
394 return;
395 }
663996b3
MS
396 sd_id128_to_string(machine, ids);
397
60f067b4
JS
398 do_vacuum(s, ids, s->system_journal, "/var/log/journal/", &s->system_metrics);
399 do_vacuum(s, ids, s->runtime_journal, "/run/log/journal/", &s->runtime_metrics);
663996b3 400
60f067b4
JS
401 s->cached_available_space_timestamp = 0;
402}
663996b3 403
60f067b4
JS
404static void server_cache_machine_id(Server *s) {
405 sd_id128_t id;
406 int r;
663996b3 407
60f067b4 408 assert(s);
663996b3 409
60f067b4
JS
410 r = sd_id128_get_machine(&id);
411 if (r < 0)
412 return;
413
414 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
415}
416
417static void server_cache_boot_id(Server *s) {
418 sd_id128_t id;
419 int r;
420
421 assert(s);
422
423 r = sd_id128_get_boot(&id);
424 if (r < 0)
425 return;
426
427 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
428}
429
430static void server_cache_hostname(Server *s) {
431 _cleanup_free_ char *t = NULL;
432 char *x;
433
434 assert(s);
435
436 t = gethostname_malloc();
437 if (!t)
438 return;
439
440 x = strappend("_HOSTNAME=", t);
441 if (!x)
442 return;
443
444 free(s->hostname_field);
445 s->hostname_field = x;
663996b3
MS
446}
447
448bool shall_try_append_again(JournalFile *f, int r) {
449
450 /* -E2BIG Hit configured limit
451 -EFBIG Hit fs limit
452 -EDQUOT Quota limit hit
453 -ENOSPC Disk full
454 -EHOSTDOWN Other machine
455 -EBUSY Unclean shutdown
456 -EPROTONOSUPPORT Unsupported feature
457 -EBADMSG Corrupted
458 -ENODATA Truncated
459 -ESHUTDOWN Already archived */
460
461 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
462 log_debug("%s: Allocation limit reached, rotating.", f->path);
463 else if (r == -EHOSTDOWN)
464 log_info("%s: Journal file from other machine, rotating.", f->path);
465 else if (r == -EBUSY)
466 log_info("%s: Unclean shutdown, rotating.", f->path);
467 else if (r == -EPROTONOSUPPORT)
468 log_info("%s: Unsupported feature, rotating.", f->path);
469 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
470 log_warning("%s: Journal file corrupted, rotating.", f->path);
471 else
472 return false;
473
474 return true;
475}
476
14228c0d 477static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
663996b3
MS
478 JournalFile *f;
479 bool vacuumed = false;
480 int r;
481
482 assert(s);
483 assert(iovec);
484 assert(n > 0);
485
486 f = find_journal(s, uid);
487 if (!f)
488 return;
489
490 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
491 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
492 server_rotate(s);
493 server_vacuum(s);
494 vacuumed = true;
495
496 f = find_journal(s, uid);
497 if (!f)
498 return;
499 }
500
501 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
502 if (r >= 0) {
14228c0d 503 server_schedule_sync(s, priority);
663996b3
MS
504 return;
505 }
506
507 if (vacuumed || !shall_try_append_again(f, r)) {
14228c0d
MB
508 size_t size = 0;
509 unsigned i;
510 for (i = 0; i < n; i++)
511 size += iovec[i].iov_len;
512
513 log_error("Failed to write entry (%d items, %zu bytes), ignoring: %s", n, size, strerror(-r));
663996b3
MS
514 return;
515 }
516
517 server_rotate(s);
518 server_vacuum(s);
519
520 f = find_journal(s, uid);
521 if (!f)
522 return;
523
524 log_debug("Retrying write.");
525 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
14228c0d
MB
526 if (r < 0) {
527 size_t size = 0;
528 unsigned i;
529 for (i = 0; i < n; i++)
530 size += iovec[i].iov_len;
531
532 log_error("Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %s", n, size, strerror(-r));
533 } else
534 server_schedule_sync(s, priority);
663996b3
MS
535}
536
537static void dispatch_message_real(
538 Server *s,
539 struct iovec *iovec, unsigned n, unsigned m,
540 struct ucred *ucred,
541 struct timeval *tv,
542 const char *label, size_t label_len,
14228c0d
MB
543 const char *unit_id,
544 int priority,
545 pid_t object_pid) {
663996b3 546
14228c0d 547 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
663996b3
MS
548 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
549 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
550 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
551 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
14228c0d
MB
552 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
553 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
554 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
555 uid_t object_uid;
556 gid_t object_gid;
557 char *x;
663996b3
MS
558 int r;
559 char *t, *c;
560 uid_t realuid = 0, owner = 0, journal_uid;
561 bool owner_valid = false;
562#ifdef HAVE_AUDIT
14228c0d
MB
563 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
564 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
565 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
566 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
663996b3
MS
567
568 uint32_t audit;
569 uid_t loginuid;
570#endif
571
572 assert(s);
573 assert(iovec);
574 assert(n > 0);
14228c0d 575 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
663996b3
MS
576
577 if (ucred) {
578 realuid = ucred->uid;
579
60f067b4 580 sprintf(pid, "_PID="PID_FMT, ucred->pid);
663996b3
MS
581 IOVEC_SET_STRING(iovec[n++], pid);
582
60f067b4 583 sprintf(uid, "_UID="UID_FMT, ucred->uid);
663996b3
MS
584 IOVEC_SET_STRING(iovec[n++], uid);
585
60f067b4 586 sprintf(gid, "_GID="GID_FMT, ucred->gid);
663996b3
MS
587 IOVEC_SET_STRING(iovec[n++], gid);
588
589 r = get_process_comm(ucred->pid, &t);
590 if (r >= 0) {
14228c0d 591 x = strappenda("_COMM=", t);
663996b3 592 free(t);
14228c0d 593 IOVEC_SET_STRING(iovec[n++], x);
663996b3
MS
594 }
595
596 r = get_process_exe(ucred->pid, &t);
597 if (r >= 0) {
14228c0d 598 x = strappenda("_EXE=", t);
663996b3 599 free(t);
14228c0d 600 IOVEC_SET_STRING(iovec[n++], x);
663996b3
MS
601 }
602
603 r = get_process_cmdline(ucred->pid, 0, false, &t);
604 if (r >= 0) {
14228c0d 605 x = strappenda("_CMDLINE=", t);
663996b3 606 free(t);
14228c0d
MB
607 IOVEC_SET_STRING(iovec[n++], x);
608 }
609
610 r = get_process_capeff(ucred->pid, &t);
611 if (r >= 0) {
612 x = strappenda("_CAP_EFFECTIVE=", t);
613 free(t);
614 IOVEC_SET_STRING(iovec[n++], x);
663996b3
MS
615 }
616
617#ifdef HAVE_AUDIT
618 r = audit_session_from_pid(ucred->pid, &audit);
619 if (r >= 0) {
60f067b4 620 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
663996b3
MS
621 IOVEC_SET_STRING(iovec[n++], audit_session);
622 }
623
624 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
625 if (r >= 0) {
60f067b4 626 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
663996b3
MS
627 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
628 }
629#endif
630
60f067b4 631 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
663996b3 632 if (r >= 0) {
14228c0d
MB
633 char *session = NULL;
634
635 x = strappenda("_SYSTEMD_CGROUP=", c);
636 IOVEC_SET_STRING(iovec[n++], x);
663996b3
MS
637
638 r = cg_path_get_session(c, &t);
639 if (r >= 0) {
640 session = strappenda("_SYSTEMD_SESSION=", t);
641 free(t);
642 IOVEC_SET_STRING(iovec[n++], session);
643 }
644
645 if (cg_path_get_owner_uid(c, &owner) >= 0) {
646 owner_valid = true;
647
60f067b4 648 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
663996b3
MS
649 IOVEC_SET_STRING(iovec[n++], owner_uid);
650 }
651
652 if (cg_path_get_unit(c, &t) >= 0) {
14228c0d 653 x = strappenda("_SYSTEMD_UNIT=", t);
663996b3 654 free(t);
14228c0d
MB
655 IOVEC_SET_STRING(iovec[n++], x);
656 } else if (unit_id && !session) {
657 x = strappenda("_SYSTEMD_UNIT=", unit_id);
658 IOVEC_SET_STRING(iovec[n++], x);
659 }
660
661 if (cg_path_get_user_unit(c, &t) >= 0) {
662 x = strappenda("_SYSTEMD_USER_UNIT=", t);
663996b3 663 free(t);
14228c0d
MB
664 IOVEC_SET_STRING(iovec[n++], x);
665 } else if (unit_id && session) {
666 x = strappenda("_SYSTEMD_USER_UNIT=", unit_id);
667 IOVEC_SET_STRING(iovec[n++], x);
668 }
663996b3 669
14228c0d
MB
670 if (cg_path_get_slice(c, &t) >= 0) {
671 x = strappenda("_SYSTEMD_SLICE=", t);
672 free(t);
673 IOVEC_SET_STRING(iovec[n++], x);
674 }
663996b3
MS
675
676 free(c);
60f067b4
JS
677 } else if (unit_id) {
678 x = strappenda("_SYSTEMD_UNIT=", unit_id);
679 IOVEC_SET_STRING(iovec[n++], x);
663996b3
MS
680 }
681
682#ifdef HAVE_SELINUX
5eef597e 683 if (mac_selinux_use()) {
60f067b4
JS
684 if (label) {
685 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
663996b3 686
60f067b4
JS
687 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
688 IOVEC_SET_STRING(iovec[n++], x);
689 } else {
690 security_context_t con;
663996b3 691
60f067b4
JS
692 if (getpidcon(ucred->pid, &con) >= 0) {
693 x = strappenda("_SELINUX_CONTEXT=", con);
663996b3 694
60f067b4
JS
695 freecon(con);
696 IOVEC_SET_STRING(iovec[n++], x);
697 }
663996b3
MS
698 }
699 }
700#endif
701 }
14228c0d
MB
702 assert(n <= m);
703
704 if (object_pid) {
705 r = get_process_uid(object_pid, &object_uid);
706 if (r >= 0) {
60f067b4 707 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
14228c0d
MB
708 IOVEC_SET_STRING(iovec[n++], o_uid);
709 }
710
711 r = get_process_gid(object_pid, &object_gid);
712 if (r >= 0) {
60f067b4 713 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
14228c0d
MB
714 IOVEC_SET_STRING(iovec[n++], o_gid);
715 }
716
717 r = get_process_comm(object_pid, &t);
718 if (r >= 0) {
719 x = strappenda("OBJECT_COMM=", t);
720 free(t);
721 IOVEC_SET_STRING(iovec[n++], x);
722 }
723
724 r = get_process_exe(object_pid, &t);
725 if (r >= 0) {
726 x = strappenda("OBJECT_EXE=", t);
727 free(t);
728 IOVEC_SET_STRING(iovec[n++], x);
729 }
730
731 r = get_process_cmdline(object_pid, 0, false, &t);
732 if (r >= 0) {
733 x = strappenda("OBJECT_CMDLINE=", t);
734 free(t);
735 IOVEC_SET_STRING(iovec[n++], x);
736 }
737
738#ifdef HAVE_AUDIT
739 r = audit_session_from_pid(object_pid, &audit);
740 if (r >= 0) {
60f067b4 741 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
14228c0d
MB
742 IOVEC_SET_STRING(iovec[n++], o_audit_session);
743 }
744
745 r = audit_loginuid_from_pid(object_pid, &loginuid);
746 if (r >= 0) {
60f067b4 747 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
14228c0d
MB
748 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
749 }
750#endif
751
60f067b4 752 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
14228c0d
MB
753 if (r >= 0) {
754 x = strappenda("OBJECT_SYSTEMD_CGROUP=", c);
755 IOVEC_SET_STRING(iovec[n++], x);
756
757 r = cg_path_get_session(c, &t);
758 if (r >= 0) {
759 x = strappenda("OBJECT_SYSTEMD_SESSION=", t);
760 free(t);
761 IOVEC_SET_STRING(iovec[n++], x);
762 }
763
764 if (cg_path_get_owner_uid(c, &owner) >= 0) {
60f067b4 765 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
14228c0d
MB
766 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
767 }
768
769 if (cg_path_get_unit(c, &t) >= 0) {
770 x = strappenda("OBJECT_SYSTEMD_UNIT=", t);
771 free(t);
772 IOVEC_SET_STRING(iovec[n++], x);
773 }
774
775 if (cg_path_get_user_unit(c, &t) >= 0) {
776 x = strappenda("OBJECT_SYSTEMD_USER_UNIT=", t);
777 free(t);
778 IOVEC_SET_STRING(iovec[n++], x);
779 }
780
781 free(c);
782 }
783 }
784 assert(n <= m);
663996b3
MS
785
786 if (tv) {
787 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
788 IOVEC_SET_STRING(iovec[n++], source_time);
789 }
790
791 /* Note that strictly speaking storing the boot id here is
792 * redundant since the entry includes this in-line
793 * anyway. However, we need this indexed, too. */
60f067b4
JS
794 if (!isempty(s->boot_id_field))
795 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
663996b3 796
60f067b4
JS
797 if (!isempty(s->machine_id_field))
798 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
663996b3 799
60f067b4
JS
800 if (!isempty(s->hostname_field))
801 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
663996b3
MS
802
803 assert(n <= m);
804
805 if (s->split_mode == SPLIT_UID && realuid > 0)
806 /* Split up strictly by any UID */
807 journal_uid = realuid;
808 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
e842803a
MB
809 /* Split up by login UIDs. We do this only if the
810 * realuid is not root, in order not to accidentally
811 * leak privileged information to the user that is
812 * logged by a privileged process that is part of an
813 * unprivileged session.*/
663996b3
MS
814 journal_uid = owner;
815 else
816 journal_uid = 0;
817
14228c0d 818 write_to_journal(s, journal_uid, iovec, n, priority);
663996b3
MS
819}
820
821void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
822 char mid[11 + 32 + 1];
823 char buffer[16 + LINE_MAX + 1];
824 struct iovec iovec[N_IOVEC_META_FIELDS + 4];
825 int n = 0;
826 va_list ap;
827 struct ucred ucred = {};
828
829 assert(s);
830 assert(format);
831
832 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
833 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
834
835 memcpy(buffer, "MESSAGE=", 8);
836 va_start(ap, format);
837 vsnprintf(buffer + 8, sizeof(buffer) - 8, format, ap);
838 va_end(ap);
839 char_array_0(buffer);
840 IOVEC_SET_STRING(iovec[n++], buffer);
841
842 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
843 snprintf(mid, sizeof(mid), MESSAGE_ID(message_id));
844 char_array_0(mid);
845 IOVEC_SET_STRING(iovec[n++], mid);
846 }
847
848 ucred.pid = getpid();
849 ucred.uid = getuid();
850 ucred.gid = getgid();
851
14228c0d 852 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
663996b3
MS
853}
854
855void server_dispatch_message(
856 Server *s,
857 struct iovec *iovec, unsigned n, unsigned m,
858 struct ucred *ucred,
859 struct timeval *tv,
860 const char *label, size_t label_len,
861 const char *unit_id,
14228c0d
MB
862 int priority,
863 pid_t object_pid) {
663996b3
MS
864
865 int rl, r;
866 _cleanup_free_ char *path = NULL;
867 char *c;
868
869 assert(s);
870 assert(iovec || n == 0);
871
872 if (n == 0)
873 return;
874
875 if (LOG_PRI(priority) > s->max_level_store)
876 return;
877
14228c0d
MB
878 /* Stop early in case the information will not be stored
879 * in a journal. */
880 if (s->storage == STORAGE_NONE)
881 return;
882
663996b3
MS
883 if (!ucred)
884 goto finish;
885
60f067b4 886 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
663996b3
MS
887 if (r < 0)
888 goto finish;
889
890 /* example: /user/lennart/3/foobar
891 * /system/dbus.service/foobar
892 *
893 * So let's cut of everything past the third /, since that is
894 * where user directories start */
895
896 c = strchr(path, '/');
897 if (c) {
898 c = strchr(c+1, '/');
899 if (c) {
900 c = strchr(c+1, '/');
901 if (c)
902 *c = 0;
903 }
904 }
905
906 rl = journal_rate_limit_test(s->rate_limit, path,
14228c0d 907 priority & LOG_PRIMASK, available_space(s, false));
663996b3
MS
908
909 if (rl == 0)
910 return;
911
912 /* Write a suppression message if we suppressed something */
913 if (rl > 1)
914 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
915 "Suppressed %u messages from %s", rl - 1, path);
916
917finish:
14228c0d 918 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
663996b3
MS
919}
920
921
5eef597e 922static int system_journal_open(Server *s, bool flush_requested) {
663996b3
MS
923 int r;
924 char *fn;
925 sd_id128_t machine;
926 char ids[33];
927
928 r = sd_id128_get_machine(&machine);
14228c0d
MB
929 if (r < 0) {
930 log_error("Failed to get machine id: %s", strerror(-r));
663996b3 931 return r;
14228c0d 932 }
663996b3
MS
933
934 sd_id128_to_string(machine, ids);
935
936 if (!s->system_journal &&
937 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
5eef597e
MP
938 (flush_requested
939 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
663996b3
MS
940
941 /* If in auto mode: first try to create the machine
942 * path, but not the prefix.
943 *
944 * If in persistent mode: create /var/log/journal and
945 * the machine path */
946
947 if (s->storage == STORAGE_PERSISTENT)
948 (void) mkdir("/var/log/journal/", 0755);
949
14228c0d 950 fn = strappenda("/var/log/journal/", ids);
663996b3 951 (void) mkdir(fn, 0755);
663996b3 952
14228c0d 953 fn = strappenda(fn, "/system.journal");
663996b3 954 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, s->seal, &s->system_metrics, s->mmap, NULL, &s->system_journal);
663996b3 955
14228c0d 956 if (r >= 0)
663996b3 957 server_fix_perms(s, s->system_journal, 0);
14228c0d 958 else if (r < 0) {
663996b3
MS
959 if (r != -ENOENT && r != -EROFS)
960 log_warning("Failed to open system journal: %s", strerror(-r));
961
962 r = 0;
963 }
964 }
965
966 if (!s->runtime_journal &&
967 (s->storage != STORAGE_NONE)) {
968
969 fn = strjoin("/run/log/journal/", ids, "/system.journal", NULL);
970 if (!fn)
971 return -ENOMEM;
972
973 if (s->system_journal) {
974
975 /* Try to open the runtime journal, but only
976 * if it already exists, so that we can flush
977 * it into the system journal */
978
979 r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
980 free(fn);
981
982 if (r < 0) {
983 if (r != -ENOENT)
984 log_warning("Failed to open runtime journal: %s", strerror(-r));
985
986 r = 0;
987 }
988
989 } else {
990
991 /* OK, we really need the runtime journal, so create
992 * it if necessary. */
993
60f067b4
JS
994 (void) mkdir("/run/log", 0755);
995 (void) mkdir("/run/log/journal", 0755);
996 (void) mkdir_parents(fn, 0750);
997
663996b3
MS
998 r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
999 free(fn);
1000
1001 if (r < 0) {
1002 log_error("Failed to open runtime journal: %s", strerror(-r));
1003 return r;
1004 }
1005 }
1006
14228c0d 1007 if (s->runtime_journal)
663996b3 1008 server_fix_perms(s, s->runtime_journal, 0);
663996b3
MS
1009 }
1010
14228c0d
MB
1011 available_space(s, true);
1012
663996b3
MS
1013 return r;
1014}
1015
1016int server_flush_to_var(Server *s) {
663996b3
MS
1017 sd_id128_t machine;
1018 sd_journal *j = NULL;
60f067b4
JS
1019 char ts[FORMAT_TIMESPAN_MAX];
1020 usec_t start;
1021 unsigned n = 0;
1022 int r;
663996b3
MS
1023
1024 assert(s);
1025
1026 if (s->storage != STORAGE_AUTO &&
1027 s->storage != STORAGE_PERSISTENT)
1028 return 0;
1029
1030 if (!s->runtime_journal)
1031 return 0;
1032
5eef597e 1033 system_journal_open(s, true);
663996b3
MS
1034
1035 if (!s->system_journal)
1036 return 0;
1037
1038 log_debug("Flushing to /var...");
1039
60f067b4
JS
1040 start = now(CLOCK_MONOTONIC);
1041
663996b3 1042 r = sd_id128_get_machine(&machine);
14228c0d 1043 if (r < 0)
663996b3 1044 return r;
663996b3
MS
1045
1046 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1047 if (r < 0) {
1048 log_error("Failed to read runtime journal: %s", strerror(-r));
1049 return r;
1050 }
1051
1052 sd_journal_set_data_threshold(j, 0);
1053
1054 SD_JOURNAL_FOREACH(j) {
1055 Object *o = NULL;
1056 JournalFile *f;
1057
1058 f = j->current_file;
1059 assert(f && f->current_offset > 0);
1060
60f067b4
JS
1061 n++;
1062
663996b3
MS
1063 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1064 if (r < 0) {
1065 log_error("Can't read entry: %s", strerror(-r));
1066 goto finish;
1067 }
1068
1069 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1070 if (r >= 0)
1071 continue;
1072
1073 if (!shall_try_append_again(s->system_journal, r)) {
1074 log_error("Can't write entry: %s", strerror(-r));
1075 goto finish;
1076 }
1077
1078 server_rotate(s);
1079 server_vacuum(s);
1080
1081 if (!s->system_journal) {
1082 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1083 r = -EIO;
1084 goto finish;
1085 }
1086
1087 log_debug("Retrying write.");
1088 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1089 if (r < 0) {
1090 log_error("Can't write entry: %s", strerror(-r));
1091 goto finish;
1092 }
1093 }
1094
1095finish:
1096 journal_file_post_change(s->system_journal);
1097
1098 journal_file_close(s->runtime_journal);
1099 s->runtime_journal = NULL;
1100
1101 if (r >= 0)
1102 rm_rf("/run/log/journal", false, true, false);
1103
1104 sd_journal_close(j);
1105
60f067b4
JS
1106 server_driver_message(s, SD_ID128_NULL, "Time spent on flushing to /var is %s for %u entries.", format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0), n);
1107
663996b3
MS
1108 return r;
1109}
1110
60f067b4
JS
1111int process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1112 Server *s = userdata;
1113
663996b3 1114 assert(s);
60f067b4
JS
1115 assert(fd == s->native_fd || fd == s->syslog_fd);
1116
1117 if (revents != EPOLLIN) {
1118 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1119 return -EIO;
1120 }
1121
1122 for (;;) {
1123 struct ucred *ucred = NULL;
1124 struct timeval *tv = NULL;
1125 struct cmsghdr *cmsg;
1126 char *label = NULL;
1127 size_t label_len = 0;
1128 struct iovec iovec;
1129
1130 union {
1131 struct cmsghdr cmsghdr;
1132
1133 /* We use NAME_MAX space for the SELinux label
1134 * here. The kernel currently enforces no
1135 * limit, but according to suggestions from
1136 * the SELinux people this will change and it
1137 * will probably be identical to NAME_MAX. For
1138 * now we use that, but this should be updated
1139 * one day when the final limit is known.*/
1140 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1141 CMSG_SPACE(sizeof(struct timeval)) +
1142 CMSG_SPACE(sizeof(int)) + /* fd */
1143 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1144 } control = {};
1145 struct msghdr msghdr = {
1146 .msg_iov = &iovec,
1147 .msg_iovlen = 1,
1148 .msg_control = &control,
1149 .msg_controllen = sizeof(control),
1150 };
663996b3 1151
663996b3 1152 ssize_t n;
60f067b4
JS
1153 int v;
1154 int *fds = NULL;
1155 unsigned n_fds = 0;
663996b3 1156
60f067b4
JS
1157 if (ioctl(fd, SIOCINQ, &v) < 0) {
1158 log_error("SIOCINQ failed: %m");
1159 return -errno;
663996b3
MS
1160 }
1161
60f067b4
JS
1162 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, LINE_MAX + (size_t) v))
1163 return log_oom();
663996b3 1164
60f067b4
JS
1165 iovec.iov_base = s->buffer;
1166 iovec.iov_len = s->buffer_size;
663996b3 1167
60f067b4
JS
1168 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1169 if (n < 0) {
663996b3 1170 if (errno == EINTR || errno == EAGAIN)
60f067b4 1171 return 0;
663996b3 1172
60f067b4 1173 log_error("recvmsg() failed: %m");
663996b3
MS
1174 return -errno;
1175 }
1176
60f067b4
JS
1177 for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
1178
1179 if (cmsg->cmsg_level == SOL_SOCKET &&
1180 cmsg->cmsg_type == SCM_CREDENTIALS &&
1181 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1182 ucred = (struct ucred*) CMSG_DATA(cmsg);
1183 else if (cmsg->cmsg_level == SOL_SOCKET &&
1184 cmsg->cmsg_type == SCM_SECURITY) {
1185 label = (char*) CMSG_DATA(cmsg);
1186 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1187 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1188 cmsg->cmsg_type == SO_TIMESTAMP &&
1189 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1190 tv = (struct timeval*) CMSG_DATA(cmsg);
1191 else if (cmsg->cmsg_level == SOL_SOCKET &&
1192 cmsg->cmsg_type == SCM_RIGHTS) {
1193 fds = (int*) CMSG_DATA(cmsg);
1194 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1195 }
663996b3
MS
1196 }
1197
60f067b4
JS
1198 if (fd == s->syslog_fd) {
1199 if (n > 0 && n_fds == 0) {
1200 s->buffer[n] = 0;
1201 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1202 } else if (n_fds > 0)
1203 log_warning("Got file descriptors via syslog socket. Ignoring.");
663996b3 1204
60f067b4
JS
1205 } else {
1206 if (n > 0 && n_fds == 0)
1207 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1208 else if (n == 0 && n_fds == 1)
1209 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1210 else if (n_fds > 0)
1211 log_warning("Got too many file descriptors via native socket. Ignoring.");
663996b3
MS
1212 }
1213
60f067b4
JS
1214 close_many(fds, n_fds);
1215 }
1216}
663996b3 1217
60f067b4
JS
1218static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1219 Server *s = userdata;
663996b3 1220
60f067b4 1221 assert(s);
663996b3 1222
60f067b4 1223 log_info("Received request to flush runtime journal from PID %"PRIu32, si->ssi_pid);
663996b3 1224
60f067b4
JS
1225 server_flush_to_var(s);
1226 server_sync(s);
5eef597e
MP
1227 server_vacuum(s);
1228
1229 touch("/run/systemd/journal/flushed");
663996b3 1230
60f067b4
JS
1231 return 0;
1232}
663996b3 1233
60f067b4
JS
1234static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1235 Server *s = userdata;
663996b3 1236
60f067b4 1237 assert(s);
663996b3 1238
60f067b4
JS
1239 log_info("Received request to rotate journal from PID %"PRIu32, si->ssi_pid);
1240 server_rotate(s);
1241 server_vacuum(s);
663996b3 1242
60f067b4
JS
1243 return 0;
1244}
663996b3 1245
60f067b4
JS
1246static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1247 Server *s = userdata;
663996b3 1248
60f067b4 1249 assert(s);
663996b3 1250
60f067b4 1251 log_received_signal(LOG_INFO, si);
663996b3 1252
60f067b4 1253 sd_event_exit(s->event, 0);
663996b3
MS
1254 return 0;
1255}
1256
60f067b4 1257static int setup_signals(Server *s) {
663996b3 1258 sigset_t mask;
60f067b4 1259 int r;
663996b3
MS
1260
1261 assert(s);
1262
1263 assert_se(sigemptyset(&mask) == 0);
1264 sigset_add_many(&mask, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, -1);
1265 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1266
60f067b4
JS
1267 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1268 if (r < 0)
1269 return r;
1270
1271 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1272 if (r < 0)
1273 return r;
663996b3 1274
60f067b4
JS
1275 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1276 if (r < 0)
1277 return r;
663996b3 1278
60f067b4
JS
1279 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1280 if (r < 0)
1281 return r;
663996b3
MS
1282
1283 return 0;
1284}
1285
1286static int server_parse_proc_cmdline(Server *s) {
1287 _cleanup_free_ char *line = NULL;
5eef597e 1288 const char *w, *state;
663996b3 1289 size_t l;
60f067b4 1290 int r;
663996b3 1291
60f067b4
JS
1292 r = proc_cmdline(&line);
1293 if (r < 0)
663996b3 1294 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
60f067b4 1295 if (r <= 0)
663996b3 1296 return 0;
663996b3
MS
1297
1298 FOREACH_WORD_QUOTED(w, l, line, state) {
1299 _cleanup_free_ char *word;
1300
1301 word = strndup(w, l);
1302 if (!word)
1303 return -ENOMEM;
1304
1305 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1306 r = parse_boolean(word + 35);
1307 if (r < 0)
1308 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1309 else
1310 s->forward_to_syslog = r;
1311 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1312 r = parse_boolean(word + 33);
1313 if (r < 0)
1314 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1315 else
1316 s->forward_to_kmsg = r;
1317 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1318 r = parse_boolean(word + 36);
1319 if (r < 0)
1320 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1321 else
1322 s->forward_to_console = r;
60f067b4
JS
1323 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1324 r = parse_boolean(word + 33);
1325 if (r < 0)
1326 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1327 else
1328 s->forward_to_wall = r;
663996b3
MS
1329 } else if (startswith(word, "systemd.journald"))
1330 log_warning("Invalid systemd.journald parameter. Ignoring.");
1331 }
5eef597e 1332 /* do not warn about state here, since probably systemd already did */
663996b3
MS
1333
1334 return 0;
1335}
1336
1337static int server_parse_config_file(Server *s) {
663996b3
MS
1338 assert(s);
1339
5eef597e
MP
1340 return config_parse(NULL, "/etc/systemd/journald.conf", NULL,
1341 "Journal\0",
1342 config_item_perf_lookup, journald_gperf_lookup,
1343 false, false, true, s);
663996b3
MS
1344}
1345
60f067b4
JS
1346static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1347 Server *s = userdata;
663996b3
MS
1348
1349 assert(s);
1350
60f067b4 1351 server_sync(s);
663996b3
MS
1352 return 0;
1353}
1354
14228c0d 1355int server_schedule_sync(Server *s, int priority) {
663996b3
MS
1356 int r;
1357
1358 assert(s);
1359
14228c0d
MB
1360 if (priority <= LOG_CRIT) {
1361 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1362 server_sync(s);
1363 return 0;
1364 }
1365
663996b3
MS
1366 if (s->sync_scheduled)
1367 return 0;
1368
60f067b4
JS
1369 if (s->sync_interval_usec > 0) {
1370 usec_t when;
1371
1372 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1373 if (r < 0)
1374 return r;
1375
1376 when += s->sync_interval_usec;
1377
1378 if (!s->sync_event_source) {
1379 r = sd_event_add_time(
1380 s->event,
1381 &s->sync_event_source,
1382 CLOCK_MONOTONIC,
1383 when, 0,
1384 server_dispatch_sync, s);
1385 if (r < 0)
1386 return r;
14228c0d 1387
60f067b4
JS
1388 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1389 } else {
1390 r = sd_event_source_set_time(s->sync_event_source, when);
1391 if (r < 0)
1392 return r;
663996b3 1393
60f067b4
JS
1394 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1395 }
663996b3 1396 if (r < 0)
60f067b4
JS
1397 return r;
1398
1399 s->sync_scheduled = true;
663996b3
MS
1400 }
1401
60f067b4
JS
1402 return 0;
1403}
1404
1405static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1406 Server *s = userdata;
1407
1408 assert(s);
1409
1410 server_cache_hostname(s);
1411 return 0;
1412}
1413
1414static int server_open_hostname(Server *s) {
1415 int r;
1416
1417 assert(s);
1418
1419 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1420 if (s->hostname_fd < 0) {
1421 log_error("Failed to open /proc/sys/kernel/hostname: %m");
1422 return -errno;
1423 }
1424
1425 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1426 if (r < 0) {
1427 /* kernels prior to 3.2 don't support polling this file. Ignore
1428 * the failure. */
1429 if (r == -EPERM) {
1430 log_warning("Failed to register hostname fd in event loop: %s. Ignoring.",
1431 strerror(-r));
1432 s->hostname_fd = safe_close(s->hostname_fd);
1433 return 0;
1434 }
1435
1436 log_error("Failed to register hostname fd in event loop: %s", strerror(-r));
1437 return r;
1438 }
1439
1440 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1441 if (r < 0) {
1442 log_error("Failed to adjust priority of host name event source: %s", strerror(-r));
1443 return r;
1444 }
663996b3
MS
1445
1446 return 0;
1447}
1448
1449int server_init(Server *s) {
1450 int n, r, fd;
1451
1452 assert(s);
1453
1454 zero(*s);
60f067b4 1455 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->hostname_fd = -1;
663996b3
MS
1456 s->compress = true;
1457 s->seal = true;
1458
1459 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1460 s->sync_scheduled = false;
1461
1462 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1463 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1464
60f067b4 1465 s->forward_to_wall = true;
663996b3 1466
e842803a
MB
1467 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1468
663996b3
MS
1469 s->max_level_store = LOG_DEBUG;
1470 s->max_level_syslog = LOG_DEBUG;
1471 s->max_level_kmsg = LOG_NOTICE;
1472 s->max_level_console = LOG_INFO;
60f067b4 1473 s->max_level_wall = LOG_EMERG;
663996b3
MS
1474
1475 memset(&s->system_metrics, 0xFF, sizeof(s->system_metrics));
1476 memset(&s->runtime_metrics, 0xFF, sizeof(s->runtime_metrics));
1477
1478 server_parse_config_file(s);
1479 server_parse_proc_cmdline(s);
1480 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
5eef597e
MP
1481 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1482 s->rate_limit_interval, s->rate_limit_burst);
663996b3
MS
1483 s->rate_limit_interval = s->rate_limit_burst = 0;
1484 }
1485
1486 mkdir_p("/run/systemd/journal", 0755);
1487
5eef597e 1488 s->user_journals = ordered_hashmap_new(NULL);
663996b3
MS
1489 if (!s->user_journals)
1490 return log_oom();
1491
1492 s->mmap = mmap_cache_new();
1493 if (!s->mmap)
1494 return log_oom();
1495
60f067b4
JS
1496 r = sd_event_default(&s->event);
1497 if (r < 0) {
1498 log_error("Failed to create event loop: %s", strerror(-r));
1499 return r;
663996b3
MS
1500 }
1501
60f067b4
JS
1502 sd_event_set_watchdog(s->event, true);
1503
663996b3
MS
1504 n = sd_listen_fds(true);
1505 if (n < 0) {
1506 log_error("Failed to read listening file descriptors from environment: %s", strerror(-n));
1507 return n;
1508 }
1509
1510 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1511
1512 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1513
1514 if (s->native_fd >= 0) {
1515 log_error("Too many native sockets passed.");
1516 return -EINVAL;
1517 }
1518
1519 s->native_fd = fd;
1520
1521 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1522
1523 if (s->stdout_fd >= 0) {
1524 log_error("Too many stdout sockets passed.");
1525 return -EINVAL;
1526 }
1527
1528 s->stdout_fd = fd;
1529
60f067b4
JS
1530 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1531 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
663996b3
MS
1532
1533 if (s->syslog_fd >= 0) {
1534 log_error("Too many /dev/log sockets passed.");
1535 return -EINVAL;
1536 }
1537
1538 s->syslog_fd = fd;
1539
1540 } else {
1541 log_error("Unknown socket passed.");
1542 return -EINVAL;
1543 }
1544 }
1545
1546 r = server_open_syslog_socket(s);
1547 if (r < 0)
1548 return r;
1549
1550 r = server_open_native_socket(s);
1551 if (r < 0)
1552 return r;
1553
1554 r = server_open_stdout_socket(s);
1555 if (r < 0)
1556 return r;
1557
1558 r = server_open_dev_kmsg(s);
1559 if (r < 0)
1560 return r;
1561
1562 r = server_open_kernel_seqnum(s);
1563 if (r < 0)
1564 return r;
1565
60f067b4 1566 r = server_open_hostname(s);
663996b3
MS
1567 if (r < 0)
1568 return r;
1569
60f067b4 1570 r = setup_signals(s);
663996b3
MS
1571 if (r < 0)
1572 return r;
1573
1574 s->udev = udev_new();
1575 if (!s->udev)
1576 return -ENOMEM;
1577
60f067b4 1578 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
663996b3
MS
1579 if (!s->rate_limit)
1580 return -ENOMEM;
1581
60f067b4
JS
1582 r = cg_get_root_path(&s->cgroup_root);
1583 if (r < 0)
1584 return r;
1585
1586 server_cache_hostname(s);
1587 server_cache_boot_id(s);
1588 server_cache_machine_id(s);
1589
5eef597e 1590 r = system_journal_open(s, false);
663996b3
MS
1591 if (r < 0)
1592 return r;
1593
1594 return 0;
1595}
1596
1597void server_maybe_append_tags(Server *s) {
1598#ifdef HAVE_GCRYPT
1599 JournalFile *f;
1600 Iterator i;
1601 usec_t n;
1602
1603 n = now(CLOCK_REALTIME);
1604
1605 if (s->system_journal)
1606 journal_file_maybe_append_tag(s->system_journal, n);
1607
5eef597e 1608 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
663996b3
MS
1609 journal_file_maybe_append_tag(f, n);
1610#endif
1611}
1612
1613void server_done(Server *s) {
1614 JournalFile *f;
1615 assert(s);
1616
1617 while (s->stdout_streams)
1618 stdout_stream_free(s->stdout_streams);
1619
1620 if (s->system_journal)
1621 journal_file_close(s->system_journal);
1622
1623 if (s->runtime_journal)
1624 journal_file_close(s->runtime_journal);
1625
5eef597e 1626 while ((f = ordered_hashmap_steal_first(s->user_journals)))
663996b3
MS
1627 journal_file_close(f);
1628
5eef597e 1629 ordered_hashmap_free(s->user_journals);
663996b3 1630
60f067b4
JS
1631 sd_event_source_unref(s->syslog_event_source);
1632 sd_event_source_unref(s->native_event_source);
1633 sd_event_source_unref(s->stdout_event_source);
1634 sd_event_source_unref(s->dev_kmsg_event_source);
1635 sd_event_source_unref(s->sync_event_source);
1636 sd_event_source_unref(s->sigusr1_event_source);
1637 sd_event_source_unref(s->sigusr2_event_source);
1638 sd_event_source_unref(s->sigterm_event_source);
1639 sd_event_source_unref(s->sigint_event_source);
1640 sd_event_source_unref(s->hostname_event_source);
1641 sd_event_unref(s->event);
1642
1643 safe_close(s->syslog_fd);
1644 safe_close(s->native_fd);
1645 safe_close(s->stdout_fd);
1646 safe_close(s->dev_kmsg_fd);
1647 safe_close(s->hostname_fd);
663996b3
MS
1648
1649 if (s->rate_limit)
1650 journal_rate_limit_free(s->rate_limit);
1651
1652 if (s->kernel_seqnum)
1653 munmap(s->kernel_seqnum, sizeof(uint64_t));
1654
1655 free(s->buffer);
1656 free(s->tty_path);
60f067b4 1657 free(s->cgroup_root);
663996b3
MS
1658
1659 if (s->mmap)
1660 mmap_cache_unref(s->mmap);
1661
1662 if (s->udev)
1663 udev_unref(s->udev);
1664}