]> git.proxmox.com Git - mirror_lxc.git/blame_incremental - src/lxc/rexec.c
github: Update for main branch
[mirror_lxc.git] / src / lxc / rexec.c
... / ...
CommitLineData
1/* liblxcapi
2 *
3 * Copyright © 2019 Christian Brauner <christian.brauner@ubuntu.com>.
4 * Copyright © 2019 Canonical Ltd.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this library; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#ifndef _GNU_SOURCE
22#define _GNU_SOURCE 1
23#endif
24#include <errno.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29
30#include "config.h"
31#include "file_utils.h"
32#include "macro.h"
33#include "memory_utils.h"
34#include "raw_syscalls.h"
35#include "string_utils.h"
36#include "syscall_wrappers.h"
37
38#if IS_BIONIC
39#include "../include/fexecve.h"
40#endif
41
42#define LXC_MEMFD_REXEC_SEALS \
43 (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
44
45static int push_vargs(char *data, int data_length, char ***output)
46{
47 int num = 0;
48 char *cur = data;
49
50 if (!data || *output)
51 return -1;
52
53 *output = must_realloc(NULL, sizeof(**output));
54
55 while (cur < data + data_length) {
56 num++;
57 *output = must_realloc(*output, (num + 1) * sizeof(**output));
58
59 (*output)[num - 1] = cur;
60 cur += strlen(cur) + 1;
61 }
62 (*output)[num] = NULL;
63 return num;
64}
65
66static int parse_argv(char ***argv)
67{
68 __do_free char *cmdline = NULL;
69 int ret;
70 size_t cmdline_size;
71
72 cmdline = file_to_buf("/proc/self/cmdline", &cmdline_size);
73 if (!cmdline)
74 return -1;
75
76 ret = push_vargs(cmdline, cmdline_size, argv);
77 if (ret <= 0)
78 return -1;
79
80 move_ptr(cmdline);
81 return 0;
82}
83
84static int is_memfd(void)
85{
86 __do_close_prot_errno int fd = -EBADF;
87 int seals;
88
89 fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
90 if (fd < 0)
91 return -ENOTRECOVERABLE;
92
93 seals = fcntl(fd, F_GET_SEALS);
94 if (seals < 0) {
95 struct stat s = {0};
96
97 if (fstat(fd, &s) == 0)
98 return (s.st_nlink == 0);
99
100 return -EINVAL;
101 }
102
103 return seals == LXC_MEMFD_REXEC_SEALS;
104}
105
106static void lxc_rexec_as_memfd(char **argv, char **envp, const char *memfd_name)
107{
108 __do_close_prot_errno int execfd = -EBADF, fd = -EBADF, memfd = -EBADF,
109 tmpfd = -EBADF;
110 int ret;
111 ssize_t bytes_sent = 0;
112 struct stat st = {0};
113
114 memfd = memfd_create(memfd_name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
115 if (memfd < 0) {
116 char template[PATH_MAX];
117
118 ret = snprintf(template, sizeof(template),
119 P_tmpdir "/.%s_XXXXXX", memfd_name);
120 if (ret < 0 || (size_t)ret >= sizeof(template))
121 return;
122
123 tmpfd = lxc_make_tmpfile(template, true);
124 if (tmpfd < 0)
125 return;
126
127 ret = fchmod(tmpfd, 0700);
128 if (ret)
129 return;
130 }
131
132 fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
133 if (fd < 0)
134 return;
135
136 /* sendfile() handles up to 2GB. */
137 ret = fstat(fd, &st);
138 if (ret)
139 return;
140
141 while (bytes_sent < st.st_size) {
142 ssize_t sent;
143
144 sent = lxc_sendfile_nointr(memfd >= 0 ? memfd : tmpfd, fd, NULL,
145 st.st_size - bytes_sent);
146 if (sent < 0) {
147 /* Fallback to shoveling data between kernel- and
148 * userspace.
149 */
150 lseek(fd, 0, SEEK_SET);
151 if (fd_to_fd(fd, memfd >= 0 ? memfd : tmpfd))
152 break;
153
154 return;
155 }
156 bytes_sent += sent;
157 }
158 close_prot_errno_disarm(fd);
159
160 if (memfd >= 0) {
161 if (fcntl(memfd, F_ADD_SEALS, LXC_MEMFD_REXEC_SEALS))
162 return;
163
164 execfd = memfd;
165 } else {
166 char procfd[LXC_PROC_PID_FD_LEN];
167
168 ret = snprintf(procfd, sizeof(procfd), "/proc/self/fd/%d", tmpfd);
169 if (ret < 0 || (size_t)ret >= sizeof(procfd))
170 return;
171
172 execfd = open(procfd, O_PATH | O_CLOEXEC);
173 close_prot_errno_disarm(tmpfd);
174
175 }
176 if (execfd < 0)
177 return;
178
179 fexecve(execfd, argv, envp);
180}
181
182/*
183 * Get cheap access to the environment. This must be declared by the user as
184 * mandated by POSIX. The definition is located in unistd.h.
185 */
186extern char **environ;
187
188int lxc_rexec(const char *memfd_name)
189{
190 int ret;
191 char **argv = NULL;
192
193 ret = is_memfd();
194 if (ret < 0 && ret == -ENOTRECOVERABLE) {
195 fprintf(stderr,
196 "%s - Failed to determine whether this is a memfd\n",
197 strerror(errno));
198 return -1;
199 } else if (ret > 0) {
200 return 0;
201 }
202
203 ret = parse_argv(&argv);
204 if (ret < 0) {
205 fprintf(stderr,
206 "%s - Failed to parse command line parameters\n",
207 strerror(errno));
208 return -1;
209 }
210
211 lxc_rexec_as_memfd(argv, environ, memfd_name);
212 fprintf(stderr, "%s - Failed to rexec as memfd\n", strerror(errno));
213 return -1;
214}
215
216/**
217 * This function will copy any binary that calls liblxc into a memory file and
218 * will use the memfd to rexecute the binary. This is done to prevent attacks
219 * through the /proc/self/exe symlink to corrupt the host binary when host and
220 * container are in the same user namespace or have set up an identity id
221 * mapping: CVE-2019-5736.
222 */
223__attribute__((constructor)) static void liblxc_rexec(void)
224{
225 if (getenv("LXC_MEMFD_REXEC") && lxc_rexec("liblxc")) {
226 fprintf(stderr, "Failed to re-execute liblxc via memory file descriptor\n");
227 _exit(EXIT_FAILURE);
228 }
229}