]>
Commit | Line | Data |
---|---|---|
1507f512 MR |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright IBM Corporation, 2021 | |
4 | * | |
5 | * Author: Mike Rapoport <rppt@linux.ibm.com> | |
6 | */ | |
7 | ||
8 | #include <linux/mm.h> | |
9 | #include <linux/fs.h> | |
10 | #include <linux/swap.h> | |
11 | #include <linux/mount.h> | |
12 | #include <linux/memfd.h> | |
13 | #include <linux/bitops.h> | |
14 | #include <linux/printk.h> | |
15 | #include <linux/pagemap.h> | |
16 | #include <linux/syscalls.h> | |
17 | #include <linux/pseudo_fs.h> | |
18 | #include <linux/secretmem.h> | |
19 | #include <linux/set_memory.h> | |
20 | #include <linux/sched/signal.h> | |
11086054 | 21 | #include <linux/refcount.h> |
1507f512 MR |
22 | |
23 | #include <uapi/linux/magic.h> | |
24 | ||
25 | #include <asm/tlbflush.h> | |
26 | ||
27 | #include "internal.h" | |
28 | ||
29 | #undef pr_fmt | |
30 | #define pr_fmt(fmt) "secretmem: " fmt | |
31 | ||
32 | /* | |
33 | * Define mode and flag masks to allow validation of the system call | |
34 | * parameters. | |
35 | */ | |
36 | #define SECRETMEM_MODE_MASK (0x0) | |
37 | #define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK | |
38 | ||
39 | static bool secretmem_enable __ro_after_init; | |
40 | module_param_named(enable, secretmem_enable, bool, 0400); | |
41 | MODULE_PARM_DESC(secretmem_enable, | |
42 | "Enable secretmem and memfd_secret(2) system call"); | |
43 | ||
11086054 | 44 | static refcount_t secretmem_users; |
9a436f8f MR |
45 | |
46 | bool secretmem_active(void) | |
47 | { | |
11086054 | 48 | return !!refcount_read(&secretmem_users); |
9a436f8f MR |
49 | } |
50 | ||
1507f512 MR |
51 | static vm_fault_t secretmem_fault(struct vm_fault *vmf) |
52 | { | |
53 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; | |
54 | struct inode *inode = file_inode(vmf->vma->vm_file); | |
55 | pgoff_t offset = vmf->pgoff; | |
56 | gfp_t gfp = vmf->gfp_mask; | |
57 | unsigned long addr; | |
58 | struct page *page; | |
59 | int err; | |
60 | ||
61 | if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) | |
62 | return vmf_error(-EINVAL); | |
63 | ||
64 | retry: | |
65 | page = find_lock_page(mapping, offset); | |
66 | if (!page) { | |
67 | page = alloc_page(gfp | __GFP_ZERO); | |
68 | if (!page) | |
69 | return VM_FAULT_OOM; | |
70 | ||
71 | err = set_direct_map_invalid_noflush(page); | |
72 | if (err) { | |
73 | put_page(page); | |
74 | return vmf_error(err); | |
75 | } | |
76 | ||
77 | __SetPageUptodate(page); | |
78 | err = add_to_page_cache_lru(page, mapping, offset, gfp); | |
79 | if (unlikely(err)) { | |
80 | put_page(page); | |
81 | /* | |
82 | * If a split of large page was required, it | |
83 | * already happened when we marked the page invalid | |
84 | * which guarantees that this call won't fail | |
85 | */ | |
86 | set_direct_map_default_noflush(page); | |
87 | if (err == -EEXIST) | |
88 | goto retry; | |
89 | ||
90 | return vmf_error(err); | |
91 | } | |
92 | ||
93 | addr = (unsigned long)page_address(page); | |
94 | flush_tlb_kernel_range(addr, addr + PAGE_SIZE); | |
95 | } | |
96 | ||
97 | vmf->page = page; | |
98 | return VM_FAULT_LOCKED; | |
99 | } | |
100 | ||
101 | static const struct vm_operations_struct secretmem_vm_ops = { | |
102 | .fault = secretmem_fault, | |
103 | }; | |
104 | ||
9a436f8f MR |
105 | static int secretmem_release(struct inode *inode, struct file *file) |
106 | { | |
11086054 | 107 | refcount_dec(&secretmem_users); |
9a436f8f MR |
108 | return 0; |
109 | } | |
110 | ||
1507f512 MR |
111 | static int secretmem_mmap(struct file *file, struct vm_area_struct *vma) |
112 | { | |
113 | unsigned long len = vma->vm_end - vma->vm_start; | |
114 | ||
115 | if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) | |
116 | return -EINVAL; | |
117 | ||
118 | if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len)) | |
119 | return -EAGAIN; | |
120 | ||
121 | vma->vm_flags |= VM_LOCKED | VM_DONTDUMP; | |
122 | vma->vm_ops = &secretmem_vm_ops; | |
123 | ||
124 | return 0; | |
125 | } | |
126 | ||
127 | bool vma_is_secretmem(struct vm_area_struct *vma) | |
128 | { | |
129 | return vma->vm_ops == &secretmem_vm_ops; | |
130 | } | |
131 | ||
132 | static const struct file_operations secretmem_fops = { | |
9a436f8f | 133 | .release = secretmem_release, |
1507f512 MR |
134 | .mmap = secretmem_mmap, |
135 | }; | |
136 | ||
137 | static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode) | |
138 | { | |
139 | return false; | |
140 | } | |
141 | ||
142 | static int secretmem_migratepage(struct address_space *mapping, | |
143 | struct page *newpage, struct page *page, | |
144 | enum migrate_mode mode) | |
145 | { | |
146 | return -EBUSY; | |
147 | } | |
148 | ||
149 | static void secretmem_freepage(struct page *page) | |
150 | { | |
151 | set_direct_map_default_noflush(page); | |
152 | clear_highpage(page); | |
153 | } | |
154 | ||
155 | const struct address_space_operations secretmem_aops = { | |
af642374 | 156 | .set_page_dirty = __set_page_dirty_no_writeback, |
1507f512 MR |
157 | .freepage = secretmem_freepage, |
158 | .migratepage = secretmem_migratepage, | |
159 | .isolate_page = secretmem_isolate_page, | |
160 | }; | |
161 | ||
162 | static struct vfsmount *secretmem_mnt; | |
163 | ||
164 | static struct file *secretmem_file_create(unsigned long flags) | |
165 | { | |
166 | struct file *file = ERR_PTR(-ENOMEM); | |
167 | struct inode *inode; | |
168 | ||
169 | inode = alloc_anon_inode(secretmem_mnt->mnt_sb); | |
170 | if (IS_ERR(inode)) | |
171 | return ERR_CAST(inode); | |
172 | ||
173 | file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", | |
174 | O_RDWR, &secretmem_fops); | |
175 | if (IS_ERR(file)) | |
176 | goto err_free_inode; | |
177 | ||
178 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | |
179 | mapping_set_unevictable(inode->i_mapping); | |
180 | ||
181 | inode->i_mapping->a_ops = &secretmem_aops; | |
182 | ||
183 | /* pretend we are a normal file with zero size */ | |
184 | inode->i_mode |= S_IFREG; | |
185 | inode->i_size = 0; | |
186 | ||
187 | return file; | |
188 | ||
189 | err_free_inode: | |
190 | iput(inode); | |
191 | return file; | |
192 | } | |
193 | ||
194 | SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) | |
195 | { | |
196 | struct file *file; | |
197 | int fd, err; | |
198 | ||
199 | /* make sure local flags do not confict with global fcntl.h */ | |
200 | BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); | |
201 | ||
202 | if (!secretmem_enable) | |
203 | return -ENOSYS; | |
204 | ||
205 | if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) | |
206 | return -EINVAL; | |
207 | ||
208 | fd = get_unused_fd_flags(flags & O_CLOEXEC); | |
209 | if (fd < 0) | |
210 | return fd; | |
211 | ||
212 | file = secretmem_file_create(flags); | |
213 | if (IS_ERR(file)) { | |
214 | err = PTR_ERR(file); | |
215 | goto err_put_fd; | |
216 | } | |
217 | ||
218 | file->f_flags |= O_LARGEFILE; | |
219 | ||
220 | fd_install(fd, file); | |
11086054 | 221 | refcount_inc(&secretmem_users); |
1507f512 MR |
222 | return fd; |
223 | ||
224 | err_put_fd: | |
225 | put_unused_fd(fd); | |
226 | return err; | |
227 | } | |
228 | ||
229 | static int secretmem_init_fs_context(struct fs_context *fc) | |
230 | { | |
231 | return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM; | |
232 | } | |
233 | ||
234 | static struct file_system_type secretmem_fs = { | |
235 | .name = "secretmem", | |
236 | .init_fs_context = secretmem_init_fs_context, | |
237 | .kill_sb = kill_anon_super, | |
238 | }; | |
239 | ||
240 | static int secretmem_init(void) | |
241 | { | |
242 | int ret = 0; | |
243 | ||
244 | if (!secretmem_enable) | |
245 | return ret; | |
246 | ||
247 | secretmem_mnt = kern_mount(&secretmem_fs); | |
248 | if (IS_ERR(secretmem_mnt)) | |
249 | ret = PTR_ERR(secretmem_mnt); | |
250 | ||
251 | /* prevent secretmem mappings from ever getting PROT_EXEC */ | |
252 | secretmem_mnt->mnt_flags |= MNT_NOEXEC; | |
253 | ||
254 | return ret; | |
255 | } | |
256 | fs_initcall(secretmem_init); |