]>
Commit | Line | Data |
---|---|---|
2aae950b | 1 | /* |
2aae950b AK |
2 | * Copyright 2007 Andi Kleen, SUSE Labs. |
3 | * Subject to the GPL, v.2 | |
1c0c1b93 AL |
4 | * |
5 | * This contains most of the x86 vDSO kernel-side code. | |
2aae950b AK |
6 | */ |
7 | #include <linux/mm.h> | |
4e950f6f | 8 | #include <linux/err.h> |
2aae950b | 9 | #include <linux/sched.h> |
5a0e3ad6 | 10 | #include <linux/slab.h> |
2aae950b AK |
11 | #include <linux/init.h> |
12 | #include <linux/random.h> | |
3fa89ca7 | 13 | #include <linux/elf.h> |
d4f829dd | 14 | #include <linux/cpu.h> |
2aae950b AK |
15 | #include <asm/vgtod.h> |
16 | #include <asm/proto.h> | |
7f3646aa | 17 | #include <asm/vdso.h> |
1c0c1b93 | 18 | #include <asm/vvar.h> |
aafade24 | 19 | #include <asm/page.h> |
18d0a6fd | 20 | #include <asm/hpet.h> |
d4f829dd | 21 | #include <asm/desc.h> |
2aae950b | 22 | |
b4b541a6 | 23 | #if defined(CONFIG_X86_64) |
3d7ee969 | 24 | unsigned int __read_mostly vdso64_enabled = 1; |
b4b541a6 | 25 | #endif |
1a21d4e0 | 26 | |
6f121e54 | 27 | void __init init_vdso_image(const struct vdso_image *image) |
1a21d4e0 | 28 | { |
1a21d4e0 | 29 | int i; |
6f121e54 | 30 | int npages = (image->size) / PAGE_SIZE; |
1a21d4e0 | 31 | |
6f121e54 AL |
32 | BUG_ON(image->size % PAGE_SIZE != 0); |
33 | for (i = 0; i < npages; i++) | |
a62c34bd AL |
34 | image->text_mapping.pages[i] = |
35 | virt_to_page(image->data + i*PAGE_SIZE); | |
1a21d4e0 | 36 | |
6f121e54 AL |
37 | apply_alternatives((struct alt_instr *)(image->data + image->alt), |
38 | (struct alt_instr *)(image->data + image->alt + | |
39 | image->alt_len)); | |
1a21d4e0 | 40 | } |
1b3f2a72 | 41 | |
2aae950b AK |
42 | struct linux_binprm; |
43 | ||
44 | /* Put the vdso above the (randomized) stack with another randomized offset. | |
45 | This way there is no hole in the middle of address space. | |
46 | To save memory make sure it is still in the same PTE as the stack top. | |
18d0a6fd AL |
47 | This doesn't give that many random bits. |
48 | ||
49 | Only used for the 64-bit and x32 vdsos. */ | |
2aae950b AK |
50 | static unsigned long vdso_addr(unsigned long start, unsigned len) |
51 | { | |
d093601b JB |
52 | #ifdef CONFIG_X86_32 |
53 | return 0; | |
54 | #else | |
2aae950b AK |
55 | unsigned long addr, end; |
56 | unsigned offset; | |
57 | end = (start + PMD_SIZE - 1) & PMD_MASK; | |
d9517346 IM |
58 | if (end >= TASK_SIZE_MAX) |
59 | end = TASK_SIZE_MAX; | |
2aae950b AK |
60 | end -= len; |
61 | /* This loses some more bits than a modulo, but is cheaper */ | |
62 | offset = get_random_int() & (PTRS_PER_PTE - 1); | |
63 | addr = start + (offset << PAGE_SHIFT); | |
64 | if (addr >= end) | |
65 | addr = end; | |
dfb09f9b BP |
66 | |
67 | /* | |
68 | * page-align it here so that get_unmapped_area doesn't | |
69 | * align it wrongfully again to the next page. addr can come in 4K | |
70 | * unaligned here as a result of stack start randomization. | |
71 | */ | |
72 | addr = PAGE_ALIGN(addr); | |
f9902472 | 73 | addr = align_vdso_addr(addr); |
dfb09f9b | 74 | |
2aae950b | 75 | return addr; |
d093601b | 76 | #endif |
2aae950b AK |
77 | } |
78 | ||
18d0a6fd | 79 | static int map_vdso(const struct vdso_image *image, bool calculate_addr) |
2aae950b AK |
80 | { |
81 | struct mm_struct *mm = current->mm; | |
18d0a6fd | 82 | struct vm_area_struct *vma; |
e6577a7c | 83 | unsigned long addr, text_start; |
18d0a6fd | 84 | int ret = 0; |
1e844fb4 | 85 | static struct page *no_pages[] = {NULL}; |
a62c34bd AL |
86 | static struct vm_special_mapping vvar_mapping = { |
87 | .name = "[vvar]", | |
88 | .pages = no_pages, | |
89 | }; | |
2aae950b | 90 | |
18d0a6fd AL |
91 | if (calculate_addr) { |
92 | addr = vdso_addr(current->mm->start_stack, | |
e6577a7c | 93 | image->size - image->sym_vvar_start); |
18d0a6fd AL |
94 | } else { |
95 | addr = 0; | |
96 | } | |
2aae950b AK |
97 | |
98 | down_write(&mm->mmap_sem); | |
18d0a6fd | 99 | |
e6577a7c AL |
100 | addr = get_unmapped_area(NULL, addr, |
101 | image->size - image->sym_vvar_start, 0, 0); | |
2aae950b AK |
102 | if (IS_ERR_VALUE(addr)) { |
103 | ret = addr; | |
104 | goto up_fail; | |
105 | } | |
106 | ||
e6577a7c AL |
107 | text_start = addr - image->sym_vvar_start; |
108 | current->mm->context.vdso = (void __user *)text_start; | |
f7b6eb3f | 109 | |
18d0a6fd AL |
110 | /* |
111 | * MAYWRITE to allow gdb to COW and set breakpoints | |
112 | */ | |
a62c34bd | 113 | vma = _install_special_mapping(mm, |
e6577a7c | 114 | text_start, |
a62c34bd AL |
115 | image->size, |
116 | VM_READ|VM_EXEC| | |
117 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, | |
118 | &image->text_mapping); | |
18d0a6fd | 119 | |
a62c34bd AL |
120 | if (IS_ERR(vma)) { |
121 | ret = PTR_ERR(vma); | |
18d0a6fd | 122 | goto up_fail; |
a62c34bd | 123 | } |
18d0a6fd AL |
124 | |
125 | vma = _install_special_mapping(mm, | |
e6577a7c AL |
126 | addr, |
127 | -image->sym_vvar_start, | |
ac379835 | 128 | VM_READ|VM_MAYREAD, |
a62c34bd | 129 | &vvar_mapping); |
18d0a6fd AL |
130 | |
131 | if (IS_ERR(vma)) { | |
132 | ret = PTR_ERR(vma); | |
2aae950b | 133 | goto up_fail; |
f7b6eb3f | 134 | } |
2aae950b | 135 | |
18d0a6fd AL |
136 | if (image->sym_vvar_page) |
137 | ret = remap_pfn_range(vma, | |
e6577a7c | 138 | text_start + image->sym_vvar_page, |
18d0a6fd AL |
139 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT, |
140 | PAGE_SIZE, | |
141 | PAGE_READONLY); | |
142 | ||
143 | if (ret) | |
144 | goto up_fail; | |
145 | ||
146 | #ifdef CONFIG_HPET_TIMER | |
147 | if (hpet_address && image->sym_hpet_page) { | |
148 | ret = io_remap_pfn_range(vma, | |
e6577a7c | 149 | text_start + image->sym_hpet_page, |
18d0a6fd AL |
150 | hpet_address >> PAGE_SHIFT, |
151 | PAGE_SIZE, | |
152 | pgprot_noncached(PAGE_READONLY)); | |
153 | ||
154 | if (ret) | |
155 | goto up_fail; | |
156 | } | |
157 | #endif | |
158 | ||
2aae950b | 159 | up_fail: |
18d0a6fd AL |
160 | if (ret) |
161 | current->mm->context.vdso = NULL; | |
162 | ||
2aae950b AK |
163 | up_write(&mm->mmap_sem); |
164 | return ret; | |
165 | } | |
166 | ||
18d0a6fd AL |
167 | #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) |
168 | static int load_vdso32(void) | |
169 | { | |
170 | int ret; | |
171 | ||
172 | if (vdso32_enabled != 1) /* Other values all mean "disabled" */ | |
173 | return 0; | |
174 | ||
175 | ret = map_vdso(selected_vdso32, false); | |
176 | if (ret) | |
177 | return ret; | |
178 | ||
179 | if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN) | |
180 | current_thread_info()->sysenter_return = | |
181 | current->mm->context.vdso + | |
182 | selected_vdso32->sym_VDSO32_SYSENTER_RETURN; | |
183 | ||
184 | return 0; | |
185 | } | |
186 | #endif | |
187 | ||
188 | #ifdef CONFIG_X86_64 | |
1a21d4e0 L |
189 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
190 | { | |
18d0a6fd AL |
191 | if (!vdso64_enabled) |
192 | return 0; | |
193 | ||
194 | return map_vdso(&vdso_image_64, true); | |
1a21d4e0 L |
195 | } |
196 | ||
18d0a6fd AL |
197 | #ifdef CONFIG_COMPAT |
198 | int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | |
199 | int uses_interp) | |
200 | { | |
1a21d4e0 | 201 | #ifdef CONFIG_X86_X32_ABI |
18d0a6fd AL |
202 | if (test_thread_flag(TIF_X32)) { |
203 | if (!vdso64_enabled) | |
204 | return 0; | |
205 | ||
206 | return map_vdso(&vdso_image_x32, true); | |
207 | } | |
208 | #endif | |
209 | ||
210 | return load_vdso32(); | |
211 | } | |
212 | #endif | |
213 | #else | |
214 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |
1a21d4e0 | 215 | { |
18d0a6fd | 216 | return load_vdso32(); |
1a21d4e0 L |
217 | } |
218 | #endif | |
219 | ||
18d0a6fd | 220 | #ifdef CONFIG_X86_64 |
2aae950b AK |
221 | static __init int vdso_setup(char *s) |
222 | { | |
3d7ee969 | 223 | vdso64_enabled = simple_strtoul(s, NULL, 0); |
2aae950b AK |
224 | return 0; |
225 | } | |
226 | __setup("vdso=", vdso_setup); | |
b4b541a6 | 227 | #endif |
d4f829dd AL |
228 | |
229 | #ifdef CONFIG_X86_64 | |
1c0c1b93 | 230 | static void vgetcpu_cpu_init(void *arg) |
d4f829dd | 231 | { |
1c0c1b93 | 232 | int cpu = smp_processor_id(); |
a92f101b | 233 | struct desc_struct d = { }; |
d4f829dd AL |
234 | unsigned long node = 0; |
235 | #ifdef CONFIG_NUMA | |
236 | node = cpu_to_node(cpu); | |
237 | #endif | |
238 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | |
239 | write_rdtscp_aux((node << 12) | cpu); | |
240 | ||
241 | /* | |
25880156 AL |
242 | * Store cpu number in limit so that it can be loaded |
243 | * quickly in user space in vgetcpu. (12 bits for the CPU | |
244 | * and 8 bits for the node) | |
d4f829dd | 245 | */ |
a92f101b AM |
246 | d.limit0 = cpu | ((node & 0xf) << 12); |
247 | d.limit = node >> 4; | |
248 | d.type = 5; /* RO data, expand down, accessed */ | |
249 | d.dpl = 3; /* Visible to user code */ | |
250 | d.s = 1; /* Not a system segment */ | |
251 | d.p = 1; /* Present */ | |
252 | d.d = 1; /* 32-bit */ | |
d4f829dd AL |
253 | |
254 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | |
255 | } | |
256 | ||
d4f829dd | 257 | static int |
1c0c1b93 | 258 | vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg) |
d4f829dd AL |
259 | { |
260 | long cpu = (long)arg; | |
261 | ||
262 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | |
1c0c1b93 | 263 | smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1); |
d4f829dd AL |
264 | |
265 | return NOTIFY_DONE; | |
266 | } | |
267 | ||
1c0c1b93 | 268 | static int __init init_vdso(void) |
d4f829dd | 269 | { |
1c0c1b93 AL |
270 | init_vdso_image(&vdso_image_64); |
271 | ||
272 | #ifdef CONFIG_X86_X32_ABI | |
273 | init_vdso_image(&vdso_image_x32); | |
274 | #endif | |
275 | ||
d4f829dd AL |
276 | cpu_notifier_register_begin(); |
277 | ||
1c0c1b93 | 278 | on_each_cpu(vgetcpu_cpu_init, NULL, 1); |
d4f829dd | 279 | /* notifier priority > KVM */ |
1c0c1b93 | 280 | __hotcpu_notifier(vgetcpu_cpu_notifier, 30); |
d4f829dd AL |
281 | |
282 | cpu_notifier_register_done(); | |
283 | ||
284 | return 0; | |
285 | } | |
1c0c1b93 AL |
286 | subsys_initcall(init_vdso); |
287 | #endif /* CONFIG_X86_64 */ |