]>
Commit | Line | Data |
---|---|---|
1e5db223 RN |
1 | /* |
2 | * umip.c Emulation for instruction protected by the Intel User-Mode | |
3 | * Instruction Prevention feature | |
4 | * | |
5 | * Copyright (c) 2017, Intel Corporation. | |
6 | * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> | |
7 | */ | |
8 | ||
9 | #include <linux/uaccess.h> | |
10 | #include <asm/umip.h> | |
11 | #include <asm/traps.h> | |
12 | #include <asm/insn.h> | |
13 | #include <asm/insn-eval.h> | |
c6a960bb RN |
14 | #include <linux/ratelimit.h> |
15 | ||
16 | #undef pr_fmt | |
17 | #define pr_fmt(fmt) "umip: " fmt | |
1e5db223 RN |
18 | |
19 | /** DOC: Emulation for User-Mode Instruction Prevention (UMIP) | |
20 | * | |
21 | * The feature User-Mode Instruction Prevention present in recent Intel | |
22 | * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str) | |
23 | * from being executed with CPL > 0. Otherwise, a general protection fault is | |
24 | * issued. | |
25 | * | |
26 | * Rather than relaying to the user space the general protection fault caused by | |
27 | * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be | |
28 | * trapped and emulate the result of such instructions to provide dummy values. | |
29 | * This allows to both conserve the current kernel behavior and not reveal the | |
30 | * system resources that UMIP intends to protect (i.e., the locations of the | |
31 | * global descriptor and interrupt descriptor tables, the segment selectors of | |
32 | * the local descriptor table, the value of the task state register and the | |
33 | * contents of the CR0 register). | |
34 | * | |
35 | * This emulation is needed because certain applications (e.g., WineHQ and | |
36 | * DOSEMU2) rely on this subset of instructions to function. | |
37 | * | |
38 | * The instructions protected by UMIP can be split in two groups. Those which | |
39 | * return a kernel memory address (sgdt and sidt) and those which return a | |
40 | * value (sldt, str and smsw). | |
41 | * | |
42 | * For the instructions that return a kernel memory address, applications | |
43 | * such as WineHQ rely on the result being located in the kernel memory space, | |
44 | * not the actual location of the table. The result is emulated as a hard-coded | |
45 | * value that, lies close to the top of the kernel memory. The limit for the GDT | |
46 | * and the IDT are set to zero. | |
47 | * | |
48 | * Given that sldt and str are not commonly used in programs that run on WineHQ | |
49 | * or DOSEMU2, they are not emulated. | |
50 | * | |
51 | * The instruction smsw is emulated to return the value that the register CR0 | |
52 | * has at boot time as set in the head_32. | |
53 | * | |
54 | * Also, emulation is provided only for 32-bit processes; 64-bit processes | |
55 | * that attempt to use the instructions that UMIP protects will receive the | |
56 | * SIGSEGV signal issued as a consequence of the general protection fault. | |
57 | * | |
58 | * Care is taken to appropriately emulate the results when segmentation is | |
59 | * used. That is, rather than relying on USER_DS and USER_CS, the function | |
60 | * insn_get_addr_ref() inspects the segment descriptor pointed by the | |
61 | * registers in pt_regs. This ensures that we correctly obtain the segment | |
62 | * base address and the address and operand sizes even if the user space | |
63 | * application uses a local descriptor table. | |
64 | */ | |
65 | ||
66 | #define UMIP_DUMMY_GDT_BASE 0xfffe0000 | |
67 | #define UMIP_DUMMY_IDT_BASE 0xffff0000 | |
68 | ||
69 | /* | |
70 | * The SGDT and SIDT instructions store the contents of the global descriptor | |
71 | * table and interrupt table registers, respectively. The destination is a | |
72 | * memory operand of X+2 bytes. X bytes are used to store the base address of | |
73 | * the table and 2 bytes are used to store the limit. In 32-bit processes, the | |
74 | * only processes for which emulation is provided, X has a value of 4. | |
75 | */ | |
76 | #define UMIP_GDT_IDT_BASE_SIZE 4 | |
77 | #define UMIP_GDT_IDT_LIMIT_SIZE 2 | |
78 | ||
79 | #define UMIP_INST_SGDT 0 /* 0F 01 /0 */ | |
80 | #define UMIP_INST_SIDT 1 /* 0F 01 /1 */ | |
6e2a3064 RN |
81 | #define UMIP_INST_SMSW 2 /* 0F 01 /4 */ |
82 | #define UMIP_INST_SLDT 3 /* 0F 00 /0 */ | |
83 | #define UMIP_INST_STR 4 /* 0F 00 /1 */ | |
1e5db223 | 84 | |
fd11a649 RN |
85 | const char * const umip_insns[5] = { |
86 | [UMIP_INST_SGDT] = "SGDT", | |
87 | [UMIP_INST_SIDT] = "SIDT", | |
88 | [UMIP_INST_SMSW] = "SMSW", | |
89 | [UMIP_INST_SLDT] = "SLDT", | |
90 | [UMIP_INST_STR] = "STR", | |
91 | }; | |
92 | ||
93 | #define umip_pr_err(regs, fmt, ...) \ | |
94 | umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__) | |
95 | #define umip_pr_warning(regs, fmt, ...) \ | |
96 | umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__) | |
97 | ||
98 | /** | |
99 | * umip_printk() - Print a rate-limited message | |
100 | * @regs: Register set with the context in which the warning is printed | |
101 | * @log_level: Kernel log level to print the message | |
102 | * @fmt: The text string to print | |
103 | * | |
104 | * Print the text contained in @fmt. The print rate is limited to bursts of 5 | |
105 | * messages every two minutes. The purpose of this customized version of | |
106 | * printk() is to print messages when user space processes use any of the | |
107 | * UMIP-protected instructions. Thus, the printed text is prepended with the | |
108 | * task name and process ID number of the current task as well as the | |
109 | * instruction and stack pointers in @regs as seen when entering kernel mode. | |
110 | * | |
111 | * Returns: | |
112 | * | |
113 | * None. | |
114 | */ | |
115 | static __printf(3, 4) | |
116 | void umip_printk(const struct pt_regs *regs, const char *log_level, | |
117 | const char *fmt, ...) | |
118 | { | |
119 | /* Bursts of 5 messages every two minutes */ | |
120 | static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5); | |
121 | struct task_struct *tsk = current; | |
122 | struct va_format vaf; | |
123 | va_list args; | |
124 | ||
125 | if (!__ratelimit(&ratelimit)) | |
126 | return; | |
127 | ||
128 | va_start(args, fmt); | |
129 | vaf.fmt = fmt; | |
130 | vaf.va = &args; | |
131 | printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm, | |
132 | task_pid_nr(tsk), regs->ip, regs->sp, &vaf); | |
133 | va_end(args); | |
134 | } | |
135 | ||
1e5db223 RN |
136 | /** |
137 | * identify_insn() - Identify a UMIP-protected instruction | |
138 | * @insn: Instruction structure with opcode and ModRM byte. | |
139 | * | |
140 | * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected | |
141 | * instruction that can be emulated. | |
142 | * | |
143 | * Returns: | |
144 | * | |
145 | * On success, a constant identifying a specific UMIP-protected instruction that | |
146 | * can be emulated. | |
147 | * | |
148 | * -EINVAL on error or when not an UMIP-protected instruction that can be | |
149 | * emulated. | |
150 | */ | |
151 | static int identify_insn(struct insn *insn) | |
152 | { | |
153 | /* By getting modrm we also get the opcode. */ | |
154 | insn_get_modrm(insn); | |
155 | ||
156 | if (!insn->modrm.nbytes) | |
157 | return -EINVAL; | |
158 | ||
159 | /* All the instructions of interest start with 0x0f. */ | |
160 | if (insn->opcode.bytes[0] != 0xf) | |
161 | return -EINVAL; | |
162 | ||
163 | if (insn->opcode.bytes[1] == 0x1) { | |
164 | switch (X86_MODRM_REG(insn->modrm.value)) { | |
165 | case 0: | |
166 | return UMIP_INST_SGDT; | |
167 | case 1: | |
168 | return UMIP_INST_SIDT; | |
169 | case 4: | |
170 | return UMIP_INST_SMSW; | |
171 | default: | |
172 | return -EINVAL; | |
173 | } | |
6e2a3064 RN |
174 | } else if (insn->opcode.bytes[1] == 0x0) { |
175 | if (X86_MODRM_REG(insn->modrm.value) == 0) | |
176 | return UMIP_INST_SLDT; | |
177 | else if (X86_MODRM_REG(insn->modrm.value) == 1) | |
178 | return UMIP_INST_STR; | |
179 | else | |
180 | return -EINVAL; | |
181 | } else { | |
182 | return -EINVAL; | |
1e5db223 | 183 | } |
1e5db223 RN |
184 | } |
185 | ||
186 | /** | |
187 | * emulate_umip_insn() - Emulate UMIP instructions and return dummy values | |
188 | * @insn: Instruction structure with operands | |
189 | * @umip_inst: A constant indicating the instruction to emulate | |
190 | * @data: Buffer into which the dummy result is stored | |
191 | * @data_size: Size of the emulated result | |
192 | * | |
193 | * Emulate an instruction protected by UMIP and provide a dummy result. The | |
194 | * result of the emulation is saved in @data. The size of the results depends | |
195 | * on both the instruction and type of operand (register vs memory address). | |
196 | * The size of the result is updated in @data_size. Caller is responsible | |
197 | * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE + | |
198 | * UMIP_GDT_IDT_LIMIT_SIZE bytes. | |
199 | * | |
200 | * Returns: | |
201 | * | |
202 | * 0 on success, -EINVAL on error while emulating. | |
203 | */ | |
204 | static int emulate_umip_insn(struct insn *insn, int umip_inst, | |
205 | unsigned char *data, int *data_size) | |
206 | { | |
207 | unsigned long dummy_base_addr, dummy_value; | |
208 | unsigned short dummy_limit = 0; | |
209 | ||
210 | if (!data || !data_size || !insn) | |
211 | return -EINVAL; | |
212 | /* | |
213 | * These two instructions return the base address and limit of the | |
214 | * global and interrupt descriptor table, respectively. According to the | |
215 | * Intel Software Development manual, the base address can be 24-bit, | |
216 | * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is | |
217 | * 16-bit, the returned value of the base address is supposed to be a | |
218 | * zero-extended 24-byte number. However, it seems that a 32-byte number | |
219 | * is always returned irrespective of the operand size. | |
220 | */ | |
221 | if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) { | |
222 | /* SGDT and SIDT do not use registers operands. */ | |
223 | if (X86_MODRM_MOD(insn->modrm.value) == 3) | |
224 | return -EINVAL; | |
225 | ||
226 | if (umip_inst == UMIP_INST_SGDT) | |
227 | dummy_base_addr = UMIP_DUMMY_GDT_BASE; | |
228 | else | |
229 | dummy_base_addr = UMIP_DUMMY_IDT_BASE; | |
230 | ||
231 | *data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE; | |
232 | ||
233 | memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE); | |
234 | memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE); | |
235 | ||
236 | } else if (umip_inst == UMIP_INST_SMSW) { | |
237 | dummy_value = CR0_STATE; | |
238 | ||
239 | /* | |
240 | * Even though the CR0 register has 4 bytes, the number | |
241 | * of bytes to be copied in the result buffer is determined | |
242 | * by whether the operand is a register or a memory location. | |
243 | * If operand is a register, return as many bytes as the operand | |
244 | * size. If operand is memory, return only the two least | |
245 | * siginificant bytes of CR0. | |
246 | */ | |
247 | if (X86_MODRM_MOD(insn->modrm.value) == 3) | |
248 | *data_size = insn->opnd_bytes; | |
249 | else | |
250 | *data_size = 2; | |
251 | ||
252 | memcpy(data, &dummy_value, *data_size); | |
253 | /* STR and SLDT are not emulated */ | |
254 | } else { | |
255 | return -EINVAL; | |
256 | } | |
257 | ||
258 | return 0; | |
259 | } | |
260 | ||
c6a960bb RN |
261 | /** |
262 | * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR | |
263 | * @addr: Address that caused the signal | |
264 | * @regs: Register set containing the instruction pointer | |
265 | * | |
266 | * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is | |
267 | * intended to be used to provide a segmentation fault when the result of the | |
268 | * UMIP emulation could not be copied to the user space memory. | |
269 | * | |
270 | * Returns: none | |
271 | */ | |
272 | static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) | |
273 | { | |
274 | siginfo_t info; | |
275 | struct task_struct *tsk = current; | |
276 | ||
277 | tsk->thread.cr2 = (unsigned long)addr; | |
278 | tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; | |
279 | tsk->thread.trap_nr = X86_TRAP_PF; | |
280 | ||
281 | info.si_signo = SIGSEGV; | |
282 | info.si_errno = 0; | |
283 | info.si_code = SEGV_MAPERR; | |
284 | info.si_addr = addr; | |
285 | force_sig_info(SIGSEGV, &info, tsk); | |
286 | ||
287 | if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) | |
288 | return; | |
289 | ||
fd11a649 RN |
290 | umip_pr_err(regs, "segfault in emulation. error%x\n", |
291 | X86_PF_USER | X86_PF_WRITE); | |
c6a960bb RN |
292 | } |
293 | ||
1e5db223 RN |
294 | /** |
295 | * fixup_umip_exception() - Fixup a general protection fault caused by UMIP | |
296 | * @regs: Registers as saved when entering the #GP handler | |
297 | * | |
298 | * The instructions sgdt, sidt, str, smsw, sldt cause a general protection | |
299 | * fault if executed with CPL > 0 (i.e., from user space). If the offending | |
300 | * user-space process is not in long mode, this function fixes the exception | |
301 | * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not | |
302 | * fixed up. Also long mode user-space processes are not fixed up. | |
303 | * | |
304 | * If operands are memory addresses, results are copied to user-space memory as | |
305 | * indicated by the instruction pointed by eIP using the registers indicated in | |
306 | * the instruction operands. If operands are registers, results are copied into | |
307 | * the context that was saved when entering kernel mode. | |
308 | * | |
309 | * Returns: | |
310 | * | |
311 | * True if emulation was successful; false if not. | |
312 | */ | |
313 | bool fixup_umip_exception(struct pt_regs *regs) | |
314 | { | |
315 | int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst; | |
316 | unsigned long seg_base = 0, *reg_addr; | |
317 | /* 10 bytes is the maximum size of the result of UMIP instructions */ | |
318 | unsigned char dummy_data[10] = { 0 }; | |
319 | unsigned char buf[MAX_INSN_SIZE]; | |
320 | void __user *uaddr; | |
321 | struct insn insn; | |
e2a5dca7 | 322 | int seg_defs; |
1e5db223 RN |
323 | |
324 | if (!regs) | |
325 | return false; | |
326 | ||
1e5db223 RN |
327 | /* |
328 | * If not in user-space long mode, a custom code segment could be in | |
329 | * use. This is true in protected mode (if the process defined a local | |
330 | * descriptor table), or virtual-8086 mode. In most of the cases | |
331 | * seg_base will be zero as in USER_CS. | |
332 | */ | |
333 | if (!user_64bit_mode(regs)) | |
334 | seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); | |
335 | ||
336 | if (seg_base == -1L) | |
337 | return false; | |
338 | ||
339 | not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip), | |
340 | sizeof(buf)); | |
341 | nr_copied = sizeof(buf) - not_copied; | |
342 | ||
343 | /* | |
344 | * The copy_from_user above could have failed if user code is protected | |
345 | * by a memory protection key. Give up on emulation in such a case. | |
346 | * Should we issue a page fault? | |
347 | */ | |
348 | if (!nr_copied) | |
349 | return false; | |
350 | ||
351 | insn_init(&insn, buf, nr_copied, user_64bit_mode(regs)); | |
352 | ||
353 | /* | |
354 | * Override the default operand and address sizes with what is specified | |
355 | * in the code segment descriptor. The instruction decoder only sets | |
356 | * the address size it to either 4 or 8 address bytes and does nothing | |
357 | * for the operand bytes. This OK for most of the cases, but we could | |
358 | * have special cases where, for instance, a 16-bit code segment | |
359 | * descriptor is used. | |
360 | * If there is an address override prefix, the instruction decoder | |
361 | * correctly updates these values, even for 16-bit defaults. | |
362 | */ | |
363 | seg_defs = insn_get_code_seg_params(regs); | |
364 | if (seg_defs == -EINVAL) | |
365 | return false; | |
366 | ||
367 | insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs); | |
368 | insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs); | |
369 | ||
370 | insn_get_length(&insn); | |
371 | if (nr_copied < insn.length) | |
372 | return false; | |
373 | ||
374 | umip_inst = identify_insn(&insn); | |
375 | if (umip_inst < 0) | |
376 | return false; | |
377 | ||
fd11a649 RN |
378 | umip_pr_warning(regs, "%s instruction cannot be used by applications.\n", |
379 | umip_insns[umip_inst]); | |
380 | ||
6e2a3064 RN |
381 | /* Do not emulate SLDT, STR or user long mode processes. */ |
382 | if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs)) | |
383 | return false; | |
384 | ||
fd11a649 RN |
385 | umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n"); |
386 | ||
1e5db223 RN |
387 | if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size)) |
388 | return false; | |
389 | ||
390 | /* | |
391 | * If operand is a register, write result to the copy of the register | |
392 | * value that was pushed to the stack when entering into kernel mode. | |
393 | * Upon exit, the value we write will be restored to the actual hardware | |
394 | * register. | |
395 | */ | |
396 | if (X86_MODRM_MOD(insn.modrm.value) == 3) { | |
397 | reg_offset = insn_get_modrm_rm_off(&insn, regs); | |
398 | ||
399 | /* | |
400 | * Negative values are usually errors. In memory addressing, | |
401 | * the exception is -EDOM. Since we expect a register operand, | |
402 | * all negative values are errors. | |
403 | */ | |
404 | if (reg_offset < 0) | |
405 | return false; | |
406 | ||
407 | reg_addr = (unsigned long *)((unsigned long)regs + reg_offset); | |
408 | memcpy(reg_addr, dummy_data, dummy_data_size); | |
409 | } else { | |
410 | uaddr = insn_get_addr_ref(&insn, regs); | |
411 | if ((unsigned long)uaddr == -1L) | |
412 | return false; | |
413 | ||
414 | nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size); | |
c6a960bb RN |
415 | if (nr_copied > 0) { |
416 | /* | |
417 | * If copy fails, send a signal and tell caller that | |
418 | * fault was fixed up. | |
419 | */ | |
420 | force_sig_info_umip_fault(uaddr, regs); | |
421 | return true; | |
422 | } | |
1e5db223 RN |
423 | } |
424 | ||
425 | /* increase IP to let the program keep going */ | |
426 | regs->ip += insn.length; | |
427 | return true; | |
428 | } |