]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - tools/testing/selftests/x86/sigreturn.c
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 288
[mirror_ubuntu-jammy-kernel.git] / tools / testing / selftests / x86 / sigreturn.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
4 * Copyright (c) 2014-2015 Andrew Lutomirski
5 *
6 * This is a series of tests that exercises the sigreturn(2) syscall and
7 * the IRET / SYSRET paths in the kernel.
8 *
9 * For now, this focuses on the effects of unusual CS and SS values,
10 * and it has a bunch of tests to make sure that ESP/RSP is restored
11 * properly.
12 *
13 * The basic idea behind these tests is to raise(SIGUSR1) to create a
14 * sigcontext frame, plug in the values to be tested, and then return,
15 * which implicitly invokes sigreturn(2) and programs the user context
16 * as desired.
17 *
18 * For tests for which we expect sigreturn and the subsequent return to
19 * user mode to succeed, we return to a short trampoline that generates
20 * SIGTRAP so that the meat of the tests can be ordinary C code in a
21 * SIGTRAP handler.
22 *
23 * The inner workings of each test is documented below.
24 *
25 * Do not run on outdated, unpatched kernels at risk of nasty crashes.
26 */
27
28 #define _GNU_SOURCE
29
30 #include <sys/time.h>
31 #include <time.h>
32 #include <stdlib.h>
33 #include <sys/syscall.h>
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <string.h>
37 #include <inttypes.h>
38 #include <sys/mman.h>
39 #include <sys/signal.h>
40 #include <sys/ucontext.h>
41 #include <asm/ldt.h>
42 #include <err.h>
43 #include <setjmp.h>
44 #include <stddef.h>
45 #include <stdbool.h>
46 #include <sys/ptrace.h>
47 #include <sys/user.h>
48
49 /* Pull in AR_xyz defines. */
50 typedef unsigned int u32;
51 typedef unsigned short u16;
52 #include "../../../../arch/x86/include/asm/desc_defs.h"
53
54 /*
55 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
56 * headers.
57 */
58 #ifdef __x86_64__
59 /*
60 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
61 * kernels that save SS in the sigcontext. All kernels that set
62 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
63 * regardless of SS (i.e. they implement espfix).
64 *
65 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
66 * when delivering a signal that came from 64-bit code.
67 *
68 * Sigreturn restores SS as follows:
69 *
70 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
71 * saved CS is not 64-bit)
72 * new SS = saved SS (will fail IRET and signal if invalid)
73 * else
74 * new SS = a flat 32-bit data segment
75 */
76 #define UC_SIGCONTEXT_SS 0x2
77 #define UC_STRICT_RESTORE_SS 0x4
78 #endif
79
80 /*
81 * In principle, this test can run on Linux emulation layers (e.g.
82 * Illumos "LX branded zones"). Solaris-based kernels reserve LDT
83 * entries 0-5 for their own internal purposes, so start our LDT
84 * allocations above that reservation. (The tests don't pass on LX
85 * branded zones, but at least this lets them run.)
86 */
87 #define LDT_OFFSET 6
88
89 /* An aligned stack accessible through some of our segments. */
90 static unsigned char stack16[65536] __attribute__((aligned(4096)));
91
92 /*
93 * An aligned int3 instruction used as a trampoline. Some of the tests
94 * want to fish out their ss values, so this trampoline copies ss to eax
95 * before the int3.
96 */
97 asm (".pushsection .text\n\t"
98 ".type int3, @function\n\t"
99 ".align 4096\n\t"
100 "int3:\n\t"
101 "mov %ss,%ecx\n\t"
102 "int3\n\t"
103 ".size int3, . - int3\n\t"
104 ".align 4096, 0xcc\n\t"
105 ".popsection");
106 extern char int3[4096];
107
108 /*
109 * At startup, we prepapre:
110 *
111 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
112 * descriptor or out of bounds).
113 * - code16_sel: A 16-bit LDT code segment pointing to int3.
114 * - data16_sel: A 16-bit LDT data segment pointing to stack16.
115 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
116 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
117 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
118 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
119 * stack16.
120 *
121 * For no particularly good reason, xyz_sel is a selector value with the
122 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
123 * descriptor table. These variables will be zero if their respective
124 * segments could not be allocated.
125 */
126 static unsigned short ldt_nonexistent_sel;
127 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
128
129 static unsigned short gdt_data16_idx, gdt_npdata32_idx;
130
131 static unsigned short GDT3(int idx)
132 {
133 return (idx << 3) | 3;
134 }
135
136 static unsigned short LDT3(int idx)
137 {
138 return (idx << 3) | 7;
139 }
140
141 /* Our sigaltstack scratch space. */
142 static char altstack_data[SIGSTKSZ];
143
144 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
145 int flags)
146 {
147 struct sigaction sa;
148 memset(&sa, 0, sizeof(sa));
149 sa.sa_sigaction = handler;
150 sa.sa_flags = SA_SIGINFO | flags;
151 sigemptyset(&sa.sa_mask);
152 if (sigaction(sig, &sa, 0))
153 err(1, "sigaction");
154 }
155
156 static void clearhandler(int sig)
157 {
158 struct sigaction sa;
159 memset(&sa, 0, sizeof(sa));
160 sa.sa_handler = SIG_DFL;
161 sigemptyset(&sa.sa_mask);
162 if (sigaction(sig, &sa, 0))
163 err(1, "sigaction");
164 }
165
166 static void add_ldt(const struct user_desc *desc, unsigned short *var,
167 const char *name)
168 {
169 if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
170 *var = LDT3(desc->entry_number);
171 } else {
172 printf("[NOTE]\tFailed to create %s segment\n", name);
173 *var = 0;
174 }
175 }
176
177 static void setup_ldt(void)
178 {
179 if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
180 errx(1, "stack16 is too high\n");
181 if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
182 errx(1, "int3 is too high\n");
183
184 ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
185
186 const struct user_desc code16_desc = {
187 .entry_number = LDT_OFFSET + 0,
188 .base_addr = (unsigned long)int3,
189 .limit = 4095,
190 .seg_32bit = 0,
191 .contents = 2, /* Code, not conforming */
192 .read_exec_only = 0,
193 .limit_in_pages = 0,
194 .seg_not_present = 0,
195 .useable = 0
196 };
197 add_ldt(&code16_desc, &code16_sel, "code16");
198
199 const struct user_desc data16_desc = {
200 .entry_number = LDT_OFFSET + 1,
201 .base_addr = (unsigned long)stack16,
202 .limit = 0xffff,
203 .seg_32bit = 0,
204 .contents = 0, /* Data, grow-up */
205 .read_exec_only = 0,
206 .limit_in_pages = 0,
207 .seg_not_present = 0,
208 .useable = 0
209 };
210 add_ldt(&data16_desc, &data16_sel, "data16");
211
212 const struct user_desc npcode32_desc = {
213 .entry_number = LDT_OFFSET + 3,
214 .base_addr = (unsigned long)int3,
215 .limit = 4095,
216 .seg_32bit = 1,
217 .contents = 2, /* Code, not conforming */
218 .read_exec_only = 0,
219 .limit_in_pages = 0,
220 .seg_not_present = 1,
221 .useable = 0
222 };
223 add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
224
225 const struct user_desc npdata32_desc = {
226 .entry_number = LDT_OFFSET + 4,
227 .base_addr = (unsigned long)stack16,
228 .limit = 0xffff,
229 .seg_32bit = 1,
230 .contents = 0, /* Data, grow-up */
231 .read_exec_only = 0,
232 .limit_in_pages = 0,
233 .seg_not_present = 1,
234 .useable = 0
235 };
236 add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
237
238 struct user_desc gdt_data16_desc = {
239 .entry_number = -1,
240 .base_addr = (unsigned long)stack16,
241 .limit = 0xffff,
242 .seg_32bit = 0,
243 .contents = 0, /* Data, grow-up */
244 .read_exec_only = 0,
245 .limit_in_pages = 0,
246 .seg_not_present = 0,
247 .useable = 0
248 };
249
250 if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
251 /*
252 * This probably indicates vulnerability to CVE-2014-8133.
253 * Merely getting here isn't definitive, though, and we'll
254 * diagnose the problem for real later on.
255 */
256 printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
257 gdt_data16_desc.entry_number);
258 gdt_data16_idx = gdt_data16_desc.entry_number;
259 } else {
260 printf("[OK]\tset_thread_area refused 16-bit data\n");
261 }
262
263 struct user_desc gdt_npdata32_desc = {
264 .entry_number = -1,
265 .base_addr = (unsigned long)stack16,
266 .limit = 0xffff,
267 .seg_32bit = 1,
268 .contents = 0, /* Data, grow-up */
269 .read_exec_only = 0,
270 .limit_in_pages = 0,
271 .seg_not_present = 1,
272 .useable = 0
273 };
274
275 if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
276 /*
277 * As a hardening measure, newer kernels don't allow this.
278 */
279 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
280 gdt_npdata32_desc.entry_number);
281 gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
282 } else {
283 printf("[OK]\tset_thread_area refused 16-bit data\n");
284 }
285 }
286
287 /* State used by our signal handlers. */
288 static gregset_t initial_regs, requested_regs, resulting_regs;
289
290 /* Instructions for the SIGUSR1 handler. */
291 static volatile unsigned short sig_cs, sig_ss;
292 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
293 #ifdef __x86_64__
294 static volatile sig_atomic_t sig_corrupt_final_ss;
295 #endif
296
297 /* Abstractions for some 32-bit vs 64-bit differences. */
298 #ifdef __x86_64__
299 # define REG_IP REG_RIP
300 # define REG_SP REG_RSP
301 # define REG_CX REG_RCX
302
303 struct selectors {
304 unsigned short cs, gs, fs, ss;
305 };
306
307 static unsigned short *ssptr(ucontext_t *ctx)
308 {
309 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
310 return &sels->ss;
311 }
312
313 static unsigned short *csptr(ucontext_t *ctx)
314 {
315 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
316 return &sels->cs;
317 }
318 #else
319 # define REG_IP REG_EIP
320 # define REG_SP REG_ESP
321 # define REG_CX REG_ECX
322
323 static greg_t *ssptr(ucontext_t *ctx)
324 {
325 return &ctx->uc_mcontext.gregs[REG_SS];
326 }
327
328 static greg_t *csptr(ucontext_t *ctx)
329 {
330 return &ctx->uc_mcontext.gregs[REG_CS];
331 }
332 #endif
333
334 /*
335 * Checks a given selector for its code bitness or returns -1 if it's not
336 * a usable code segment selector.
337 */
338 int cs_bitness(unsigned short cs)
339 {
340 uint32_t valid = 0, ar;
341 asm ("lar %[cs], %[ar]\n\t"
342 "jnz 1f\n\t"
343 "mov $1, %[valid]\n\t"
344 "1:"
345 : [ar] "=r" (ar), [valid] "+rm" (valid)
346 : [cs] "r" (cs));
347
348 if (!valid)
349 return -1;
350
351 bool db = (ar & (1 << 22));
352 bool l = (ar & (1 << 21));
353
354 if (!(ar & (1<<11)))
355 return -1; /* Not code. */
356
357 if (l && !db)
358 return 64;
359 else if (!l && db)
360 return 32;
361 else if (!l && !db)
362 return 16;
363 else
364 return -1; /* Unknown bitness. */
365 }
366
367 /*
368 * Checks a given selector for its code bitness or returns -1 if it's not
369 * a usable code segment selector.
370 */
371 bool is_valid_ss(unsigned short cs)
372 {
373 uint32_t valid = 0, ar;
374 asm ("lar %[cs], %[ar]\n\t"
375 "jnz 1f\n\t"
376 "mov $1, %[valid]\n\t"
377 "1:"
378 : [ar] "=r" (ar), [valid] "+rm" (valid)
379 : [cs] "r" (cs));
380
381 if (!valid)
382 return false;
383
384 if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
385 (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
386 return false;
387
388 return (ar & AR_P);
389 }
390
391 /* Number of errors in the current test case. */
392 static volatile sig_atomic_t nerrs;
393
394 static void validate_signal_ss(int sig, ucontext_t *ctx)
395 {
396 #ifdef __x86_64__
397 bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
398
399 if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
400 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
401 nerrs++;
402
403 /*
404 * This happens on Linux 4.1. The rest will fail, too, so
405 * return now to reduce the noise.
406 */
407 return;
408 }
409
410 /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
411 if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
412 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
413 sig);
414 nerrs++;
415 }
416
417 if (is_valid_ss(*ssptr(ctx))) {
418 /*
419 * DOSEMU was written before 64-bit sigcontext had SS, and
420 * it tries to figure out the signal source SS by looking at
421 * the physical register. Make sure that keeps working.
422 */
423 unsigned short hw_ss;
424 asm ("mov %%ss, %0" : "=rm" (hw_ss));
425 if (hw_ss != *ssptr(ctx)) {
426 printf("[FAIL]\tHW SS didn't match saved SS\n");
427 nerrs++;
428 }
429 }
430 #endif
431 }
432
433 /*
434 * SIGUSR1 handler. Sets CS and SS as requested and points IP to the
435 * int3 trampoline. Sets SP to a large known value so that we can see
436 * whether the value round-trips back to user mode correctly.
437 */
438 static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
439 {
440 ucontext_t *ctx = (ucontext_t*)ctx_void;
441
442 validate_signal_ss(sig, ctx);
443
444 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
445
446 *csptr(ctx) = sig_cs;
447 *ssptr(ctx) = sig_ss;
448
449 ctx->uc_mcontext.gregs[REG_IP] =
450 sig_cs == code16_sel ? 0 : (unsigned long)&int3;
451 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
452 ctx->uc_mcontext.gregs[REG_CX] = 0;
453
454 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
455 requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
456
457 return;
458 }
459
460 /*
461 * Called after a successful sigreturn (via int3) or from a failed
462 * sigreturn (directly by kernel). Restores our state so that the
463 * original raise(SIGUSR1) returns.
464 */
465 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
466 {
467 ucontext_t *ctx = (ucontext_t*)ctx_void;
468
469 validate_signal_ss(sig, ctx);
470
471 sig_err = ctx->uc_mcontext.gregs[REG_ERR];
472 sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
473
474 unsigned short ss;
475 asm ("mov %%ss,%0" : "=r" (ss));
476
477 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
478 if (asm_ss != sig_ss && sig == SIGTRAP) {
479 /* Sanity check failure. */
480 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
481 ss, *ssptr(ctx), (unsigned long long)asm_ss);
482 nerrs++;
483 }
484
485 memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
486 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
487
488 #ifdef __x86_64__
489 if (sig_corrupt_final_ss) {
490 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
491 printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
492 nerrs++;
493 } else {
494 /*
495 * DOSEMU transitions from 32-bit to 64-bit mode by
496 * adjusting sigcontext, and it requires that this work
497 * even if the saved SS is bogus.
498 */
499 printf("\tCorrupting SS on return to 64-bit mode\n");
500 *ssptr(ctx) = 0;
501 }
502 }
503 #endif
504
505 sig_trapped = sig;
506 }
507
508 #ifdef __x86_64__
509 /* Tests recovery if !UC_STRICT_RESTORE_SS */
510 static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
511 {
512 ucontext_t *ctx = (ucontext_t*)ctx_void;
513
514 if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
515 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
516 nerrs++;
517 return; /* We can't do the rest. */
518 }
519
520 ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
521 *ssptr(ctx) = 0;
522
523 /* Return. The kernel should recover without sending another signal. */
524 }
525
526 static int test_nonstrict_ss(void)
527 {
528 clearhandler(SIGUSR1);
529 clearhandler(SIGTRAP);
530 clearhandler(SIGSEGV);
531 clearhandler(SIGILL);
532 sethandler(SIGUSR2, sigusr2, 0);
533
534 nerrs = 0;
535
536 printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
537 raise(SIGUSR2);
538 if (!nerrs)
539 printf("[OK]\tIt worked\n");
540
541 return nerrs;
542 }
543 #endif
544
545 /* Finds a usable code segment of the requested bitness. */
546 int find_cs(int bitness)
547 {
548 unsigned short my_cs;
549
550 asm ("mov %%cs,%0" : "=r" (my_cs));
551
552 if (cs_bitness(my_cs) == bitness)
553 return my_cs;
554 if (cs_bitness(my_cs + (2 << 3)) == bitness)
555 return my_cs + (2 << 3);
556 if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
557 return my_cs - (2 << 3);
558 if (cs_bitness(code16_sel) == bitness)
559 return code16_sel;
560
561 printf("[WARN]\tCould not find %d-bit CS\n", bitness);
562 return -1;
563 }
564
565 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
566 {
567 int cs = find_cs(cs_bits);
568 if (cs == -1) {
569 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
570 cs_bits, use_16bit_ss ? 16 : 32);
571 return 0;
572 }
573
574 if (force_ss != -1) {
575 sig_ss = force_ss;
576 } else {
577 if (use_16bit_ss) {
578 if (!data16_sel) {
579 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
580 cs_bits);
581 return 0;
582 }
583 sig_ss = data16_sel;
584 } else {
585 asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
586 }
587 }
588
589 sig_cs = cs;
590
591 printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
592 cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
593 (sig_ss & 4) ? "" : ", GDT");
594
595 raise(SIGUSR1);
596
597 nerrs = 0;
598
599 /*
600 * Check that each register had an acceptable value when the
601 * int3 trampoline was invoked.
602 */
603 for (int i = 0; i < NGREG; i++) {
604 greg_t req = requested_regs[i], res = resulting_regs[i];
605
606 if (i == REG_TRAPNO || i == REG_IP)
607 continue; /* don't care */
608
609 if (i == REG_SP) {
610 /*
611 * If we were using a 16-bit stack segment, then
612 * the kernel is a bit stuck: IRET only restores
613 * the low 16 bits of ESP/RSP if SS is 16-bit.
614 * The kernel uses a hack to restore bits 31:16,
615 * but that hack doesn't help with bits 63:32.
616 * On Intel CPUs, bits 63:32 end up zeroed, and, on
617 * AMD CPUs, they leak the high bits of the kernel
618 * espfix64 stack pointer. There's very little that
619 * the kernel can do about it.
620 *
621 * Similarly, if we are returning to a 32-bit context,
622 * the CPU will often lose the high 32 bits of RSP.
623 */
624
625 if (res == req)
626 continue;
627
628 if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
629 printf("[NOTE]\tSP: %llx -> %llx\n",
630 (unsigned long long)req,
631 (unsigned long long)res);
632 continue;
633 }
634
635 printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
636 (unsigned long long)requested_regs[i],
637 (unsigned long long)resulting_regs[i]);
638 nerrs++;
639 continue;
640 }
641
642 bool ignore_reg = false;
643 #if __i386__
644 if (i == REG_UESP)
645 ignore_reg = true;
646 #else
647 if (i == REG_CSGSFS) {
648 struct selectors *req_sels =
649 (void *)&requested_regs[REG_CSGSFS];
650 struct selectors *res_sels =
651 (void *)&resulting_regs[REG_CSGSFS];
652 if (req_sels->cs != res_sels->cs) {
653 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
654 req_sels->cs, res_sels->cs);
655 nerrs++;
656 }
657
658 if (req_sels->ss != res_sels->ss) {
659 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
660 req_sels->ss, res_sels->ss);
661 nerrs++;
662 }
663
664 continue;
665 }
666 #endif
667
668 /* Sanity check on the kernel */
669 if (i == REG_CX && req != res) {
670 printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
671 (unsigned long long)req,
672 (unsigned long long)res);
673 nerrs++;
674 continue;
675 }
676
677 if (req != res && !ignore_reg) {
678 printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
679 i, (unsigned long long)req,
680 (unsigned long long)res);
681 nerrs++;
682 }
683 }
684
685 if (nerrs == 0)
686 printf("[OK]\tall registers okay\n");
687
688 return nerrs;
689 }
690
691 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
692 {
693 int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
694 if (cs == -1)
695 return 0;
696
697 sig_cs = cs;
698 sig_ss = ss;
699
700 printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
701 cs_bits, sig_cs, sig_ss);
702
703 sig_trapped = 0;
704 raise(SIGUSR1);
705 if (sig_trapped) {
706 char errdesc[32] = "";
707 if (sig_err) {
708 const char *src = (sig_err & 1) ? " EXT" : "";
709 const char *table;
710 if ((sig_err & 0x6) == 0x0)
711 table = "GDT";
712 else if ((sig_err & 0x6) == 0x4)
713 table = "LDT";
714 else if ((sig_err & 0x6) == 0x2)
715 table = "IDT";
716 else
717 table = "???";
718
719 sprintf(errdesc, "%s%s index %d, ",
720 table, src, sig_err >> 3);
721 }
722
723 char trapname[32];
724 if (sig_trapno == 13)
725 strcpy(trapname, "GP");
726 else if (sig_trapno == 11)
727 strcpy(trapname, "NP");
728 else if (sig_trapno == 12)
729 strcpy(trapname, "SS");
730 else if (sig_trapno == 32)
731 strcpy(trapname, "IRET"); /* X86_TRAP_IRET */
732 else
733 sprintf(trapname, "%d", sig_trapno);
734
735 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
736 trapname, (unsigned long)sig_err,
737 errdesc, strsignal(sig_trapped));
738 return 0;
739 } else {
740 /*
741 * This also implicitly tests UC_STRICT_RESTORE_SS:
742 * We check that these signals set UC_STRICT_RESTORE_SS and,
743 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
744 * then we won't get SIGSEGV.
745 */
746 printf("[FAIL]\tDid not get SIGSEGV\n");
747 return 1;
748 }
749 }
750
751 int main()
752 {
753 int total_nerrs = 0;
754 unsigned short my_cs, my_ss;
755
756 asm volatile ("mov %%cs,%0" : "=r" (my_cs));
757 asm volatile ("mov %%ss,%0" : "=r" (my_ss));
758 setup_ldt();
759
760 stack_t stack = {
761 .ss_sp = altstack_data,
762 .ss_size = SIGSTKSZ,
763 };
764 if (sigaltstack(&stack, NULL) != 0)
765 err(1, "sigaltstack");
766
767 sethandler(SIGUSR1, sigusr1, 0);
768 sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
769
770 /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
771 total_nerrs += test_valid_sigreturn(64, false, -1);
772 total_nerrs += test_valid_sigreturn(32, false, -1);
773 total_nerrs += test_valid_sigreturn(16, false, -1);
774
775 /*
776 * Test easy espfix cases: return to a 16-bit LDT SS in each possible
777 * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant.
778 *
779 * This catches the original missing-espfix-on-64-bit-kernels issue
780 * as well as CVE-2014-8134.
781 */
782 total_nerrs += test_valid_sigreturn(64, true, -1);
783 total_nerrs += test_valid_sigreturn(32, true, -1);
784 total_nerrs += test_valid_sigreturn(16, true, -1);
785
786 if (gdt_data16_idx) {
787 /*
788 * For performance reasons, Linux skips espfix if SS points
789 * to the GDT. If we were able to allocate a 16-bit SS in
790 * the GDT, see if it leaks parts of the kernel stack pointer.
791 *
792 * This tests for CVE-2014-8133.
793 */
794 total_nerrs += test_valid_sigreturn(64, true,
795 GDT3(gdt_data16_idx));
796 total_nerrs += test_valid_sigreturn(32, true,
797 GDT3(gdt_data16_idx));
798 total_nerrs += test_valid_sigreturn(16, true,
799 GDT3(gdt_data16_idx));
800 }
801
802 #ifdef __x86_64__
803 /* Nasty ABI case: check SS corruption handling. */
804 sig_corrupt_final_ss = 1;
805 total_nerrs += test_valid_sigreturn(32, false, -1);
806 total_nerrs += test_valid_sigreturn(32, true, -1);
807 sig_corrupt_final_ss = 0;
808 #endif
809
810 /*
811 * We're done testing valid sigreturn cases. Now we test states
812 * for which sigreturn itself will succeed but the subsequent
813 * entry to user mode will fail.
814 *
815 * Depending on the failure mode and the kernel bitness, these
816 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
817 */
818 clearhandler(SIGTRAP);
819 sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
820 sethandler(SIGBUS, sigtrap, SA_ONSTACK);
821 sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */
822
823 /* Easy failures: invalid SS, resulting in #GP(0) */
824 test_bad_iret(64, ldt_nonexistent_sel, -1);
825 test_bad_iret(32, ldt_nonexistent_sel, -1);
826 test_bad_iret(16, ldt_nonexistent_sel, -1);
827
828 /* These fail because SS isn't a data segment, resulting in #GP(SS) */
829 test_bad_iret(64, my_cs, -1);
830 test_bad_iret(32, my_cs, -1);
831 test_bad_iret(16, my_cs, -1);
832
833 /* Try to return to a not-present code segment, triggering #NP(SS). */
834 test_bad_iret(32, my_ss, npcode32_sel);
835
836 /*
837 * Try to return to a not-present but otherwise valid data segment.
838 * This will cause IRET to fail with #SS on the espfix stack. This
839 * exercises CVE-2014-9322.
840 *
841 * Note that, if espfix is enabled, 64-bit Linux will lose track
842 * of the actual cause of failure and report #GP(0) instead.
843 * This would be very difficult for Linux to avoid, because
844 * espfix64 causes IRET failures to be promoted to #DF, so the
845 * original exception frame is never pushed onto the stack.
846 */
847 test_bad_iret(32, npdata32_sel, -1);
848
849 /*
850 * Try to return to a not-present but otherwise valid data
851 * segment without invoking espfix. Newer kernels don't allow
852 * this to happen in the first place. On older kernels, though,
853 * this can trigger CVE-2014-9322.
854 */
855 if (gdt_npdata32_idx)
856 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
857
858 #ifdef __x86_64__
859 total_nerrs += test_nonstrict_ss();
860 #endif
861
862 return total_nerrs ? 1 : 0;
863 }