]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/x86/include/asm/desc.h
x86/bugs, KVM: Support the combination of guest and host IBRS
[mirror_ubuntu-artful-kernel.git] / arch / x86 / include / asm / desc.h
CommitLineData
1965aae3
PA
1#ifndef _ASM_X86_DESC_H
2#define _ASM_X86_DESC_H
80fbb69a 3
80fbb69a
GOC
4#include <asm/desc_defs.h>
5#include <asm/ldt.h>
881c2975 6#include <asm/mmu.h>
69218e47 7#include <asm/fixmap.h>
bda9eb32
TG
8#include <asm/pgtable.h>
9#include <asm/cpu_entry_area.h>
9a3865b1 10
54cd0eac 11#include <linux/smp.h>
c6ae41e7 12#include <linux/percpu.h>
80fbb69a 13
9a3865b1
IM
14static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
15{
16 desc->limit0 = info->limit & 0x0ffff;
17
18 desc->base0 = (info->base_addr & 0x0000ffff);
19 desc->base1 = (info->base_addr & 0x00ff0000) >> 16;
20
21 desc->type = (info->read_exec_only ^ 1) << 1;
22 desc->type |= info->contents << 2;
f4b13d6f
TG
23 /* Set the ACCESS bit so it can be mapped RO */
24 desc->type |= 1;
9a3865b1
IM
25
26 desc->s = 1;
27 desc->dpl = 0x3;
28 desc->p = info->seg_not_present ^ 1;
29 desc->limit = (info->limit & 0xf0000) >> 16;
30 desc->avl = info->useable;
31 desc->d = info->seg_32bit;
32 desc->g = info->limit_in_pages;
33
34 desc->base2 = (info->base_addr & 0xff000000) >> 24;
64f53a04 35 /*
318f5a2a
AL
36 * Don't allow setting of the lm bit. It would confuse
37 * user_64bit_mode and would get overridden by sysret anyway.
64f53a04 38 */
9a3865b1 39 desc->l = 0;
80fbb69a
GOC
40}
41
881c2975
GOC
42extern struct desc_ptr idt_descr;
43extern gate_desc idt_table[];
404f6aac 44extern const struct desc_ptr debug_idt_descr;
629f4f9d 45extern gate_desc debug_idt_table[];
80fbb69a 46
a939098a
GC
47struct gdt_page {
48 struct desc_struct gdt[GDT_ENTRIES];
49} __attribute__((aligned(PAGE_SIZE)));
9a3865b1 50
9b8de747 51DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
a939098a 52
69218e47
TG
53/* Provide the original GDT */
54static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
a939098a
GC
55{
56 return per_cpu(gdt_page, cpu).gdt;
57}
58
69218e47
TG
59/* Provide the current original GDT */
60static inline struct desc_struct *get_current_gdt_rw(void)
61{
62 return this_cpu_ptr(&gdt_page)->gdt;
63}
64
69218e47
TG
65/* Provide the fixmap address of the remapped GDT */
66static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
67{
b17894f1 68 return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
69218e47
TG
69}
70
69218e47
TG
71/* Provide the current read-only GDT */
72static inline struct desc_struct *get_current_gdt_ro(void)
73{
74 return get_cpu_gdt_ro(smp_processor_id());
75}
76
aa4ea675
AL
77/* Provide the physical address of the GDT page. */
78static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
79{
80 return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
81}
82
507f90c9
GOC
83static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
84 unsigned dpl, unsigned ist, unsigned seg)
85{
587719b1
TG
86 gate->offset_low = (u16) func;
87 gate->bits.p = 1;
88 gate->bits.dpl = dpl;
89 gate->bits.zero = 0;
90 gate->bits.type = type;
91 gate->offset_middle = (u16) (func >> 16);
92#ifdef CONFIG_X86_64
9a3865b1 93 gate->segment = __KERNEL_CS;
587719b1
TG
94 gate->bits.ist = ist;
95 gate->reserved = 0;
96 gate->offset_high = (u32) (func >> 32);
54cd0eac 97#else
587719b1
TG
98 gate->segment = seg;
99 gate->bits.ist = 0;
54cd0eac 100#endif
587719b1 101}
54cd0eac 102
746ff60f
GOC
103static inline int desc_empty(const void *ptr)
104{
105 const u32 *desc = ptr;
9a3865b1 106
746ff60f
GOC
107 return !(desc[0] | desc[1]);
108}
109
54cd0eac
GOC
110#ifdef CONFIG_PARAVIRT
111#include <asm/paravirt.h>
112#else
9a3865b1
IM
113#define load_TR_desc() native_load_tr_desc()
114#define load_gdt(dtr) native_load_gdt(dtr)
115#define load_idt(dtr) native_load_idt(dtr)
116#define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
117#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
118
119#define store_gdt(dtr) native_store_gdt(dtr)
9a3865b1
IM
120#define store_tr(tr) (tr = native_store_tr())
121
122#define load_TLS(t, cpu) native_load_tls(t, cpu)
123#define set_ldt native_set_ldt
124
125#define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc)
126#define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type)
127#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
38ffbe66
JF
128
129static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
130{
131}
132
133static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
134{
135}
136#endif /* CONFIG_PARAVIRT */
54cd0eac 137
8229d754
JSR
138#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
139
9a3865b1 140static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
54cd0eac
GOC
141{
142 memcpy(&idt[entry], gate, sizeof(*gate));
143}
144
9a3865b1 145static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
54cd0eac
GOC
146{
147 memcpy(&ldt[entry], desc, 8);
148}
149
9a3865b1
IM
150static inline void
151native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type)
54cd0eac
GOC
152{
153 unsigned int size;
9a3865b1 154
54cd0eac 155 switch (type) {
9a3865b1
IM
156 case DESC_TSS: size = sizeof(tss_desc); break;
157 case DESC_LDT: size = sizeof(ldt_desc); break;
158 default: size = sizeof(*gdt); break;
54cd0eac 159 }
9a3865b1 160
54cd0eac
GOC
161 memcpy(&gdt[entry], desc, size);
162}
163
54cd0eac
GOC
164static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
165 unsigned long limit, unsigned char type,
166 unsigned char flags)
167{
168 desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
169 desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
c1773a16
JP
170 (limit & 0x000f0000) | ((type & 0xff) << 8) |
171 ((flags & 0xf) << 20);
54cd0eac
GOC
172 desc->p = 1;
173}
174
54cd0eac 175
587719b1
TG
176static inline void set_tssldt_descriptor(void *d, unsigned long addr,
177 unsigned type, unsigned size)
c81c6ca4
GOC
178{
179#ifdef CONFIG_X86_64
f6e0eba1 180 struct ldttss_desc64 *desc = d;
9a3865b1 181
f6e0eba1 182 memset(desc, 0, sizeof(*desc));
9a3865b1
IM
183
184 desc->limit0 = size & 0xFFFF;
587719b1
TG
185 desc->base0 = (u16) addr;
186 desc->base1 = (addr >> 16) & 0xFF;
9a3865b1
IM
187 desc->type = type;
188 desc->p = 1;
189 desc->limit1 = (size >> 16) & 0xF;
587719b1
TG
190 desc->base2 = (addr >> 24) & 0xFF;
191 desc->base3 = (u32) (addr >> 32);
c81c6ca4 192#else
f6e0eba1 193 pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
c81c6ca4
GOC
194#endif
195}
196
7123a5de 197static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
c81c6ca4 198{
69218e47 199 struct desc_struct *d = get_cpu_gdt_rw(cpu);
c81c6ca4
GOC
200 tss_desc tss;
201
f6e0eba1 202 set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
4f53ab14 203 __KERNEL_TSS_LIMIT);
c81c6ca4
GOC
204 write_gdt_entry(d, entry, &tss, DESC_TSS);
205}
206
207#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
208
54cd0eac
GOC
209static inline void native_set_ldt(const void *addr, unsigned int entries)
210{
211 if (likely(entries == 0))
c1773a16 212 asm volatile("lldt %w0"::"q" (0));
54cd0eac
GOC
213 else {
214 unsigned cpu = smp_processor_id();
215 ldt_desc ldt;
216
5ac37f87
MK
217 set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
218 entries * LDT_ENTRY_SIZE - 1);
69218e47 219 write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
54cd0eac 220 &ldt, DESC_LDT);
c1773a16 221 asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
54cd0eac
GOC
222 }
223}
224
45fc8757
TG
225static inline void native_load_gdt(const struct desc_ptr *dtr)
226{
227 asm volatile("lgdt %0"::"m" (*dtr));
228}
229
230static inline void native_load_idt(const struct desc_ptr *dtr)
231{
232 asm volatile("lidt %0"::"m" (*dtr));
233}
234
235static inline void native_store_gdt(struct desc_ptr *dtr)
236{
237 asm volatile("sgdt %0":"=m" (*dtr));
238}
239
edf3ab00 240static inline void store_idt(struct desc_ptr *dtr)
45fc8757
TG
241{
242 asm volatile("sidt %0":"=m" (*dtr));
243}
244
245/*
246 * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
247 * a read-only remapping. To prevent a page fault, the GDT is switched to the
248 * original writeable version when needed.
249 */
250#ifdef CONFIG_X86_64
54cd0eac
GOC
251static inline void native_load_tr_desc(void)
252{
45fc8757
TG
253 struct desc_ptr gdt;
254 int cpu = raw_smp_processor_id();
255 bool restore = 0;
256 struct desc_struct *fixmap_gdt;
257
258 native_store_gdt(&gdt);
259 fixmap_gdt = get_cpu_gdt_ro(cpu);
260
261 /*
262 * If the current GDT is the read-only fixmap, swap to the original
263 * writeable version. Swap back at the end.
264 */
265 if (gdt.address == (unsigned long)fixmap_gdt) {
266 load_direct_gdt(cpu);
267 restore = 1;
268 }
54cd0eac 269 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
45fc8757
TG
270 if (restore)
271 load_fixmap_gdt(cpu);
272}
273#else
274static inline void native_load_tr_desc(void)
275{
276 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
277}
278#endif
279
280static inline unsigned long native_store_tr(void)
281{
282 unsigned long tr;
283
284 asm volatile("str %0":"=r" (tr));
285
286 return tr;
287}
288
289static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
290{
291 struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
292 unsigned int i;
293
294 for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
295 gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
54cd0eac
GOC
296}
297
b7ceaec1
AL
298DECLARE_PER_CPU(bool, __tss_limit_invalid);
299
b7ffc44d
AL
300static inline void force_reload_TR(void)
301{
69218e47 302 struct desc_struct *d = get_current_gdt_rw();
b7ffc44d
AL
303 tss_desc tss;
304
305 memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
306
307 /*
308 * LTR requires an available TSS, and the TSS is currently
309 * busy. Make it be available so that LTR will work.
310 */
311 tss.type = DESC_TSS;
312 write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
313
314 load_TR_desc();
b7ceaec1 315 this_cpu_write(__tss_limit_invalid, false);
b7ffc44d
AL
316}
317
b7ceaec1
AL
318/*
319 * Call this if you need the TSS limit to be correct, which should be the case
320 * if and only if you have TIF_IO_BITMAP set or you're switching to a task
321 * with TIF_IO_BITMAP set.
322 */
323static inline void refresh_tss_limit(void)
b7ffc44d
AL
324{
325 DEBUG_LOCKS_WARN_ON(preemptible());
326
b7ceaec1 327 if (unlikely(this_cpu_read(__tss_limit_invalid)))
b7ffc44d 328 force_reload_TR();
b7ffc44d
AL
329}
330
331/*
332 * If you do something evil that corrupts the cached TSS limit (I'm looking
333 * at you, VMX exits), call this function.
334 *
335 * The optimization here is that the TSS limit only matters for Linux if the
336 * IO bitmap is in use. If the TSS limit gets forced to its minimum value,
337 * everything works except that IO bitmap will be ignored and all CPL 3 IO
338 * instructions will #GP, which is exactly what we want for normal tasks.
339 */
340static inline void invalidate_tss_limit(void)
341{
342 DEBUG_LOCKS_WARN_ON(preemptible());
343
344 if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
345 force_reload_TR();
346 else
b7ceaec1 347 this_cpu_write(__tss_limit_invalid, true);
b7ffc44d
AL
348}
349
e30ab185
AL
350/* This intentionally ignores lm, since 32-bit apps don't have that field. */
351#define LDT_empty(info) \
c1773a16
JP
352 ((info)->base_addr == 0 && \
353 (info)->limit == 0 && \
354 (info)->contents == 0 && \
355 (info)->read_exec_only == 1 && \
356 (info)->seg_32bit == 0 && \
357 (info)->limit_in_pages == 0 && \
358 (info)->seg_not_present == 1 && \
359 (info)->useable == 0)
881c2975 360
3669ef9f
AL
361/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
362static inline bool LDT_zero(const struct user_desc *info)
363{
364 return (info->base_addr == 0 &&
365 info->limit == 0 &&
366 info->contents == 0 &&
367 info->read_exec_only == 0 &&
368 info->seg_32bit == 0 &&
369 info->limit_in_pages == 0 &&
370 info->seg_not_present == 0 &&
371 info->useable == 0);
372}
373
881c2975
GOC
374static inline void clear_LDT(void)
375{
376 set_ldt(NULL, 0);
377}
378
1bd5718c 379static inline unsigned long get_desc_base(const struct desc_struct *desc)
cc697852 380{
2c75910f 381 return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
cc697852 382}
1bd5718c 383
57594742
AM
384static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
385{
386 desc->base0 = base & 0xffff;
387 desc->base1 = (base >> 16) & 0xff;
388 desc->base2 = (base >> 24) & 0xff;
389}
390
1bd5718c
RM
391static inline unsigned long get_desc_limit(const struct desc_struct *desc)
392{
393 return desc->limit0 | (desc->limit << 16);
394}
395
57594742
AM
396static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
397{
398 desc->limit0 = limit & 0xffff;
399 desc->limit = (limit >> 16) & 0xf;
400}
401
228bdaa9
SR
402#ifdef CONFIG_X86_64
403static inline void set_nmi_gate(int gate, void *addr)
404{
405 gate_desc s;
406
407 pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
629f4f9d 408 write_idt_entry(debug_idt_table, gate, &s);
228bdaa9
SR
409}
410#endif
411
cf910e83
SA
412#ifdef CONFIG_TRACING
413extern struct desc_ptr trace_idt_descr;
414extern gate_desc trace_idt_table[];
415static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
416{
417 write_idt_entry(trace_idt_table, entry, gate);
418}
25c74b10
SA
419
420static inline void _trace_set_gate(int gate, unsigned type, void *addr,
421 unsigned dpl, unsigned ist, unsigned seg)
422{
423 gate_desc s;
424
425 pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
426 /*
427 * does not need to be atomic because it is only done once at
428 * setup time
429 */
430 write_trace_idt_entry(gate, &s);
431}
cf910e83
SA
432#else
433static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
434{
435}
25c74b10
SA
436
437#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
cf910e83
SA
438#endif
439
507f90c9 440static inline void _set_gate(int gate, unsigned type, void *addr,
c1773a16 441 unsigned dpl, unsigned ist, unsigned seg)
507f90c9
GOC
442{
443 gate_desc s;
9a3865b1 444
507f90c9
GOC
445 pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
446 /*
447 * does not need to be atomic because it is only done once at
448 * setup time
449 */
450 write_idt_entry(idt_table, gate, &s);
cf910e83 451 write_trace_idt_entry(gate, &s);
507f90c9
GOC
452}
453
454/*
455 * This needs to use 'idt_table' rather than 'idt', and
456 * thus use the _nonmapped_ version of the IDT, as the
457 * Pentium F0 0F bugfix can have resulted in the mapped
458 * IDT being write-protected.
459 */
5eca7453 460#define set_intr_gate_notrace(n, addr) \
25c74b10
SA
461 do { \
462 BUG_ON((unsigned)n > 0xFF); \
463 _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
464 __KERNEL_CS); \
5eca7453
WN
465 } while (0)
466
467#define set_intr_gate(n, addr) \
468 do { \
469 set_intr_gate_notrace(n, addr); \
25c74b10
SA
470 _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
471 0, 0, __KERNEL_CS); \
472 } while (0)
507f90c9 473
305b92a2 474extern int first_system_vector;
b77b881f
YL
475/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
476extern unsigned long used_vectors[];
305b92a2
AM
477
478static inline void alloc_system_vector(int vector)
479{
b77b881f
YL
480 if (!test_bit(vector, used_vectors)) {
481 set_bit(vector, used_vectors);
305b92a2
AM
482 if (first_system_vector > vector)
483 first_system_vector = vector;
9a3865b1 484 } else {
305b92a2 485 BUG();
9a3865b1 486 }
305b92a2
AM
487}
488
cf910e83
SA
489#define alloc_intr_gate(n, addr) \
490 do { \
491 alloc_system_vector(n); \
959c071f 492 set_intr_gate(n, addr); \
cf910e83
SA
493 } while (0)
494
507f90c9
GOC
495/*
496 * This routine sets up an interrupt gate at directory privilege level 3.
497 */
498static inline void set_system_intr_gate(unsigned int n, void *addr)
499{
500 BUG_ON((unsigned)n > 0xFF);
501 _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
502}
503
699d2937 504static inline void set_system_trap_gate(unsigned int n, void *addr)
507f90c9
GOC
505{
506 BUG_ON((unsigned)n > 0xFF);
699d2937 507 _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
507f90c9
GOC
508}
509
699d2937 510static inline void set_trap_gate(unsigned int n, void *addr)
507f90c9
GOC
511{
512 BUG_ON((unsigned)n > 0xFF);
699d2937 513 _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
507f90c9
GOC
514}
515
516static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
517{
518 BUG_ON((unsigned)n > 0xFF);
519 _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
520}
521
522static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
523{
524 BUG_ON((unsigned)n > 0xFF);
525 _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
526}
527
699d2937 528static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
507f90c9
GOC
529{
530 BUG_ON((unsigned)n > 0xFF);
531 _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
532}
cc697852 533
629f4f9d
SA
534#ifdef CONFIG_X86_64
535DECLARE_PER_CPU(u32, debug_idt_ctr);
536static inline bool is_debug_idt_enabled(void)
537{
538 if (this_cpu_read(debug_idt_ctr))
539 return true;
540
541 return false;
542}
543
544static inline void load_debug_idt(void)
545{
546 load_idt((const struct desc_ptr *)&debug_idt_descr);
547}
548#else
549static inline bool is_debug_idt_enabled(void)
550{
551 return false;
552}
553
554static inline void load_debug_idt(void)
555{
556}
557#endif
558
cf910e83
SA
559#ifdef CONFIG_TRACING
560extern atomic_t trace_idt_ctr;
561static inline bool is_trace_idt_enabled(void)
562{
563 if (atomic_read(&trace_idt_ctr))
564 return true;
565
566 return false;
567}
568
569static inline void load_trace_idt(void)
570{
571 load_idt((const struct desc_ptr *)&trace_idt_descr);
572}
573#else
574static inline bool is_trace_idt_enabled(void)
575{
576 return false;
577}
578
579static inline void load_trace_idt(void)
580{
581}
582#endif
583
629f4f9d 584/*
2b4bc789 585 * The load_current_idt() must be called with interrupts disabled
629f4f9d 586 * to avoid races. That way the IDT will always be set back to the expected
2b4bc789
SRRH
587 * descriptor. It's also called when a CPU is being initialized, and
588 * that doesn't need to disable interrupts, as nothing should be
589 * bothering the CPU then.
629f4f9d
SA
590 */
591static inline void load_current_idt(void)
592{
629f4f9d
SA
593 if (is_debug_idt_enabled())
594 load_debug_idt();
cf910e83
SA
595 else if (is_trace_idt_enabled())
596 load_trace_idt();
629f4f9d
SA
597 else
598 load_idt((const struct desc_ptr *)&idt_descr);
629f4f9d 599}
1965aae3 600#endif /* _ASM_X86_DESC_H */