]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/x86/include/asm/desc.h
x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss
[mirror_ubuntu-artful-kernel.git] / arch / x86 / include / asm / desc.h
1 #ifndef _ASM_X86_DESC_H
2 #define _ASM_X86_DESC_H
3
4 #include <asm/desc_defs.h>
5 #include <asm/ldt.h>
6 #include <asm/mmu.h>
7 #include <asm/fixmap.h>
8
9 #include <linux/smp.h>
10 #include <linux/percpu.h>
11
12 static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
13 {
14 desc->limit0 = info->limit & 0x0ffff;
15
16 desc->base0 = (info->base_addr & 0x0000ffff);
17 desc->base1 = (info->base_addr & 0x00ff0000) >> 16;
18
19 desc->type = (info->read_exec_only ^ 1) << 1;
20 desc->type |= info->contents << 2;
21
22 desc->s = 1;
23 desc->dpl = 0x3;
24 desc->p = info->seg_not_present ^ 1;
25 desc->limit = (info->limit & 0xf0000) >> 16;
26 desc->avl = info->useable;
27 desc->d = info->seg_32bit;
28 desc->g = info->limit_in_pages;
29
30 desc->base2 = (info->base_addr & 0xff000000) >> 24;
31 /*
32 * Don't allow setting of the lm bit. It would confuse
33 * user_64bit_mode and would get overridden by sysret anyway.
34 */
35 desc->l = 0;
36 }
37
38 extern struct desc_ptr idt_descr;
39 extern gate_desc idt_table[];
40 extern const struct desc_ptr debug_idt_descr;
41 extern gate_desc debug_idt_table[];
42
43 struct gdt_page {
44 struct desc_struct gdt[GDT_ENTRIES];
45 } __attribute__((aligned(PAGE_SIZE)));
46
47 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
48
49 /* Provide the original GDT */
50 static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
51 {
52 return per_cpu(gdt_page, cpu).gdt;
53 }
54
55 /* Provide the current original GDT */
56 static inline struct desc_struct *get_current_gdt_rw(void)
57 {
58 return this_cpu_ptr(&gdt_page)->gdt;
59 }
60
61 /* Provide the fixmap address of the remapped GDT */
62 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
63 {
64 return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
65 }
66
67 /* Provide the current read-only GDT */
68 static inline struct desc_struct *get_current_gdt_ro(void)
69 {
70 return get_cpu_gdt_ro(smp_processor_id());
71 }
72
73 /* Provide the physical address of the GDT page. */
74 static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
75 {
76 return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
77 }
78
79 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
80 unsigned dpl, unsigned ist, unsigned seg)
81 {
82 gate->offset_low = (u16) func;
83 gate->bits.p = 1;
84 gate->bits.dpl = dpl;
85 gate->bits.zero = 0;
86 gate->bits.type = type;
87 gate->offset_middle = (u16) (func >> 16);
88 #ifdef CONFIG_X86_64
89 gate->segment = __KERNEL_CS;
90 gate->bits.ist = ist;
91 gate->reserved = 0;
92 gate->offset_high = (u32) (func >> 32);
93 #else
94 gate->segment = seg;
95 gate->bits.ist = 0;
96 #endif
97 }
98
99 static inline int desc_empty(const void *ptr)
100 {
101 const u32 *desc = ptr;
102
103 return !(desc[0] | desc[1]);
104 }
105
106 #ifdef CONFIG_PARAVIRT
107 #include <asm/paravirt.h>
108 #else
109 #define load_TR_desc() native_load_tr_desc()
110 #define load_gdt(dtr) native_load_gdt(dtr)
111 #define load_idt(dtr) native_load_idt(dtr)
112 #define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
113 #define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
114
115 #define store_gdt(dtr) native_store_gdt(dtr)
116 #define store_tr(tr) (tr = native_store_tr())
117
118 #define load_TLS(t, cpu) native_load_tls(t, cpu)
119 #define set_ldt native_set_ldt
120
121 #define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc)
122 #define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type)
123 #define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
124
125 static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
126 {
127 }
128
129 static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
130 {
131 }
132 #endif /* CONFIG_PARAVIRT */
133
134 #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
135
136 static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
137 {
138 memcpy(&idt[entry], gate, sizeof(*gate));
139 }
140
141 static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
142 {
143 memcpy(&ldt[entry], desc, 8);
144 }
145
146 static inline void
147 native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type)
148 {
149 unsigned int size;
150
151 switch (type) {
152 case DESC_TSS: size = sizeof(tss_desc); break;
153 case DESC_LDT: size = sizeof(ldt_desc); break;
154 default: size = sizeof(*gdt); break;
155 }
156
157 memcpy(&gdt[entry], desc, size);
158 }
159
160 static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
161 unsigned long limit, unsigned char type,
162 unsigned char flags)
163 {
164 desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
165 desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
166 (limit & 0x000f0000) | ((type & 0xff) << 8) |
167 ((flags & 0xf) << 20);
168 desc->p = 1;
169 }
170
171
172 static inline void set_tssldt_descriptor(void *d, unsigned long addr,
173 unsigned type, unsigned size)
174 {
175 #ifdef CONFIG_X86_64
176 struct ldttss_desc64 *desc = d;
177
178 memset(desc, 0, sizeof(*desc));
179
180 desc->limit0 = size & 0xFFFF;
181 desc->base0 = (u16) addr;
182 desc->base1 = (addr >> 16) & 0xFF;
183 desc->type = type;
184 desc->p = 1;
185 desc->limit1 = (size >> 16) & 0xF;
186 desc->base2 = (addr >> 24) & 0xFF;
187 desc->base3 = (u32) (addr >> 32);
188 #else
189 pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
190 #endif
191 }
192
193 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
194 {
195 struct desc_struct *d = get_cpu_gdt_rw(cpu);
196 tss_desc tss;
197
198 set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
199 __KERNEL_TSS_LIMIT);
200 write_gdt_entry(d, entry, &tss, DESC_TSS);
201 }
202
203 #define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
204
205 static inline void native_set_ldt(const void *addr, unsigned int entries)
206 {
207 if (likely(entries == 0))
208 asm volatile("lldt %w0"::"q" (0));
209 else {
210 unsigned cpu = smp_processor_id();
211 ldt_desc ldt;
212
213 set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
214 entries * LDT_ENTRY_SIZE - 1);
215 write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
216 &ldt, DESC_LDT);
217 asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
218 }
219 }
220
221 static inline void native_load_gdt(const struct desc_ptr *dtr)
222 {
223 asm volatile("lgdt %0"::"m" (*dtr));
224 }
225
226 static inline void native_load_idt(const struct desc_ptr *dtr)
227 {
228 asm volatile("lidt %0"::"m" (*dtr));
229 }
230
231 static inline void native_store_gdt(struct desc_ptr *dtr)
232 {
233 asm volatile("sgdt %0":"=m" (*dtr));
234 }
235
236 static inline void store_idt(struct desc_ptr *dtr)
237 {
238 asm volatile("sidt %0":"=m" (*dtr));
239 }
240
241 /*
242 * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
243 * a read-only remapping. To prevent a page fault, the GDT is switched to the
244 * original writeable version when needed.
245 */
246 #ifdef CONFIG_X86_64
247 static inline void native_load_tr_desc(void)
248 {
249 struct desc_ptr gdt;
250 int cpu = raw_smp_processor_id();
251 bool restore = 0;
252 struct desc_struct *fixmap_gdt;
253
254 native_store_gdt(&gdt);
255 fixmap_gdt = get_cpu_gdt_ro(cpu);
256
257 /*
258 * If the current GDT is the read-only fixmap, swap to the original
259 * writeable version. Swap back at the end.
260 */
261 if (gdt.address == (unsigned long)fixmap_gdt) {
262 load_direct_gdt(cpu);
263 restore = 1;
264 }
265 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
266 if (restore)
267 load_fixmap_gdt(cpu);
268 }
269 #else
270 static inline void native_load_tr_desc(void)
271 {
272 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
273 }
274 #endif
275
276 static inline unsigned long native_store_tr(void)
277 {
278 unsigned long tr;
279
280 asm volatile("str %0":"=r" (tr));
281
282 return tr;
283 }
284
285 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
286 {
287 struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
288 unsigned int i;
289
290 for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
291 gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
292 }
293
294 DECLARE_PER_CPU(bool, __tss_limit_invalid);
295
296 static inline void force_reload_TR(void)
297 {
298 struct desc_struct *d = get_current_gdt_rw();
299 tss_desc tss;
300
301 memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
302
303 /*
304 * LTR requires an available TSS, and the TSS is currently
305 * busy. Make it be available so that LTR will work.
306 */
307 tss.type = DESC_TSS;
308 write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
309
310 load_TR_desc();
311 this_cpu_write(__tss_limit_invalid, false);
312 }
313
314 /*
315 * Call this if you need the TSS limit to be correct, which should be the case
316 * if and only if you have TIF_IO_BITMAP set or you're switching to a task
317 * with TIF_IO_BITMAP set.
318 */
319 static inline void refresh_tss_limit(void)
320 {
321 DEBUG_LOCKS_WARN_ON(preemptible());
322
323 if (unlikely(this_cpu_read(__tss_limit_invalid)))
324 force_reload_TR();
325 }
326
327 /*
328 * If you do something evil that corrupts the cached TSS limit (I'm looking
329 * at you, VMX exits), call this function.
330 *
331 * The optimization here is that the TSS limit only matters for Linux if the
332 * IO bitmap is in use. If the TSS limit gets forced to its minimum value,
333 * everything works except that IO bitmap will be ignored and all CPL 3 IO
334 * instructions will #GP, which is exactly what we want for normal tasks.
335 */
336 static inline void invalidate_tss_limit(void)
337 {
338 DEBUG_LOCKS_WARN_ON(preemptible());
339
340 if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
341 force_reload_TR();
342 else
343 this_cpu_write(__tss_limit_invalid, true);
344 }
345
346 /* This intentionally ignores lm, since 32-bit apps don't have that field. */
347 #define LDT_empty(info) \
348 ((info)->base_addr == 0 && \
349 (info)->limit == 0 && \
350 (info)->contents == 0 && \
351 (info)->read_exec_only == 1 && \
352 (info)->seg_32bit == 0 && \
353 (info)->limit_in_pages == 0 && \
354 (info)->seg_not_present == 1 && \
355 (info)->useable == 0)
356
357 /* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
358 static inline bool LDT_zero(const struct user_desc *info)
359 {
360 return (info->base_addr == 0 &&
361 info->limit == 0 &&
362 info->contents == 0 &&
363 info->read_exec_only == 0 &&
364 info->seg_32bit == 0 &&
365 info->limit_in_pages == 0 &&
366 info->seg_not_present == 0 &&
367 info->useable == 0);
368 }
369
370 static inline void clear_LDT(void)
371 {
372 set_ldt(NULL, 0);
373 }
374
375 static inline unsigned long get_desc_base(const struct desc_struct *desc)
376 {
377 return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
378 }
379
380 static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
381 {
382 desc->base0 = base & 0xffff;
383 desc->base1 = (base >> 16) & 0xff;
384 desc->base2 = (base >> 24) & 0xff;
385 }
386
387 static inline unsigned long get_desc_limit(const struct desc_struct *desc)
388 {
389 return desc->limit0 | (desc->limit << 16);
390 }
391
392 static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
393 {
394 desc->limit0 = limit & 0xffff;
395 desc->limit = (limit >> 16) & 0xf;
396 }
397
398 #ifdef CONFIG_X86_64
399 static inline void set_nmi_gate(int gate, void *addr)
400 {
401 gate_desc s;
402
403 pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
404 write_idt_entry(debug_idt_table, gate, &s);
405 }
406 #endif
407
408 #ifdef CONFIG_TRACING
409 extern struct desc_ptr trace_idt_descr;
410 extern gate_desc trace_idt_table[];
411 static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
412 {
413 write_idt_entry(trace_idt_table, entry, gate);
414 }
415
416 static inline void _trace_set_gate(int gate, unsigned type, void *addr,
417 unsigned dpl, unsigned ist, unsigned seg)
418 {
419 gate_desc s;
420
421 pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
422 /*
423 * does not need to be atomic because it is only done once at
424 * setup time
425 */
426 write_trace_idt_entry(gate, &s);
427 }
428 #else
429 static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
430 {
431 }
432
433 #define _trace_set_gate(gate, type, addr, dpl, ist, seg)
434 #endif
435
436 static inline void _set_gate(int gate, unsigned type, void *addr,
437 unsigned dpl, unsigned ist, unsigned seg)
438 {
439 gate_desc s;
440
441 pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
442 /*
443 * does not need to be atomic because it is only done once at
444 * setup time
445 */
446 write_idt_entry(idt_table, gate, &s);
447 write_trace_idt_entry(gate, &s);
448 }
449
450 /*
451 * This needs to use 'idt_table' rather than 'idt', and
452 * thus use the _nonmapped_ version of the IDT, as the
453 * Pentium F0 0F bugfix can have resulted in the mapped
454 * IDT being write-protected.
455 */
456 #define set_intr_gate_notrace(n, addr) \
457 do { \
458 BUG_ON((unsigned)n > 0xFF); \
459 _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
460 __KERNEL_CS); \
461 } while (0)
462
463 #define set_intr_gate(n, addr) \
464 do { \
465 set_intr_gate_notrace(n, addr); \
466 _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
467 0, 0, __KERNEL_CS); \
468 } while (0)
469
470 extern int first_system_vector;
471 /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
472 extern unsigned long used_vectors[];
473
474 static inline void alloc_system_vector(int vector)
475 {
476 if (!test_bit(vector, used_vectors)) {
477 set_bit(vector, used_vectors);
478 if (first_system_vector > vector)
479 first_system_vector = vector;
480 } else {
481 BUG();
482 }
483 }
484
485 #define alloc_intr_gate(n, addr) \
486 do { \
487 alloc_system_vector(n); \
488 set_intr_gate(n, addr); \
489 } while (0)
490
491 /*
492 * This routine sets up an interrupt gate at directory privilege level 3.
493 */
494 static inline void set_system_intr_gate(unsigned int n, void *addr)
495 {
496 BUG_ON((unsigned)n > 0xFF);
497 _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
498 }
499
500 static inline void set_system_trap_gate(unsigned int n, void *addr)
501 {
502 BUG_ON((unsigned)n > 0xFF);
503 _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
504 }
505
506 static inline void set_trap_gate(unsigned int n, void *addr)
507 {
508 BUG_ON((unsigned)n > 0xFF);
509 _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
510 }
511
512 static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
513 {
514 BUG_ON((unsigned)n > 0xFF);
515 _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
516 }
517
518 static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
519 {
520 BUG_ON((unsigned)n > 0xFF);
521 _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
522 }
523
524 static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
525 {
526 BUG_ON((unsigned)n > 0xFF);
527 _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
528 }
529
530 #ifdef CONFIG_X86_64
531 DECLARE_PER_CPU(u32, debug_idt_ctr);
532 static inline bool is_debug_idt_enabled(void)
533 {
534 if (this_cpu_read(debug_idt_ctr))
535 return true;
536
537 return false;
538 }
539
540 static inline void load_debug_idt(void)
541 {
542 load_idt((const struct desc_ptr *)&debug_idt_descr);
543 }
544 #else
545 static inline bool is_debug_idt_enabled(void)
546 {
547 return false;
548 }
549
550 static inline void load_debug_idt(void)
551 {
552 }
553 #endif
554
555 #ifdef CONFIG_TRACING
556 extern atomic_t trace_idt_ctr;
557 static inline bool is_trace_idt_enabled(void)
558 {
559 if (atomic_read(&trace_idt_ctr))
560 return true;
561
562 return false;
563 }
564
565 static inline void load_trace_idt(void)
566 {
567 load_idt((const struct desc_ptr *)&trace_idt_descr);
568 }
569 #else
570 static inline bool is_trace_idt_enabled(void)
571 {
572 return false;
573 }
574
575 static inline void load_trace_idt(void)
576 {
577 }
578 #endif
579
580 /*
581 * The load_current_idt() must be called with interrupts disabled
582 * to avoid races. That way the IDT will always be set back to the expected
583 * descriptor. It's also called when a CPU is being initialized, and
584 * that doesn't need to disable interrupts, as nothing should be
585 * bothering the CPU then.
586 */
587 static inline void load_current_idt(void)
588 {
589 if (is_debug_idt_enabled())
590 load_debug_idt();
591 else if (is_trace_idt_enabled())
592 load_trace_idt();
593 else
594 load_idt((const struct desc_ptr *)&idt_descr);
595 }
596 #endif /* _ASM_X86_DESC_H */