1 /* By Ross Biro 1/23/92 */
3 * Pentium III FXSR, SSE support
4 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
10 #include <linux/kernel.h>
11 #include <linux/sched.h>
13 #include <linux/smp.h>
14 #include <linux/errno.h>
15 #include <linux/ptrace.h>
16 #include <linux/regset.h>
17 #include <linux/user.h>
18 #include <linux/elf.h>
19 #include <linux/security.h>
20 #include <linux/audit.h>
21 #include <linux/seccomp.h>
22 #include <linux/signal.h>
24 #include <asm/uaccess.h>
25 #include <asm/pgtable.h>
26 #include <asm/system.h>
27 #include <asm/processor.h>
29 #include <asm/debugreg.h>
32 #include <asm/prctl.h>
33 #include <asm/proto.h>
46 * does not yet catch signals sent when the child dies.
47 * in exit.c or in signal.c.
51 * Determines which flags the user has access to [1 = access, 0 = no access].
53 #define FLAG_MASK_32 ((unsigned long) \
54 (X86_EFLAGS_CF | X86_EFLAGS_PF | \
55 X86_EFLAGS_AF | X86_EFLAGS_ZF | \
56 X86_EFLAGS_SF | X86_EFLAGS_TF | \
57 X86_EFLAGS_DF | X86_EFLAGS_OF | \
58 X86_EFLAGS_RF | X86_EFLAGS_AC))
61 * Determines whether a value may be installed in a segment register.
63 static inline bool invalid_selector(u16 value
)
65 return unlikely(value
!= 0 && (value
& SEGMENT_RPL_MASK
) != USER_RPL
);
70 #define FLAG_MASK FLAG_MASK_32
72 static long *pt_regs_access(struct pt_regs
*regs
, unsigned long regno
)
74 BUILD_BUG_ON(offsetof(struct pt_regs
, bx
) != 0);
78 return ®s
->bx
+ regno
;
81 static u16
get_segment_reg(struct task_struct
*task
, unsigned long offset
)
84 * Returning the value truncates it to 16 bits.
87 if (offset
!= offsetof(struct user_regs_struct
, gs
))
88 retval
= *pt_regs_access(task_pt_regs(task
), offset
);
90 retval
= task
->thread
.gs
;
92 savesegment(gs
, retval
);
97 static int set_segment_reg(struct task_struct
*task
,
98 unsigned long offset
, u16 value
)
101 * The value argument was already truncated to 16 bits.
103 if (invalid_selector(value
))
107 * For %cs and %ss we cannot permit a null selector.
108 * We can permit a bogus selector as long as it has USER_RPL.
109 * Null selectors are fine for other segment registers, but
110 * we will never get back to user mode with invalid %cs or %ss
111 * and will take the trap in iret instead. Much code relies
112 * on user_mode() to distinguish a user trap frame (which can
113 * safely use invalid selectors) from a kernel trap frame.
116 case offsetof(struct user_regs_struct
, cs
):
117 case offsetof(struct user_regs_struct
, ss
):
118 if (unlikely(value
== 0))
122 *pt_regs_access(task_pt_regs(task
), offset
) = value
;
125 case offsetof(struct user_regs_struct
, gs
):
126 task
->thread
.gs
= value
;
129 * The user-mode %gs is not affected by
130 * kernel entry, so we must update the CPU.
132 loadsegment(gs
, value
);
138 static unsigned long debugreg_addr_limit(struct task_struct
*task
)
140 return TASK_SIZE
- 3;
143 #else /* CONFIG_X86_64 */
145 #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
147 static unsigned long *pt_regs_access(struct pt_regs
*regs
, unsigned long offset
)
149 BUILD_BUG_ON(offsetof(struct pt_regs
, r15
) != 0);
150 return ®s
->r15
+ (offset
/ sizeof(regs
->r15
));
153 static u16
get_segment_reg(struct task_struct
*task
, unsigned long offset
)
156 * Returning the value truncates it to 16 bits.
161 case offsetof(struct user_regs_struct
, fs
):
162 if (task
== current
) {
163 /* Older gas can't assemble movq %?s,%r?? */
164 asm("movl %%fs,%0" : "=r" (seg
));
167 return task
->thread
.fsindex
;
168 case offsetof(struct user_regs_struct
, gs
):
169 if (task
== current
) {
170 asm("movl %%gs,%0" : "=r" (seg
));
173 return task
->thread
.gsindex
;
174 case offsetof(struct user_regs_struct
, ds
):
175 if (task
== current
) {
176 asm("movl %%ds,%0" : "=r" (seg
));
179 return task
->thread
.ds
;
180 case offsetof(struct user_regs_struct
, es
):
181 if (task
== current
) {
182 asm("movl %%es,%0" : "=r" (seg
));
185 return task
->thread
.es
;
187 case offsetof(struct user_regs_struct
, cs
):
188 case offsetof(struct user_regs_struct
, ss
):
191 return *pt_regs_access(task_pt_regs(task
), offset
);
194 static int set_segment_reg(struct task_struct
*task
,
195 unsigned long offset
, u16 value
)
198 * The value argument was already truncated to 16 bits.
200 if (invalid_selector(value
))
204 case offsetof(struct user_regs_struct
,fs
):
206 * If this is setting fs as for normal 64-bit use but
207 * setting fs_base has implicitly changed it, leave it.
209 if ((value
== FS_TLS_SEL
&& task
->thread
.fsindex
== 0 &&
210 task
->thread
.fs
!= 0) ||
211 (value
== 0 && task
->thread
.fsindex
== FS_TLS_SEL
&&
212 task
->thread
.fs
== 0))
214 task
->thread
.fsindex
= value
;
216 loadsegment(fs
, task
->thread
.fsindex
);
218 case offsetof(struct user_regs_struct
,gs
):
220 * If this is setting gs as for normal 64-bit use but
221 * setting gs_base has implicitly changed it, leave it.
223 if ((value
== GS_TLS_SEL
&& task
->thread
.gsindex
== 0 &&
224 task
->thread
.gs
!= 0) ||
225 (value
== 0 && task
->thread
.gsindex
== GS_TLS_SEL
&&
226 task
->thread
.gs
== 0))
228 task
->thread
.gsindex
= value
;
230 load_gs_index(task
->thread
.gsindex
);
232 case offsetof(struct user_regs_struct
,ds
):
233 task
->thread
.ds
= value
;
235 loadsegment(ds
, task
->thread
.ds
);
237 case offsetof(struct user_regs_struct
,es
):
238 task
->thread
.es
= value
;
240 loadsegment(es
, task
->thread
.es
);
244 * Can't actually change these in 64-bit mode.
246 case offsetof(struct user_regs_struct
,cs
):
247 if (unlikely(value
== 0))
249 #ifdef CONFIG_IA32_EMULATION
250 if (test_tsk_thread_flag(task
, TIF_IA32
))
251 task_pt_regs(task
)->cs
= value
;
254 case offsetof(struct user_regs_struct
,ss
):
255 if (unlikely(value
== 0))
257 #ifdef CONFIG_IA32_EMULATION
258 if (test_tsk_thread_flag(task
, TIF_IA32
))
259 task_pt_regs(task
)->ss
= value
;
267 static unsigned long debugreg_addr_limit(struct task_struct
*task
)
269 #ifdef CONFIG_IA32_EMULATION
270 if (test_tsk_thread_flag(task
, TIF_IA32
))
271 return IA32_PAGE_OFFSET
- 3;
273 return TASK_SIZE64
- 7;
276 #endif /* CONFIG_X86_32 */
278 static unsigned long get_flags(struct task_struct
*task
)
280 unsigned long retval
= task_pt_regs(task
)->flags
;
283 * If the debugger set TF, hide it from the readout.
285 if (test_tsk_thread_flag(task
, TIF_FORCED_TF
))
286 retval
&= ~X86_EFLAGS_TF
;
291 static int set_flags(struct task_struct
*task
, unsigned long value
)
293 struct pt_regs
*regs
= task_pt_regs(task
);
296 * If the user value contains TF, mark that
297 * it was not "us" (the debugger) that set it.
298 * If not, make sure it stays set if we had.
300 if (value
& X86_EFLAGS_TF
)
301 clear_tsk_thread_flag(task
, TIF_FORCED_TF
);
302 else if (test_tsk_thread_flag(task
, TIF_FORCED_TF
))
303 value
|= X86_EFLAGS_TF
;
305 regs
->flags
= (regs
->flags
& ~FLAG_MASK
) | (value
& FLAG_MASK
);
310 static int putreg(struct task_struct
*child
,
311 unsigned long offset
, unsigned long value
)
314 case offsetof(struct user_regs_struct
, cs
):
315 case offsetof(struct user_regs_struct
, ds
):
316 case offsetof(struct user_regs_struct
, es
):
317 case offsetof(struct user_regs_struct
, fs
):
318 case offsetof(struct user_regs_struct
, gs
):
319 case offsetof(struct user_regs_struct
, ss
):
320 return set_segment_reg(child
, offset
, value
);
322 case offsetof(struct user_regs_struct
, flags
):
323 return set_flags(child
, value
);
327 * Orig_ax is really just a flag with small positive and
328 * negative values, so make sure to always sign-extend it
329 * from 32 bits so that it works correctly regardless of
330 * whether we come from a 32-bit environment or not.
332 case offsetof(struct user_regs_struct
, orig_ax
):
333 value
= (long) (s32
) value
;
336 case offsetof(struct user_regs_struct
,fs_base
):
337 if (value
>= TASK_SIZE_OF(child
))
340 * When changing the segment base, use do_arch_prctl
341 * to set either thread.fs or thread.fsindex and the
342 * corresponding GDT slot.
344 if (child
->thread
.fs
!= value
)
345 return do_arch_prctl(child
, ARCH_SET_FS
, value
);
347 case offsetof(struct user_regs_struct
,gs_base
):
349 * Exactly the same here as the %fs handling above.
351 if (value
>= TASK_SIZE_OF(child
))
353 if (child
->thread
.gs
!= value
)
354 return do_arch_prctl(child
, ARCH_SET_GS
, value
);
359 *pt_regs_access(task_pt_regs(child
), offset
) = value
;
363 static unsigned long getreg(struct task_struct
*task
, unsigned long offset
)
366 case offsetof(struct user_regs_struct
, cs
):
367 case offsetof(struct user_regs_struct
, ds
):
368 case offsetof(struct user_regs_struct
, es
):
369 case offsetof(struct user_regs_struct
, fs
):
370 case offsetof(struct user_regs_struct
, gs
):
371 case offsetof(struct user_regs_struct
, ss
):
372 return get_segment_reg(task
, offset
);
374 case offsetof(struct user_regs_struct
, flags
):
375 return get_flags(task
);
378 case offsetof(struct user_regs_struct
, fs_base
): {
380 * do_arch_prctl may have used a GDT slot instead of
381 * the MSR. To userland, it appears the same either
382 * way, except the %fs segment selector might not be 0.
384 unsigned int seg
= task
->thread
.fsindex
;
385 if (task
->thread
.fs
!= 0)
386 return task
->thread
.fs
;
388 asm("movl %%fs,%0" : "=r" (seg
));
389 if (seg
!= FS_TLS_SEL
)
391 return get_desc_base(&task
->thread
.tls_array
[FS_TLS
]);
393 case offsetof(struct user_regs_struct
, gs_base
): {
395 * Exactly the same here as the %fs handling above.
397 unsigned int seg
= task
->thread
.gsindex
;
398 if (task
->thread
.gs
!= 0)
399 return task
->thread
.gs
;
401 asm("movl %%gs,%0" : "=r" (seg
));
402 if (seg
!= GS_TLS_SEL
)
404 return get_desc_base(&task
->thread
.tls_array
[GS_TLS
]);
409 return *pt_regs_access(task_pt_regs(task
), offset
);
412 static int genregs_get(struct task_struct
*target
,
413 const struct user_regset
*regset
,
414 unsigned int pos
, unsigned int count
,
415 void *kbuf
, void __user
*ubuf
)
418 unsigned long *k
= kbuf
;
420 *k
++ = getreg(target
, pos
);
425 unsigned long __user
*u
= ubuf
;
427 if (__put_user(getreg(target
, pos
), u
++))
437 static int genregs_set(struct task_struct
*target
,
438 const struct user_regset
*regset
,
439 unsigned int pos
, unsigned int count
,
440 const void *kbuf
, const void __user
*ubuf
)
444 const unsigned long *k
= kbuf
;
445 while (count
> 0 && !ret
) {
446 ret
= putreg(target
, pos
, *k
++);
451 const unsigned long __user
*u
= ubuf
;
452 while (count
> 0 && !ret
) {
454 ret
= __get_user(word
, u
++);
457 ret
= putreg(target
, pos
, word
);
466 * This function is trivial and will be inlined by the compiler.
467 * Having it separates the implementation details of debug
468 * registers from the interface details of ptrace.
470 static unsigned long ptrace_get_debugreg(struct task_struct
*child
, int n
)
473 case 0: return child
->thread
.debugreg0
;
474 case 1: return child
->thread
.debugreg1
;
475 case 2: return child
->thread
.debugreg2
;
476 case 3: return child
->thread
.debugreg3
;
477 case 6: return child
->thread
.debugreg6
;
478 case 7: return child
->thread
.debugreg7
;
483 static int ptrace_set_debugreg(struct task_struct
*child
,
484 int n
, unsigned long data
)
488 if (unlikely(n
== 4 || n
== 5))
491 if (n
< 4 && unlikely(data
>= debugreg_addr_limit(child
)))
495 case 0: child
->thread
.debugreg0
= data
; break;
496 case 1: child
->thread
.debugreg1
= data
; break;
497 case 2: child
->thread
.debugreg2
= data
; break;
498 case 3: child
->thread
.debugreg3
= data
; break;
501 if ((data
& ~0xffffffffUL
) != 0)
503 child
->thread
.debugreg6
= data
;
508 * Sanity-check data. Take one half-byte at once with
509 * check = (val >> (16 + 4*i)) & 0xf. It contains the
510 * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
511 * 2 and 3 are LENi. Given a list of invalid values,
512 * we do mask |= 1 << invalid_value, so that
513 * (mask >> check) & 1 is a correct test for invalid
516 * R/Wi contains the type of the breakpoint /
517 * watchpoint, LENi contains the length of the watched
518 * data in the watchpoint case.
520 * The invalid values are:
521 * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
522 * - R/Wi == 0x10 (break on I/O reads or writes), so
524 * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
527 * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
529 * See the Intel Manual "System Programming Guide",
532 * Note that LENi == 0x10 is defined on x86_64 in long
533 * mode (i.e. even for 32-bit userspace software, but
534 * 64-bit kernel), so the x86_64 mask value is 0x5454.
535 * See the AMD manual no. 24593 (AMD64 System Programming)
538 #define DR7_MASK 0x5f54
540 #define DR7_MASK 0x5554
542 data
&= ~DR_CONTROL_RESERVED
;
543 for (i
= 0; i
< 4; i
++)
544 if ((DR7_MASK
>> ((data
>> (16 + 4*i
)) & 0xf)) & 1)
546 child
->thread
.debugreg7
= data
;
548 set_tsk_thread_flag(child
, TIF_DEBUG
);
550 clear_tsk_thread_flag(child
, TIF_DEBUG
);
557 #ifdef CONFIG_X86_PTRACE_BTS
559 * The configuration for a particular BTS hardware implementation.
561 struct bts_configuration
{
562 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
563 unsigned char sizeof_bts
;
564 /* the size of a field in the BTS record in bytes */
565 unsigned char sizeof_field
;
566 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
567 unsigned long debugctl_mask
;
569 static struct bts_configuration bts_cfg
;
571 #define BTS_MAX_RECORD_SIZE (8 * 3)
575 * Branch Trace Store (BTS) uses the following format. Different
576 * architectures vary in the size of those fields.
577 * - source linear address
578 * - destination linear address
581 * Later architectures use 64bit pointers throughout, whereas earlier
582 * architectures use 32bit pointers in 32bit mode.
584 * We compute the base address for the first 8 fields based on:
585 * - the field size stored in the DS configuration
586 * - the relative field position
588 * In order to store additional information in the BTS buffer, we use
589 * a special source address to indicate that the record requires
590 * special interpretation.
592 * Netburst indicated via a bit in the flags field whether the branch
593 * was predicted; this is ignored.
601 bts_escape
= (unsigned long)-1,
603 bts_jiffies
= bts_flags
606 static inline unsigned long bts_get(const char *base
, enum bts_field field
)
608 base
+= (bts_cfg
.sizeof_field
* field
);
609 return *(unsigned long *)base
;
612 static inline void bts_set(char *base
, enum bts_field field
, unsigned long val
)
614 base
+= (bts_cfg
.sizeof_field
* field
);;
615 (*(unsigned long *)base
) = val
;
619 * Translate a BTS record from the raw format into the bts_struct format
621 * out (out): bts_struct interpretation
622 * raw: raw BTS record
624 static void ptrace_bts_translate_record(struct bts_struct
*out
, const void *raw
)
626 memset(out
, 0, sizeof(*out
));
627 if (bts_get(raw
, bts_from
) == bts_escape
) {
628 out
->qualifier
= bts_get(raw
, bts_qual
);
629 out
->variant
.jiffies
= bts_get(raw
, bts_jiffies
);
631 out
->qualifier
= BTS_BRANCH
;
632 out
->variant
.lbr
.from_ip
= bts_get(raw
, bts_from
);
633 out
->variant
.lbr
.to_ip
= bts_get(raw
, bts_to
);
637 static int ptrace_bts_read_record(struct task_struct
*child
, size_t index
,
638 struct bts_struct __user
*out
)
640 struct bts_struct ret
;
641 const void *bts_record
;
642 size_t bts_index
, bts_end
;
645 error
= ds_get_bts_end(child
, &bts_end
);
649 if (bts_end
<= index
)
652 error
= ds_get_bts_index(child
, &bts_index
);
656 /* translate the ptrace bts index into the ds bts index */
657 bts_index
+= bts_end
- (index
+ 1);
658 if (bts_end
<= bts_index
)
659 bts_index
-= bts_end
;
661 error
= ds_access_bts(child
, bts_index
, &bts_record
);
665 ptrace_bts_translate_record(&ret
, bts_record
);
667 if (copy_to_user(out
, &ret
, sizeof(ret
)))
673 static int ptrace_bts_drain(struct task_struct
*child
,
675 struct bts_struct __user
*out
)
677 struct bts_struct ret
;
678 const unsigned char *raw
;
682 error
= ds_get_bts_index(child
, &end
);
686 if (size
< (end
* sizeof(struct bts_struct
)))
689 error
= ds_access_bts(child
, 0, (const void **)&raw
);
693 for (i
= 0; i
< end
; i
++, out
++, raw
+= bts_cfg
.sizeof_bts
) {
694 ptrace_bts_translate_record(&ret
, raw
);
696 if (copy_to_user(out
, &ret
, sizeof(ret
)))
700 error
= ds_clear_bts(child
);
707 static void ptrace_bts_ovfl(struct task_struct
*child
)
709 send_sig(child
->thread
.bts_ovfl_signal
, child
, 0);
712 static int ptrace_bts_config(struct task_struct
*child
,
714 const struct ptrace_bts_config __user
*ucfg
)
716 struct ptrace_bts_config cfg
;
720 if (!bts_cfg
.sizeof_bts
)
724 if (cfg_size
< sizeof(cfg
))
728 if (copy_from_user(&cfg
, ucfg
, sizeof(cfg
)))
732 if ((cfg
.flags
& PTRACE_BTS_O_SIGNAL
) &&
733 !(cfg
.flags
& PTRACE_BTS_O_ALLOC
))
736 if (cfg
.flags
& PTRACE_BTS_O_ALLOC
) {
737 ds_ovfl_callback_t ovfl
= 0;
738 unsigned int sig
= 0;
740 /* we ignore the error in case we were not tracing child */
741 (void)ds_release_bts(child
);
743 if (cfg
.flags
& PTRACE_BTS_O_SIGNAL
) {
748 ovfl
= ptrace_bts_ovfl
;
751 error
= ds_request_bts(child
, /* base = */ 0, cfg
.size
, ovfl
);
755 child
->thread
.bts_ovfl_signal
= sig
;
759 if (!child
->thread
.ds_ctx
&& cfg
.flags
)
762 if (cfg
.flags
& PTRACE_BTS_O_TRACE
)
763 child
->thread
.debugctlmsr
|= bts_cfg
.debugctl_mask
;
765 child
->thread
.debugctlmsr
&= ~bts_cfg
.debugctl_mask
;
767 if (cfg
.flags
& PTRACE_BTS_O_SCHED
)
768 set_tsk_thread_flag(child
, TIF_BTS_TRACE_TS
);
770 clear_tsk_thread_flag(child
, TIF_BTS_TRACE_TS
);
775 if (child
->thread
.debugctlmsr
)
776 set_tsk_thread_flag(child
, TIF_DEBUGCTLMSR
);
778 clear_tsk_thread_flag(child
, TIF_DEBUGCTLMSR
);
783 child
->thread
.debugctlmsr
&= ~bts_cfg
.debugctl_mask
;
784 clear_tsk_thread_flag(child
, TIF_BTS_TRACE_TS
);
788 static int ptrace_bts_status(struct task_struct
*child
,
790 struct ptrace_bts_config __user
*ucfg
)
792 struct ptrace_bts_config cfg
;
794 const void *base
, *max
;
797 if (cfg_size
< sizeof(cfg
))
800 error
= ds_get_bts_end(child
, &end
);
804 error
= ds_access_bts(child
, /* index = */ 0, &base
);
808 error
= ds_access_bts(child
, /* index = */ end
, &max
);
812 memset(&cfg
, 0, sizeof(cfg
));
813 cfg
.size
= (max
- base
);
814 cfg
.signal
= child
->thread
.bts_ovfl_signal
;
815 cfg
.bts_size
= sizeof(struct bts_struct
);
818 cfg
.flags
|= PTRACE_BTS_O_SIGNAL
;
820 if (test_tsk_thread_flag(child
, TIF_DEBUGCTLMSR
) &&
821 child
->thread
.debugctlmsr
& bts_cfg
.debugctl_mask
)
822 cfg
.flags
|= PTRACE_BTS_O_TRACE
;
824 if (test_tsk_thread_flag(child
, TIF_BTS_TRACE_TS
))
825 cfg
.flags
|= PTRACE_BTS_O_SCHED
;
827 if (copy_to_user(ucfg
, &cfg
, sizeof(cfg
)))
833 static int ptrace_bts_write_record(struct task_struct
*child
,
834 const struct bts_struct
*in
)
836 unsigned char bts_record
[BTS_MAX_RECORD_SIZE
];
838 BUG_ON(BTS_MAX_RECORD_SIZE
< bts_cfg
.sizeof_bts
);
840 memset(bts_record
, 0, bts_cfg
.sizeof_bts
);
841 switch (in
->qualifier
) {
846 bts_set(bts_record
, bts_from
, in
->variant
.lbr
.from_ip
);
847 bts_set(bts_record
, bts_to
, in
->variant
.lbr
.to_ip
);
850 case BTS_TASK_ARRIVES
:
851 case BTS_TASK_DEPARTS
:
852 bts_set(bts_record
, bts_from
, bts_escape
);
853 bts_set(bts_record
, bts_qual
, in
->qualifier
);
854 bts_set(bts_record
, bts_jiffies
, in
->variant
.jiffies
);
861 /* The writing task will be the switched-to task on a context
862 * switch. It needs to write into the switched-from task's BTS
864 return ds_unchecked_write_bts(child
, bts_record
, bts_cfg
.sizeof_bts
);
867 void ptrace_bts_take_timestamp(struct task_struct
*tsk
,
868 enum bts_qualifier qualifier
)
870 struct bts_struct rec
= {
871 .qualifier
= qualifier
,
872 .variant
.jiffies
= jiffies_64
875 ptrace_bts_write_record(tsk
, &rec
);
878 static const struct bts_configuration bts_cfg_netburst
= {
879 .sizeof_bts
= sizeof(long) * 3,
880 .sizeof_field
= sizeof(long),
881 .debugctl_mask
= (1<<2)|(1<<3)|(1<<5)
884 static const struct bts_configuration bts_cfg_pentium_m
= {
885 .sizeof_bts
= sizeof(long) * 3,
886 .sizeof_field
= sizeof(long),
887 .debugctl_mask
= (1<<6)|(1<<7)
890 static const struct bts_configuration bts_cfg_core2
= {
893 .debugctl_mask
= (1<<6)|(1<<7)|(1<<9)
896 static inline void bts_configure(const struct bts_configuration
*cfg
)
901 void __cpuinit
ptrace_bts_init_intel(struct cpuinfo_x86
*c
)
905 switch (c
->x86_model
) {
907 case 0xE: /* Pentium M */
908 bts_configure(&bts_cfg_pentium_m
);
910 case 0xF: /* Core2 */
911 case 0x1C: /* Atom */
912 bts_configure(&bts_cfg_core2
);
915 /* sorry, don't know about them */
920 switch (c
->x86_model
) {
923 case 0x2: /* Netburst */
924 bts_configure(&bts_cfg_netburst
);
927 /* sorry, don't know about them */
932 /* sorry, don't know about them */
936 #endif /* CONFIG_X86_PTRACE_BTS */
939 * Called by kernel/ptrace.c when detaching..
941 * Make sure the single step bit is not set.
943 void ptrace_disable(struct task_struct
*child
)
945 user_disable_single_step(child
);
946 #ifdef TIF_SYSCALL_EMU
947 clear_tsk_thread_flag(child
, TIF_SYSCALL_EMU
);
949 #ifdef CONFIG_X86_PTRACE_BTS
950 (void)ds_release_bts(child
);
952 child
->thread
.debugctlmsr
&= ~bts_cfg
.debugctl_mask
;
953 if (!child
->thread
.debugctlmsr
)
954 clear_tsk_thread_flag(child
, TIF_DEBUGCTLMSR
);
956 clear_tsk_thread_flag(child
, TIF_BTS_TRACE_TS
);
957 #endif /* CONFIG_X86_PTRACE_BTS */
960 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
961 static const struct user_regset_view user_x86_32_view
; /* Initialized below. */
964 long arch_ptrace(struct task_struct
*child
, long request
, long addr
, long data
)
967 unsigned long __user
*datap
= (unsigned long __user
*)data
;
970 /* read the word at location addr in the USER area. */
971 case PTRACE_PEEKUSR
: {
975 if ((addr
& (sizeof(data
) - 1)) || addr
< 0 ||
976 addr
>= sizeof(struct user
))
979 tmp
= 0; /* Default return condition */
980 if (addr
< sizeof(struct user_regs_struct
))
981 tmp
= getreg(child
, addr
);
982 else if (addr
>= offsetof(struct user
, u_debugreg
[0]) &&
983 addr
<= offsetof(struct user
, u_debugreg
[7])) {
984 addr
-= offsetof(struct user
, u_debugreg
[0]);
985 tmp
= ptrace_get_debugreg(child
, addr
/ sizeof(data
));
987 ret
= put_user(tmp
, datap
);
991 case PTRACE_POKEUSR
: /* write the word at location addr in the USER area */
993 if ((addr
& (sizeof(data
) - 1)) || addr
< 0 ||
994 addr
>= sizeof(struct user
))
997 if (addr
< sizeof(struct user_regs_struct
))
998 ret
= putreg(child
, addr
, data
);
999 else if (addr
>= offsetof(struct user
, u_debugreg
[0]) &&
1000 addr
<= offsetof(struct user
, u_debugreg
[7])) {
1001 addr
-= offsetof(struct user
, u_debugreg
[0]);
1002 ret
= ptrace_set_debugreg(child
,
1003 addr
/ sizeof(data
), data
);
1007 case PTRACE_GETREGS
: /* Get all gp regs from the child. */
1008 return copy_regset_to_user(child
,
1009 task_user_regset_view(current
),
1011 0, sizeof(struct user_regs_struct
),
1014 case PTRACE_SETREGS
: /* Set all gp regs in the child. */
1015 return copy_regset_from_user(child
,
1016 task_user_regset_view(current
),
1018 0, sizeof(struct user_regs_struct
),
1021 case PTRACE_GETFPREGS
: /* Get the child FPU state. */
1022 return copy_regset_to_user(child
,
1023 task_user_regset_view(current
),
1025 0, sizeof(struct user_i387_struct
),
1028 case PTRACE_SETFPREGS
: /* Set the child FPU state. */
1029 return copy_regset_from_user(child
,
1030 task_user_regset_view(current
),
1032 0, sizeof(struct user_i387_struct
),
1035 #ifdef CONFIG_X86_32
1036 case PTRACE_GETFPXREGS
: /* Get the child extended FPU state. */
1037 return copy_regset_to_user(child
, &user_x86_32_view
,
1039 0, sizeof(struct user_fxsr_struct
),
1042 case PTRACE_SETFPXREGS
: /* Set the child extended FPU state. */
1043 return copy_regset_from_user(child
, &user_x86_32_view
,
1045 0, sizeof(struct user_fxsr_struct
),
1049 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1050 case PTRACE_GET_THREAD_AREA
:
1053 ret
= do_get_thread_area(child
, addr
,
1054 (struct user_desc __user
*) data
);
1057 case PTRACE_SET_THREAD_AREA
:
1060 ret
= do_set_thread_area(child
, addr
,
1061 (struct user_desc __user
*) data
, 0);
1065 #ifdef CONFIG_X86_64
1066 /* normal 64bit interface to access TLS data.
1067 Works just like arch_prctl, except that the arguments
1069 case PTRACE_ARCH_PRCTL
:
1070 ret
= do_arch_prctl(child
, data
, addr
);
1075 * These bits need more cooking - not enabled yet:
1077 #ifdef CONFIG_X86_PTRACE_BTS
1078 case PTRACE_BTS_CONFIG
:
1079 ret
= ptrace_bts_config
1080 (child
, data
, (struct ptrace_bts_config __user
*)addr
);
1083 case PTRACE_BTS_STATUS
:
1084 ret
= ptrace_bts_status
1085 (child
, data
, (struct ptrace_bts_config __user
*)addr
);
1088 case PTRACE_BTS_SIZE
:
1089 ret
= ds_get_bts_index(child
, /* pos = */ 0);
1092 case PTRACE_BTS_GET
:
1093 ret
= ptrace_bts_read_record
1094 (child
, data
, (struct bts_struct __user
*) addr
);
1097 case PTRACE_BTS_CLEAR
:
1098 ret
= ds_clear_bts(child
);
1101 case PTRACE_BTS_DRAIN
:
1102 ret
= ptrace_bts_drain
1103 (child
, data
, (struct bts_struct __user
*) addr
);
1105 #endif /* CONFIG_X86_PTRACE_BTS */
1108 ret
= ptrace_request(child
, request
, addr
, data
);
1115 #ifdef CONFIG_IA32_EMULATION
1117 #include <linux/compat.h>
1118 #include <linux/syscalls.h>
1119 #include <asm/ia32.h>
1120 #include <asm/user32.h>
1123 case offsetof(struct user32, regs.l): \
1124 regs->q = value; break
1127 case offsetof(struct user32, regs.rs): \
1128 return set_segment_reg(child, \
1129 offsetof(struct user_regs_struct, rs), \
1133 static int putreg32(struct task_struct
*child
, unsigned regno
, u32 value
)
1135 struct pt_regs
*regs
= task_pt_regs(child
);
1156 case offsetof(struct user32
, regs
.orig_eax
):
1158 * Sign-extend the value so that orig_eax = -1
1159 * causes (long)orig_ax < 0 tests to fire correctly.
1161 regs
->orig_ax
= (long) (s32
) value
;
1164 case offsetof(struct user32
, regs
.eflags
):
1165 return set_flags(child
, value
);
1167 case offsetof(struct user32
, u_debugreg
[0]) ...
1168 offsetof(struct user32
, u_debugreg
[7]):
1169 regno
-= offsetof(struct user32
, u_debugreg
[0]);
1170 return ptrace_set_debugreg(child
, regno
/ 4, value
);
1173 if (regno
> sizeof(struct user32
) || (regno
& 3))
1177 * Other dummy fields in the virtual user structure
1189 case offsetof(struct user32, regs.l): \
1190 *val = regs->q; break
1193 case offsetof(struct user32, regs.rs): \
1194 *val = get_segment_reg(child, \
1195 offsetof(struct user_regs_struct, rs)); \
1198 static int getreg32(struct task_struct
*child
, unsigned regno
, u32
*val
)
1200 struct pt_regs
*regs
= task_pt_regs(child
);
1218 R32(orig_eax
, orig_ax
);
1222 case offsetof(struct user32
, regs
.eflags
):
1223 *val
= get_flags(child
);
1226 case offsetof(struct user32
, u_debugreg
[0]) ...
1227 offsetof(struct user32
, u_debugreg
[7]):
1228 regno
-= offsetof(struct user32
, u_debugreg
[0]);
1229 *val
= ptrace_get_debugreg(child
, regno
/ 4);
1233 if (regno
> sizeof(struct user32
) || (regno
& 3))
1237 * Other dummy fields in the virtual user structure
1249 static int genregs32_get(struct task_struct
*target
,
1250 const struct user_regset
*regset
,
1251 unsigned int pos
, unsigned int count
,
1252 void *kbuf
, void __user
*ubuf
)
1255 compat_ulong_t
*k
= kbuf
;
1257 getreg32(target
, pos
, k
++);
1258 count
-= sizeof(*k
);
1262 compat_ulong_t __user
*u
= ubuf
;
1264 compat_ulong_t word
;
1265 getreg32(target
, pos
, &word
);
1266 if (__put_user(word
, u
++))
1268 count
-= sizeof(*u
);
1276 static int genregs32_set(struct task_struct
*target
,
1277 const struct user_regset
*regset
,
1278 unsigned int pos
, unsigned int count
,
1279 const void *kbuf
, const void __user
*ubuf
)
1283 const compat_ulong_t
*k
= kbuf
;
1284 while (count
> 0 && !ret
) {
1285 ret
= putreg32(target
, pos
, *k
++);
1286 count
-= sizeof(*k
);
1290 const compat_ulong_t __user
*u
= ubuf
;
1291 while (count
> 0 && !ret
) {
1292 compat_ulong_t word
;
1293 ret
= __get_user(word
, u
++);
1296 ret
= putreg32(target
, pos
, word
);
1297 count
-= sizeof(*u
);
1304 long compat_arch_ptrace(struct task_struct
*child
, compat_long_t request
,
1305 compat_ulong_t caddr
, compat_ulong_t cdata
)
1307 unsigned long addr
= caddr
;
1308 unsigned long data
= cdata
;
1309 void __user
*datap
= compat_ptr(data
);
1314 case PTRACE_PEEKUSR
:
1315 ret
= getreg32(child
, addr
, &val
);
1317 ret
= put_user(val
, (__u32 __user
*)datap
);
1320 case PTRACE_POKEUSR
:
1321 ret
= putreg32(child
, addr
, data
);
1324 case PTRACE_GETREGS
: /* Get all gp regs from the child. */
1325 return copy_regset_to_user(child
, &user_x86_32_view
,
1327 0, sizeof(struct user_regs_struct32
),
1330 case PTRACE_SETREGS
: /* Set all gp regs in the child. */
1331 return copy_regset_from_user(child
, &user_x86_32_view
,
1333 sizeof(struct user_regs_struct32
),
1336 case PTRACE_GETFPREGS
: /* Get the child FPU state. */
1337 return copy_regset_to_user(child
, &user_x86_32_view
,
1339 sizeof(struct user_i387_ia32_struct
),
1342 case PTRACE_SETFPREGS
: /* Set the child FPU state. */
1343 return copy_regset_from_user(
1344 child
, &user_x86_32_view
, REGSET_FP
,
1345 0, sizeof(struct user_i387_ia32_struct
), datap
);
1347 case PTRACE_GETFPXREGS
: /* Get the child extended FPU state. */
1348 return copy_regset_to_user(child
, &user_x86_32_view
,
1350 sizeof(struct user32_fxsr_struct
),
1353 case PTRACE_SETFPXREGS
: /* Set the child extended FPU state. */
1354 return copy_regset_from_user(child
, &user_x86_32_view
,
1356 sizeof(struct user32_fxsr_struct
),
1359 case PTRACE_GET_THREAD_AREA
:
1360 case PTRACE_SET_THREAD_AREA
:
1361 return arch_ptrace(child
, request
, addr
, data
);
1364 return compat_ptrace_request(child
, request
, addr
, data
);
1370 #endif /* CONFIG_IA32_EMULATION */
1372 #ifdef CONFIG_X86_64
1374 static const struct user_regset x86_64_regsets
[] = {
1375 [REGSET_GENERAL
] = {
1376 .core_note_type
= NT_PRSTATUS
,
1377 .n
= sizeof(struct user_regs_struct
) / sizeof(long),
1378 .size
= sizeof(long), .align
= sizeof(long),
1379 .get
= genregs_get
, .set
= genregs_set
1382 .core_note_type
= NT_PRFPREG
,
1383 .n
= sizeof(struct user_i387_struct
) / sizeof(long),
1384 .size
= sizeof(long), .align
= sizeof(long),
1385 .active
= xfpregs_active
, .get
= xfpregs_get
, .set
= xfpregs_set
1389 static const struct user_regset_view user_x86_64_view
= {
1390 .name
= "x86_64", .e_machine
= EM_X86_64
,
1391 .regsets
= x86_64_regsets
, .n
= ARRAY_SIZE(x86_64_regsets
)
1394 #else /* CONFIG_X86_32 */
1396 #define user_regs_struct32 user_regs_struct
1397 #define genregs32_get genregs_get
1398 #define genregs32_set genregs_set
1400 #define user_i387_ia32_struct user_i387_struct
1401 #define user32_fxsr_struct user_fxsr_struct
1403 #endif /* CONFIG_X86_64 */
1405 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1406 static const struct user_regset x86_32_regsets
[] = {
1407 [REGSET_GENERAL
] = {
1408 .core_note_type
= NT_PRSTATUS
,
1409 .n
= sizeof(struct user_regs_struct32
) / sizeof(u32
),
1410 .size
= sizeof(u32
), .align
= sizeof(u32
),
1411 .get
= genregs32_get
, .set
= genregs32_set
1414 .core_note_type
= NT_PRFPREG
,
1415 .n
= sizeof(struct user_i387_ia32_struct
) / sizeof(u32
),
1416 .size
= sizeof(u32
), .align
= sizeof(u32
),
1417 .active
= fpregs_active
, .get
= fpregs_get
, .set
= fpregs_set
1420 .core_note_type
= NT_PRXFPREG
,
1421 .n
= sizeof(struct user32_fxsr_struct
) / sizeof(u32
),
1422 .size
= sizeof(u32
), .align
= sizeof(u32
),
1423 .active
= xfpregs_active
, .get
= xfpregs_get
, .set
= xfpregs_set
1426 .core_note_type
= NT_386_TLS
,
1427 .n
= GDT_ENTRY_TLS_ENTRIES
, .bias
= GDT_ENTRY_TLS_MIN
,
1428 .size
= sizeof(struct user_desc
),
1429 .align
= sizeof(struct user_desc
),
1430 .active
= regset_tls_active
,
1431 .get
= regset_tls_get
, .set
= regset_tls_set
1435 static const struct user_regset_view user_x86_32_view
= {
1436 .name
= "i386", .e_machine
= EM_386
,
1437 .regsets
= x86_32_regsets
, .n
= ARRAY_SIZE(x86_32_regsets
)
1441 const struct user_regset_view
*task_user_regset_view(struct task_struct
*task
)
1443 #ifdef CONFIG_IA32_EMULATION
1444 if (test_tsk_thread_flag(task
, TIF_IA32
))
1446 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1447 return &user_x86_32_view
;
1449 #ifdef CONFIG_X86_64
1450 return &user_x86_64_view
;
1454 void send_sigtrap(struct task_struct
*tsk
, struct pt_regs
*regs
, int error_code
)
1456 struct siginfo info
;
1458 tsk
->thread
.trap_no
= 1;
1459 tsk
->thread
.error_code
= error_code
;
1461 memset(&info
, 0, sizeof(info
));
1462 info
.si_signo
= SIGTRAP
;
1463 info
.si_code
= TRAP_BRKPT
;
1466 info
.si_addr
= user_mode_vm(regs
) ? (void __user
*) regs
->ip
: NULL
;
1468 /* Send us the fake SIGTRAP */
1469 force_sig_info(SIGTRAP
, &info
, tsk
);
1472 static void syscall_trace(struct pt_regs
*regs
)
1474 if (!(current
->ptrace
& PT_PTRACED
))
1478 printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
1480 regs
->ip
, regs
->sp
, regs
->ax
, regs
->orig_ax
, __builtin_return_address(0),
1481 current_thread_info()->flags
, current
->ptrace
);
1484 ptrace_notify(SIGTRAP
| ((current
->ptrace
& PT_TRACESYSGOOD
)
1487 * this isn't the same as continuing with a signal, but it will do
1488 * for normal use. strace only continues with a signal if the
1489 * stopping signal is not SIGTRAP. -brl
1491 if (current
->exit_code
) {
1492 send_sig(current
->exit_code
, current
, 1);
1493 current
->exit_code
= 0;
1497 #ifdef CONFIG_X86_32
1499 #elif defined CONFIG_IA32_EMULATION
1500 # define IS_IA32 test_thread_flag(TIF_IA32)
1506 * We must return the syscall number to actually look up in the table.
1507 * This can be -1L to skip running any syscall at all.
1509 asmregparm
long syscall_trace_enter(struct pt_regs
*regs
)
1514 * If we stepped into a sysenter/syscall insn, it trapped in
1515 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
1516 * If user-mode had set TF itself, then it's still clear from
1517 * do_debug() and we need to set it again to restore the user
1518 * state. If we entered on the slow path, TF was already set.
1520 if (test_thread_flag(TIF_SINGLESTEP
))
1521 regs
->flags
|= X86_EFLAGS_TF
;
1523 /* do the secure computing check first */
1524 secure_computing(regs
->orig_ax
);
1526 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU
)))
1529 if (ret
|| test_thread_flag(TIF_SYSCALL_TRACE
))
1530 syscall_trace(regs
);
1532 if (unlikely(current
->audit_context
)) {
1534 audit_syscall_entry(AUDIT_ARCH_I386
,
1537 regs
->dx
, regs
->si
);
1538 #ifdef CONFIG_X86_64
1540 audit_syscall_entry(AUDIT_ARCH_X86_64
,
1543 regs
->dx
, regs
->r10
);
1547 return ret
?: regs
->orig_ax
;
1550 asmregparm
void syscall_trace_leave(struct pt_regs
*regs
)
1552 if (unlikely(current
->audit_context
))
1553 audit_syscall_exit(AUDITSC_RESULT(regs
->ax
), regs
->ax
);
1555 if (test_thread_flag(TIF_SYSCALL_TRACE
))
1556 syscall_trace(regs
);
1559 * If TIF_SYSCALL_EMU is set, we only get here because of
1560 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
1561 * We already reported this syscall instruction in
1562 * syscall_trace_enter(), so don't do any more now.
1564 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU
)))
1568 * If we are single-stepping, synthesize a trap to follow the
1569 * system call instruction.
1571 if (test_thread_flag(TIF_SINGLESTEP
) &&
1572 (current
->ptrace
& PT_PTRACED
))
1573 send_sigtrap(current
, regs
, 0);