arch/x86/xen/xen-asm_32.S

   1 /*
   2  * Asm versions of Xen pv-ops, suitable for either direct use or
   3  * inlining.  The inline versions are the same as the direct-use
   4  * versions, with the pre- and post-amble chopped off.
   5  *
   6  * This code is encoded for size rather than absolute efficiency, with
   7  * a view to being able to inline as much as possible.
   8  *
   9  * We only bother with direct forms (ie, vcpu in pda) of the
  10  * operations here; the indirect forms are better handled in C, since
  11  * they're generally too large to inline anyway.
  12  */
  13
  14 #include <asm/thread_info.h>
  15 #include <asm/processor-flags.h>
  16 #include <asm/segment.h>
  17 #include <asm/asm.h>
  18
  19 #include <xen/interface/xen.h>
  20
  21 #include "xen-asm.h"
  22
  23 /*
  24  * Force an event check by making a hypercall, but preserve regs
  25  * before making the call.
  26  */
  27 check_events:
  28         push %eax
  29         push %ecx
  30         push %edx
  31         call xen_force_evtchn_callback
  32         pop %edx
  33         pop %ecx
  34         pop %eax
  35         ret
  36
  37 /*
  38  * This is run where a normal iret would be run, with the same stack setup:
  39  *      8: eflags
  40  *      4: cs
  41  *      esp-> 0: eip
  42  *
  43  * This attempts to make sure that any pending events are dealt with
  44  * on return to usermode, but there is a small window in which an
  45  * event can happen just before entering usermode.  If the nested
  46  * interrupt ends up setting one of the TIF_WORK_MASK pending work
  47  * flags, they will not be tested again before returning to
  48  * usermode. This means that a process can end up with pending work,
  49  * which will be unprocessed until the process enters and leaves the
  50  * kernel again, which could be an unbounded amount of time.  This
  51  * means that a pending signal or reschedule event could be
  52  * indefinitely delayed.
  53  *
  54  * The fix is to notice a nested interrupt in the critical window, and
  55  * if one occurs, then fold the nested interrupt into the current
  56  * interrupt stack frame, and re-process it iteratively rather than
  57  * recursively.  This means that it will exit via the normal path, and
  58  * all pending work will be dealt with appropriately.
  59  *
  60  * Because the nested interrupt handler needs to deal with the current
  61  * stack state in whatever form its in, we keep things simple by only
  62  * using a single register which is pushed/popped on the stack.
  63  */
  64
  65 .macro POP_FS
  66 1:
  67         popw %fs
  68 .pushsection .fixup, "ax"
  69 2:      movw $0, (%esp)
  70         jmp 1b
  71 .popsection
  72         _ASM_EXTABLE(1b,2b)
  73 .endm
  74
  75 ENTRY(xen_iret)
  76         /* test eflags for special cases */
  77         testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
  78         jnz hyper_iret
  79
  80         push %eax
  81         ESP_OFFSET=4    # bytes pushed onto stack
  82
  83         /* Store vcpu_info pointer for easy access */
  84 #ifdef CONFIG_SMP
  85         pushw %fs
  86         movl $(__KERNEL_PERCPU), %eax
  87         movl %eax, %fs
  88         movl %fs:xen_vcpu, %eax
  89         POP_FS
  90 #else
  91         movl %ss:xen_vcpu, %eax
  92 #endif
  93
  94         /* check IF state we're restoring */
  95         testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
  96
  97         /*
  98          * Maybe enable events.  Once this happens we could get a
  99          * recursive event, so the critical region starts immediately
 100          * afterwards.  However, if that happens we don't end up
 101          * resuming the code, so we don't have to be worried about
 102          * being preempted to another CPU.
 103          */
 104         setz %ss:XEN_vcpu_info_mask(%eax)
 105 xen_iret_start_crit:
 106
 107         /* check for unmasked and pending */
 108         cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
 109
 110         /*
 111          * If there's something pending, mask events again so we can
 112          * jump back into xen_hypervisor_callback. Otherwise do not
 113          * touch XEN_vcpu_info_mask.
 114          */
 115         jne 1f
 116         movb $1, %ss:XEN_vcpu_info_mask(%eax)
 117
 118 1:      popl %eax
 119
 120         /*
 121          * From this point on the registers are restored and the stack
 122          * updated, so we don't need to worry about it if we're
 123          * preempted
 124          */
 125 iret_restore_end:
 126
 127         /*
 128          * Jump to hypervisor_callback after fixing up the stack.
 129          * Events are masked, so jumping out of the critical region is
 130          * OK.
 131          */
 132         je xen_hypervisor_callback
 133
 134 1:      iret
 135 xen_iret_end_crit:
 136         _ASM_EXTABLE(1b, iret_exc)
 137
 138 hyper_iret:
 139         /* put this out of line since its very rarely used */
 140         jmp hypercall_page + __HYPERVISOR_iret * 32
 141
 142         .globl xen_iret_start_crit, xen_iret_end_crit
 143
 144 /*
 145  * This is called by xen_hypervisor_callback in entry.S when it sees
 146  * that the EIP at the time of interrupt was between
 147  * xen_iret_start_crit and xen_iret_end_crit.  We're passed the EIP in
 148  * %eax so we can do a more refined determination of what to do.
 149  *
 150  * The stack format at this point is:
 151  *      ----------------
 152  *       ss             : (ss/esp may be present if we came from usermode)
 153  *       esp            :
 154  *       eflags         }  outer exception info
 155  *       cs             }
 156  *       eip            }
 157  *      ---------------- <- edi (copy dest)
 158  *       eax            :  outer eax if it hasn't been restored
 159  *      ----------------
 160  *       eflags         }  nested exception info
 161  *       cs             }   (no ss/esp because we're nested
 162  *       eip            }    from the same ring)
 163  *       orig_eax       }<- esi (copy src)
 164  *       - - - - - - - -
 165  *       fs             }
 166  *       es             }
 167  *       ds             }  SAVE_ALL state
 168  *       eax            }
 169  *        :             :
 170  *       ebx            }<- esp
 171  *      ----------------
 172  *
 173  * In order to deliver the nested exception properly, we need to shift
 174  * everything from the return addr up to the error code so it sits
 175  * just under the outer exception info.  This means that when we
 176  * handle the exception, we do it in the context of the outer
 177  * exception rather than starting a new one.
 178  *
 179  * The only caveat is that if the outer eax hasn't been restored yet
 180  * (ie, it's still on stack), we need to insert its value into the
 181  * SAVE_ALL state before going on, since it's usermode state which we
 182  * eventually need to restore.
 183  */
 184 ENTRY(xen_iret_crit_fixup)
 185         /*
 186          * Paranoia: Make sure we're really coming from kernel space.
 187          * One could imagine a case where userspace jumps into the
 188          * critical range address, but just before the CPU delivers a
 189          * GP, it decides to deliver an interrupt instead.  Unlikely?
 190          * Definitely.  Easy to avoid?  Yes.  The Intel documents
 191          * explicitly say that the reported EIP for a bad jump is the
 192          * jump instruction itself, not the destination, but some
 193          * virtual environments get this wrong.
 194          */
 195         movl PT_CS(%esp), %ecx
 196         andl $SEGMENT_RPL_MASK, %ecx
 197         cmpl $USER_RPL, %ecx
 198         je 2f
 199
 200         lea PT_ORIG_EAX(%esp), %esi
 201         lea PT_EFLAGS(%esp), %edi
 202
 203         /*
 204          * If eip is before iret_restore_end then stack
 205          * hasn't been restored yet.
 206          */
 207         cmp $iret_restore_end, %eax
 208         jae 1f
 209
 210         movl 0+4(%edi), %eax            /* copy EAX (just above top of frame) */
 211         movl %eax, PT_EAX(%esp)
 212
 213         lea ESP_OFFSET(%edi), %edi      /* move dest up over saved regs */
 214
 215         /* set up the copy */
 216 1:      std
 217         mov $PT_EIP / 4, %ecx           /* saved regs up to orig_eax */
 218         rep movsl
 219         cld
 220
 221         lea 4(%edi), %esp               /* point esp to new frame */
 222 2:      jmp xen_do_upcall
 223