+#------------------------------------------------------------------------------\r
+#\r
+# Copyright (c) 2006 - 2009, Intel Corporation. All rights reserved.<BR>\r
+# This program and the accompanying materials\r
+# are licensed and made available under the terms and conditions of the BSD License\r
+# which accompanies this distribution. The full text of the license may be found at\r
+# http://opensource.org/licenses/bsd-license.php\r
+#\r
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
+# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
+#\r
+# Module Name:\r
+#\r
+# Thunk16.S\r
+#\r
+# Abstract:\r
+#\r
+# Real mode thunk\r
+#\r
+#------------------------------------------------------------------------------\r
+\r
+#include <Library/BaseLib.h>\r
+\r
+ASM_GLOBAL ASM_PFX(m16Start)\r
+ASM_GLOBAL ASM_PFX(m16Size)\r
+ASM_GLOBAL ASM_PFX(mThunk16Attr)\r
+ASM_GLOBAL ASM_PFX(m16Gdt)\r
+ASM_GLOBAL ASM_PFX(m16GdtrBase)\r
+ASM_GLOBAL ASM_PFX(mTransition)\r
+ASM_GLOBAL ASM_PFX(InternalAsmThunk16)\r
+\r
+# define the structure of IA32_REGS\r
+.set _EDI, 0 #size 4\r
+.set _ESI, 4 #size 4\r
+.set _EBP, 8 #size 4\r
+.set _ESP, 12 #size 4\r
+.set _EBX, 16 #size 4\r
+.set _EDX, 20 #size 4\r
+.set _ECX, 24 #size 4\r
+.set _EAX, 28 #size 4\r
+.set _DS, 32 #size 2\r
+.set _ES, 34 #size 2\r
+.set _FS, 36 #size 2\r
+.set _GS, 38 #size 2\r
+.set _EFLAGS, 40 #size 8\r
+.set _EIP, 48 #size 4\r
+.set _CS, 52 #size 2\r
+.set _SS, 54 #size 2\r
+.set IA32_REGS_SIZE, 56\r
+\r
+ .data\r
+\r
+ASM_PFX(m16Size): .word ASM_PFX(InternalAsmThunk16) - ASM_PFX(m16Start)\r
+ASM_PFX(mThunk16Attr): .word _ThunkAttr - ASM_PFX(m16Start)\r
+ASM_PFX(m16Gdt): .word ASM_PFX(NullSeg) - ASM_PFX(m16Start)\r
+ASM_PFX(m16GdtrBase): .word _16GdtrBase - ASM_PFX(m16Start)\r
+ASM_PFX(mTransition): .word _EntryPoint - ASM_PFX(m16Start)\r
+\r
+ .text\r
+\r
+ASM_PFX(m16Start):\r
+\r
+SavedGdt: .space 10\r
+\r
+#------------------------------------------------------------------------------\r
+# _BackFromUserCode() takes control in real mode after 'retf' has been executed\r
+# by user code. It will be shadowed to somewhere in memory below 1MB.\r
+#------------------------------------------------------------------------------\r
+ASM_GLOBAL ASM_PFX(BackFromUserCode)\r
+ASM_PFX(BackFromUserCode):\r
+ #\r
+ # The order of saved registers on the stack matches the order they appears\r
+ # in IA32_REGS structure. This facilitates wrapper function to extract them\r
+ # into that structure.\r
+ #\r
+ # Some instructions for manipulation of segment registers have to be written\r
+ # in opcode since 64-bit MASM prevents accesses to those registers.\r
+ #\r
+ .byte 0x16 # push ss\r
+ .byte 0xe # push cs\r
+ .byte 0x66\r
+ call L_Base # push eip\r
+L_Base: \r
+ .byte 0x66\r
+ pushq $0 # reserved high order 32 bits of EFlags\r
+ .byte 0x66, 0x9c # pushfd actually\r
+ cli # disable interrupts\r
+ push %gs\r
+ push %fs\r
+ .byte 6 # push es\r
+ .byte 0x1e # push ds\r
+ .byte 0x66,0x60 # pushad\r
+ .byte 0x66,0xba # mov edx, imm32\r
+_ThunkAttr: .space 4\r
+ testb $THUNK_ATTRIBUTE_DISABLE_A20_MASK_INT_15, %dl\r
+ jz L_1\r
+ movl $0x15cd2401,%eax # mov ax, 2401h & int 15h\r
+ cli # disable interrupts\r
+ jnc L_2\r
+L_1: \r
+ testb $THUNK_ATTRIBUTE_DISABLE_A20_MASK_KBD_CTRL, %dl\r
+ jz L_2\r
+ inb $0x92,%al\r
+ orb $2,%al\r
+ outb %al, $0x92 # deactivate A20M#\r
+L_2: \r
+ movl %ss,%eax\r
+ lea IA32_REGS_SIZE(%esp), %bp\r
+ #\r
+ # rsi in the following 2 instructions is indeed bp in 16-bit code\r
+ #\r
+ movw %bp, (_ESP - IA32_REGS_SIZE)(%rsi)\r
+ .byte 0x66\r
+ movl (_EIP - IA32_REGS_SIZE)(%rsi), %ebx\r
+ shlw $4,%ax # shl eax, 4\r
+ addw %ax,%bp # add ebp, eax\r
+ movw %cs,%ax\r
+ shlw $4,%ax\r
+ lea (L_64BitCode - L_Base)(%ebx, %eax), %ax\r
+ .byte 0x66,0x2e,0x89,0x87 # mov cs:[bx + (L_64Eip - L_Base)], eax\r
+ .word L_64Eip - L_Base\r
+ .byte 0x66,0xb8 # mov eax, imm32\r
+SavedCr4: .space 4\r
+ movq %rax, %cr4\r
+ #\r
+ # rdi in the instruction below is indeed bx in 16-bit code\r
+ #\r
+ .byte 0x66,0x2e # 2eh is "cs:" segment override\r
+ lgdt (SavedGdt - L_Base)(%rdi)\r
+ .byte 0x66\r
+ movl $0xc0000080,%ecx\r
+ rdmsr\r
+ orb $1,%ah\r
+ wrmsr\r
+ .byte 0x66,0xb8 # mov eax, imm32\r
+SavedCr0: .space 4\r
+ movq %rax, %cr0\r
+ .byte 0x66,0xea # jmp far cs:L_64Bit\r
+L_64Eip: .space 4\r
+SavedCs: .space 2\r
+L_64BitCode: \r
+ .byte 0x90\r
+ .byte 0x67,0xbc # mov esp, imm32\r
+SavedSp: .space 4 # restore stack\r
+ nop\r
+ ret\r
+\r
+_EntryPoint: .long ASM_PFX(ToUserCode) - ASM_PFX(m16Start)\r
+ .word CODE16\r
+_16Gdtr: .word GDT_SIZE - 1\r
+_16GdtrBase: .quad ASM_PFX(NullSeg)\r
+_16Idtr: .word 0x3ff\r
+ .long 0\r
+\r
+#------------------------------------------------------------------------------\r
+# _ToUserCode() takes control in real mode before passing control to user code.\r
+# It will be shadowed to somewhere in memory below 1MB.\r
+#------------------------------------------------------------------------------\r
+ASM_GLOBAL ASM_PFX(ToUserCode)\r
+ASM_PFX(ToUserCode):\r
+ movl %edx,%ss # set new segment selectors\r
+ movl %edx,%ds\r
+ movl %edx,%es\r
+ movl %edx,%fs\r
+ movl %edx,%gs\r
+ .byte 0x66\r
+ movl $0xc0000080,%ecx\r
+ movq %rax, %cr0\r
+ rdmsr\r
+ andb $0xfe, %ah # $0b11111110\r
+ wrmsr\r
+ movq %rbp, %cr4\r
+ movl %esi,%ss # set up 16-bit stack segment\r
+ movw %bx,%sp # set up 16-bit stack pointer\r
+ .byte 0x66 # make the following call 32-bit\r
+ call L_Base1 # push eip\r
+L_Base1: \r
+ popw %bp # ebp <- address of L_Base1\r
+ pushq (IA32_REGS_SIZE + 2)(%esp)\r
+ lea 0x0c(%rsi), %eax\r
+ pushq %rax\r
+ lret # execution begins at next instruction\r
+L_RealMode: \r
+ .byte 0x66,0x2e # CS and operand size override\r
+ lidt (_16Idtr - L_Base1)(%rsi)\r
+ .byte 0x66,0x61 # popad\r
+ .byte 0x1f # pop ds\r
+ .byte 0x7 # pop es\r
+ .byte 0x0f, 0xa1 # pop fs\r
+ .byte 0x0f, 0xa9 # pop gs\r
+ .byte 0x66, 0x9d # popfd\r
+ leaw 4(%esp),%sp # skip high order 32 bits of EFlags\r
+ .byte 0x66 # make the following retf 32-bit\r
+ lret # transfer control to user code\r
+\r
+.set CODE16, ASM_PFX(_16Code) - .\r
+.set DATA16, ASM_PFX(_16Data) - .\r
+.set DATA32, ASM_PFX(_32Data) - .\r
+\r
+ASM_PFX(NullSeg): .quad 0\r
+ASM_PFX(_16Code):\r
+ .word -1\r
+ .word 0\r
+ .byte 0\r
+ .byte 0x9b\r
+ .byte 0x8f # 16-bit segment, 4GB limit\r
+ .byte 0\r
+ASM_PFX(_16Data):\r
+ .word -1\r
+ .word 0\r
+ .byte 0\r
+ .byte 0x93\r
+ .byte 0x8f # 16-bit segment, 4GB limit\r
+ .byte 0\r
+ASM_PFX(_32Data):\r
+ .word -1\r
+ .word 0\r
+ .byte 0\r
+ .byte 0x93\r
+ .byte 0xcf # 16-bit segment, 4GB limit\r
+ .byte 0\r
+\r
+.set GDT_SIZE, . - ASM_PFX(NullSeg)\r
+\r
+#------------------------------------------------------------------------------\r
+# IA32_REGISTER_SET *\r
+# EFIAPI\r
+# InternalAsmThunk16 (\r
+# IN IA32_REGISTER_SET *RegisterSet,\r
+# IN OUT VOID *Transition\r
+# );\r
+#------------------------------------------------------------------------------\r
+\r
+ASM_GLOBAL ASM_PFX(InternalAsmThunk16)\r
+ASM_PFX(InternalAsmThunk16):\r
+ pushq %rbp\r
+ pushq %rbx\r
+ pushq %rsi\r
+ pushq %rdi\r
+ \r
+ movl %ds, %ebx\r
+ pushq %rbx # Save ds segment register on the stack\r
+ movl %es, %ebx\r
+ pushq %rbx # Save es segment register on the stack\r
+ movl %ss, %ebx\r
+ pushq %rbx # Save ss segment register on the stack\r
+\r
+ .byte 0x0f, 0xa0 #push fs\r
+ .byte 0x0f, 0xa8 #push gs\r
+ movq %rcx, %rsi\r
+ movzwl _SS(%rsi), %r8d\r
+ movl _ESP(%rsi), %edi\r
+ lea -(IA32_REGS_SIZE + 4)(%edi), %rdi\r
+ imul $16, %r8d, %eax \r
+ movl %edi,%ebx # ebx <- stack for 16-bit code\r
+ pushq $(IA32_REGS_SIZE / 4)\r
+ addl %eax,%edi # edi <- linear address of 16-bit stack\r
+ popq %rcx\r
+ rep\r
+ movsl # copy RegSet\r
+ lea (SavedCr4 - ASM_PFX(m16Start))(%rdx), %ecx\r
+ movl %edx,%eax # eax <- transition code address\r
+ andl $0xf,%edx\r
+ shll $12,%eax # segment address in high order 16 bits\r
+ lea (ASM_PFX(BackFromUserCode) - ASM_PFX(m16Start))(%rdx), %ax\r
+ stosl # [edi] <- return address of user code\r
+ sgdt 0x60(%rsp) # save GDT stack in argument space\r
+ movzwq 0x60(%rsp), %r10 # r10 <- GDT limit \r
+ lea ((ASM_PFX(InternalAsmThunk16) - SavedCr4) + 0xf)(%rcx), %r11 \r
+ andq $0xfffffffffffffff0, %r11 # r11 <- 16-byte aligned shadowed GDT table in real mode buffer \r
+ \r
+ movw %r10w, (SavedGdt - SavedCr4)(%rcx) # save the limit of shadowed GDT table\r
+ movq %r11, (SavedGdt - SavedCr4 + 0x2)(%rcx) # save the base address of shadowed GDT table\r
+ \r
+ movq 0x62(%rsp) ,%rsi # rsi <- the original GDT base address\r
+ xchg %r10, %rcx # save rcx to r10 and initialize rcx to be the limit of GDT table \r
+ incq %rcx # rcx <- the size of memory to copy\r
+ xchg %r11, %rdi # save rdi to r11 and initialize rdi to the base address of shadowed GDT table\r
+ rep\r
+ movsb # perform memory copy to shadow GDT table\r
+ movq %r10, %rcx # restore the orignal rcx before memory copy\r
+ movq %r11, %rdi # restore the original rdi before memory copy\r
+ \r
+ sidt 0x50(%rsp)\r
+ movq %cr0, %rax\r
+ movl %eax, (SavedCr0 - SavedCr4)(%rcx)\r
+ andl $0x7ffffffe,%eax # clear PE, PG bits\r
+ movq %cr4, %rbp\r
+ movl %ebp, (%rcx) # save CR4 in SavedCr4\r
+ andl $0x300,%ebp # clear all but PCE and OSFXSR bits\r
+ movl %r8d, %esi # esi <- 16-bit stack segment\r
+ .byte 0x6a, DATA32\r
+ popq %rdx\r
+ lgdt (_16Gdtr - SavedCr4)(%rcx)\r
+ movl %edx,%ss\r
+ pushfq\r
+ lea -8(%rdx), %edx\r
+ lea L_RetFromRealMode(%rip), %r8\r
+ pushq %r8\r
+ movl %cs, %r8d\r
+ movw %r8w, (SavedCs - SavedCr4)(%rcx)\r
+ movl %esp, (SavedSp - SavedCr4)(%rcx)\r
+ .byte 0xff, 0x69 # jmp (_EntryPoint - SavedCr4)(%rcx)\r
+ .byte _EntryPoint - SavedCr4\r
+L_RetFromRealMode: \r
+ popfq\r
+ lgdt 0x60(%rsp) # restore protected mode GDTR\r
+ lidt 0x50(%rsp) # restore protected mode IDTR\r
+ lea -IA32_REGS_SIZE(%rbp), %eax\r
+ .byte 0x0f, 0xa9 # pop gs\r
+ .byte 0x0f, 0xa1 # pop fs\r
+ \r
+ popq %rbx\r
+ movl %ebx, %ss\r
+ popq %rbx\r
+ movl %ebx, %es\r
+ popq %rbx\r
+ movl %ebx, %ds\r
+ \r
+ popq %rdi\r
+ popq %rsi\r
+ popq %rbx\r
+ popq %rbp\r
+\r
+ ret\r