movl 8(%esp), %ecx\r
movl 12(%esp), %edx\r
pushfl\r
- pop %edi\r
+ pop %edi # save EFLAGS to edi\r
cli\r
movl %cr0, %eax\r
btrl $31, %eax\r
movl 16(%esp), %esp\r
movl %eax, %cr0\r
push %edi\r
- popfl\r
+ popfl # restore EFLAGS from edi\r
push %edx\r
push %ecx\r
call *%ebx\r
- jmp .\r
+ jmp . # EntryPoint() should not return\r
movl 12(%esp), %ecx
xorl %edx, %edx
divl %ecx
- push %eax
+ push %eax # save quotient on stack
movl 8(%esp), %eax
divl %ecx
- pop %edx
+ pop %edx # restore high-order dword of the quotient
ret
# );\r
#------------------------------------------------------------------------------\r
ASM_PFX(InternalMathDivRemU64x32):\r
- movl 12(%esp), %ecx\r
- movl 8(%esp), %eax\r
+ movl 12(%esp), %ecx # ecx <- divisor\r
+ movl 8(%esp), %eax # eax <- dividend[32..63]\r
xorl %edx, %edx\r
- divl %ecx\r
+ divl %ecx # eax <- quotient[32..63], edx <- remainder\r
push %eax\r
- movl 8(%esp), %eax\r
- divl %ecx\r
- movl 20(%esp), %ecx\r
- jecxz L1\r
+ movl 8(%esp), %eax # eax <- dividend[0..31]\r
+ divl %ecx # eax <- quotient[0..31]\r
+ movl 20(%esp), %ecx # ecx <- Remainder\r
+ jecxz L1 # abandon remainder if Remainder == NULL\r
movl %edx, (%ecx)\r
L1:\r
- pop %edx\r
+ pop %edx # edx <- quotient[32..63]\r
ret\r
# );\r
#------------------------------------------------------------------------------\r
ASM_PFX(InternalMathDivRemU64x64):\r
- movl 16(%esp), %ecx\r
+ movl 16(%esp), %ecx # ecx <- divisor[32..63]\r
testl %ecx, %ecx\r
- jnz Hard\r
+ jnz Hard # call _@DivRemU64x64 if Divisor > 2^32\r
movl 20(%esp), %ecx\r
jecxz L1\r
- and $0, 4(%ecx)\r
- movl %ecx, 16(%esp)\r
+ and $0, 4(%ecx) # zero high dword of remainder\r
+ movl %ecx, 16(%esp) # set up stack frame to match DivRemU64x32\r
L1:\r
jmp ASM_PFX(InternalMathDivRemU64x32)\r
Hard:\r
push %esi\r
push %edi\r
mov 20(%esp), %edx\r
- mov 16(%esp), %eax\r
+ mov 16(%esp), %eax # edx:eax <- dividend\r
movl %edx, %edi\r
- movl %eax, %esi\r
- mov 24(%esp), %ebx\r
+ movl %eax, %esi # edi:esi <- dividend\r
+ mov 24(%esp), %ebx # ecx:ebx <- divisor\r
L2:\r
shrl %edx\r
rcrl $1, %eax\r
shrl %ecx\r
jnz L2\r
divl %ebx\r
- movl %eax, %ebx\r
- movl 28(%esp), %ecx\r
- mull 24(%esp)\r
- imull %ebx, %ecx\r
- addl %ecx, %edx\r
- mov 32(%esp), %ecx\r
- jc TooLarge\r
- cmpl %edx, %edi\r
- ja Correct\r
- jb TooLarge\r
- cmpl %eax, %esi\r
- jae Correct\r
+ movl %eax, %ebx # ebx <- quotient \r
+ movl 28(%esp), %ecx # ecx <- high dword of divisor \r
+ mull 24(%esp) # edx:eax <- quotient * divisor[0..31]\r
+ imull %ebx, %ecx # ecx <- quotient * divisor[32..63] \r
+ addl %ecx, %edx # edx <- (quotient * divisor)[32..63] \r
+ mov 32(%esp), %ecx # ecx <- addr for Remainder \r
+ jc TooLarge # product > 2^64 \r
+ cmpl %edx, %edi # compare high 32 bits \r
+ ja Correct \r
+ jb TooLarge # product > dividend \r
+ cmpl %eax, %esi \r
+ jae Correct # product <= dividend \r
TooLarge:\r
- decl %ebx\r
- jecxz Return\r
- sub 24(%esp), %eax\r
- sbb 28(%esp), %edx\r
+ decl %ebx # adjust quotient by -1 \r
+ jecxz Return # return if Remainder == NULL \r
+ sub 24(%esp), %eax \r
+ sbb 28(%esp), %edx # edx:eax <- (quotient - 1) * divisor\r
Correct:\r
jecxz Return\r
subl %eax, %esi\r
- sbbl %edx, %edi\r
+ sbbl %edx, %edi # edi:esi <- remainder\r
movl %esi, (%ecx)\r
movl %edi, 4(%ecx)\r
Return:\r
- movl %ebx, %eax\r
- xorl %edx, %edx\r
+ movl %ebx, %eax # eax <- quotient \r
+ xorl %edx, %edx # quotient is 32 bits long\r
pop %edi\r
pop %esi\r
pop %ebx\r
movl 8(%esp), %ecx\r
movl 12(%esp), %edx\r
pushfl\r
- pop %edi\r
+ pop %edi # save flags in edi\r
cli\r
movl %cr0, %eax\r
btsl $31, %eax\r
movl 16(%esp), %esp\r
movl %eax, %cr0\r
push %edi\r
- popfl\r
+ popfl # restore flags\r
push %edx\r
push %ecx\r
call *%ebx\r
#------------------------------------------------------------------------------\r
ASM_PFX(InternalX86EnablePaging64):\r
cli\r
- movl $LongStart, (%esp)\r
+ movl $LongStart, (%esp) # offset for far retf, seg is the 1st arg\r
movl %cr4, %eax\r
orb $0x20, %al\r
movl %eax, %cr4 # enable PAE\r
orb $1, %ah # set LME\r
wrmsr\r
movl %cr0, %eax\r
- btsl $31, %eax\r
+ btsl $31, %eax # set PG\r
movl %eax, %cr0 # enable paging\r
- lret\r
+ lret # topmost 2 dwords hold the address\r
LongStart: # long mode starts here\r
- .byte 0x67, 0x48\r
+ .byte 0x67, 0x48 # 32-bit address size, 64-bit operand size\r
movl (%esp), %ebx # mov rbx, [esp]\r
.byte 0x67, 0x48\r
movl 8(%esp), %ecx # mov rcx, [esp + 8]\r
.byte 0x67, 0x48\r
movl 0x18(%esp), %esp # mov rsp, [esp + 18h]\r
.byte 0x48\r
- addl $0x-0x20, %esp # add rsp, -20h\r
+ addl $0x-0x20, %esp # add rsp, -20h\r
call *%ebx # call rbx\r
- jmp .\r
+ jmp . # no one should get here\r
# );\r
#------------------------------------------------------------------------------\r
ASM_PFX(InternalX86FxRestore):\r
- movl 4(%esp), %eax\r
+ movl 4(%esp), %eax # Buffer must be 16-byte aligned\r
fxrstor (%eax)\r
ret\r
# );\r
#------------------------------------------------------------------------------\r
ASM_PFX(InternalX86FxSave):\r
- movl 4(%esp), %eax\r
+ movl 4(%esp), %eax # Buffer must be 16-byte aligned\r
fxsave (%eax)\r
ret\r
shldl %cl, %eax, %edx\r
rorl %cl, %ebx\r
shldl %cl, %ebx, %eax\r
- testb $32, %cl\r
+ testb $32, %cl # Count >= 32?\r
cmovnz %eax, %ecx\r
cmovnz %edx, %eax\r
cmovnz %ecx, %edx\r
movb 12(%esp), %cl
xorl %eax, %eax
movl 4(%esp), %edx
- testb $32, %cl
+ testb $32, %cl # Count >= 32?
cmovz %edx, %eax
cmovz 0x8(%esp), %edx
shld %cl, %eax, %edx
# );
#------------------------------------------------------------------------------
ASM_PFX(InternalLongJump):
- pop %eax
- pop %edx
- pop %eax
+ pop %eax # skip return address
+ pop %edx # edx <- JumpBuffer
+ pop %eax # eax <- Value
movl (%edx), %ebx
movl 4(%edx), %esi
movl 8(%edx), %edi
movl 12(%edx), %ebp
movl 16(%edx), %esp
- jmp *20(%edx)
+ jmp *20(%edx) # restore "eip"
movl 4(%esp), %eax\r
movl 8(%esp), %ecx\r
movl 12(%esp), %edx\r
- monitor %eax, %ecx, %edx\r
+ monitor %eax, %ecx, %edx # monitor\r
ret\r
ASM_PFX(InternalMathMultU64x32):
movl 12(%esp), %ecx
movl %ecx, %eax
- imull 8(%esp), %ecx
+ imull 8(%esp), %ecx # overflow not detectable
mull 0x4(%esp)
addl %ecx, %edx
ret
# );\r
#------------------------------------------------------------------------------\r
ASM_PFX(InternalMathMultU64x64):\r
- push %ebx\r
- movl 8(%esp), %ebx\r
- movl 16(%esp), %edx\r
- movl %ebx, %ecx\r
- movl %edx, %eax\r
- imull 20(%esp), %ebx\r
- imull 12(%esp), %edx\r
- addl %edx, %ebx\r
- mull %ecx\r
- addl %ebx, %edx\r
+ push %ebx \r
+ movl 8(%esp), %ebx # ebx <- M1[0..31] \r
+ movl 16(%esp), %edx # edx <- M2[0..31] \r
+ movl %ebx, %ecx \r
+ movl %edx, %eax \r
+ imull 20(%esp), %ebx # ebx <- M1[0..31] * M2[32..63] \r
+ imull 12(%esp), %edx # edx <- M1[32..63] * M2[0..31] \r
+ addl %edx, %ebx # carries are abandoned \r
+ mull %ecx # edx:eax <- M1[0..31] * M2[0..31]\r
+ addl %ebx, %edx # carries are abandoned \r
pop %ebx\r
ret\r
ASM_PFX(AsmMwait):\r
movl 4(%esp), %eax\r
movl 8(%esp), %ecx\r
- mwait %eax, %ecx\r
+ mwait %eax, %ecx # mwait\r
ret\r
shrdl %cl, %edx, %eax\r
roll %cl, %ebx\r
shrdl %cl, %ebx, %edx\r
- testb $32, %cl\r
- cmovnz %eax, %ecx\r
+ testb $32, %cl # Count >= 32?\r
+ cmovnz %eax, %ecx # switch eax & edx if Count >= 32\r
cmovnz %edx, %eax\r
cmovnz %ecx, %edx\r
pop %ebx\r
# );
#------------------------------------------------------------------------------
ASM_PFX(InternalMathRShiftU64):
- movb 12(%esp), %cl
+ movb 12(%esp), %cl # cl <- Count
xorl %edx, %edx
movl 8(%esp), %eax
- testb $32, %cl
+ testb $32, %cl # Count >= 32?
cmovz %eax, %edx
cmovz 0x4(%esp), %eax
shrdl %cl, %edx, %eax
#------------------------------------------------------------------------------\r
ASM_PFX(SetJump):\r
pushl 0x4(%esp)\r
- call ASM_PFX(InternalAssertJumpBuffer)\r
- pop %ecx\r
+ call ASM_PFX(InternalAssertJumpBuffer) # To validate JumpBuffer\r
pop %ecx\r
+ pop %ecx # ecx <- return address\r
movl (%esp), %edx\r
movl %ebx, (%edx)\r
movl %esi, 4(%edx)\r
movl %edi, 8(%edx)\r
movl %ebp, 12(%edx)\r
movl %esp, 16(%edx)\r
- movl %ecx, 20(%edx)\r
+ movl %ecx, 20(%edx) # eip value to restore in LongJump\r
xorl %eax, %eax\r
jmp *%ecx\r
#------------------------------------------------------------------------------
.globl ASM_PFX(InternalMathSwapBytes64)
ASM_PFX(InternalMathSwapBytes64):
- movl 8(%esp), %eax
- movl 4(%esp), %edx
+ movl 8(%esp), %eax # eax <- upper 32 bits
+ movl 4(%esp), %edx # edx <- lower 32 bits
bswapl %eax
bswapl %edx
ret