| 1 | ;------------------------------------------------------------------------------ ;\r |
| 2 | ; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>\r |
| 3 | ; This program and the accompanying materials\r |
| 4 | ; are licensed and made available under the terms and conditions of the BSD License\r |
| 5 | ; which accompanies this distribution. The full text of the license may be found at\r |
| 6 | ; http://opensource.org/licenses/bsd-license.php.\r |
| 7 | ;\r |
| 8 | ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r |
| 9 | ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r |
| 10 | ;\r |
| 11 | ; Module Name:\r |
| 12 | ;\r |
| 13 | ; MpFuncs.nasm\r |
| 14 | ;\r |
| 15 | ; Abstract:\r |
| 16 | ;\r |
| 17 | ; This is the assembly code for MP support\r |
| 18 | ;\r |
| 19 | ;-------------------------------------------------------------------------------\r |
| 20 | \r |
| 21 | %include "MpEqu.inc"\r |
| 22 | extern ASM_PFX(InitializeFloatingPointUnits)\r |
| 23 | \r |
| 24 | DEFAULT REL\r |
| 25 | \r |
| 26 | SECTION .text\r |
| 27 | \r |
| 28 | ;-------------------------------------------------------------------------------------\r |
| 29 | ;RendezvousFunnelProc procedure follows. All APs execute their procedure. This\r |
| 30 | ;procedure serializes all the AP processors through an Init sequence. It must be\r |
| 31 | ;noted that APs arrive here very raw...ie: real mode, no stack.\r |
| 32 | ;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC\r |
| 33 | ;IS IN MACHINE CODE.\r |
| 34 | ;-------------------------------------------------------------------------------------\r |
| 35 | global ASM_PFX(RendezvousFunnelProc)\r |
| 36 | ASM_PFX(RendezvousFunnelProc):\r |
| 37 | RendezvousFunnelProcStart:\r |
| 38 | ; At this point CS = 0x(vv00) and ip= 0x0.\r |
| 39 | ; Save BIST information to ebp firstly\r |
| 40 | \r |
| 41 | BITS 16\r |
| 42 | mov ebp, eax ; Save BIST information\r |
| 43 | \r |
| 44 | mov ax, cs\r |
| 45 | mov ds, ax\r |
| 46 | mov es, ax\r |
| 47 | mov ss, ax\r |
| 48 | xor ax, ax\r |
| 49 | mov fs, ax\r |
| 50 | mov gs, ax\r |
| 51 | \r |
| 52 | mov si, BufferStartLocation\r |
| 53 | mov ebx, [si]\r |
| 54 | \r |
| 55 | mov di, ModeOffsetLocation\r |
| 56 | mov eax, [di]\r |
| 57 | mov di, CodeSegmentLocation\r |
| 58 | mov edx, [di]\r |
| 59 | mov di, ax\r |
| 60 | sub di, 02h\r |
| 61 | mov [di],dx ; Patch long mode CS\r |
| 62 | sub di, 04h\r |
| 63 | add eax, ebx\r |
| 64 | mov [di],eax ; Patch address\r |
| 65 | \r |
| 66 | mov si, GdtrLocation\r |
| 67 | o32 lgdt [cs:si]\r |
| 68 | \r |
| 69 | mov si, IdtrLocation\r |
| 70 | o32 lidt [cs:si]\r |
| 71 | \r |
| 72 | mov si, EnableExecuteDisableLocation\r |
| 73 | cmp byte [si], 0\r |
| 74 | jz SkipEnableExecuteDisableBit\r |
| 75 | \r |
| 76 | ;\r |
| 77 | ; Enable execute disable bit\r |
| 78 | ;\r |
| 79 | mov ecx, 0c0000080h ; EFER MSR number\r |
| 80 | rdmsr ; Read EFER\r |
| 81 | bts eax, 11 ; Enable Execute Disable Bit\r |
| 82 | wrmsr ; Write EFER\r |
| 83 | \r |
| 84 | SkipEnableExecuteDisableBit:\r |
| 85 | \r |
| 86 | mov di, DataSegmentLocation\r |
| 87 | mov edi, [di] ; Save long mode DS in edi\r |
| 88 | \r |
| 89 | mov si, Cr3Location ; Save CR3 in ecx\r |
| 90 | mov ecx, [si]\r |
| 91 | \r |
| 92 | xor ax, ax\r |
| 93 | mov ds, ax ; Clear data segment\r |
| 94 | \r |
| 95 | mov eax, cr0 ; Get control register 0\r |
| 96 | or eax, 000000003h ; Set PE bit (bit #0) & MP\r |
| 97 | mov cr0, eax\r |
| 98 | \r |
| 99 | mov eax, cr4\r |
| 100 | bts eax, 5\r |
| 101 | mov cr4, eax\r |
| 102 | \r |
| 103 | mov cr3, ecx ; Load CR3\r |
| 104 | \r |
| 105 | mov ecx, 0c0000080h ; EFER MSR number\r |
| 106 | rdmsr ; Read EFER\r |
| 107 | bts eax, 8 ; Set LME=1\r |
| 108 | wrmsr ; Write EFER\r |
| 109 | \r |
| 110 | mov eax, cr0 ; Read CR0\r |
| 111 | bts eax, 31 ; Set PG=1\r |
| 112 | mov cr0, eax ; Write CR0\r |
| 113 | \r |
| 114 | jmp 0:strict dword 0 ; far jump to long mode\r |
| 115 | BITS 64\r |
| 116 | LongModeStart:\r |
| 117 | mov eax, edi\r |
| 118 | mov ds, ax\r |
| 119 | mov es, ax\r |
| 120 | mov ss, ax\r |
| 121 | \r |
| 122 | mov esi, ebx\r |
| 123 | lea edi, [esi + InitFlagLocation]\r |
| 124 | cmp qword [edi], 1 ; ApInitConfig\r |
| 125 | jnz GetApicId\r |
| 126 | \r |
| 127 | ; AP init\r |
| 128 | mov edi, esi\r |
| 129 | add edi, LockLocation\r |
| 130 | mov rax, NotVacantFlag\r |
| 131 | \r |
| 132 | TestLock:\r |
| 133 | xchg qword [edi], rax\r |
| 134 | cmp rax, NotVacantFlag\r |
| 135 | jz TestLock\r |
| 136 | \r |
| 137 | lea ecx, [esi + NumApsExecutingLocation]\r |
| 138 | inc dword [ecx]\r |
| 139 | mov ebx, [ecx]\r |
| 140 | \r |
| 141 | Releaselock:\r |
| 142 | mov rax, VacantFlag\r |
| 143 | xchg qword [edi], rax\r |
| 144 | ; program stack\r |
| 145 | mov edi, esi\r |
| 146 | add edi, StackSizeLocation\r |
| 147 | mov eax, dword [edi]\r |
| 148 | mov ecx, ebx\r |
| 149 | inc ecx\r |
| 150 | mul ecx ; EAX = StackSize * (CpuNumber + 1)\r |
| 151 | mov edi, esi\r |
| 152 | add edi, StackStartAddressLocation\r |
| 153 | add rax, qword [edi]\r |
| 154 | mov rsp, rax\r |
| 155 | jmp CProcedureInvoke\r |
| 156 | \r |
| 157 | GetApicId:\r |
| 158 | mov eax, 0\r |
| 159 | cpuid\r |
| 160 | cmp eax, 0bh\r |
| 161 | jnb X2Apic\r |
| 162 | ; Processor is not x2APIC capable, so get 8-bit APIC ID\r |
| 163 | mov eax, 1\r |
| 164 | cpuid\r |
| 165 | shr ebx, 24\r |
| 166 | mov edx, ebx\r |
| 167 | jmp GetProcessorNumber\r |
| 168 | \r |
| 169 | X2Apic:\r |
| 170 | ; Processor is x2APIC capable, so get 32-bit x2APIC ID\r |
| 171 | mov eax, 0bh\r |
| 172 | xor ecx, ecx\r |
| 173 | cpuid \r |
| 174 | ; edx save x2APIC ID\r |
| 175 | \r |
| 176 | GetProcessorNumber:\r |
| 177 | ;\r |
| 178 | ; Get processor number for this AP\r |
| 179 | ; Note that BSP may become an AP due to SwitchBsp()\r |
| 180 | ;\r |
| 181 | xor ebx, ebx\r |
| 182 | lea eax, [esi + CpuInfoLocation]\r |
| 183 | mov edi, [eax]\r |
| 184 | \r |
| 185 | GetNextProcNumber:\r |
| 186 | cmp dword [edi], edx ; APIC ID match?\r |
| 187 | jz ProgramStack\r |
| 188 | add edi, 20\r |
| 189 | inc ebx\r |
| 190 | jmp GetNextProcNumber \r |
| 191 | \r |
| 192 | ProgramStack:\r |
| 193 | mov rsp, qword [edi + 12]\r |
| 194 | \r |
| 195 | CProcedureInvoke:\r |
| 196 | push rbp ; Push BIST data at top of AP stack\r |
| 197 | xor rbp, rbp ; Clear ebp for call stack trace\r |
| 198 | push rbp\r |
| 199 | mov rbp, rsp\r |
| 200 | \r |
| 201 | mov rax, ASM_PFX(InitializeFloatingPointUnits)\r |
| 202 | sub rsp, 20h\r |
| 203 | call rax ; Call assembly function to initialize FPU per UEFI spec\r |
| 204 | add rsp, 20h\r |
| 205 | \r |
| 206 | mov edx, ebx ; edx is NumApsExecuting\r |
| 207 | mov ecx, esi\r |
| 208 | add ecx, LockLocation ; rcx is address of exchange info data buffer\r |
| 209 | \r |
| 210 | mov edi, esi\r |
| 211 | add edi, ApProcedureLocation\r |
| 212 | mov rax, qword [edi]\r |
| 213 | \r |
| 214 | sub rsp, 20h\r |
| 215 | call rax ; Invoke C function\r |
| 216 | add rsp, 20h\r |
| 217 | jmp $ ; Should never reach here\r |
| 218 | \r |
| 219 | RendezvousFunnelProcEnd:\r |
| 220 | \r |
| 221 | ;-------------------------------------------------------------------------------------\r |
| 222 | ; AsmRelocateApLoop (MwaitSupport, ApTargetCState, PmCodeSegment);\r |
| 223 | ;-------------------------------------------------------------------------------------\r |
| 224 | global ASM_PFX(AsmRelocateApLoop)\r |
| 225 | ASM_PFX(AsmRelocateApLoop):\r |
| 226 | AsmRelocateApLoopStart:\r |
| 227 | push rcx\r |
| 228 | push rdx\r |
| 229 | \r |
| 230 | lea rsi, [PmEntry] ; rsi <- The start address of transition code\r |
| 231 | \r |
| 232 | push r8\r |
| 233 | push rsi\r |
| 234 | DB 0x48\r |
| 235 | retf\r |
| 236 | BITS 32\r |
| 237 | PmEntry:\r |
| 238 | mov eax, cr0\r |
| 239 | btr eax, 31 ; Clear CR0.PG\r |
| 240 | mov cr0, eax ; Disable paging and caches\r |
| 241 | \r |
| 242 | mov ebx, edx ; Save EntryPoint to rbx, for rdmsr will overwrite rdx\r |
| 243 | mov ecx, 0xc0000080\r |
| 244 | rdmsr\r |
| 245 | and ah, ~ 1 ; Clear LME\r |
| 246 | wrmsr\r |
| 247 | mov eax, cr4\r |
| 248 | and al, ~ (1 << 5) ; Clear PAE\r |
| 249 | mov cr4, eax\r |
| 250 | \r |
| 251 | pop edx\r |
| 252 | add esp, 4\r |
| 253 | pop ecx,\r |
| 254 | add esp, 4\r |
| 255 | cmp cl, 1 ; Check mwait-monitor support\r |
| 256 | jnz HltLoop\r |
| 257 | mov ebx, edx ; Save C-State to ebx\r |
| 258 | MwaitLoop:\r |
| 259 | mov eax, esp ; Set Monitor Address\r |
| 260 | xor ecx, ecx ; ecx = 0\r |
| 261 | xor edx, edx ; edx = 0\r |
| 262 | monitor\r |
| 263 | shl ebx, 4\r |
| 264 | mov eax, ebx ; Mwait Cx, Target C-State per eax[7:4]\r |
| 265 | mwait\r |
| 266 | jmp MwaitLoop\r |
| 267 | HltLoop:\r |
| 268 | cli\r |
| 269 | hlt\r |
| 270 | jmp HltLoop\r |
| 271 | ret\r |
| 272 | BITS 64\r |
| 273 | AsmRelocateApLoopEnd:\r |
| 274 | \r |
| 275 | ;-------------------------------------------------------------------------------------\r |
| 276 | ; AsmGetAddressMap (&AddressMap);\r |
| 277 | ;-------------------------------------------------------------------------------------\r |
| 278 | global ASM_PFX(AsmGetAddressMap)\r |
| 279 | ASM_PFX(AsmGetAddressMap):\r |
| 280 | mov rax, ASM_PFX(RendezvousFunnelProc)\r |
| 281 | mov qword [rcx], rax\r |
| 282 | mov qword [rcx + 8h], LongModeStart - RendezvousFunnelProcStart\r |
| 283 | mov qword [rcx + 10h], RendezvousFunnelProcEnd - RendezvousFunnelProcStart\r |
| 284 | mov rax, ASM_PFX(AsmRelocateApLoop)\r |
| 285 | mov qword [rcx + 18h], rax\r |
| 286 | mov qword [rcx + 20h], AsmRelocateApLoopEnd - AsmRelocateApLoopStart\r |
| 287 | ret\r |
| 288 | \r |
| 289 | ;-------------------------------------------------------------------------------------\r |
| 290 | ;AsmExchangeRole procedure follows. This procedure executed by current BSP, that is\r |
| 291 | ;about to become an AP. It switches its stack with the current AP.\r |
| 292 | ;AsmExchangeRole (IN CPU_EXCHANGE_INFO *MyInfo, IN CPU_EXCHANGE_INFO *OthersInfo);\r |
| 293 | ;-------------------------------------------------------------------------------------\r |
| 294 | global ASM_PFX(AsmExchangeRole)\r |
| 295 | ASM_PFX(AsmExchangeRole):\r |
| 296 | ; DO NOT call other functions in this function, since 2 CPU may use 1 stack\r |
| 297 | ; at the same time. If 1 CPU try to call a function, stack will be corrupted.\r |
| 298 | \r |
| 299 | push rax\r |
| 300 | push rbx\r |
| 301 | push rcx\r |
| 302 | push rdx\r |
| 303 | push rsi\r |
| 304 | push rdi\r |
| 305 | push rbp\r |
| 306 | push r8\r |
| 307 | push r9\r |
| 308 | push r10\r |
| 309 | push r11\r |
| 310 | push r12\r |
| 311 | push r13\r |
| 312 | push r14\r |
| 313 | push r15\r |
| 314 | \r |
| 315 | mov rax, cr0\r |
| 316 | push rax\r |
| 317 | \r |
| 318 | mov rax, cr4\r |
| 319 | push rax\r |
| 320 | \r |
| 321 | ; rsi contains MyInfo pointer\r |
| 322 | mov rsi, rcx\r |
| 323 | \r |
| 324 | ; rdi contains OthersInfo pointer\r |
| 325 | mov rdi, rdx\r |
| 326 | \r |
| 327 | ;Store EFLAGS, GDTR and IDTR regiter to stack\r |
| 328 | pushfq\r |
| 329 | sgdt [rsi + 16]\r |
| 330 | sidt [rsi + 26]\r |
| 331 | \r |
| 332 | ; Store the its StackPointer\r |
| 333 | mov [rsi + 8], rsp\r |
| 334 | \r |
| 335 | ; update its switch state to STORED\r |
| 336 | mov byte [rsi], CPU_SWITCH_STATE_STORED\r |
| 337 | \r |
| 338 | WaitForOtherStored:\r |
| 339 | ; wait until the other CPU finish storing its state\r |
| 340 | cmp byte [rdi], CPU_SWITCH_STATE_STORED\r |
| 341 | jz OtherStored\r |
| 342 | pause\r |
| 343 | jmp WaitForOtherStored\r |
| 344 | \r |
| 345 | OtherStored:\r |
| 346 | ; Since another CPU already stored its state, load them\r |
| 347 | ; load GDTR value\r |
| 348 | lgdt [rdi + 16]\r |
| 349 | \r |
| 350 | ; load IDTR value\r |
| 351 | lidt [rdi + 26]\r |
| 352 | \r |
| 353 | ; load its future StackPointer\r |
| 354 | mov rsp, [rdi + 8]\r |
| 355 | \r |
| 356 | ; update the other CPU's switch state to LOADED\r |
| 357 | mov byte [rdi], CPU_SWITCH_STATE_LOADED\r |
| 358 | \r |
| 359 | WaitForOtherLoaded:\r |
| 360 | ; wait until the other CPU finish loading new state,\r |
| 361 | ; otherwise the data in stack may corrupt\r |
| 362 | cmp byte [rsi], CPU_SWITCH_STATE_LOADED\r |
| 363 | jz OtherLoaded\r |
| 364 | pause\r |
| 365 | jmp WaitForOtherLoaded\r |
| 366 | \r |
| 367 | OtherLoaded:\r |
| 368 | ; since the other CPU already get the data it want, leave this procedure\r |
| 369 | popfq\r |
| 370 | \r |
| 371 | pop rax\r |
| 372 | mov cr4, rax\r |
| 373 | \r |
| 374 | pop rax\r |
| 375 | mov cr0, rax\r |
| 376 | \r |
| 377 | pop r15\r |
| 378 | pop r14\r |
| 379 | pop r13\r |
| 380 | pop r12\r |
| 381 | pop r11\r |
| 382 | pop r10\r |
| 383 | pop r9\r |
| 384 | pop r8\r |
| 385 | pop rbp\r |
| 386 | pop rdi\r |
| 387 | pop rsi\r |
| 388 | pop rdx\r |
| 389 | pop rcx\r |
| 390 | pop rbx\r |
| 391 | pop rax\r |
| 392 | \r |
| 393 | ret\r |