]> git.proxmox.com Git - mirror_edk2.git/blame - UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
UefiCpuPkg/MpInitLib: Enhance waiting for AP initialization logic.
[mirror_edk2.git] / UefiCpuPkg / Library / MpInitLib / X64 / MpFuncs.nasm
CommitLineData
d94e5f67 1;------------------------------------------------------------------------------ ;\r
3b2928b4 2; Copyright (c) 2015 - 2017, Intel Corporation. All rights reserved.<BR>\r
d94e5f67
JF
3; This program and the accompanying materials\r
4; are licensed and made available under the terms and conditions of the BSD License\r
5; which accompanies this distribution. The full text of the license may be found at\r
6; http://opensource.org/licenses/bsd-license.php.\r
7;\r
8; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
9; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
10;\r
11; Module Name:\r
12;\r
13; MpFuncs.nasm\r
14;\r
15; Abstract:\r
16;\r
17; This is the assembly code for MP support\r
18;\r
19;-------------------------------------------------------------------------------\r
20\r
21%include "MpEqu.inc"\r
22extern ASM_PFX(InitializeFloatingPointUnits)\r
23\r
24DEFAULT REL\r
25\r
26SECTION .text\r
27\r
28;-------------------------------------------------------------------------------------\r
29;RendezvousFunnelProc procedure follows. All APs execute their procedure. This\r
30;procedure serializes all the AP processors through an Init sequence. It must be\r
31;noted that APs arrive here very raw...ie: real mode, no stack.\r
32;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC\r
33;IS IN MACHINE CODE.\r
34;-------------------------------------------------------------------------------------\r
35global ASM_PFX(RendezvousFunnelProc)\r
36ASM_PFX(RendezvousFunnelProc):\r
37RendezvousFunnelProcStart:\r
38; At this point CS = 0x(vv00) and ip= 0x0.\r
39; Save BIST information to ebp firstly\r
40\r
41BITS 16\r
42 mov ebp, eax ; Save BIST information\r
43\r
44 mov ax, cs\r
45 mov ds, ax\r
46 mov es, ax\r
47 mov ss, ax\r
48 xor ax, ax\r
49 mov fs, ax\r
50 mov gs, ax\r
51\r
52 mov si, BufferStartLocation\r
53 mov ebx, [si]\r
54\r
55 mov di, ModeOffsetLocation\r
56 mov eax, [di]\r
57 mov di, CodeSegmentLocation\r
58 mov edx, [di]\r
59 mov di, ax\r
8396e2dd 60 sub di, 02h\r
d94e5f67
JF
61 mov [di],dx ; Patch long mode CS\r
62 sub di, 04h\r
63 add eax, ebx\r
64 mov [di],eax ; Patch address\r
65\r
66 mov si, GdtrLocation\r
67o32 lgdt [cs:si]\r
68\r
69 mov si, IdtrLocation\r
70o32 lidt [cs:si]\r
71\r
5c66d125
JF
72 mov si, EnableExecuteDisableLocation\r
73 cmp byte [si], 0\r
74 jz SkipEnableExecuteDisableBit\r
75\r
76 ;\r
77 ; Enable execute disable bit\r
78 ;\r
79 mov ecx, 0c0000080h ; EFER MSR number\r
80 rdmsr ; Read EFER\r
81 bts eax, 11 ; Enable Execute Disable Bit\r
82 wrmsr ; Write EFER\r
83\r
84SkipEnableExecuteDisableBit:\r
d94e5f67
JF
85\r
86 mov di, DataSegmentLocation\r
87 mov edi, [di] ; Save long mode DS in edi\r
88\r
89 mov si, Cr3Location ; Save CR3 in ecx\r
90 mov ecx, [si]\r
91\r
92 xor ax, ax\r
93 mov ds, ax ; Clear data segment\r
94\r
95 mov eax, cr0 ; Get control register 0\r
96 or eax, 000000003h ; Set PE bit (bit #0) & MP\r
97 mov cr0, eax\r
98\r
99 mov eax, cr4\r
100 bts eax, 5\r
101 mov cr4, eax\r
102\r
103 mov cr3, ecx ; Load CR3\r
104\r
105 mov ecx, 0c0000080h ; EFER MSR number\r
106 rdmsr ; Read EFER\r
107 bts eax, 8 ; Set LME=1\r
108 wrmsr ; Write EFER\r
109\r
110 mov eax, cr0 ; Read CR0\r
111 bts eax, 31 ; Set PG=1\r
112 mov cr0, eax ; Write CR0\r
113\r
114 jmp 0:strict dword 0 ; far jump to long mode\r
115BITS 64\r
116LongModeStart:\r
117 mov eax, edi\r
118 mov ds, ax\r
119 mov es, ax\r
120 mov ss, ax\r
121\r
845c5be1
JF
122 mov esi, ebx\r
123 lea edi, [esi + InitFlagLocation]\r
124 cmp qword [edi], 1 ; ApInitConfig\r
125 jnz GetApicId\r
126\r
0594ec41
ED
127 ; Increment the number of APs executing here as early as possible\r
128 ; This is decremented in C code when AP is finished executing\r
129 mov edi, esi\r
130 add edi, NumApsExecutingLocation\r
131 lock inc dword [edi]\r
132\r
845c5be1 133 ; AP init\r
d94e5f67
JF
134 mov edi, esi\r
135 add edi, LockLocation\r
136 mov rax, NotVacantFlag\r
137\r
138TestLock:\r
139 xchg qword [edi], rax\r
140 cmp rax, NotVacantFlag\r
141 jz TestLock\r
142\r
37676b9f 143 lea ecx, [esi + ApIndexLocation]\r
845c5be1
JF
144 inc dword [ecx]\r
145 mov ebx, [ecx]\r
d94e5f67 146\r
845c5be1
JF
147Releaselock:\r
148 mov rax, VacantFlag\r
149 xchg qword [edi], rax\r
150 ; program stack\r
d94e5f67
JF
151 mov edi, esi\r
152 add edi, StackSizeLocation\r
845c5be1
JF
153 mov eax, dword [edi]\r
154 mov ecx, ebx\r
155 inc ecx\r
156 mul ecx ; EAX = StackSize * (CpuNumber + 1)\r
d94e5f67
JF
157 mov edi, esi\r
158 add edi, StackStartAddressLocation\r
159 add rax, qword [edi]\r
160 mov rsp, rax\r
845c5be1
JF
161 jmp CProcedureInvoke\r
162\r
163GetApicId:\r
164 mov eax, 0\r
165 cpuid\r
166 cmp eax, 0bh\r
1cbd8330
LE
167 jb NoX2Apic ; CPUID level below CPUID_EXTENDED_TOPOLOGY\r
168\r
169 mov eax, 0bh\r
170 xor ecx, ecx\r
171 cpuid\r
172 test ebx, 0ffffh\r
173 jz NoX2Apic ; CPUID.0BH:EBX[15:0] is zero\r
174\r
175 ; Processor is x2APIC capable; 32-bit x2APIC ID is already in EDX\r
176 jmp GetProcessorNumber\r
177\r
178NoX2Apic:\r
845c5be1
JF
179 ; Processor is not x2APIC capable, so get 8-bit APIC ID\r
180 mov eax, 1\r
181 cpuid\r
182 shr ebx, 24\r
183 mov edx, ebx\r
845c5be1 184\r
845c5be1
JF
185GetProcessorNumber:\r
186 ;\r
187 ; Get processor number for this AP\r
188 ; Note that BSP may become an AP due to SwitchBsp()\r
189 ;\r
190 xor ebx, ebx\r
191 lea eax, [esi + CpuInfoLocation]\r
192 mov edi, [eax]\r
d94e5f67 193\r
845c5be1
JF
194GetNextProcNumber:\r
195 cmp dword [edi], edx ; APIC ID match?\r
196 jz ProgramStack\r
dd3fa0cd 197 add edi, 20\r
845c5be1
JF
198 inc ebx\r
199 jmp GetNextProcNumber \r
200\r
201ProgramStack:\r
dd3fa0cd 202 mov rsp, qword [edi + 12]\r
d94e5f67
JF
203\r
204CProcedureInvoke:\r
8396e2dd
JF
205 push rbp ; Push BIST data at top of AP stack\r
206 xor rbp, rbp ; Clear ebp for call stack trace\r
d94e5f67
JF
207 push rbp\r
208 mov rbp, rsp\r
209\r
3b2928b4 210 mov rax, qword [esi + InitializeFloatingPointUnitsAddress]\r
d94e5f67
JF
211 sub rsp, 20h\r
212 call rax ; Call assembly function to initialize FPU per UEFI spec\r
213 add rsp, 20h\r
214\r
37676b9f 215 mov edx, ebx ; edx is ApIndex\r
d94e5f67
JF
216 mov ecx, esi\r
217 add ecx, LockLocation ; rcx is address of exchange info data buffer\r
218\r
219 mov edi, esi\r
220 add edi, ApProcedureLocation\r
221 mov rax, qword [edi]\r
222\r
223 sub rsp, 20h\r
8396e2dd 224 call rax ; Invoke C function\r
d94e5f67 225 add rsp, 20h\r
8396e2dd 226 jmp $ ; Should never reach here\r
d94e5f67
JF
227\r
228RendezvousFunnelProcEnd:\r
229\r
76157021 230;-------------------------------------------------------------------------------------\r
9f91cb01 231; AsmRelocateApLoop (MwaitSupport, ApTargetCState, PmCodeSegment, TopOfApStack, CountTofinish);\r
76157021
JF
232;-------------------------------------------------------------------------------------\r
233global ASM_PFX(AsmRelocateApLoop)\r
234ASM_PFX(AsmRelocateApLoop):\r
235AsmRelocateApLoopStart:\r
9f91cb01
JF
236 mov rax, [rsp + 40] ; CountTofinish\r
237 lock dec dword [rax] ; (*CountTofinish)--\r
bf2786dc 238 mov rsp, r9\r
76157021
JF
239 push rcx\r
240 push rdx\r
241\r
242 lea rsi, [PmEntry] ; rsi <- The start address of transition code\r
243\r
244 push r8\r
245 push rsi\r
246 DB 0x48\r
247 retf\r
248BITS 32\r
249PmEntry:\r
250 mov eax, cr0\r
251 btr eax, 31 ; Clear CR0.PG\r
252 mov cr0, eax ; Disable paging and caches\r
253\r
254 mov ebx, edx ; Save EntryPoint to rbx, for rdmsr will overwrite rdx\r
255 mov ecx, 0xc0000080\r
256 rdmsr\r
257 and ah, ~ 1 ; Clear LME\r
258 wrmsr\r
259 mov eax, cr4\r
260 and al, ~ (1 << 5) ; Clear PAE\r
261 mov cr4, eax\r
262\r
263 pop edx\r
264 add esp, 4\r
265 pop ecx,\r
266 add esp, 4\r
267 cmp cl, 1 ; Check mwait-monitor support\r
268 jnz HltLoop\r
269 mov ebx, edx ; Save C-State to ebx\r
270MwaitLoop:\r
271 mov eax, esp ; Set Monitor Address\r
272 xor ecx, ecx ; ecx = 0\r
273 xor edx, edx ; edx = 0\r
274 monitor\r
76157021 275 mov eax, ebx ; Mwait Cx, Target C-State per eax[7:4]\r
f56379f3 276 shl eax, 4\r
76157021
JF
277 mwait\r
278 jmp MwaitLoop\r
279HltLoop:\r
280 cli\r
281 hlt\r
282 jmp HltLoop\r
76157021
JF
283BITS 64\r
284AsmRelocateApLoopEnd:\r
285\r
d94e5f67
JF
286;-------------------------------------------------------------------------------------\r
287; AsmGetAddressMap (&AddressMap);\r
288;-------------------------------------------------------------------------------------\r
289global ASM_PFX(AsmGetAddressMap)\r
290ASM_PFX(AsmGetAddressMap):\r
3b2928b4 291 lea rax, [ASM_PFX(RendezvousFunnelProc)]\r
d94e5f67
JF
292 mov qword [rcx], rax\r
293 mov qword [rcx + 8h], LongModeStart - RendezvousFunnelProcStart\r
294 mov qword [rcx + 10h], RendezvousFunnelProcEnd - RendezvousFunnelProcStart\r
3b2928b4 295 lea rax, [ASM_PFX(AsmRelocateApLoop)]\r
f7f85d83
JF
296 mov qword [rcx + 18h], rax\r
297 mov qword [rcx + 20h], AsmRelocateApLoopEnd - AsmRelocateApLoopStart\r
d94e5f67
JF
298 ret\r
299\r
300;-------------------------------------------------------------------------------------\r
301;AsmExchangeRole procedure follows. This procedure executed by current BSP, that is\r
8396e2dd 302;about to become an AP. It switches its stack with the current AP.\r
d94e5f67
JF
303;AsmExchangeRole (IN CPU_EXCHANGE_INFO *MyInfo, IN CPU_EXCHANGE_INFO *OthersInfo);\r
304;-------------------------------------------------------------------------------------\r
305global ASM_PFX(AsmExchangeRole)\r
306ASM_PFX(AsmExchangeRole):\r
307 ; DO NOT call other functions in this function, since 2 CPU may use 1 stack\r
308 ; at the same time. If 1 CPU try to call a function, stack will be corrupted.\r
309\r
310 push rax\r
311 push rbx\r
312 push rcx\r
313 push rdx\r
314 push rsi\r
315 push rdi\r
316 push rbp\r
317 push r8\r
318 push r9\r
319 push r10\r
320 push r11\r
321 push r12\r
322 push r13\r
323 push r14\r
324 push r15\r
325\r
326 mov rax, cr0\r
327 push rax\r
328\r
329 mov rax, cr4\r
330 push rax\r
331\r
332 ; rsi contains MyInfo pointer\r
333 mov rsi, rcx\r
334\r
335 ; rdi contains OthersInfo pointer\r
336 mov rdi, rdx\r
337\r
338 ;Store EFLAGS, GDTR and IDTR regiter to stack\r
339 pushfq\r
340 sgdt [rsi + 16]\r
341 sidt [rsi + 26]\r
342\r
343 ; Store the its StackPointer\r
344 mov [rsi + 8], rsp\r
345\r
346 ; update its switch state to STORED\r
347 mov byte [rsi], CPU_SWITCH_STATE_STORED\r
348\r
349WaitForOtherStored:\r
350 ; wait until the other CPU finish storing its state\r
351 cmp byte [rdi], CPU_SWITCH_STATE_STORED\r
352 jz OtherStored\r
353 pause\r
354 jmp WaitForOtherStored\r
355\r
356OtherStored:\r
357 ; Since another CPU already stored its state, load them\r
358 ; load GDTR value\r
359 lgdt [rdi + 16]\r
360\r
361 ; load IDTR value\r
362 lidt [rdi + 26]\r
363\r
364 ; load its future StackPointer\r
365 mov rsp, [rdi + 8]\r
366\r
367 ; update the other CPU's switch state to LOADED\r
368 mov byte [rdi], CPU_SWITCH_STATE_LOADED\r
369\r
370WaitForOtherLoaded:\r
371 ; wait until the other CPU finish loading new state,\r
372 ; otherwise the data in stack may corrupt\r
373 cmp byte [rsi], CPU_SWITCH_STATE_LOADED\r
374 jz OtherLoaded\r
375 pause\r
376 jmp WaitForOtherLoaded\r
377\r
378OtherLoaded:\r
379 ; since the other CPU already get the data it want, leave this procedure\r
380 popfq\r
381\r
382 pop rax\r
383 mov cr4, rax\r
384\r
385 pop rax\r
386 mov cr0, rax\r
387\r
388 pop r15\r
389 pop r14\r
390 pop r13\r
391 pop r12\r
392 pop r11\r
393 pop r10\r
394 pop r9\r
395 pop r8\r
396 pop rbp\r
397 pop rdi\r
398 pop rsi\r
399 pop rdx\r
400 pop rcx\r
401 pop rbx\r
402 pop rax\r
403\r
404 ret\r