]> git.proxmox.com Git - mirror_edk2.git/blame - UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
UefiCpuPkg/DxeMpLib: Allocate new safe stack < 4GB
[mirror_edk2.git] / UefiCpuPkg / Library / MpInitLib / X64 / MpFuncs.nasm
CommitLineData
d94e5f67
JF
1;------------------------------------------------------------------------------ ;\r
2; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>\r
3; This program and the accompanying materials\r
4; are licensed and made available under the terms and conditions of the BSD License\r
5; which accompanies this distribution. The full text of the license may be found at\r
6; http://opensource.org/licenses/bsd-license.php.\r
7;\r
8; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
9; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
10;\r
11; Module Name:\r
12;\r
13; MpFuncs.nasm\r
14;\r
15; Abstract:\r
16;\r
17; This is the assembly code for MP support\r
18;\r
19;-------------------------------------------------------------------------------\r
20\r
21%include "MpEqu.inc"\r
22extern ASM_PFX(InitializeFloatingPointUnits)\r
23\r
24DEFAULT REL\r
25\r
26SECTION .text\r
27\r
28;-------------------------------------------------------------------------------------\r
29;RendezvousFunnelProc procedure follows. All APs execute their procedure. This\r
30;procedure serializes all the AP processors through an Init sequence. It must be\r
31;noted that APs arrive here very raw...ie: real mode, no stack.\r
32;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC\r
33;IS IN MACHINE CODE.\r
34;-------------------------------------------------------------------------------------\r
35global ASM_PFX(RendezvousFunnelProc)\r
36ASM_PFX(RendezvousFunnelProc):\r
37RendezvousFunnelProcStart:\r
38; At this point CS = 0x(vv00) and ip= 0x0.\r
39; Save BIST information to ebp firstly\r
40\r
41BITS 16\r
42 mov ebp, eax ; Save BIST information\r
43\r
44 mov ax, cs\r
45 mov ds, ax\r
46 mov es, ax\r
47 mov ss, ax\r
48 xor ax, ax\r
49 mov fs, ax\r
50 mov gs, ax\r
51\r
52 mov si, BufferStartLocation\r
53 mov ebx, [si]\r
54\r
55 mov di, ModeOffsetLocation\r
56 mov eax, [di]\r
57 mov di, CodeSegmentLocation\r
58 mov edx, [di]\r
59 mov di, ax\r
8396e2dd 60 sub di, 02h\r
d94e5f67
JF
61 mov [di],dx ; Patch long mode CS\r
62 sub di, 04h\r
63 add eax, ebx\r
64 mov [di],eax ; Patch address\r
65\r
66 mov si, GdtrLocation\r
67o32 lgdt [cs:si]\r
68\r
69 mov si, IdtrLocation\r
70o32 lidt [cs:si]\r
71\r
5c66d125
JF
72 mov si, EnableExecuteDisableLocation\r
73 cmp byte [si], 0\r
74 jz SkipEnableExecuteDisableBit\r
75\r
76 ;\r
77 ; Enable execute disable bit\r
78 ;\r
79 mov ecx, 0c0000080h ; EFER MSR number\r
80 rdmsr ; Read EFER\r
81 bts eax, 11 ; Enable Execute Disable Bit\r
82 wrmsr ; Write EFER\r
83\r
84SkipEnableExecuteDisableBit:\r
d94e5f67
JF
85\r
86 mov di, DataSegmentLocation\r
87 mov edi, [di] ; Save long mode DS in edi\r
88\r
89 mov si, Cr3Location ; Save CR3 in ecx\r
90 mov ecx, [si]\r
91\r
92 xor ax, ax\r
93 mov ds, ax ; Clear data segment\r
94\r
95 mov eax, cr0 ; Get control register 0\r
96 or eax, 000000003h ; Set PE bit (bit #0) & MP\r
97 mov cr0, eax\r
98\r
99 mov eax, cr4\r
100 bts eax, 5\r
101 mov cr4, eax\r
102\r
103 mov cr3, ecx ; Load CR3\r
104\r
105 mov ecx, 0c0000080h ; EFER MSR number\r
106 rdmsr ; Read EFER\r
107 bts eax, 8 ; Set LME=1\r
108 wrmsr ; Write EFER\r
109\r
110 mov eax, cr0 ; Read CR0\r
111 bts eax, 31 ; Set PG=1\r
112 mov cr0, eax ; Write CR0\r
113\r
114 jmp 0:strict dword 0 ; far jump to long mode\r
115BITS 64\r
116LongModeStart:\r
117 mov eax, edi\r
118 mov ds, ax\r
119 mov es, ax\r
120 mov ss, ax\r
121\r
845c5be1
JF
122 mov esi, ebx\r
123 lea edi, [esi + InitFlagLocation]\r
124 cmp qword [edi], 1 ; ApInitConfig\r
125 jnz GetApicId\r
126\r
127 ; AP init\r
d94e5f67
JF
128 mov edi, esi\r
129 add edi, LockLocation\r
130 mov rax, NotVacantFlag\r
131\r
132TestLock:\r
133 xchg qword [edi], rax\r
134 cmp rax, NotVacantFlag\r
135 jz TestLock\r
136\r
00650c53 137 lea ecx, [esi + NumApsExecutingLocation]\r
845c5be1
JF
138 inc dword [ecx]\r
139 mov ebx, [ecx]\r
d94e5f67 140\r
845c5be1
JF
141Releaselock:\r
142 mov rax, VacantFlag\r
143 xchg qword [edi], rax\r
144 ; program stack\r
d94e5f67
JF
145 mov edi, esi\r
146 add edi, StackSizeLocation\r
845c5be1
JF
147 mov eax, dword [edi]\r
148 mov ecx, ebx\r
149 inc ecx\r
150 mul ecx ; EAX = StackSize * (CpuNumber + 1)\r
d94e5f67
JF
151 mov edi, esi\r
152 add edi, StackStartAddressLocation\r
153 add rax, qword [edi]\r
154 mov rsp, rax\r
845c5be1
JF
155 jmp CProcedureInvoke\r
156\r
157GetApicId:\r
158 mov eax, 0\r
159 cpuid\r
160 cmp eax, 0bh\r
1cbd8330
LE
161 jb NoX2Apic ; CPUID level below CPUID_EXTENDED_TOPOLOGY\r
162\r
163 mov eax, 0bh\r
164 xor ecx, ecx\r
165 cpuid\r
166 test ebx, 0ffffh\r
167 jz NoX2Apic ; CPUID.0BH:EBX[15:0] is zero\r
168\r
169 ; Processor is x2APIC capable; 32-bit x2APIC ID is already in EDX\r
170 jmp GetProcessorNumber\r
171\r
172NoX2Apic:\r
845c5be1
JF
173 ; Processor is not x2APIC capable, so get 8-bit APIC ID\r
174 mov eax, 1\r
175 cpuid\r
176 shr ebx, 24\r
177 mov edx, ebx\r
845c5be1 178\r
845c5be1
JF
179GetProcessorNumber:\r
180 ;\r
181 ; Get processor number for this AP\r
182 ; Note that BSP may become an AP due to SwitchBsp()\r
183 ;\r
184 xor ebx, ebx\r
185 lea eax, [esi + CpuInfoLocation]\r
186 mov edi, [eax]\r
d94e5f67 187\r
845c5be1
JF
188GetNextProcNumber:\r
189 cmp dword [edi], edx ; APIC ID match?\r
190 jz ProgramStack\r
dd3fa0cd 191 add edi, 20\r
845c5be1
JF
192 inc ebx\r
193 jmp GetNextProcNumber \r
194\r
195ProgramStack:\r
dd3fa0cd 196 mov rsp, qword [edi + 12]\r
d94e5f67
JF
197\r
198CProcedureInvoke:\r
8396e2dd
JF
199 push rbp ; Push BIST data at top of AP stack\r
200 xor rbp, rbp ; Clear ebp for call stack trace\r
d94e5f67
JF
201 push rbp\r
202 mov rbp, rsp\r
203\r
204 mov rax, ASM_PFX(InitializeFloatingPointUnits)\r
205 sub rsp, 20h\r
206 call rax ; Call assembly function to initialize FPU per UEFI spec\r
207 add rsp, 20h\r
208\r
209 mov edx, ebx ; edx is NumApsExecuting\r
210 mov ecx, esi\r
211 add ecx, LockLocation ; rcx is address of exchange info data buffer\r
212\r
213 mov edi, esi\r
214 add edi, ApProcedureLocation\r
215 mov rax, qword [edi]\r
216\r
217 sub rsp, 20h\r
8396e2dd 218 call rax ; Invoke C function\r
d94e5f67 219 add rsp, 20h\r
8396e2dd 220 jmp $ ; Should never reach here\r
d94e5f67
JF
221\r
222RendezvousFunnelProcEnd:\r
223\r
76157021 224;-------------------------------------------------------------------------------------\r
bf2786dc 225; AsmRelocateApLoop (MwaitSupport, ApTargetCState, PmCodeSegment, TopOfApStack);\r
76157021
JF
226;-------------------------------------------------------------------------------------\r
227global ASM_PFX(AsmRelocateApLoop)\r
228ASM_PFX(AsmRelocateApLoop):\r
229AsmRelocateApLoopStart:\r
bf2786dc 230 mov rsp, r9\r
76157021
JF
231 push rcx\r
232 push rdx\r
233\r
234 lea rsi, [PmEntry] ; rsi <- The start address of transition code\r
235\r
236 push r8\r
237 push rsi\r
238 DB 0x48\r
239 retf\r
240BITS 32\r
241PmEntry:\r
242 mov eax, cr0\r
243 btr eax, 31 ; Clear CR0.PG\r
244 mov cr0, eax ; Disable paging and caches\r
245\r
246 mov ebx, edx ; Save EntryPoint to rbx, for rdmsr will overwrite rdx\r
247 mov ecx, 0xc0000080\r
248 rdmsr\r
249 and ah, ~ 1 ; Clear LME\r
250 wrmsr\r
251 mov eax, cr4\r
252 and al, ~ (1 << 5) ; Clear PAE\r
253 mov cr4, eax\r
254\r
255 pop edx\r
256 add esp, 4\r
257 pop ecx,\r
258 add esp, 4\r
259 cmp cl, 1 ; Check mwait-monitor support\r
260 jnz HltLoop\r
261 mov ebx, edx ; Save C-State to ebx\r
262MwaitLoop:\r
263 mov eax, esp ; Set Monitor Address\r
264 xor ecx, ecx ; ecx = 0\r
265 xor edx, edx ; edx = 0\r
266 monitor\r
267 shl ebx, 4\r
268 mov eax, ebx ; Mwait Cx, Target C-State per eax[7:4]\r
269 mwait\r
270 jmp MwaitLoop\r
271HltLoop:\r
272 cli\r
273 hlt\r
274 jmp HltLoop\r
275 ret\r
276BITS 64\r
277AsmRelocateApLoopEnd:\r
278\r
d94e5f67
JF
279;-------------------------------------------------------------------------------------\r
280; AsmGetAddressMap (&AddressMap);\r
281;-------------------------------------------------------------------------------------\r
282global ASM_PFX(AsmGetAddressMap)\r
283ASM_PFX(AsmGetAddressMap):\r
284 mov rax, ASM_PFX(RendezvousFunnelProc)\r
285 mov qword [rcx], rax\r
286 mov qword [rcx + 8h], LongModeStart - RendezvousFunnelProcStart\r
287 mov qword [rcx + 10h], RendezvousFunnelProcEnd - RendezvousFunnelProcStart\r
f7f85d83
JF
288 mov rax, ASM_PFX(AsmRelocateApLoop)\r
289 mov qword [rcx + 18h], rax\r
290 mov qword [rcx + 20h], AsmRelocateApLoopEnd - AsmRelocateApLoopStart\r
d94e5f67
JF
291 ret\r
292\r
293;-------------------------------------------------------------------------------------\r
294;AsmExchangeRole procedure follows. This procedure executed by current BSP, that is\r
8396e2dd 295;about to become an AP. It switches its stack with the current AP.\r
d94e5f67
JF
296;AsmExchangeRole (IN CPU_EXCHANGE_INFO *MyInfo, IN CPU_EXCHANGE_INFO *OthersInfo);\r
297;-------------------------------------------------------------------------------------\r
298global ASM_PFX(AsmExchangeRole)\r
299ASM_PFX(AsmExchangeRole):\r
300 ; DO NOT call other functions in this function, since 2 CPU may use 1 stack\r
301 ; at the same time. If 1 CPU try to call a function, stack will be corrupted.\r
302\r
303 push rax\r
304 push rbx\r
305 push rcx\r
306 push rdx\r
307 push rsi\r
308 push rdi\r
309 push rbp\r
310 push r8\r
311 push r9\r
312 push r10\r
313 push r11\r
314 push r12\r
315 push r13\r
316 push r14\r
317 push r15\r
318\r
319 mov rax, cr0\r
320 push rax\r
321\r
322 mov rax, cr4\r
323 push rax\r
324\r
325 ; rsi contains MyInfo pointer\r
326 mov rsi, rcx\r
327\r
328 ; rdi contains OthersInfo pointer\r
329 mov rdi, rdx\r
330\r
331 ;Store EFLAGS, GDTR and IDTR regiter to stack\r
332 pushfq\r
333 sgdt [rsi + 16]\r
334 sidt [rsi + 26]\r
335\r
336 ; Store the its StackPointer\r
337 mov [rsi + 8], rsp\r
338\r
339 ; update its switch state to STORED\r
340 mov byte [rsi], CPU_SWITCH_STATE_STORED\r
341\r
342WaitForOtherStored:\r
343 ; wait until the other CPU finish storing its state\r
344 cmp byte [rdi], CPU_SWITCH_STATE_STORED\r
345 jz OtherStored\r
346 pause\r
347 jmp WaitForOtherStored\r
348\r
349OtherStored:\r
350 ; Since another CPU already stored its state, load them\r
351 ; load GDTR value\r
352 lgdt [rdi + 16]\r
353\r
354 ; load IDTR value\r
355 lidt [rdi + 26]\r
356\r
357 ; load its future StackPointer\r
358 mov rsp, [rdi + 8]\r
359\r
360 ; update the other CPU's switch state to LOADED\r
361 mov byte [rdi], CPU_SWITCH_STATE_LOADED\r
362\r
363WaitForOtherLoaded:\r
364 ; wait until the other CPU finish loading new state,\r
365 ; otherwise the data in stack may corrupt\r
366 cmp byte [rsi], CPU_SWITCH_STATE_LOADED\r
367 jz OtherLoaded\r
368 pause\r
369 jmp WaitForOtherLoaded\r
370\r
371OtherLoaded:\r
372 ; since the other CPU already get the data it want, leave this procedure\r
373 popfq\r
374\r
375 pop rax\r
376 mov cr4, rax\r
377\r
378 pop rax\r
379 mov cr0, rax\r
380\r
381 pop r15\r
382 pop r14\r
383 pop r13\r
384 pop r12\r
385 pop r11\r
386 pop r10\r
387 pop r9\r
388 pop r8\r
389 pop rbp\r
390 pop rdi\r
391 pop rsi\r
392 pop rdx\r
393 pop rcx\r
394 pop rbx\r
395 pop rax\r
396\r
397 ret\r