]> git.proxmox.com Git - mirror_edk2.git/blame - UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
UefiCpuPkg/MpInitLib: support 64-bit AP stack addresses
[mirror_edk2.git] / UefiCpuPkg / Library / MpInitLib / X64 / MpFuncs.nasm
CommitLineData
d94e5f67
JF
1;------------------------------------------------------------------------------ ;\r
2; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>\r
3; This program and the accompanying materials\r
4; are licensed and made available under the terms and conditions of the BSD License\r
5; which accompanies this distribution. The full text of the license may be found at\r
6; http://opensource.org/licenses/bsd-license.php.\r
7;\r
8; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
9; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
10;\r
11; Module Name:\r
12;\r
13; MpFuncs.nasm\r
14;\r
15; Abstract:\r
16;\r
17; This is the assembly code for MP support\r
18;\r
19;-------------------------------------------------------------------------------\r
20\r
21%include "MpEqu.inc"\r
22extern ASM_PFX(InitializeFloatingPointUnits)\r
23\r
24DEFAULT REL\r
25\r
26SECTION .text\r
27\r
28;-------------------------------------------------------------------------------------\r
29;RendezvousFunnelProc procedure follows. All APs execute their procedure. This\r
30;procedure serializes all the AP processors through an Init sequence. It must be\r
31;noted that APs arrive here very raw...ie: real mode, no stack.\r
32;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC\r
33;IS IN MACHINE CODE.\r
34;-------------------------------------------------------------------------------------\r
35global ASM_PFX(RendezvousFunnelProc)\r
36ASM_PFX(RendezvousFunnelProc):\r
37RendezvousFunnelProcStart:\r
38; At this point CS = 0x(vv00) and ip= 0x0.\r
39; Save BIST information to ebp firstly\r
40\r
41BITS 16\r
42 mov ebp, eax ; Save BIST information\r
43\r
44 mov ax, cs\r
45 mov ds, ax\r
46 mov es, ax\r
47 mov ss, ax\r
48 xor ax, ax\r
49 mov fs, ax\r
50 mov gs, ax\r
51\r
52 mov si, BufferStartLocation\r
53 mov ebx, [si]\r
54\r
55 mov di, ModeOffsetLocation\r
56 mov eax, [di]\r
57 mov di, CodeSegmentLocation\r
58 mov edx, [di]\r
59 mov di, ax\r
8396e2dd 60 sub di, 02h\r
d94e5f67
JF
61 mov [di],dx ; Patch long mode CS\r
62 sub di, 04h\r
63 add eax, ebx\r
64 mov [di],eax ; Patch address\r
65\r
66 mov si, GdtrLocation\r
67o32 lgdt [cs:si]\r
68\r
69 mov si, IdtrLocation\r
70o32 lidt [cs:si]\r
71\r
5c66d125
JF
72 mov si, EnableExecuteDisableLocation\r
73 cmp byte [si], 0\r
74 jz SkipEnableExecuteDisableBit\r
75\r
76 ;\r
77 ; Enable execute disable bit\r
78 ;\r
79 mov ecx, 0c0000080h ; EFER MSR number\r
80 rdmsr ; Read EFER\r
81 bts eax, 11 ; Enable Execute Disable Bit\r
82 wrmsr ; Write EFER\r
83\r
84SkipEnableExecuteDisableBit:\r
d94e5f67
JF
85\r
86 mov di, DataSegmentLocation\r
87 mov edi, [di] ; Save long mode DS in edi\r
88\r
89 mov si, Cr3Location ; Save CR3 in ecx\r
90 mov ecx, [si]\r
91\r
92 xor ax, ax\r
93 mov ds, ax ; Clear data segment\r
94\r
95 mov eax, cr0 ; Get control register 0\r
96 or eax, 000000003h ; Set PE bit (bit #0) & MP\r
97 mov cr0, eax\r
98\r
99 mov eax, cr4\r
100 bts eax, 5\r
101 mov cr4, eax\r
102\r
103 mov cr3, ecx ; Load CR3\r
104\r
105 mov ecx, 0c0000080h ; EFER MSR number\r
106 rdmsr ; Read EFER\r
107 bts eax, 8 ; Set LME=1\r
108 wrmsr ; Write EFER\r
109\r
110 mov eax, cr0 ; Read CR0\r
111 bts eax, 31 ; Set PG=1\r
112 mov cr0, eax ; Write CR0\r
113\r
114 jmp 0:strict dword 0 ; far jump to long mode\r
115BITS 64\r
116LongModeStart:\r
117 mov eax, edi\r
118 mov ds, ax\r
119 mov es, ax\r
120 mov ss, ax\r
121\r
845c5be1
JF
122 mov esi, ebx\r
123 lea edi, [esi + InitFlagLocation]\r
124 cmp qword [edi], 1 ; ApInitConfig\r
125 jnz GetApicId\r
126\r
127 ; AP init\r
d94e5f67
JF
128 mov edi, esi\r
129 add edi, LockLocation\r
130 mov rax, NotVacantFlag\r
131\r
132TestLock:\r
133 xchg qword [edi], rax\r
134 cmp rax, NotVacantFlag\r
135 jz TestLock\r
136\r
00650c53 137 lea ecx, [esi + NumApsExecutingLocation]\r
845c5be1
JF
138 inc dword [ecx]\r
139 mov ebx, [ecx]\r
d94e5f67 140\r
845c5be1
JF
141Releaselock:\r
142 mov rax, VacantFlag\r
143 xchg qword [edi], rax\r
144 ; program stack\r
d94e5f67
JF
145 mov edi, esi\r
146 add edi, StackSizeLocation\r
845c5be1
JF
147 mov eax, dword [edi]\r
148 mov ecx, ebx\r
149 inc ecx\r
150 mul ecx ; EAX = StackSize * (CpuNumber + 1)\r
d94e5f67
JF
151 mov edi, esi\r
152 add edi, StackStartAddressLocation\r
153 add rax, qword [edi]\r
154 mov rsp, rax\r
845c5be1
JF
155 jmp CProcedureInvoke\r
156\r
157GetApicId:\r
158 mov eax, 0\r
159 cpuid\r
160 cmp eax, 0bh\r
161 jnb X2Apic\r
162 ; Processor is not x2APIC capable, so get 8-bit APIC ID\r
163 mov eax, 1\r
164 cpuid\r
165 shr ebx, 24\r
166 mov edx, ebx\r
167 jmp GetProcessorNumber\r
168\r
169X2Apic:\r
170 ; Processor is x2APIC capable, so get 32-bit x2APIC ID\r
171 mov eax, 0bh\r
172 xor ecx, ecx\r
173 cpuid \r
174 ; edx save x2APIC ID\r
175 \r
176GetProcessorNumber:\r
177 ;\r
178 ; Get processor number for this AP\r
179 ; Note that BSP may become an AP due to SwitchBsp()\r
180 ;\r
181 xor ebx, ebx\r
182 lea eax, [esi + CpuInfoLocation]\r
183 mov edi, [eax]\r
d94e5f67 184\r
845c5be1
JF
185GetNextProcNumber:\r
186 cmp dword [edi], edx ; APIC ID match?\r
187 jz ProgramStack\r
dd3fa0cd 188 add edi, 20\r
845c5be1
JF
189 inc ebx\r
190 jmp GetNextProcNumber \r
191\r
192ProgramStack:\r
dd3fa0cd 193 mov rsp, qword [edi + 12]\r
d94e5f67
JF
194\r
195CProcedureInvoke:\r
8396e2dd
JF
196 push rbp ; Push BIST data at top of AP stack\r
197 xor rbp, rbp ; Clear ebp for call stack trace\r
d94e5f67
JF
198 push rbp\r
199 mov rbp, rsp\r
200\r
201 mov rax, ASM_PFX(InitializeFloatingPointUnits)\r
202 sub rsp, 20h\r
203 call rax ; Call assembly function to initialize FPU per UEFI spec\r
204 add rsp, 20h\r
205\r
206 mov edx, ebx ; edx is NumApsExecuting\r
207 mov ecx, esi\r
208 add ecx, LockLocation ; rcx is address of exchange info data buffer\r
209\r
210 mov edi, esi\r
211 add edi, ApProcedureLocation\r
212 mov rax, qword [edi]\r
213\r
214 sub rsp, 20h\r
8396e2dd 215 call rax ; Invoke C function\r
d94e5f67 216 add rsp, 20h\r
8396e2dd 217 jmp $ ; Should never reach here\r
d94e5f67
JF
218\r
219RendezvousFunnelProcEnd:\r
220\r
76157021
JF
221;-------------------------------------------------------------------------------------\r
222; AsmRelocateApLoop (MwaitSupport, ApTargetCState, PmCodeSegment);\r
223;-------------------------------------------------------------------------------------\r
224global ASM_PFX(AsmRelocateApLoop)\r
225ASM_PFX(AsmRelocateApLoop):\r
226AsmRelocateApLoopStart:\r
227 push rcx\r
228 push rdx\r
229\r
230 lea rsi, [PmEntry] ; rsi <- The start address of transition code\r
231\r
232 push r8\r
233 push rsi\r
234 DB 0x48\r
235 retf\r
236BITS 32\r
237PmEntry:\r
238 mov eax, cr0\r
239 btr eax, 31 ; Clear CR0.PG\r
240 mov cr0, eax ; Disable paging and caches\r
241\r
242 mov ebx, edx ; Save EntryPoint to rbx, for rdmsr will overwrite rdx\r
243 mov ecx, 0xc0000080\r
244 rdmsr\r
245 and ah, ~ 1 ; Clear LME\r
246 wrmsr\r
247 mov eax, cr4\r
248 and al, ~ (1 << 5) ; Clear PAE\r
249 mov cr4, eax\r
250\r
251 pop edx\r
252 add esp, 4\r
253 pop ecx,\r
254 add esp, 4\r
255 cmp cl, 1 ; Check mwait-monitor support\r
256 jnz HltLoop\r
257 mov ebx, edx ; Save C-State to ebx\r
258MwaitLoop:\r
259 mov eax, esp ; Set Monitor Address\r
260 xor ecx, ecx ; ecx = 0\r
261 xor edx, edx ; edx = 0\r
262 monitor\r
263 shl ebx, 4\r
264 mov eax, ebx ; Mwait Cx, Target C-State per eax[7:4]\r
265 mwait\r
266 jmp MwaitLoop\r
267HltLoop:\r
268 cli\r
269 hlt\r
270 jmp HltLoop\r
271 ret\r
272BITS 64\r
273AsmRelocateApLoopEnd:\r
274\r
d94e5f67
JF
275;-------------------------------------------------------------------------------------\r
276; AsmGetAddressMap (&AddressMap);\r
277;-------------------------------------------------------------------------------------\r
278global ASM_PFX(AsmGetAddressMap)\r
279ASM_PFX(AsmGetAddressMap):\r
280 mov rax, ASM_PFX(RendezvousFunnelProc)\r
281 mov qword [rcx], rax\r
282 mov qword [rcx + 8h], LongModeStart - RendezvousFunnelProcStart\r
283 mov qword [rcx + 10h], RendezvousFunnelProcEnd - RendezvousFunnelProcStart\r
f7f85d83
JF
284 mov rax, ASM_PFX(AsmRelocateApLoop)\r
285 mov qword [rcx + 18h], rax\r
286 mov qword [rcx + 20h], AsmRelocateApLoopEnd - AsmRelocateApLoopStart\r
d94e5f67
JF
287 ret\r
288\r
289;-------------------------------------------------------------------------------------\r
290;AsmExchangeRole procedure follows. This procedure executed by current BSP, that is\r
8396e2dd 291;about to become an AP. It switches its stack with the current AP.\r
d94e5f67
JF
292;AsmExchangeRole (IN CPU_EXCHANGE_INFO *MyInfo, IN CPU_EXCHANGE_INFO *OthersInfo);\r
293;-------------------------------------------------------------------------------------\r
294global ASM_PFX(AsmExchangeRole)\r
295ASM_PFX(AsmExchangeRole):\r
296 ; DO NOT call other functions in this function, since 2 CPU may use 1 stack\r
297 ; at the same time. If 1 CPU try to call a function, stack will be corrupted.\r
298\r
299 push rax\r
300 push rbx\r
301 push rcx\r
302 push rdx\r
303 push rsi\r
304 push rdi\r
305 push rbp\r
306 push r8\r
307 push r9\r
308 push r10\r
309 push r11\r
310 push r12\r
311 push r13\r
312 push r14\r
313 push r15\r
314\r
315 mov rax, cr0\r
316 push rax\r
317\r
318 mov rax, cr4\r
319 push rax\r
320\r
321 ; rsi contains MyInfo pointer\r
322 mov rsi, rcx\r
323\r
324 ; rdi contains OthersInfo pointer\r
325 mov rdi, rdx\r
326\r
327 ;Store EFLAGS, GDTR and IDTR regiter to stack\r
328 pushfq\r
329 sgdt [rsi + 16]\r
330 sidt [rsi + 26]\r
331\r
332 ; Store the its StackPointer\r
333 mov [rsi + 8], rsp\r
334\r
335 ; update its switch state to STORED\r
336 mov byte [rsi], CPU_SWITCH_STATE_STORED\r
337\r
338WaitForOtherStored:\r
339 ; wait until the other CPU finish storing its state\r
340 cmp byte [rdi], CPU_SWITCH_STATE_STORED\r
341 jz OtherStored\r
342 pause\r
343 jmp WaitForOtherStored\r
344\r
345OtherStored:\r
346 ; Since another CPU already stored its state, load them\r
347 ; load GDTR value\r
348 lgdt [rdi + 16]\r
349\r
350 ; load IDTR value\r
351 lidt [rdi + 26]\r
352\r
353 ; load its future StackPointer\r
354 mov rsp, [rdi + 8]\r
355\r
356 ; update the other CPU's switch state to LOADED\r
357 mov byte [rdi], CPU_SWITCH_STATE_LOADED\r
358\r
359WaitForOtherLoaded:\r
360 ; wait until the other CPU finish loading new state,\r
361 ; otherwise the data in stack may corrupt\r
362 cmp byte [rsi], CPU_SWITCH_STATE_LOADED\r
363 jz OtherLoaded\r
364 pause\r
365 jmp WaitForOtherLoaded\r
366\r
367OtherLoaded:\r
368 ; since the other CPU already get the data it want, leave this procedure\r
369 popfq\r
370\r
371 pop rax\r
372 mov cr4, rax\r
373\r
374 pop rax\r
375 mov cr0, rax\r
376\r
377 pop r15\r
378 pop r14\r
379 pop r13\r
380 pop r12\r
381 pop r11\r
382 pop r10\r
383 pop r9\r
384 pop r8\r
385 pop rbp\r
386 pop rdi\r
387 pop rsi\r
388 pop rdx\r
389 pop rcx\r
390 pop rbx\r
391 pop rax\r
392\r
393 ret\r