]> git.proxmox.com Git - mirror_edk2.git/blame - UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
UefiCpuPkg/MpInitLib: Program AP stack in fixed address
[mirror_edk2.git] / UefiCpuPkg / Library / MpInitLib / X64 / MpFuncs.nasm
CommitLineData
d94e5f67
JF
1;------------------------------------------------------------------------------ ;\r
2; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>\r
3; This program and the accompanying materials\r
4; are licensed and made available under the terms and conditions of the BSD License\r
5; which accompanies this distribution. The full text of the license may be found at\r
6; http://opensource.org/licenses/bsd-license.php.\r
7;\r
8; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
9; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
10;\r
11; Module Name:\r
12;\r
13; MpFuncs.nasm\r
14;\r
15; Abstract:\r
16;\r
17; This is the assembly code for MP support\r
18;\r
19;-------------------------------------------------------------------------------\r
20\r
21%include "MpEqu.inc"\r
22extern ASM_PFX(InitializeFloatingPointUnits)\r
23\r
24DEFAULT REL\r
25\r
26SECTION .text\r
27\r
28;-------------------------------------------------------------------------------------\r
29;RendezvousFunnelProc procedure follows. All APs execute their procedure. This\r
30;procedure serializes all the AP processors through an Init sequence. It must be\r
31;noted that APs arrive here very raw...ie: real mode, no stack.\r
32;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC\r
33;IS IN MACHINE CODE.\r
34;-------------------------------------------------------------------------------------\r
35global ASM_PFX(RendezvousFunnelProc)\r
36ASM_PFX(RendezvousFunnelProc):\r
37RendezvousFunnelProcStart:\r
38; At this point CS = 0x(vv00) and ip= 0x0.\r
39; Save BIST information to ebp firstly\r
40\r
41BITS 16\r
42 mov ebp, eax ; Save BIST information\r
43\r
44 mov ax, cs\r
45 mov ds, ax\r
46 mov es, ax\r
47 mov ss, ax\r
48 xor ax, ax\r
49 mov fs, ax\r
50 mov gs, ax\r
51\r
52 mov si, BufferStartLocation\r
53 mov ebx, [si]\r
54\r
55 mov di, ModeOffsetLocation\r
56 mov eax, [di]\r
57 mov di, CodeSegmentLocation\r
58 mov edx, [di]\r
59 mov di, ax\r
8396e2dd 60 sub di, 02h\r
d94e5f67
JF
61 mov [di],dx ; Patch long mode CS\r
62 sub di, 04h\r
63 add eax, ebx\r
64 mov [di],eax ; Patch address\r
65\r
66 mov si, GdtrLocation\r
67o32 lgdt [cs:si]\r
68\r
69 mov si, IdtrLocation\r
70o32 lidt [cs:si]\r
71\r
5c66d125
JF
72 mov si, EnableExecuteDisableLocation\r
73 cmp byte [si], 0\r
74 jz SkipEnableExecuteDisableBit\r
75\r
76 ;\r
77 ; Enable execute disable bit\r
78 ;\r
79 mov ecx, 0c0000080h ; EFER MSR number\r
80 rdmsr ; Read EFER\r
81 bts eax, 11 ; Enable Execute Disable Bit\r
82 wrmsr ; Write EFER\r
83\r
84SkipEnableExecuteDisableBit:\r
d94e5f67
JF
85\r
86 mov di, DataSegmentLocation\r
87 mov edi, [di] ; Save long mode DS in edi\r
88\r
89 mov si, Cr3Location ; Save CR3 in ecx\r
90 mov ecx, [si]\r
91\r
92 xor ax, ax\r
93 mov ds, ax ; Clear data segment\r
94\r
95 mov eax, cr0 ; Get control register 0\r
96 or eax, 000000003h ; Set PE bit (bit #0) & MP\r
97 mov cr0, eax\r
98\r
99 mov eax, cr4\r
100 bts eax, 5\r
101 mov cr4, eax\r
102\r
103 mov cr3, ecx ; Load CR3\r
104\r
105 mov ecx, 0c0000080h ; EFER MSR number\r
106 rdmsr ; Read EFER\r
107 bts eax, 8 ; Set LME=1\r
108 wrmsr ; Write EFER\r
109\r
110 mov eax, cr0 ; Read CR0\r
111 bts eax, 31 ; Set PG=1\r
112 mov cr0, eax ; Write CR0\r
113\r
114 jmp 0:strict dword 0 ; far jump to long mode\r
115BITS 64\r
116LongModeStart:\r
117 mov eax, edi\r
118 mov ds, ax\r
119 mov es, ax\r
120 mov ss, ax\r
121\r
845c5be1
JF
122 mov esi, ebx\r
123 lea edi, [esi + InitFlagLocation]\r
124 cmp qword [edi], 1 ; ApInitConfig\r
125 jnz GetApicId\r
126\r
127 ; AP init\r
d94e5f67
JF
128 mov esi, ebx\r
129 mov edi, esi\r
130 add edi, LockLocation\r
131 mov rax, NotVacantFlag\r
132\r
133TestLock:\r
134 xchg qword [edi], rax\r
135 cmp rax, NotVacantFlag\r
136 jz TestLock\r
137\r
845c5be1
JF
138 lea ecx, [esi + InitFlagLocation]\r
139 inc dword [ecx]\r
140 mov ebx, [ecx]\r
d94e5f67 141\r
845c5be1
JF
142Releaselock:\r
143 mov rax, VacantFlag\r
144 xchg qword [edi], rax\r
145 ; program stack\r
d94e5f67
JF
146 mov edi, esi\r
147 add edi, StackSizeLocation\r
845c5be1
JF
148 mov eax, dword [edi]\r
149 mov ecx, ebx\r
150 inc ecx\r
151 mul ecx ; EAX = StackSize * (CpuNumber + 1)\r
d94e5f67
JF
152 mov edi, esi\r
153 add edi, StackStartAddressLocation\r
154 add rax, qword [edi]\r
155 mov rsp, rax\r
845c5be1
JF
156 jmp CProcedureInvoke\r
157\r
158GetApicId:\r
159 mov eax, 0\r
160 cpuid\r
161 cmp eax, 0bh\r
162 jnb X2Apic\r
163 ; Processor is not x2APIC capable, so get 8-bit APIC ID\r
164 mov eax, 1\r
165 cpuid\r
166 shr ebx, 24\r
167 mov edx, ebx\r
168 jmp GetProcessorNumber\r
169\r
170X2Apic:\r
171 ; Processor is x2APIC capable, so get 32-bit x2APIC ID\r
172 mov eax, 0bh\r
173 xor ecx, ecx\r
174 cpuid \r
175 ; edx save x2APIC ID\r
176 \r
177GetProcessorNumber:\r
178 ;\r
179 ; Get processor number for this AP\r
180 ; Note that BSP may become an AP due to SwitchBsp()\r
181 ;\r
182 xor ebx, ebx\r
183 lea eax, [esi + CpuInfoLocation]\r
184 mov edi, [eax]\r
d94e5f67 185\r
845c5be1
JF
186GetNextProcNumber:\r
187 cmp dword [edi], edx ; APIC ID match?\r
188 jz ProgramStack\r
189 add edi, 16\r
190 inc ebx\r
191 jmp GetNextProcNumber \r
192\r
193ProgramStack:\r
194 xor rsp, rsp\r
195 mov esp, dword [edi + 12]\r
d94e5f67
JF
196\r
197CProcedureInvoke:\r
8396e2dd
JF
198 push rbp ; Push BIST data at top of AP stack\r
199 xor rbp, rbp ; Clear ebp for call stack trace\r
d94e5f67
JF
200 push rbp\r
201 mov rbp, rsp\r
202\r
203 mov rax, ASM_PFX(InitializeFloatingPointUnits)\r
204 sub rsp, 20h\r
205 call rax ; Call assembly function to initialize FPU per UEFI spec\r
206 add rsp, 20h\r
207\r
208 mov edx, ebx ; edx is NumApsExecuting\r
209 mov ecx, esi\r
210 add ecx, LockLocation ; rcx is address of exchange info data buffer\r
211\r
212 mov edi, esi\r
213 add edi, ApProcedureLocation\r
214 mov rax, qword [edi]\r
215\r
216 sub rsp, 20h\r
8396e2dd 217 call rax ; Invoke C function\r
d94e5f67 218 add rsp, 20h\r
8396e2dd 219 jmp $ ; Should never reach here\r
d94e5f67
JF
220\r
221RendezvousFunnelProcEnd:\r
222\r
76157021
JF
223;-------------------------------------------------------------------------------------\r
224; AsmRelocateApLoop (MwaitSupport, ApTargetCState, PmCodeSegment);\r
225;-------------------------------------------------------------------------------------\r
226global ASM_PFX(AsmRelocateApLoop)\r
227ASM_PFX(AsmRelocateApLoop):\r
228AsmRelocateApLoopStart:\r
229 push rcx\r
230 push rdx\r
231\r
232 lea rsi, [PmEntry] ; rsi <- The start address of transition code\r
233\r
234 push r8\r
235 push rsi\r
236 DB 0x48\r
237 retf\r
238BITS 32\r
239PmEntry:\r
240 mov eax, cr0\r
241 btr eax, 31 ; Clear CR0.PG\r
242 mov cr0, eax ; Disable paging and caches\r
243\r
244 mov ebx, edx ; Save EntryPoint to rbx, for rdmsr will overwrite rdx\r
245 mov ecx, 0xc0000080\r
246 rdmsr\r
247 and ah, ~ 1 ; Clear LME\r
248 wrmsr\r
249 mov eax, cr4\r
250 and al, ~ (1 << 5) ; Clear PAE\r
251 mov cr4, eax\r
252\r
253 pop edx\r
254 add esp, 4\r
255 pop ecx,\r
256 add esp, 4\r
257 cmp cl, 1 ; Check mwait-monitor support\r
258 jnz HltLoop\r
259 mov ebx, edx ; Save C-State to ebx\r
260MwaitLoop:\r
261 mov eax, esp ; Set Monitor Address\r
262 xor ecx, ecx ; ecx = 0\r
263 xor edx, edx ; edx = 0\r
264 monitor\r
265 shl ebx, 4\r
266 mov eax, ebx ; Mwait Cx, Target C-State per eax[7:4]\r
267 mwait\r
268 jmp MwaitLoop\r
269HltLoop:\r
270 cli\r
271 hlt\r
272 jmp HltLoop\r
273 ret\r
274BITS 64\r
275AsmRelocateApLoopEnd:\r
276\r
d94e5f67
JF
277;-------------------------------------------------------------------------------------\r
278; AsmGetAddressMap (&AddressMap);\r
279;-------------------------------------------------------------------------------------\r
280global ASM_PFX(AsmGetAddressMap)\r
281ASM_PFX(AsmGetAddressMap):\r
282 mov rax, ASM_PFX(RendezvousFunnelProc)\r
283 mov qword [rcx], rax\r
284 mov qword [rcx + 8h], LongModeStart - RendezvousFunnelProcStart\r
285 mov qword [rcx + 10h], RendezvousFunnelProcEnd - RendezvousFunnelProcStart\r
f7f85d83
JF
286 mov rax, ASM_PFX(AsmRelocateApLoop)\r
287 mov qword [rcx + 18h], rax\r
288 mov qword [rcx + 20h], AsmRelocateApLoopEnd - AsmRelocateApLoopStart\r
d94e5f67
JF
289 ret\r
290\r
291;-------------------------------------------------------------------------------------\r
292;AsmExchangeRole procedure follows. This procedure executed by current BSP, that is\r
8396e2dd 293;about to become an AP. It switches its stack with the current AP.\r
d94e5f67
JF
294;AsmExchangeRole (IN CPU_EXCHANGE_INFO *MyInfo, IN CPU_EXCHANGE_INFO *OthersInfo);\r
295;-------------------------------------------------------------------------------------\r
296global ASM_PFX(AsmExchangeRole)\r
297ASM_PFX(AsmExchangeRole):\r
298 ; DO NOT call other functions in this function, since 2 CPU may use 1 stack\r
299 ; at the same time. If 1 CPU try to call a function, stack will be corrupted.\r
300\r
301 push rax\r
302 push rbx\r
303 push rcx\r
304 push rdx\r
305 push rsi\r
306 push rdi\r
307 push rbp\r
308 push r8\r
309 push r9\r
310 push r10\r
311 push r11\r
312 push r12\r
313 push r13\r
314 push r14\r
315 push r15\r
316\r
317 mov rax, cr0\r
318 push rax\r
319\r
320 mov rax, cr4\r
321 push rax\r
322\r
323 ; rsi contains MyInfo pointer\r
324 mov rsi, rcx\r
325\r
326 ; rdi contains OthersInfo pointer\r
327 mov rdi, rdx\r
328\r
329 ;Store EFLAGS, GDTR and IDTR regiter to stack\r
330 pushfq\r
331 sgdt [rsi + 16]\r
332 sidt [rsi + 26]\r
333\r
334 ; Store the its StackPointer\r
335 mov [rsi + 8], rsp\r
336\r
337 ; update its switch state to STORED\r
338 mov byte [rsi], CPU_SWITCH_STATE_STORED\r
339\r
340WaitForOtherStored:\r
341 ; wait until the other CPU finish storing its state\r
342 cmp byte [rdi], CPU_SWITCH_STATE_STORED\r
343 jz OtherStored\r
344 pause\r
345 jmp WaitForOtherStored\r
346\r
347OtherStored:\r
348 ; Since another CPU already stored its state, load them\r
349 ; load GDTR value\r
350 lgdt [rdi + 16]\r
351\r
352 ; load IDTR value\r
353 lidt [rdi + 26]\r
354\r
355 ; load its future StackPointer\r
356 mov rsp, [rdi + 8]\r
357\r
358 ; update the other CPU's switch state to LOADED\r
359 mov byte [rdi], CPU_SWITCH_STATE_LOADED\r
360\r
361WaitForOtherLoaded:\r
362 ; wait until the other CPU finish loading new state,\r
363 ; otherwise the data in stack may corrupt\r
364 cmp byte [rsi], CPU_SWITCH_STATE_LOADED\r
365 jz OtherLoaded\r
366 pause\r
367 jmp WaitForOtherLoaded\r
368\r
369OtherLoaded:\r
370 ; since the other CPU already get the data it want, leave this procedure\r
371 popfq\r
372\r
373 pop rax\r
374 mov cr4, rax\r
375\r
376 pop rax\r
377 mov cr0, rax\r
378\r
379 pop r15\r
380 pop r14\r
381 pop r13\r
382 pop r12\r
383 pop r11\r
384 pop r10\r
385 pop r9\r
386 pop r8\r
387 pop rbp\r
388 pop rdi\r
389 pop rsi\r
390 pop rdx\r
391 pop rcx\r
392 pop rbx\r
393 pop rax\r
394\r
395 ret\r