]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/DebugSupportDxe/Ipf/AsmFuncs.s
OptionRomPkg: Removing ipf which is no longer supported from edk2.
[mirror_edk2.git] / MdeModulePkg / Universal / DebugSupportDxe / Ipf / AsmFuncs.s
CommitLineData
7b414b4e 1/// @file\r
2/// Low level IPF routines used by the debug support driver\r
3///\r
e5eed7d3
HT
4/// Copyright (c) 2006 - 2008, Intel Corporation. All rights reserved.<BR>\r
5/// This program and the accompanying materials\r
7b414b4e 6/// are licensed and made available under the terms and conditions of the BSD License\r
7/// which accompanies this distribution. The full text of the license may be found at\r
8/// http://opensource.org/licenses/bsd-license.php\r
9///\r
10/// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
11/// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
12///\r
7b414b4e 13///\r
c1f23d63 14\r
15\r
e9f9d09a 16#include "Common.i"\r
c1f23d63 17#include "Ds64Macros.i"\r
18\r
132f41f0 19ASM_GLOBAL PatchSaveBuffer\r
20ASM_GLOBAL IpfContextBuf\r
21ASM_GLOBAL CommonHandler\r
22ASM_GLOBAL ExternalInterruptCount\r
c1f23d63 23\r
24\r
25/////////////////////////////////////////////\r
26//\r
27// Name:\r
28// InstructionCacheFlush\r
29//\r
30// Description:\r
31// Flushes instruction cache for specified number of bytes\r
32//\r
132f41f0 33 ASM_GLOBAL InstructionCacheFlush\r
c1f23d63 34 .proc InstructionCacheFlush\r
35 .align 32\r
36InstructionCacheFlush::\r
37 { .mii\r
38 alloc r3=2, 0, 0, 0\r
39 cmp4.leu p0,p6=32, r33;;\r
40 (p6) mov r33=32;;\r
41 }\r
42 { .mii\r
43 nop.m 0\r
44 zxt4 r29=r33;;\r
45 dep.z r30=r29, 0, 5;;\r
46 }\r
47 { .mii\r
48 cmp4.eq p0,p7=r0, r30\r
49 shr.u r28=r29, 5;;\r
50 (p7) adds r28=1, r28;;\r
51 }\r
52 { .mii\r
53 nop.m 0\r
54 shl r27=r28, 5;;\r
55 zxt4 r26=r27;;\r
56 }\r
57 { .mfb\r
58 add r31=r26, r32\r
59 nop.f 0\r
60 nop.b 0\r
61 }\r
62LoopBack: // $L143:\r
63 { .mii\r
64 fc r32\r
65 adds r32=32, r32;;\r
66 cmp.ltu p14,p15=r32, r31\r
67 }\r
68 { .mfb\r
69 nop.m 0\r
70 nop.f 0\r
71 //(p14) br.cond.dptk.few $L143#;;\r
72 (p14) br.cond.dptk.few LoopBack;;\r
73 }\r
74 { .mmi\r
75 sync.i;;\r
76 srlz.i\r
77 nop.i 0;;\r
78 }\r
79 { .mfb\r
80 nop.m 0\r
81 nop.f 0\r
82 br.ret.sptk.few b0;;\r
83 }\r
84 .endp InstructionCacheFlush\r
85\r
86\r
87/////////////////////////////////////////////\r
88//\r
89// Name:\r
90// ChainHandler\r
91//\r
92// Description:\r
93// Chains an interrupt handler\r
94//\r
95// The purpose of this function is to enable chaining of the external interrupt.\r
96// Since there's no clean SAL abstraction for doing this, we must do it\r
97// surreptitiously.\r
98//\r
99// The reserved IVT entry at offset 0x3400 is coopted for use by this handler.\r
100// According to Itanium architecture, it is reserved. Strictly speaking, this is\r
101// not safe, as we're cheating and violating the Itanium architecture. However,\r
102// as long as we're the only ones cheating, we should be OK. Without hooks in\r
103// the SAL to enable IVT management, there aren't many good options.\r
104//\r
105// The strategy is to replace the first bundle of the external interrupt handler\r
106// with our own that will branch into a piece of code we've supplied and located\r
107// in the reserved IVT entry. Only the first bundle of the external interrupt\r
108// IVT entry is modified.\r
109//\r
110// The original bundle is moved and relocated to space\r
111// allocated within the reserved IVT entry. The next bundle following is\r
112// is generated to go a hard coded branch back to the second bundle of the\r
113// external interrupt IVT entry just in case the first bundle had no branch.\r
114//\r
115// Our new code will execute our handler, and then fall through to the\r
116// original bundle after restoring all context appropriately.\r
117//\r
118// The following is a representation of what the IVT memory map looks like with\r
119// our chained handler installed:\r
120//\r
121//\r
7b414b4e 122//\r
123//\r
124//\r
125// This IVT entry is Failsafe bundle\r
126// reserved by the\r
127// Itanium architecture Original bundle 0\r
128// and is used for\r
129// for locating our\r
130// handler and the\r
131// original bundle Patch code...\r
132// zero of the ext\r
133// interrupt handler\r
134//\r
135// RSVD (3400) Unused\r
136//\r
137//\r
138//\r
139//\r
140//\r
141//\r
142//\r
143//\r
144//\r
145//\r
146//\r
147//\r
c1f23d63 148// EXT_INT (3000) Bundle 0 Bundle zero - This one is\r
149// modified, all other bundles\r
150// in the EXT_INT entry are\r
151// untouched.\r
152//\r
153//\r
154// Arguments:\r
155//\r
156// Returns:\r
157//\r
158// Notes:\r
159//\r
160//\r
132f41f0 161 ASM_GLOBAL ChainHandler\r
c1f23d63 162 .proc ChainHandler\r
163ChainHandler:\r
164\r
165 NESTED_SETUP( 0,2+3,3,0 )\r
166\r
167 mov r8=1 // r8 = success\r
168 mov r2=cr.iva;;\r
169//\r
170// NOTE: There's a potential hazard here in that we're simply stealing a bunch of\r
171// bundles (memory) from the IVT and assuming there's no catastrophic side effect.\r
172//\r
173// First, save IVT area we're taking over with the patch so we can restore it later\r
174//\r
175 addl out0=PATCH_ENTRY_OFFSET, r2 // out0 = source buffer\r
176 movl out1=PatchSaveBuffer // out1 = destination buffer\r
177 mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r
178 br.call.sptk.few b0 = CopyBundles\r
179\r
180// Next, copy the patch code into the IVT\r
181 movl out0=PatchCode // out0 = source buffer of patch code\r
182 addl out1=PATCH_OFFSET, r2 // out1 = destination buffer - in IVT\r
f8c9de87 183 mov out2=PATCH_CODE_SIZE;; \r
184 shr out2=out2, 4;; // out2 = number of bundles to copy\r
c1f23d63 185 br.call.sptk.few b0 = CopyBundles\r
186\r
187\r
188// copy original bundle 0 from the external interrupt handler to the\r
189// appropriate place in the reserved IVT interrupt slot\r
190 addl out0=EXT_INT_ENTRY_OFFSET, r2 // out0 = source buffer\r
191 addl out1=RELOCATED_EXT_INT, r2 // out1 = destination buffer - in reserved IVT\r
192 mov out2=1;; // out2 = copy 1 bundle\r
193 br.call.sptk.few b0 = CopyBundles\r
194\r
195// Now relocate it there because it very likely had a branch instruction that\r
196// that must now be fixed up.\r
197 addl out0=RELOCATED_EXT_INT, r2 // out0 = new runtime address of bundle - in reserved IVT\r
198 addl out1=EXT_INT_ENTRY_OFFSET, r2;;// out1 = IP address of previous location\r
199 mov out2=out0;; // out2 = IP address of new location\r
200 br.call.sptk.few b0 = RelocateBundle\r
201\r
202// Now copy into the failsafe branch into the next bundle just in case\r
203// the original ext int bundle 0 bundle did not contain a branch instruction\r
204 movl out0=FailsafeBranch // out0 = source buffer\r
205 addl out1=FAILSAFE_BRANCH_OFFSET, r2 // out1 = destination buffer - in reserved IVT\r
206 mov out2=1;; // out2 = copy 1 bundle\r
207 br.call.sptk.few b0 = CopyBundles\r
208\r
209// Last, copy in our replacement for the external interrupt IVT entry bundle 0\r
210 movl out0=PatchCodeNewBun0 // out0 = source buffer - our replacement bundle 0\r
211 addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - bundle 0 of External interrupt entry\r
212 mov out2=1;; // out2 = copy 1 bundle\r
213 br.call.sptk.few b0 = CopyBundles\r
214\r
215ChainHandlerDone:\r
216 NESTED_RETURN\r
217\r
218 .endp ChainHandler\r
219\r
220\r
221/////////////////////////////////////////////\r
222//\r
223// Name:\r
224// UnchainHandler\r
225//\r
226// Description:\r
227// Unchains an interrupt handler\r
228//\r
229// Arguments:\r
230//\r
231// Returns:\r
232//\r
233// Notes:\r
234//\r
235//\r
132f41f0 236 ASM_GLOBAL UnchainHandler\r
c1f23d63 237 .proc UnchainHandler\r
238\r
239UnchainHandler:\r
240\r
241 NESTED_SETUP( 0,2+3,3,0 )\r
242\r
243 mov r8=1 // r8 = success\r
244 mov r2=cr.iva;; // r2 = interrupt vector address\r
245\r
246// First copy original Ext Int bundle 0 back to it's proper home...\r
247 addl out0=RELOCATED_EXT_INT, r2 // out0 = source - in reserved IVT\r
248 addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - first bundle of Ext Int entry\r
249 mov out2=1;; // out2 = copy 1 bundle\r
250 br.call.sptk.few b0 = CopyBundles\r
251\r
252// Now, relocate it again...\r
253 addl out0=EXT_INT_ENTRY_OFFSET, r2 // out1 = New runtime address\r
254 addl out1=RELOCATED_EXT_INT, r2;; // out0 = IP address of previous location\r
255 mov out2=out0;; // out2 = IP address of new location\r
256 br.call.sptk.few b0 = RelocateBundle\r
257\r
258// Last, restore the patch area\r
259 movl out0=PatchSaveBuffer // out0 = source buffer\r
260 addl out1=PATCH_ENTRY_OFFSET, r2 // out1 = destination buffer\r
261 mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r
262 br.call.sptk.few b0 = CopyBundles\r
263\r
264UnchainHandlerDone:\r
265 NESTED_RETURN\r
266\r
267 .endp UnchainHandler\r
268\r
269\r
270/////////////////////////////////////////////\r
271//\r
272// Name:\r
273// CopyBundles\r
274//\r
275// Description:\r
276// Copies instruction bundles - flushes icache as necessary\r
277//\r
278// Arguments:\r
279// in0 - Bundle source\r
280// in1 - Bundle destination\r
281// in2 - Bundle count\r
282//\r
283// Returns:\r
284//\r
285// Notes:\r
286// This procedure is a leaf routine\r
287//\r
288 .proc CopyBundles\r
289\r
290CopyBundles:\r
291\r
292 NESTED_SETUP(3,2+1,0,0)\r
293\r
294 shl in2=in2, 1;; // in2 = count of 8 byte blocks to copy\r
295\r
296CopyBundlesLoop:\r
297\r
298 cmp.eq p14, p15 = 0, in2;; // Check if done\r
299(p14) br.sptk.few CopyBundlesDone;;\r
300\r
301 ld8 loc2=[in0], 0x8;; // loc2 = source bytes\r
302 st8 [in1]=loc2;; // [in1] = destination bytes\r
303 fc in1;; // Flush instruction cache\r
304 sync.i;; // Ensure local and remote data/inst caches in sync\r
305 srlz.i;; // Ensure sync has been observed\r
306 add in1=0x8, in1;; // in1 = next destination\r
307 add in2=-1, in2;; // in2 = decrement 8 bytes blocks to copy\r
308 br.sptk.few CopyBundlesLoop;;\r
309\r
310CopyBundlesDone:\r
311 NESTED_RETURN\r
312\r
313 .endp CopyBundles\r
314\r
315\r
316/////////////////////////////////////////////\r
317//\r
318// Name:\r
319// RelocateBundle\r
320//\r
321// Description:\r
322// Relocates an instruction bundle by updating any ip-relative branch instructions.\r
323//\r
324// Arguments:\r
325// in0 - Runtime address of bundle\r
326// in1 - IP address of previous location of bundle\r
327// in2 - IP address of new location of bundle\r
328//\r
329// Returns:\r
330// in0 - 1 if successful or 0 if unsuccessful\r
331//\r
332// Notes:\r
333// This routine examines all slots in the given bundle that are destined for the\r
334// branch execution unit. If any of these slots contain an IP-relative branch\r
335// namely instructions B1, B2, B3, or B6, the slot is fixed-up with a new relative\r
336// address. Errors can occur if a branch cannot be reached.\r
337//\r
338 .proc RelocateBundle\r
339\r
340RelocateBundle:\r
341\r
342 NESTED_SETUP(3,2+4,3,0)\r
343\r
344 mov loc2=SLOT0 // loc2 = slot index\r
345 mov loc5=in0;; // loc5 = runtime address of bundle\r
346 mov in0=1;; // in0 = success\r
347\r
348RelocateBundleNextSlot:\r
349\r
350 cmp.ge p14, p15 = SLOT2, loc2;; // Check if maximum slot\r
351(p15) br.sptk.few RelocateBundleDone\r
352\r
353 mov out0=loc5;; // out0 = runtime address of bundle\r
354 br.call.sptk.few b0 = GetTemplate\r
355 mov loc3=out0;; // loc3 = instruction template\r
356 mov out0=loc5 // out0 = runtime address of bundle\r
357 mov out1=loc2;; // out1 = instruction slot number\r
358 br.call.sptk.few b0 = GetSlot\r
359 mov loc4=out0;; // loc4 = instruction encoding\r
360 mov out0=loc4 // out0 = instuction encoding\r
361 mov out1=loc2 // out1 = instruction slot number\r
362 mov out2=loc3;; // out2 = instruction template\r
363 br.call.sptk.few b0 = IsSlotBranch\r
364 cmp.eq p14, p15 = 1, out0;; // Check if branch slot\r
365(p15) add loc2=1,loc2 // Increment slot\r
366(p15) br.sptk.few RelocateBundleNextSlot\r
367 mov out0=loc4 // out0 = instuction encoding\r
368 mov out1=in1 // out1 = IP address of previous location\r
369 mov out2=in2;; // out2 = IP address of new location\r
370 br.call.sptk.few b0 = RelocateSlot\r
371 cmp.eq p14, p15 = 1, out1;; // Check if relocated slot\r
372(p15) mov in0=0 // in0 = failure\r
373(p15) br.sptk.few RelocateBundleDone\r
374 mov out2=out0;; // out2 = instruction encoding\r
375 mov out0=loc5 // out0 = runtime address of bundle\r
376 mov out1=loc2;; // out1 = instruction slot number\r
377 br.call.sptk.few b0 = SetSlot\r
378 add loc2=1,loc2;; // Increment slot\r
379 br.sptk.few RelocateBundleNextSlot\r
380\r
381RelocateBundleDone:\r
382 NESTED_RETURN\r
383\r
384 .endp RelocateBundle\r
385\r
386\r
387/////////////////////////////////////////////\r
388//\r
389// Name:\r
390// RelocateSlot\r
391//\r
392// Description:\r
393// Relocates an instruction bundle by updating any ip-relative branch instructions.\r
394//\r
395// Arguments:\r
396// in0 - Instruction encoding (41-bits, right justified)\r
397// in1 - IP address of previous location of bundle\r
398// in2 - IP address of new location of bundle\r
399//\r
400// Returns:\r
401// in0 - Instruction encoding (41-bits, right justified)\r
402// in1 - 1 if successful otherwise 0\r
403//\r
404// Notes:\r
405// This procedure is a leaf routine\r
406//\r
407 .proc RelocateSlot\r
408\r
409RelocateSlot:\r
410 NESTED_SETUP(3,2+5,0,0)\r
411 extr.u loc2=in0, 37, 4;; // loc2 = instruction opcode\r
412 cmp.eq p14, p15 = 4, loc2;; // IP-relative branch (B1) or\r
413 // IP-relative counted branch (B2)\r
414(p15) cmp.eq p14, p15 = 5, loc2;; // IP-relative call (B3)\r
415(p15) cmp.eq p14, p15 = 7, loc2;; // IP-relative predict (B6)\r
416(p15) mov in1=1 // Instruction did not need to be reencoded\r
417(p15) br.sptk.few RelocateSlotDone\r
418 tbit.nz p14, p15 = in0, 36;; // put relative offset sign bit in p14\r
419 extr.u loc2=in0, 13, 20;; // loc2 = relative offset in instruction\r
420(p14) movl loc3=0xfffffffffff00000;; // extend sign\r
421(p14) or loc2=loc2, loc3;;\r
422 shl loc2=loc2,4;; // convert to byte offset instead of bundle offset\r
423 add loc3=loc2, in1;; // loc3 = physical address of branch target\r
424(p14) sub loc2=r0,loc2;; // flip sign in loc2 if offset is negative\r
425 sub loc4=loc3,in2;; // loc4 = relative offset from new ip to branch target\r
7b414b4e 426 cmp.lt p15, p14 = 0, loc4;; // get new sign bit\r
c1f23d63 427(p14) sub loc5=r0,loc4 // get absolute value of offset\r
428(p15) mov loc5=loc4;;\r
429 movl loc6=0x0FFFFFF;; // maximum offset in bytes for ip-rel branch\r
430 cmp.gt p14, p15 = loc5, loc6;; // check to see we're not out of range for an ip-relative branch\r
431(p14) br.sptk.few RelocateSlotError\r
432 cmp.lt p15, p14 = 0, loc4;; // store sign in p14 again\r
f8c9de87 433(p14) dep in0=-1,in0,36,1 // store sign bit in instruction\r
c1f23d63 434(p15) dep in0=0,in0,36,1\r
435 shr loc4=loc4, 4;; // convert back to bundle offset\r
436 dep in0=loc4,in0,13,16;; // put first 16 bits of new offset into instruction\r
437 shr loc4=loc4,16;;\r
438 dep in0=loc4,in0,13+16,4 // put last 4 bits of new offset into instruction\r
439 mov in1=1;; // in1 = success\r
440 br.sptk.few RelocateSlotDone;;\r
441\r
442RelocateSlotError:\r
443 mov in1=0;; // in1 = failure\r
444\r
445RelocateSlotDone:\r
446 NESTED_RETURN\r
447\r
448 .endp RelocateSlot\r
449\r
450\r
451/////////////////////////////////////////////\r
452//\r
453// Name:\r
454// IsSlotBranch\r
455//\r
456// Description:\r
457// Determines if the given instruction is a branch instruction.\r
458//\r
459// Arguments:\r
460// in0 - Instruction encoding (41-bits, right justified)\r
461// in1 - Instruction slot number\r
462// in2 - Bundle template\r
463//\r
464// Returns:\r
465// in0 - 1 if branch or 0 if not branch\r
466//\r
467// Notes:\r
468// This procedure is a leaf routine\r
469//\r
470// IsSlotBranch recognizes all branch instructions by looking at the provided template.\r
471// The instruction encoding is only passed to this routine for future expansion.\r
472//\r
473 .proc IsSlotBranch\r
474\r
475IsSlotBranch:\r
476\r
477 NESTED_SETUP (3,2+0,0,0)\r
478\r
479 mov in0=1;; // in0 = 1 which destroys the instruction\r
480 andcm in2=in2,in0;; // in2 = even template to reduce compares\r
481 mov in0=0;; // in0 = not a branch\r
482 cmp.eq p14, p15 = 0x16, in2;; // Template 0x16 is BBB\r
483(p14) br.sptk.few IsSlotBranchTrue\r
484 cmp.eq p14, p15 = SLOT0, in1;; // Slot 0 has no other possiblities\r
485(p14) br.sptk.few IsSlotBranchDone\r
486 cmp.eq p14, p15 = 0x12, in2;; // Template 0x12 is MBB\r
487(p14) br.sptk.few IsSlotBranchTrue\r
488 cmp.eq p14, p15 = SLOT1, in1;; // Slot 1 has no other possiblities\r
489(p14) br.sptk.few IsSlotBranchDone\r
490 cmp.eq p14, p15 = 0x10, in2;; // Template 0x10 is MIB\r
491(p14) br.sptk.few IsSlotBranchTrue\r
492 cmp.eq p14, p15 = 0x18, in2;; // Template 0x18 is MMB\r
493(p14) br.sptk.few IsSlotBranchTrue\r
494 cmp.eq p14, p15 = 0x1C, in2;; // Template 0x1C is MFB\r
495(p14) br.sptk.few IsSlotBranchTrue\r
496 br.sptk.few IsSlotBranchDone\r
497\r
498IsSlotBranchTrue:\r
499 mov in0=1;; // in0 = branch\r
500\r
501IsSlotBranchDone:\r
502 NESTED_RETURN\r
503\r
504 .endp IsSlotBranch\r
505\r
506\r
507/////////////////////////////////////////////\r
508//\r
509// Name:\r
510// GetTemplate\r
511//\r
512// Description:\r
513// Retrieves the instruction template for an instruction bundle\r
514//\r
515// Arguments:\r
516// in0 - Runtime address of bundle\r
517//\r
518// Returns:\r
519// in0 - Instruction template (5-bits, right-justified)\r
520//\r
521// Notes:\r
522// This procedure is a leaf routine\r
523//\r
524 .proc GetTemplate\r
525\r
526GetTemplate:\r
527\r
528 NESTED_SETUP (1,2+2,0,0)\r
529\r
530 ld8 loc2=[in0], 0x8 // loc2 = first 8 bytes of branch bundle\r
531 movl loc3=MASK_0_4;; // loc3 = template mask\r
532 and loc2=loc2,loc3;; // loc2 = template, right justified\r
533 mov in0=loc2;; // in0 = template, right justified\r
534\r
535 NESTED_RETURN\r
536\r
537 .endp GetTemplate\r
538\r
539\r
540/////////////////////////////////////////////\r
541//\r
542// Name:\r
543// GetSlot\r
544//\r
545// Description:\r
546// Gets the instruction encoding for an instruction slot and bundle\r
547//\r
548// Arguments:\r
549// in0 - Runtime address of bundle\r
550// in1 - Instruction slot (either 0, 1, or 2)\r
551//\r
552// Returns:\r
553// in0 - Instruction encoding (41-bits, right justified)\r
554//\r
555// Notes:\r
556// This procedure is a leaf routine\r
557//\r
558// Slot0 - [in0 + 0x8] Bits 45-5\r
559// Slot1 - [in0 + 0x8] Bits 63-46 and [in0] Bits 22-0\r
560// Slot2 - [in0] Bits 63-23\r
561//\r
562 .proc GetSlot\r
563\r
564GetSlot:\r
565 NESTED_SETUP (2,2+3,0,0)\r
566\r
567 ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of branch bundle\r
568 ld8 loc3=[in0];; // loc3 = second 8 bytes of branch bundle\r
569 cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r
570 (p14) br.cond.sptk.few GetSlot2;; // get slot 2\r
571 cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r
572 (p14) br.cond.sptk.few GetSlot1;; // get slot 1\r
573\r
574GetSlot0:\r
575 extr.u in0=loc2, 5, 45 // in0 = extracted slot 0\r
576 br.sptk.few GetSlotDone;;\r
577\r
578GetSlot1:\r
579 extr.u in0=loc2, 46, 18 // in0 = bits 63-46 of loc2 right-justified\r
580 extr.u loc4=loc3, 0, 23;; // loc4 = bits 22-0 of loc3 right-justified\r
581 dep in0=loc4, in0, 18, 15;;\r
582 shr.u loc4=loc4,15;;\r
583 dep in0=loc4, in0, 33, 8;; // in0 = extracted slot 1\r
584 br.sptk.few GetSlotDone;;\r
585\r
586GetSlot2:\r
587 extr.u in0=loc3, 23, 41;; // in0 = extracted slot 2\r
588\r
589GetSlotDone:\r
590 NESTED_RETURN\r
591\r
592 .endp GetSlot\r
593\r
594\r
595/////////////////////////////////////////////\r
596//\r
597// Name:\r
598// SetSlot\r
599//\r
600// Description:\r
601// Sets the instruction encoding for an instruction slot and bundle\r
602//\r
603// Arguments:\r
604// in0 - Runtime address of bundle\r
605// in1 - Instruction slot (either 0, 1, or 2)\r
606// in2 - Instruction encoding (41-bits, right justified)\r
607//\r
608// Returns:\r
609//\r
610// Notes:\r
611// This procedure is a leaf routine\r
612//\r
613 .proc SetSlot\r
614\r
615SetSlot:\r
616 NESTED_SETUP (3,2+3,0,0)\r
617\r
618 ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of bundle\r
619 ld8 loc3=[in0];; // loc3 = second 8 bytes of bundle\r
620 cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r
621 (p14) br.cond.sptk.few SetSlot2;; // set slot 2\r
622 cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r
623 (p14) br.cond.sptk.few SetSlot1;; // set slot 1\r
624\r
625SetSlot0:\r
626 dep loc2=0, loc2, 5, 41;; // remove old instruction from slot 0\r
627 shl loc4=in2, 5;; // loc4 = new instruction ready to be inserted\r
628 or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r
629 add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r
630 st8 [loc4]=loc2 // [loc4] = updated bundle\r
631 br.sptk.few SetSlotDone;;\r
632 ;;\r
633\r
634SetSlot1:\r
635 dep loc2=0, loc2, 46, 18 // remove old instruction from slot 1\r
636 dep loc3=0, loc3, 0, 23;;\r
637 shl loc4=in2, 46;; // loc4 = partial instruction ready to be inserted\r
638 or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r
639 add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r
640 st8 [loc4]=loc2;; // [loc4] = updated bundle\r
641 shr.u loc4=in2, 18;; // loc4 = partial instruction ready to be inserted\r
642 or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r
643 st8 [in0]=loc3;; // [in0] = updated bundle\r
644 br.sptk.few SetSlotDone;;\r
645\r
646SetSlot2:\r
647 dep loc3=0, loc3, 23, 41;; // remove old instruction from slot 2\r
648 shl loc4=in2, 23;; // loc4 = instruction ready to be inserted\r
649 or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r
650 st8 [in0]=loc3;; // [in0] = updated bundle\r
651\r
652SetSlotDone:\r
653\r
654 NESTED_RETURN\r
655 .endp SetSlot\r
656\r
657\r
658/////////////////////////////////////////////\r
659//\r
660// Name:\r
661// GetIva\r
662//\r
663// Description:\r
664// C callable function to obtain the current value of IVA\r
665//\r
666// Returns:\r
667// Current value if IVA\r
668\r
132f41f0 669 ASM_GLOBAL GetIva\r
c1f23d63 670 .proc GetIva\r
671GetIva:\r
672 mov r8=cr2;;\r
673 br.ret.sptk.many b0\r
674\r
675 .endp GetIva\r
676\r
677\r
678/////////////////////////////////////////////\r
679//\r
680// Name:\r
681// ProgramInterruptFlags\r
682//\r
683// Description:\r
684// C callable function to enable/disable interrupts\r
685//\r
686// Returns:\r
687// Previous state of psr.ic\r
688//\r
132f41f0 689 ASM_GLOBAL ProgramInterruptFlags\r
c1f23d63 690 .proc ProgramInterruptFlags\r
691ProgramInterruptFlags:\r
692 alloc loc0=1,2,0,0;;\r
693 mov loc0=psr\r
694 mov loc1=0x6000;;\r
695 and r8=loc0, loc1 // obtain current psr.ic and psr.i state\r
696 and in0=in0, loc1 // insure no extra bits set in input\r
697 andcm loc0=loc0,loc1;; // clear original psr.i and psr.ic\r
698 or loc0=loc0,in0;; // OR in new psr.ic value\r
699 mov psr.l=loc0;; // write new psr\r
700 srlz.d\r
701 br.ret.sptk.many b0 // return\r
702\r
703 .endp ProgramInterruptFlags\r
704\r
705\r
706/////////////////////////////////////////////\r
707//\r
708// Name:\r
709// SpillContext\r
710//\r
711// Description:\r
712// Saves system context to context record.\r
713//\r
714// Arguments:\r
715// in0 = 512 byte aligned context record address\r
716// in1 = original B0\r
717// in2 = original ar.bsp\r
718// in3 = original ar.bspstore\r
719// in4 = original ar.rnat\r
720// in5 = original ar.pfs\r
721//\r
722// Notes:\r
723// loc0 - scratch\r
724// loc1 - scratch\r
725// loc2 - temporary application unat storage\r
726// loc3 - temporary exception handler unat storage\r
727\r
728 .proc SpillContext\r
729\r
730SpillContext:\r
731 alloc loc0=6,4,0,0;; // alloc 6 input, 4 locals, 0 outs\r
732 mov loc2=ar.unat;; // save application context unat (spilled later)\r
733 mov ar.unat=r0;; // set UNAT=0\r
734 st8.spill [in0]=r0,8;;\r
735 st8.spill [in0]=r1,8;; // save R1 - R31\r
736 st8.spill [in0]=r2,8;;\r
737 st8.spill [in0]=r3,8;;\r
738 st8.spill [in0]=r4,8;;\r
739 st8.spill [in0]=r5,8;;\r
740 st8.spill [in0]=r6,8;;\r
741 st8.spill [in0]=r7,8;;\r
742 st8.spill [in0]=r8,8;;\r
743 st8.spill [in0]=r9,8;;\r
744 st8.spill [in0]=r10,8;;\r
745 st8.spill [in0]=r11,8;;\r
746 st8.spill [in0]=r12,8;;\r
747 st8.spill [in0]=r13,8;;\r
748 st8.spill [in0]=r14,8;;\r
749 st8.spill [in0]=r15,8;;\r
750 st8.spill [in0]=r16,8;;\r
751 st8.spill [in0]=r17,8;;\r
752 st8.spill [in0]=r18,8;;\r
753 st8.spill [in0]=r19,8;;\r
754 st8.spill [in0]=r20,8;;\r
755 st8.spill [in0]=r21,8;;\r
756 st8.spill [in0]=r22,8;;\r
757 st8.spill [in0]=r23,8;;\r
758 st8.spill [in0]=r24,8;;\r
759 st8.spill [in0]=r25,8;;\r
760 st8.spill [in0]=r26,8;;\r
761 st8.spill [in0]=r27,8;;\r
762 st8.spill [in0]=r28,8;;\r
763 st8.spill [in0]=r29,8;;\r
764 st8.spill [in0]=r30,8;;\r
765 st8.spill [in0]=r31,8;;\r
766 mov loc3=ar.unat;; // save debugger context unat (spilled later)\r
767 stf.spill [in0]=f2,16;; // save f2 - f31\r
768 stf.spill [in0]=f3,16;;\r
769 stf.spill [in0]=f4,16;;\r
770 stf.spill [in0]=f5,16;;\r
771 stf.spill [in0]=f6,16;;\r
772 stf.spill [in0]=f7,16;;\r
773 stf.spill [in0]=f8,16;;\r
774 stf.spill [in0]=f9,16;;\r
775 stf.spill [in0]=f10,16;;\r
776 stf.spill [in0]=f11,16;;\r
777 stf.spill [in0]=f12,16;;\r
778 stf.spill [in0]=f13,16;;\r
779 stf.spill [in0]=f14,16;;\r
780 stf.spill [in0]=f15,16;;\r
781 stf.spill [in0]=f16,16;;\r
782 stf.spill [in0]=f17,16;;\r
783 stf.spill [in0]=f18,16;;\r
784 stf.spill [in0]=f19,16;;\r
785 stf.spill [in0]=f20,16;;\r
786 stf.spill [in0]=f21,16;;\r
787 stf.spill [in0]=f22,16;;\r
788 stf.spill [in0]=f23,16;;\r
789 stf.spill [in0]=f24,16;;\r
790 stf.spill [in0]=f25,16;;\r
791 stf.spill [in0]=f26,16;;\r
792 stf.spill [in0]=f27,16;;\r
793 stf.spill [in0]=f28,16;;\r
794 stf.spill [in0]=f29,16;;\r
795 stf.spill [in0]=f30,16;;\r
796 stf.spill [in0]=f31,16;;\r
797 mov loc0=pr;; // save predicates\r
798 st8.spill [in0]=loc0,8;;\r
799 st8.spill [in0]=in1,8;; // save b0 - b7... in1 already equals saved b0\r
800 mov loc0=b1;;\r
801 st8.spill [in0]=loc0,8;;\r
802 mov loc0=b2;;\r
803 st8.spill [in0]=loc0,8;;\r
804 mov loc0=b3;;\r
805 st8.spill [in0]=loc0,8;;\r
806 mov loc0=b4;;\r
807 st8.spill [in0]=loc0,8;;\r
808 mov loc0=b5;;\r
809 st8.spill [in0]=loc0,8;;\r
810 mov loc0=b6;;\r
811 st8.spill [in0]=loc0,8;;\r
812 mov loc0=b7;;\r
813 st8.spill [in0]=loc0,8;;\r
814 mov loc0=ar.rsc;; // save ar.rsc\r
815 st8.spill [in0]=loc0,8;;\r
816 st8.spill [in0]=in2,8;; // save ar.bsp (in2)\r
817 st8.spill [in0]=in3,8;; // save ar.bspstore (in3)\r
818 st8.spill [in0]=in4,8;; // save ar.rnat (in4)\r
819 mov loc0=ar.fcr;; // save ar.fcr (ar21 - IA32 floating-point control register)\r
820 st8.spill [in0]=loc0,8;;\r
821 mov loc0=ar.eflag;; // save ar.eflag (ar24)\r
822 st8.spill [in0]=loc0,8;;\r
823 mov loc0=ar.csd;; // save ar.csd (ar25 - ia32 CS descriptor)\r
824 st8.spill [in0]=loc0,8;;\r
825 mov loc0=ar.ssd;; // save ar.ssd (ar26 - ia32 ss descriptor)\r
826 st8.spill [in0]=loc0,8;;\r
827 mov loc0=ar.cflg;; // save ar.cflg (ar27 - ia32 cr0 and cr4)\r
828 st8.spill [in0]=loc0,8;;\r
829 mov loc0=ar.fsr;; // save ar.fsr (ar28 - ia32 floating-point status register)\r
830 st8.spill [in0]=loc0,8;;\r
831 mov loc0=ar.fir;; // save ar.fir (ar29 - ia32 floating-point instruction register)\r
832 st8.spill [in0]=loc0,8;;\r
833 mov loc0=ar.fdr;; // save ar.fdr (ar30 - ia32 floating-point data register)\r
834 st8.spill [in0]=loc0,8;;\r
835 mov loc0=ar.ccv;; // save ar.ccv\r
836 st8.spill [in0]=loc0,8;;\r
837 st8.spill [in0]=loc2,8;; // save ar.unat (saved to loc2 earlier)\r
838 mov loc0=ar.fpsr;; // save floating point status register\r
839 st8.spill [in0]=loc0,8;;\r
840 st8.spill [in0]=in5,8;; // save ar.pfs\r
841 mov loc0=ar.lc;; // save ar.lc\r
842 st8.spill [in0]=loc0,8;;\r
843 mov loc0=ar.ec;; // save ar.ec\r
844 st8.spill [in0]=loc0,8;;\r
845\r
846 // save control registers\r
847 mov loc0=cr.dcr;; // save dcr\r
848 st8.spill [in0]=loc0,8;;\r
849 mov loc0=cr.itm;; // save itm\r
850 st8.spill [in0]=loc0,8;;\r
851 mov loc0=cr.iva;; // save iva\r
852 st8.spill [in0]=loc0,8;;\r
853 mov loc0=cr.pta;; // save pta\r
854 st8.spill [in0]=loc0,8;;\r
855 mov loc0=cr.ipsr;; // save ipsr\r
856 st8.spill [in0]=loc0,8;;\r
857 mov loc0=cr.isr;; // save isr\r
858 st8.spill [in0]=loc0,8;;\r
859 mov loc0=cr.iip;; // save iip\r
860 st8.spill [in0]=loc0,8;;\r
861 mov loc0=cr.ifa;; // save ifa\r
862 st8.spill [in0]=loc0,8;;\r
863 mov loc0=cr.itir;; // save itir\r
864 st8.spill [in0]=loc0,8;;\r
865 mov loc0=cr.iipa;; // save iipa\r
866 st8.spill [in0]=loc0,8;;\r
867 mov loc0=cr.ifs;; // save ifs\r
868 st8.spill [in0]=loc0,8;;\r
869 mov loc0=cr.iim;; // save iim\r
870 st8.spill [in0]=loc0,8;;\r
871 mov loc0=cr.iha;; // save iha\r
872 st8.spill [in0]=loc0,8;;\r
873\r
874 // save debug registers\r
875 mov loc0=dbr[r0];; // save dbr0 - dbr7\r
876 st8.spill [in0]=loc0,8;;\r
877 movl loc1=1;;\r
878 mov loc0=dbr[loc1];;\r
879 st8.spill [in0]=loc0,8;;\r
880 movl loc1=2;;\r
881 mov loc0=dbr[loc1];;\r
882 st8.spill [in0]=loc0,8;;\r
883 movl loc1=3;;\r
884 mov loc0=dbr[loc1];;\r
885 st8.spill [in0]=loc0,8;;\r
886 movl loc1=4;;\r
887 mov loc0=dbr[loc1];;\r
888 st8.spill [in0]=loc0,8;;\r
889 movl loc1=5;;\r
890 mov loc0=dbr[loc1];;\r
891 st8.spill [in0]=loc0,8;;\r
892 movl loc1=6;;\r
893 mov loc0=dbr[loc1];;\r
894 st8.spill [in0]=loc0,8;;\r
895 movl loc1=7;;\r
896 mov loc0=dbr[loc1];;\r
897 st8.spill [in0]=loc0,8;;\r
898 mov loc0=ibr[r0];; // save ibr0 - ibr7\r
899 st8.spill [in0]=loc0,8;;\r
900 movl loc1=1;;\r
901 mov loc0=ibr[loc1];;\r
902 st8.spill [in0]=loc0,8;;\r
903 movl loc1=2;;\r
904 mov loc0=ibr[loc1];;\r
905 st8.spill [in0]=loc0,8;;\r
906 movl loc1=3;;\r
907 mov loc0=ibr[loc1];;\r
908 st8.spill [in0]=loc0,8;;\r
909 movl loc1=4;;\r
910 mov loc0=ibr[loc1];;\r
911 st8.spill [in0]=loc0,8;;\r
912 movl loc1=5;;\r
913 mov loc0=ibr[loc1];;\r
914 st8.spill [in0]=loc0,8;;\r
915 movl loc1=6;;\r
916 mov loc0=ibr[loc1];;\r
917 st8.spill [in0]=loc0,8;;\r
918 movl loc1=7;;\r
919 mov loc0=ibr[loc1];;\r
920 st8.spill [in0]=loc0,8;;\r
921 st8.spill [in0]=loc3;;\r
922\r
923 br.ret.sptk.few b0\r
924\r
925 .endp SpillContext\r
926\r
927\r
928/////////////////////////////////////////////\r
929//\r
930// Name:\r
931// FillContext\r
932//\r
933// Description:\r
934// Restores register context from context record.\r
935//\r
936// Arguments:\r
937// in0 = address of last element 512 byte aligned context record address\r
938// in1 = modified B0\r
939// in2 = modified ar.bsp\r
940// in3 = modified ar.bspstore\r
941// in4 = modified ar.rnat\r
942// in5 = modified ar.pfs\r
943//\r
944// Notes:\r
945// loc0 - scratch\r
946// loc1 - scratch\r
947// loc2 - temporary application unat storage\r
948// loc3 - temporary exception handler unat storage\r
949\r
950 .proc FillContext\r
951FillContext:\r
952 alloc loc0=6,4,0,0;; // alloc 6 inputs, 4 locals, 0 outs\r
953 ld8.fill loc3=[in0],-8;; // int_nat (nat bits for R1-31)\r
954 movl loc1=7;; // ibr7\r
955 ld8.fill loc0=[in0],-8;;\r
956 mov ibr[loc1]=loc0;;\r
957 movl loc1=6;; // ibr6\r
958 ld8.fill loc0=[in0],-8;;\r
959 mov ibr[loc1]=loc0;;\r
960 movl loc1=5;; // ibr5\r
961 ld8.fill loc0=[in0],-8;;\r
962 mov ibr[loc1]=loc0;;\r
963 movl loc1=4;; // ibr4\r
964 ld8.fill loc0=[in0],-8;;\r
965 mov ibr[loc1]=loc0;;\r
966 movl loc1=3;; // ibr3\r
967 ld8.fill loc0=[in0],-8;;\r
968 mov ibr[loc1]=loc0;;\r
969 movl loc1=2;; // ibr2\r
970 ld8.fill loc0=[in0],-8;;\r
971 mov ibr[loc1]=loc0;;\r
972 movl loc1=1;; // ibr1\r
973 ld8.fill loc0=[in0],-8;;\r
974 mov ibr[loc1]=loc0;;\r
975 ld8.fill loc0=[in0],-8;; // ibr0\r
976 mov ibr[r0]=loc0;;\r
977 movl loc1=7;; // dbr7\r
978 ld8.fill loc0=[in0],-8;;\r
979 mov dbr[loc1]=loc0;;\r
980 movl loc1=6;; // dbr6\r
981 ld8.fill loc0=[in0],-8;;\r
982 mov dbr[loc1]=loc0;;\r
983 movl loc1=5;; // dbr5\r
984 ld8.fill loc0=[in0],-8;;\r
985 mov dbr[loc1]=loc0;;\r
986 movl loc1=4;; // dbr4\r
987 ld8.fill loc0=[in0],-8;;\r
988 mov dbr[loc1]=loc0;;\r
989 movl loc1=3;; // dbr3\r
990 ld8.fill loc0=[in0],-8;;\r
991 mov dbr[loc1]=loc0;;\r
992 movl loc1=2;; // dbr2\r
993 ld8.fill loc0=[in0],-8;;\r
994 mov dbr[loc1]=loc0;;\r
995 movl loc1=1;; // dbr1\r
996 ld8.fill loc0=[in0],-8;;\r
997 mov dbr[loc1]=loc0;;\r
998 ld8.fill loc0=[in0],-8;; // dbr0\r
999 mov dbr[r0]=loc0;;\r
1000 ld8.fill loc0=[in0],-8;; // iha\r
1001 mov cr.iha=loc0;;\r
1002 ld8.fill loc0=[in0],-8;; // iim\r
1003 mov cr.iim=loc0;;\r
1004 ld8.fill loc0=[in0],-8;; // ifs\r
1005 mov cr.ifs=loc0;;\r
1006 ld8.fill loc0=[in0],-8;; // iipa\r
1007 mov cr.iipa=loc0;;\r
1008 ld8.fill loc0=[in0],-8;; // itir\r
1009 mov cr.itir=loc0;;\r
1010 ld8.fill loc0=[in0],-8;; // ifa\r
1011 mov cr.ifa=loc0;;\r
1012 ld8.fill loc0=[in0],-8;; // iip\r
1013 mov cr.iip=loc0;;\r
1014 ld8.fill loc0=[in0],-8;; // isr\r
1015 mov cr.isr=loc0;;\r
1016 ld8.fill loc0=[in0],-8;; // ipsr\r
1017 mov cr.ipsr=loc0;;\r
1018 ld8.fill loc0=[in0],-8;; // pta\r
1019 mov cr.pta=loc0;;\r
1020 ld8.fill loc0=[in0],-8;; // iva\r
1021 mov cr.iva=loc0;;\r
1022 ld8.fill loc0=[in0],-8;; // itm\r
1023 mov cr.itm=loc0;;\r
1024 ld8.fill loc0=[in0],-8;; // dcr\r
1025 mov cr.dcr=loc0;;\r
1026 ld8.fill loc0=[in0],-8;; // ec\r
1027 mov ar.ec=loc0;;\r
1028 ld8.fill loc0=[in0],-8;; // lc\r
1029 mov ar.lc=loc0;;\r
1030 ld8.fill in5=[in0],-8;; // ar.pfs\r
1031 ld8.fill loc0=[in0],-8;; // ar.fpsr\r
1032 mov ar.fpsr=loc0;;\r
1033 ld8.fill loc2=[in0],-8;; // ar.unat - restored later...\r
1034 ld8.fill loc0=[in0],-8;; // ar.ccv\r
1035 mov ar.ccv=loc0;;\r
1036 ld8.fill loc0=[in0],-8;; // ar.fdr\r
1037 mov ar.fdr=loc0;;\r
1038 ld8.fill loc0=[in0],-8;; // ar.fir\r
1039 mov ar.fir=loc0;;\r
1040 ld8.fill loc0=[in0],-8;; // ar.fsr\r
1041 mov ar.fsr=loc0;;\r
1042 ld8.fill loc0=[in0],-8;; // ar.cflg\r
1043 mov ar.cflg=loc0;;\r
1044 ld8.fill loc0=[in0],-8;; // ar.ssd\r
1045 mov ar.ssd=loc0;;\r
1046 ld8.fill loc0=[in0],-8;; // ar.csd\r
1047 mov ar.csd=loc0;;\r
1048 ld8.fill loc0=[in0],-8;; // ar.eflag\r
1049 mov ar.eflag=loc0;;\r
1050 ld8.fill loc0=[in0],-8;; // ar.fcr\r
1051 mov ar.fcr=loc0;;\r
1052 ld8.fill in4=[in0],-8;; // ar.rnat\r
1053 ld8.fill in3=[in0],-8;; // bspstore\r
1054 ld8.fill in2=[in0],-8;; // bsp\r
1055 ld8.fill loc0=[in0],-8;; // ar.rsc\r
1056 mov ar.rsc=loc0;;\r
1057 ld8.fill loc0=[in0],-8;; // B7 - B0\r
1058 mov b7=loc0;;\r
1059 ld8.fill loc0=[in0],-8;;\r
1060 mov b6=loc0;;\r
1061 ld8.fill loc0=[in0],-8;;\r
1062 mov b5=loc0;;\r
1063 ld8.fill loc0=[in0],-8;;\r
1064 mov b4=loc0;;\r
1065 ld8.fill loc0=[in0],-8;;\r
1066 mov b3=loc0;;\r
1067 ld8.fill loc0=[in0],-8;;\r
1068 mov b2=loc0;;\r
1069 ld8.fill loc0=[in0],-8;;\r
1070 mov b1=loc0;;\r
1071 ld8.fill in1=[in0],-8;; // b0 is temporarily stored in in1\r
1072 ld8.fill loc0=[in0],-16;; // predicates\r
1073 mov pr=loc0;;\r
1074 ldf.fill f31=[in0],-16;;\r
1075 ldf.fill f30=[in0],-16;;\r
1076 ldf.fill f29=[in0],-16;;\r
1077 ldf.fill f28=[in0],-16;;\r
1078 ldf.fill f27=[in0],-16;;\r
1079 ldf.fill f26=[in0],-16;;\r
1080 ldf.fill f25=[in0],-16;;\r
1081 ldf.fill f24=[in0],-16;;\r
1082 ldf.fill f23=[in0],-16;;\r
1083 ldf.fill f22=[in0],-16;;\r
1084 ldf.fill f21=[in0],-16;;\r
1085 ldf.fill f20=[in0],-16;;\r
1086 ldf.fill f19=[in0],-16;;\r
1087 ldf.fill f18=[in0],-16;;\r
1088 ldf.fill f17=[in0],-16;;\r
1089 ldf.fill f16=[in0],-16;;\r
1090 ldf.fill f15=[in0],-16;;\r
1091 ldf.fill f14=[in0],-16;;\r
1092 ldf.fill f13=[in0],-16;;\r
1093 ldf.fill f12=[in0],-16;;\r
1094 ldf.fill f11=[in0],-16;;\r
1095 ldf.fill f10=[in0],-16;;\r
1096 ldf.fill f9=[in0],-16;;\r
1097 ldf.fill f8=[in0],-16;;\r
1098 ldf.fill f7=[in0],-16;;\r
1099 ldf.fill f6=[in0],-16;;\r
1100 ldf.fill f5=[in0],-16;;\r
1101 ldf.fill f4=[in0],-16;;\r
1102 ldf.fill f3=[in0],-16;;\r
1103 ldf.fill f2=[in0],-8;;\r
1104 mov ar.unat=loc3;; // restore unat (int_nat) before fill of general registers\r
1105 ld8.fill r31=[in0],-8;;\r
1106 ld8.fill r30=[in0],-8;;\r
1107 ld8.fill r29=[in0],-8;;\r
1108 ld8.fill r28=[in0],-8;;\r
1109 ld8.fill r27=[in0],-8;;\r
1110 ld8.fill r26=[in0],-8;;\r
1111 ld8.fill r25=[in0],-8;;\r
1112 ld8.fill r24=[in0],-8;;\r
1113 ld8.fill r23=[in0],-8;;\r
1114 ld8.fill r22=[in0],-8;;\r
1115 ld8.fill r21=[in0],-8;;\r
1116 ld8.fill r20=[in0],-8;;\r
1117 ld8.fill r19=[in0],-8;;\r
1118 ld8.fill r18=[in0],-8;;\r
1119 ld8.fill r17=[in0],-8;;\r
1120 ld8.fill r16=[in0],-8;;\r
1121 ld8.fill r15=[in0],-8;;\r
1122 ld8.fill r14=[in0],-8;;\r
1123 ld8.fill r13=[in0],-8;;\r
1124 ld8.fill r12=[in0],-8;;\r
1125 ld8.fill r11=[in0],-8;;\r
1126 ld8.fill r10=[in0],-8;;\r
1127 ld8.fill r9=[in0],-8;;\r
1128 ld8.fill r8=[in0],-8;;\r
1129 ld8.fill r7=[in0],-8;;\r
1130 ld8.fill r6=[in0],-8;;\r
1131 ld8.fill r5=[in0],-8;;\r
1132 ld8.fill r4=[in0],-8;;\r
1133 ld8.fill r3=[in0],-8;;\r
1134 ld8.fill r2=[in0],-8;;\r
1135 ld8.fill r1=[in0],-8;;\r
1136 mov ar.unat=loc2;; // restore application context unat\r
1137\r
1138 br.ret.sptk.many b0\r
1139\r
1140 .endp FillContext\r
1141\r
1142\r
1143/////////////////////////////////////////////\r
1144//\r
1145// Name:\r
1146// HookHandler\r
1147//\r
1148// Description:\r
1149// Common branch target from hooked IVT entries. Runs in interrupt context.\r
1150// Responsible for saving and restoring context and calling common C\r
1151// handler. Banked registers running on bank 0 at entry.\r
1152//\r
1153// Arguments:\r
1154// All arguments are passed in banked registers:\r
1155// B0_REG = Original B0\r
1156// SCRATCH_REG1 = IVT entry index\r
1157//\r
1158// Returns:\r
1159// Returns via rfi\r
1160//\r
1161// Notes:\r
1162// loc0 - scratch\r
1163// loc1 - scratch\r
1164// loc2 - vector number / mask\r
1165// loc3 - 16 byte aligned context record address\r
1166// loc4 - temporary storage of last address in context record\r
1167\r
1168HookHandler:\r
1169 flushrs;; // Synch RSE with backing store\r
1170 mov SCRATCH_REG2=ar.bsp // save interrupted context bsp\r
1171 mov SCRATCH_REG3=ar.bspstore // save interrupted context bspstore\r
1172 mov SCRATCH_REG4=ar.rnat // save interrupted context rnat\r
1173 mov SCRATCH_REG6=cr.ifs;; // save IFS in case we need to chain...\r
1174 cover;; // creates new frame, moves old\r
1175 // CFM to IFS.\r
1176 alloc SCRATCH_REG5=0,5,6,0 // alloc 5 locals, 6 outs\r
1177 ;;\r
1178 // save banked registers to locals\r
1179 mov out1=B0_REG // out1 = Original B0\r
1180 mov out2=SCRATCH_REG2 // out2 = original ar.bsp\r
1181 mov out3=SCRATCH_REG3 // out3 = original ar.bspstore\r
1182 mov out4=SCRATCH_REG4 // out4 = original ar.rnat\r
1183 mov out5=SCRATCH_REG5 // out5 = original ar.pfs\r
1184 mov loc2=SCRATCH_REG1;; // loc2 = vector number + chain flag\r
1185 bsw.1;; // switch banked registers to bank 1\r
1186 srlz.d // explicit serialize required\r
1187 // now fill in context record structure\r
1188 movl loc3=IpfContextBuf // Insure context record is aligned\r
1189 add loc0=-0x200,r0;; // mask the lower 9 bits (align on 512 byte boundary)\r
1190 and loc3=loc3,loc0;;\r
1191 add loc3=0x200,loc3;; // move to next 512 byte boundary\r
1192 // loc3 now contains the 512 byte aligned context record\r
1193 // spill register context into context record\r
1194 mov out0=loc3;; // Context record base in out0\r
1195 // original B0 in out1 already\r
1196 // original ar.bsp in out2 already\r
1197 // original ar.bspstore in out3 already\r
1198 br.call.sptk.few b0=SpillContext;; // spill context\r
1199 mov loc4=out0 // save modified address\r
1200\r
1201 // At this point, the context has been saved to the context record and we're\r
1202 // ready to call the C part of the handler...\r
1203\r
1204 movl loc0=CommonHandler;; // obtain address of plabel\r
1205 ld8 loc1=[loc0];; // get entry point of CommonHandler\r
1206 mov b6=loc1;; // put it in a branch register\r
1207 adds loc1= 8, loc0;; // index to GP in plabel\r
1208 ld8 r1=[loc1];; // set up gp for C call\r
1209 mov loc1=0xfffff;; // mask off so only vector bits are present\r
1210 and out0=loc2,loc1;; // pass vector number (exception type)\r
1211 mov out1=loc3;; // pass context record address\r
1212 br.call.sptk.few b0=b6;; // call C handler\r
1213\r
1214 // We've returned from the C call, so restore the context and either rfi\r
1215 // back to interrupted thread, or chain into the SAL if this was an external interrupt\r
1216 mov out0=loc4;; // pass address of last element in context record\r
1217 br.call.sptk.few b0=FillContext;; // Fill context\r
1218 mov b0=out1 // fill in b0\r
1219 mov ar.rnat=out4\r
1220 mov ar.pfs=out5\r
1221\r
1222 // Loadrs is necessary because the debugger may have changed some values in\r
1223 // the backing store. The processor, however may not be aware that the\r
1224 // stacked registers need to be reloaded from the backing store. Therefore,\r
1225 // we explicitly cause the RSE to refresh the stacked register's contents\r
7b414b4e 1226 // from the backing store.\r
c1f23d63 1227 mov loc0=ar.rsc // get RSC value\r
1228 mov loc1=ar.rsc // save it so we can restore it\r
1229 movl loc3=0xffffffffc000ffff;; // create mask for clearing RSC.loadrs\r
1230 and loc0=loc0,loc3;; // create value for RSC with RSC.loadrs==0\r
1231 mov ar.rsc=loc0;; // modify RSC\r
1232 loadrs;; // invalidate register stack\r
1233 mov ar.rsc=loc1;; // restore original RSC\r
1234\r
1235 bsw.0;; // switch banked registers back to bank 0\r
1236 srlz.d;; // explicit serialize required\r
1237 mov PR_REG=pr // save predicates - to be restored after chaining decision\r
1238 mov B0_REG=b0 // save b0 - required by chain code\r
1239 mov loc2=EXCPT_EXTERNAL_INTERRUPT;;\r
1240 cmp.eq p7,p0=SCRATCH_REG1,loc2;; // check to see if this is the timer tick\r
1241 (p7) br.cond.dpnt.few DO_CHAIN;;\r
1242\r
1243NO_CHAIN:\r
1244 mov pr=PR_REG;;\r
1245 rfi;; // we're outa here.\r
1246\r
1247DO_CHAIN:\r
1248 mov pr=PR_REG\r
1249 mov SCRATCH_REG1=cr.iva\r
1250 mov SCRATCH_REG2=PATCH_RETURN_OFFSET;;\r
1251 add SCRATCH_REG1=SCRATCH_REG1, SCRATCH_REG2;;\r
1252 mov b0=SCRATCH_REG1;;\r
1253 br.cond.sptk.few b0;;\r
1254\r
1255EndHookHandler:\r
1256\r
1257\r
1258/////////////////////////////////////////////\r
1259//\r
1260// Name:\r
1261// HookStub\r
1262//\r
1263// Description:\r
1264// HookStub will be copied from it's loaded location into the IVT when\r
1265// an IVT entry is hooked. The IVT entry does an indirect jump via B0 to\r
1266// HookHandler, which in turn calls into the default C handler, which calls\r
1267// the user-installed C handler. The calls return and HookHandler executes\r
1268// an rfi.\r
1269//\r
1270// Notes:\r
1271// Saves B0 to B0_REG\r
1272// Saves IVT index to SCRATCH_REG1 (immediate value is fixed up when code is copied\r
1273// to the IVT entry.\r
1274\r
132f41f0 1275 ASM_GLOBAL HookStub\r
c1f23d63 1276 .proc HookStub\r
1277HookStub:\r
1278\r
1279 mov B0_REG=b0\r
1280 movl SCRATCH_REG1=HookHandler;;\r
1281 mov b0=SCRATCH_REG1;;\r
1282 mov SCRATCH_REG1=0;;// immediate value is fixed up during install of handler to be the vector number\r
1283 br.cond.sptk.few b0\r
1284\r
1285 .endp HookStub\r
1286\r
1287\r
1288/////////////////////////////////////////////\r
1289// The following code is moved into IVT entry 14 (offset 3400) which is reserved\r
1290// in the Itanium architecture. The patch code is located at the end of the\r
1291// IVT entry.\r
1292\r
1293PatchCode:\r
1294 mov SCRATCH_REG0=psr\r
1295 mov SCRATCH_REG6=cr.ipsr\r
1296 mov PR_REG=pr\r
1297 mov B0_REG=b0;;\r
1298\r
1299 // turn off any virtual translations\r
1300 movl SCRATCH_REG1 = ~( MASK(PSR_DT,1) | MASK(PSR_RT,1));;\r
1301 and SCRATCH_REG1 = SCRATCH_REG0, SCRATCH_REG1;;\r
1302 mov psr.l = SCRATCH_REG1;;\r
7b414b4e 1303 srlz.d\r
c1f23d63 1304 tbit.z p14, p15 = SCRATCH_REG6, PSR_IS;; // Check to see if we were\r
1305 // interrupted from IA32\r
1306 // context. If so, bail out\r
1307 // and chain to SAL immediately\r
1308 (p15) br.cond.sptk.few Stub_IVT_Passthru;;\r
1309 // we only want to take 1 out of 32 external interrupts to minimize the\r
1310 // impact to system performance. Check our interrupt count and bail\r
1311 // out if we're not up to 32\r
1312 movl SCRATCH_REG1=ExternalInterruptCount;;\r
1313 ld8 SCRATCH_REG2=[SCRATCH_REG1];; // ExternalInterruptCount\r
1314 tbit.z p14, p15 = SCRATCH_REG2, 5;; // bit 5 set?\r
1315 (p14) add SCRATCH_REG2=1, SCRATCH_REG2;; // No? Then increment\r
1316 // ExternalInterruptCount\r
1317 // and Chain to SAL\r
1318 // immediately\r
1319 (p14) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r
1320 (p14) br.cond.sptk.few Stub_IVT_Passthru;;\r
1321 (p15) mov SCRATCH_REG2=0;; // Yes? Then reset\r
1322 // ExternalInterruptCount\r
1323 // and branch to\r
1324 // HookHandler\r
1325 (p15) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r
1326 mov pr=PR_REG\r
1327 movl SCRATCH_REG1=HookHandler;; // SCRATCH_REG1 = entrypoint of HookHandler\r
1328 mov b0=SCRATCH_REG1;; // b0 = entrypoint of HookHandler\r
1329 mov SCRATCH_REG1=EXCPT_EXTERNAL_INTERRUPT;;\r
1330 br.sptk.few b0;; // branch to HookHandler\r
1331\r
1332PatchCodeRet:\r
1333 // fake-up an rfi to get RSE back to being coherent and insure psr has\r
1334 // original contents when interrupt occured, then exit to SAL\r
1335 // at this point:\r
1336 // cr.ifs has been modified by previous "cover"\r
1337 // SCRATCH_REG6 has original cr.ifs\r
1338\r
1339 mov SCRATCH_REG5=cr.ipsr\r
1340 mov SCRATCH_REG4=cr.iip;;\r
1341 mov cr.ipsr=SCRATCH_REG0\r
1342 mov SCRATCH_REG1=ip;;\r
1343 add SCRATCH_REG1=0x30, SCRATCH_REG1;;\r
1344 mov cr.iip=SCRATCH_REG1;;\r
1345 rfi;; // rfi to next instruction\r
1346\r
1347Stub_RfiTarget:\r
1348 mov cr.ifs=SCRATCH_REG6\r
1349 mov cr.ipsr=SCRATCH_REG5\r
1350 mov cr.iip=SCRATCH_REG4;;\r
1351\r
1352Stub_IVT_Passthru:\r
1353 mov pr=PR_REG // pr = saved predicate registers\r
1354 mov b0=B0_REG;; // b0 = saved b0\r
1355EndPatchCode:\r
1356\r
1357\r
1358/////////////////////////////////////////////\r
1359// The following bundle is moved into IVT entry 14 (offset 0x3400) which is reserved\r
1360// in the Itanium architecture. This bundle will be the last bundle and will\r
1361// be located at offset 0x37F0 in the IVT.\r
1362\r
1363FailsafeBranch:\r
1364{\r
1365 .mib\r
1366 nop.m 0\r
1367 nop.i 0\r
1368 br.sptk.few -(FAILSAFE_BRANCH_OFFSET - EXT_INT_ENTRY_OFFSET - 0x10)\r
1369}\r
1370\r
1371\r
1372/////////////////////////////////////////////\r
1373// The following bundle is moved into IVT entry 13 (offset 0x3000) which is the\r
1374// external interrupt. It branches to the patch code.\r
1375\r
1376PatchCodeNewBun0:\r
1377{\r
1378 .mib\r
1379 nop.m 0\r
1380 nop.i 0\r
1381 br.cond.sptk.few PATCH_BRANCH\r
1382}\r