]> git.proxmox.com Git - mirror_edk2.git/blame - EdkModulePkg/Universal/DebugSupport/Dxe/ipf/AsmFuncs.s
Initial import.
[mirror_edk2.git] / EdkModulePkg / Universal / DebugSupport / Dxe / ipf / AsmFuncs.s
CommitLineData
878ddf1f 1//++\r
2// Copyright (c) 2006, Intel Corporation \r
3// All rights reserved. This program and the accompanying materials \r
4// are licensed and made available under the terms and conditions of the BSD License \r
5// which accompanies this distribution. The full text of the license may be found at \r
6// http://opensource.org/licenses/bsd-license.php \r
7// \r
8// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, \r
9// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. \r
10// \r
11// Module Name:\r
12//\r
13// AsmFuncs.s\r
14//\r
15// Abstract:\r
16//\r
17// Low level IPF routines used by the debug support driver\r
18//\r
19// Revision History:\r
20//\r
21//--\r
22\r
23\r
24#include "common.i"\r
25#include "Ds64Macros.i"\r
26\r
27.global PatchSaveBuffer\r
28.global IpfContextBuf\r
29.global CommonHandler\r
30.global ExternalInterruptCount\r
31\r
32\r
33/////////////////////////////////////////////\r
34//\r
35// Name:\r
36// InstructionCacheFlush\r
37//\r
38// Description:\r
39// Flushes instruction cache for specified number of bytes\r
40//\r
41 .global InstructionCacheFlush\r
42 .proc InstructionCacheFlush\r
43 .align 32\r
44InstructionCacheFlush::\r
45 { .mii\r
46 alloc r3=2, 0, 0, 0\r
47 cmp4.leu p0,p6=32, r33;;\r
48 (p6) mov r33=32;;\r
49 }\r
50 { .mii\r
51 nop.m 0\r
52 zxt4 r29=r33;;\r
53 dep.z r30=r29, 0, 5;;\r
54 }\r
55 { .mii\r
56 cmp4.eq p0,p7=r0, r30\r
57 shr.u r28=r29, 5;;\r
58 (p7) adds r28=1, r28;;\r
59 }\r
60 { .mii\r
61 nop.m 0\r
62 shl r27=r28, 5;;\r
63 zxt4 r26=r27;;\r
64 }\r
65 { .mfb\r
66 add r31=r26, r32\r
67 nop.f 0\r
68 nop.b 0\r
69 }\r
70LoopBack: // $L143:\r
71 { .mii\r
72 fc r32\r
73 adds r32=32, r32;;\r
74 cmp.ltu p14,p15=r32, r31\r
75 }\r
76 { .mfb\r
77 nop.m 0\r
78 nop.f 0\r
79 //(p14) br.cond.dptk.few $L143#;;\r
80 (p14) br.cond.dptk.few LoopBack;;\r
81 }\r
82 { .mmi\r
83 sync.i;;\r
84 srlz.i\r
85 nop.i 0;;\r
86 }\r
87 { .mfb\r
88 nop.m 0\r
89 nop.f 0\r
90 br.ret.sptk.few b0;;\r
91 }\r
92 .endp InstructionCacheFlush\r
93\r
94\r
95/////////////////////////////////////////////\r
96//\r
97// Name:\r
98// ChainHandler\r
99//\r
100// Description:\r
101// Chains an interrupt handler\r
102//\r
103// The purpose of this function is to enable chaining of the external interrupt.\r
104// Since there's no clean SAL abstraction for doing this, we must do it\r
105// surreptitiously.\r
106//\r
107// The reserved IVT entry at offset 0x3400 is coopted for use by this handler.\r
108// According to Itanium architecture, it is reserved. Strictly speaking, this is\r
109// not safe, as we're cheating and violating the Itanium architecture. However,\r
110// as long as we're the only ones cheating, we should be OK. Without hooks in\r
111// the SAL to enable IVT management, there aren't many good options.\r
112//\r
113// The strategy is to replace the first bundle of the external interrupt handler\r
114// with our own that will branch into a piece of code we've supplied and located\r
115// in the reserved IVT entry. Only the first bundle of the external interrupt\r
116// IVT entry is modified.\r
117//\r
118// The original bundle is moved and relocated to space\r
119// allocated within the reserved IVT entry. The next bundle following is\r
120// is generated to go a hard coded branch back to the second bundle of the\r
121// external interrupt IVT entry just in case the first bundle had no branch.\r
122//\r
123// Our new code will execute our handler, and then fall through to the\r
124// original bundle after restoring all context appropriately.\r
125//\r
126// The following is a representation of what the IVT memory map looks like with\r
127// our chained handler installed:\r
128//\r
129//\r
130// \r
131// \r
132// \r
133// This IVT entry is Failsafe bundle \r
134// reserved by the \r
135// Itanium architecture Original bundle 0 \r
136// and is used for \r
137// for locating our \r
138// handler and the \r
139// original bundle Patch code... \r
140// zero of the ext \r
141// interrupt handler \r
142// \r
143// RSVD (3400) Unused \r
144// \r
145// \r
146// \r
147// \r
148// \r
149// \r
150// \r
151// \r
152// \r
153// \r
154// \r
155// \r
156// EXT_INT (3000) Bundle 0 Bundle zero - This one is\r
157// modified, all other bundles\r
158// in the EXT_INT entry are\r
159// untouched.\r
160//\r
161//\r
162// Arguments:\r
163//\r
164// Returns:\r
165//\r
166// Notes:\r
167//\r
168//\r
169 .global ChainHandler\r
170 .proc ChainHandler\r
171ChainHandler:\r
172\r
173 NESTED_SETUP( 0,2+3,3,0 )\r
174\r
175 mov r8=1 // r8 = success\r
176 mov r2=cr.iva;;\r
177//\r
178// NOTE: There's a potential hazard here in that we're simply stealing a bunch of\r
179// bundles (memory) from the IVT and assuming there's no catastrophic side effect.\r
180//\r
181// First, save IVT area we're taking over with the patch so we can restore it later\r
182//\r
183 addl out0=PATCH_ENTRY_OFFSET, r2 // out0 = source buffer\r
184 movl out1=PatchSaveBuffer // out1 = destination buffer\r
185 mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r
186 br.call.sptk.few b0 = CopyBundles\r
187\r
188// Next, copy the patch code into the IVT\r
189 movl out0=PatchCode // out0 = source buffer of patch code\r
190 addl out1=PATCH_OFFSET, r2 // out1 = destination buffer - in IVT\r
191 mov out2=NUM_PATCH_BUNDLES;; // out2 = number of bundles to copy\r
192 br.call.sptk.few b0 = CopyBundles\r
193\r
194\r
195// copy original bundle 0 from the external interrupt handler to the\r
196// appropriate place in the reserved IVT interrupt slot\r
197 addl out0=EXT_INT_ENTRY_OFFSET, r2 // out0 = source buffer\r
198 addl out1=RELOCATED_EXT_INT, r2 // out1 = destination buffer - in reserved IVT\r
199 mov out2=1;; // out2 = copy 1 bundle\r
200 br.call.sptk.few b0 = CopyBundles\r
201\r
202// Now relocate it there because it very likely had a branch instruction that\r
203// that must now be fixed up.\r
204 addl out0=RELOCATED_EXT_INT, r2 // out0 = new runtime address of bundle - in reserved IVT\r
205 addl out1=EXT_INT_ENTRY_OFFSET, r2;;// out1 = IP address of previous location\r
206 mov out2=out0;; // out2 = IP address of new location\r
207 br.call.sptk.few b0 = RelocateBundle\r
208\r
209// Now copy into the failsafe branch into the next bundle just in case\r
210// the original ext int bundle 0 bundle did not contain a branch instruction\r
211 movl out0=FailsafeBranch // out0 = source buffer\r
212 addl out1=FAILSAFE_BRANCH_OFFSET, r2 // out1 = destination buffer - in reserved IVT\r
213 mov out2=1;; // out2 = copy 1 bundle\r
214 br.call.sptk.few b0 = CopyBundles\r
215\r
216// Last, copy in our replacement for the external interrupt IVT entry bundle 0\r
217 movl out0=PatchCodeNewBun0 // out0 = source buffer - our replacement bundle 0\r
218 addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - bundle 0 of External interrupt entry\r
219 mov out2=1;; // out2 = copy 1 bundle\r
220 br.call.sptk.few b0 = CopyBundles\r
221\r
222ChainHandlerDone:\r
223 NESTED_RETURN\r
224\r
225 .endp ChainHandler\r
226\r
227\r
228/////////////////////////////////////////////\r
229//\r
230// Name:\r
231// UnchainHandler\r
232//\r
233// Description:\r
234// Unchains an interrupt handler\r
235//\r
236// Arguments:\r
237//\r
238// Returns:\r
239//\r
240// Notes:\r
241//\r
242//\r
243 .global UnchainHandler\r
244 .proc UnchainHandler\r
245\r
246UnchainHandler:\r
247\r
248 NESTED_SETUP( 0,2+3,3,0 )\r
249\r
250 mov r8=1 // r8 = success\r
251 mov r2=cr.iva;; // r2 = interrupt vector address\r
252\r
253// First copy original Ext Int bundle 0 back to it's proper home...\r
254 addl out0=RELOCATED_EXT_INT, r2 // out0 = source - in reserved IVT\r
255 addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - first bundle of Ext Int entry\r
256 mov out2=1;; // out2 = copy 1 bundle\r
257 br.call.sptk.few b0 = CopyBundles\r
258\r
259// Now, relocate it again...\r
260 addl out0=EXT_INT_ENTRY_OFFSET, r2 // out1 = New runtime address\r
261 addl out1=RELOCATED_EXT_INT, r2;; // out0 = IP address of previous location\r
262 mov out2=out0;; // out2 = IP address of new location\r
263 br.call.sptk.few b0 = RelocateBundle\r
264\r
265// Last, restore the patch area\r
266 movl out0=PatchSaveBuffer // out0 = source buffer\r
267 addl out1=PATCH_ENTRY_OFFSET, r2 // out1 = destination buffer\r
268 mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r
269 br.call.sptk.few b0 = CopyBundles\r
270\r
271UnchainHandlerDone:\r
272 NESTED_RETURN\r
273\r
274 .endp UnchainHandler\r
275\r
276\r
277/////////////////////////////////////////////\r
278//\r
279// Name:\r
280// CopyBundles\r
281//\r
282// Description:\r
283// Copies instruction bundles - flushes icache as necessary\r
284//\r
285// Arguments:\r
286// in0 - Bundle source\r
287// in1 - Bundle destination\r
288// in2 - Bundle count\r
289//\r
290// Returns:\r
291//\r
292// Notes:\r
293// This procedure is a leaf routine\r
294//\r
295 .proc CopyBundles\r
296\r
297CopyBundles:\r
298\r
299 NESTED_SETUP(3,2+1,0,0)\r
300\r
301 shl in2=in2, 1;; // in2 = count of 8 byte blocks to copy\r
302\r
303CopyBundlesLoop:\r
304\r
305 cmp.eq p14, p15 = 0, in2;; // Check if done\r
306(p14) br.sptk.few CopyBundlesDone;;\r
307\r
308 ld8 loc2=[in0], 0x8;; // loc2 = source bytes\r
309 st8 [in1]=loc2;; // [in1] = destination bytes\r
310 fc in1;; // Flush instruction cache\r
311 sync.i;; // Ensure local and remote data/inst caches in sync\r
312 srlz.i;; // Ensure sync has been observed\r
313 add in1=0x8, in1;; // in1 = next destination\r
314 add in2=-1, in2;; // in2 = decrement 8 bytes blocks to copy\r
315 br.sptk.few CopyBundlesLoop;;\r
316\r
317CopyBundlesDone:\r
318 NESTED_RETURN\r
319\r
320 .endp CopyBundles\r
321\r
322\r
323/////////////////////////////////////////////\r
324//\r
325// Name:\r
326// RelocateBundle\r
327//\r
328// Description:\r
329// Relocates an instruction bundle by updating any ip-relative branch instructions.\r
330//\r
331// Arguments:\r
332// in0 - Runtime address of bundle\r
333// in1 - IP address of previous location of bundle\r
334// in2 - IP address of new location of bundle\r
335//\r
336// Returns:\r
337// in0 - 1 if successful or 0 if unsuccessful\r
338//\r
339// Notes:\r
340// This routine examines all slots in the given bundle that are destined for the\r
341// branch execution unit. If any of these slots contain an IP-relative branch\r
342// namely instructions B1, B2, B3, or B6, the slot is fixed-up with a new relative\r
343// address. Errors can occur if a branch cannot be reached.\r
344//\r
345 .proc RelocateBundle\r
346\r
347RelocateBundle:\r
348\r
349 NESTED_SETUP(3,2+4,3,0)\r
350\r
351 mov loc2=SLOT0 // loc2 = slot index\r
352 mov loc5=in0;; // loc5 = runtime address of bundle\r
353 mov in0=1;; // in0 = success\r
354\r
355RelocateBundleNextSlot:\r
356\r
357 cmp.ge p14, p15 = SLOT2, loc2;; // Check if maximum slot\r
358(p15) br.sptk.few RelocateBundleDone\r
359\r
360 mov out0=loc5;; // out0 = runtime address of bundle\r
361 br.call.sptk.few b0 = GetTemplate\r
362 mov loc3=out0;; // loc3 = instruction template\r
363 mov out0=loc5 // out0 = runtime address of bundle\r
364 mov out1=loc2;; // out1 = instruction slot number\r
365 br.call.sptk.few b0 = GetSlot\r
366 mov loc4=out0;; // loc4 = instruction encoding\r
367 mov out0=loc4 // out0 = instuction encoding\r
368 mov out1=loc2 // out1 = instruction slot number\r
369 mov out2=loc3;; // out2 = instruction template\r
370 br.call.sptk.few b0 = IsSlotBranch\r
371 cmp.eq p14, p15 = 1, out0;; // Check if branch slot\r
372(p15) add loc2=1,loc2 // Increment slot\r
373(p15) br.sptk.few RelocateBundleNextSlot\r
374 mov out0=loc4 // out0 = instuction encoding\r
375 mov out1=in1 // out1 = IP address of previous location\r
376 mov out2=in2;; // out2 = IP address of new location\r
377 br.call.sptk.few b0 = RelocateSlot\r
378 cmp.eq p14, p15 = 1, out1;; // Check if relocated slot\r
379(p15) mov in0=0 // in0 = failure\r
380(p15) br.sptk.few RelocateBundleDone\r
381 mov out2=out0;; // out2 = instruction encoding\r
382 mov out0=loc5 // out0 = runtime address of bundle\r
383 mov out1=loc2;; // out1 = instruction slot number\r
384 br.call.sptk.few b0 = SetSlot\r
385 add loc2=1,loc2;; // Increment slot\r
386 br.sptk.few RelocateBundleNextSlot\r
387\r
388RelocateBundleDone:\r
389 NESTED_RETURN\r
390\r
391 .endp RelocateBundle\r
392\r
393\r
394/////////////////////////////////////////////\r
395//\r
396// Name:\r
397// RelocateSlot\r
398//\r
399// Description:\r
400// Relocates an instruction bundle by updating any ip-relative branch instructions.\r
401//\r
402// Arguments:\r
403// in0 - Instruction encoding (41-bits, right justified)\r
404// in1 - IP address of previous location of bundle\r
405// in2 - IP address of new location of bundle\r
406//\r
407// Returns:\r
408// in0 - Instruction encoding (41-bits, right justified)\r
409// in1 - 1 if successful otherwise 0\r
410//\r
411// Notes:\r
412// This procedure is a leaf routine\r
413//\r
414 .proc RelocateSlot\r
415\r
416RelocateSlot:\r
417 NESTED_SETUP(3,2+5,0,0)\r
418 extr.u loc2=in0, 37, 4;; // loc2 = instruction opcode\r
419 cmp.eq p14, p15 = 4, loc2;; // IP-relative branch (B1) or\r
420 // IP-relative counted branch (B2)\r
421(p15) cmp.eq p14, p15 = 5, loc2;; // IP-relative call (B3)\r
422(p15) cmp.eq p14, p15 = 7, loc2;; // IP-relative predict (B6)\r
423(p15) mov in1=1 // Instruction did not need to be reencoded\r
424(p15) br.sptk.few RelocateSlotDone\r
425 tbit.nz p14, p15 = in0, 36;; // put relative offset sign bit in p14\r
426 extr.u loc2=in0, 13, 20;; // loc2 = relative offset in instruction\r
427(p14) movl loc3=0xfffffffffff00000;; // extend sign\r
428(p14) or loc2=loc2, loc3;;\r
429 shl loc2=loc2,4;; // convert to byte offset instead of bundle offset\r
430 add loc3=loc2, in1;; // loc3 = physical address of branch target\r
431(p14) sub loc2=r0,loc2;; // flip sign in loc2 if offset is negative\r
432 sub loc4=loc3,in2;; // loc4 = relative offset from new ip to branch target\r
433 cmp.lt p15, p14 = 0, loc4;; // get new sign bit \r
434(p14) sub loc5=r0,loc4 // get absolute value of offset\r
435(p15) mov loc5=loc4;;\r
436 movl loc6=0x0FFFFFF;; // maximum offset in bytes for ip-rel branch\r
437 cmp.gt p14, p15 = loc5, loc6;; // check to see we're not out of range for an ip-relative branch\r
438(p14) br.sptk.few RelocateSlotError\r
439 cmp.lt p15, p14 = 0, loc4;; // store sign in p14 again\r
440(p14) dep in0=1,in0,36,1 // store sign bit in instruction\r
441(p15) dep in0=0,in0,36,1\r
442 shr loc4=loc4, 4;; // convert back to bundle offset\r
443 dep in0=loc4,in0,13,16;; // put first 16 bits of new offset into instruction\r
444 shr loc4=loc4,16;;\r
445 dep in0=loc4,in0,13+16,4 // put last 4 bits of new offset into instruction\r
446 mov in1=1;; // in1 = success\r
447 br.sptk.few RelocateSlotDone;;\r
448\r
449RelocateSlotError:\r
450 mov in1=0;; // in1 = failure\r
451\r
452RelocateSlotDone:\r
453 NESTED_RETURN\r
454\r
455 .endp RelocateSlot\r
456\r
457\r
458/////////////////////////////////////////////\r
459//\r
460// Name:\r
461// IsSlotBranch\r
462//\r
463// Description:\r
464// Determines if the given instruction is a branch instruction.\r
465//\r
466// Arguments:\r
467// in0 - Instruction encoding (41-bits, right justified)\r
468// in1 - Instruction slot number\r
469// in2 - Bundle template\r
470//\r
471// Returns:\r
472// in0 - 1 if branch or 0 if not branch\r
473//\r
474// Notes:\r
475// This procedure is a leaf routine\r
476//\r
477// IsSlotBranch recognizes all branch instructions by looking at the provided template.\r
478// The instruction encoding is only passed to this routine for future expansion.\r
479//\r
480 .proc IsSlotBranch\r
481\r
482IsSlotBranch:\r
483\r
484 NESTED_SETUP (3,2+0,0,0)\r
485\r
486 mov in0=1;; // in0 = 1 which destroys the instruction\r
487 andcm in2=in2,in0;; // in2 = even template to reduce compares\r
488 mov in0=0;; // in0 = not a branch\r
489 cmp.eq p14, p15 = 0x16, in2;; // Template 0x16 is BBB\r
490(p14) br.sptk.few IsSlotBranchTrue\r
491 cmp.eq p14, p15 = SLOT0, in1;; // Slot 0 has no other possiblities\r
492(p14) br.sptk.few IsSlotBranchDone\r
493 cmp.eq p14, p15 = 0x12, in2;; // Template 0x12 is MBB\r
494(p14) br.sptk.few IsSlotBranchTrue\r
495 cmp.eq p14, p15 = SLOT1, in1;; // Slot 1 has no other possiblities\r
496(p14) br.sptk.few IsSlotBranchDone\r
497 cmp.eq p14, p15 = 0x10, in2;; // Template 0x10 is MIB\r
498(p14) br.sptk.few IsSlotBranchTrue\r
499 cmp.eq p14, p15 = 0x18, in2;; // Template 0x18 is MMB\r
500(p14) br.sptk.few IsSlotBranchTrue\r
501 cmp.eq p14, p15 = 0x1C, in2;; // Template 0x1C is MFB\r
502(p14) br.sptk.few IsSlotBranchTrue\r
503 br.sptk.few IsSlotBranchDone\r
504\r
505IsSlotBranchTrue:\r
506 mov in0=1;; // in0 = branch\r
507\r
508IsSlotBranchDone:\r
509 NESTED_RETURN\r
510\r
511 .endp IsSlotBranch\r
512\r
513\r
514/////////////////////////////////////////////\r
515//\r
516// Name:\r
517// GetTemplate\r
518//\r
519// Description:\r
520// Retrieves the instruction template for an instruction bundle\r
521//\r
522// Arguments:\r
523// in0 - Runtime address of bundle\r
524//\r
525// Returns:\r
526// in0 - Instruction template (5-bits, right-justified)\r
527//\r
528// Notes:\r
529// This procedure is a leaf routine\r
530//\r
531 .proc GetTemplate\r
532\r
533GetTemplate:\r
534\r
535 NESTED_SETUP (1,2+2,0,0)\r
536\r
537 ld8 loc2=[in0], 0x8 // loc2 = first 8 bytes of branch bundle\r
538 movl loc3=MASK_0_4;; // loc3 = template mask\r
539 and loc2=loc2,loc3;; // loc2 = template, right justified\r
540 mov in0=loc2;; // in0 = template, right justified\r
541\r
542 NESTED_RETURN\r
543\r
544 .endp GetTemplate\r
545\r
546\r
547/////////////////////////////////////////////\r
548//\r
549// Name:\r
550// GetSlot\r
551//\r
552// Description:\r
553// Gets the instruction encoding for an instruction slot and bundle\r
554//\r
555// Arguments:\r
556// in0 - Runtime address of bundle\r
557// in1 - Instruction slot (either 0, 1, or 2)\r
558//\r
559// Returns:\r
560// in0 - Instruction encoding (41-bits, right justified)\r
561//\r
562// Notes:\r
563// This procedure is a leaf routine\r
564//\r
565// Slot0 - [in0 + 0x8] Bits 45-5\r
566// Slot1 - [in0 + 0x8] Bits 63-46 and [in0] Bits 22-0\r
567// Slot2 - [in0] Bits 63-23\r
568//\r
569 .proc GetSlot\r
570\r
571GetSlot:\r
572 NESTED_SETUP (2,2+3,0,0)\r
573\r
574 ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of branch bundle\r
575 ld8 loc3=[in0];; // loc3 = second 8 bytes of branch bundle\r
576 cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r
577 (p14) br.cond.sptk.few GetSlot2;; // get slot 2\r
578 cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r
579 (p14) br.cond.sptk.few GetSlot1;; // get slot 1\r
580\r
581GetSlot0:\r
582 extr.u in0=loc2, 5, 45 // in0 = extracted slot 0\r
583 br.sptk.few GetSlotDone;;\r
584\r
585GetSlot1:\r
586 extr.u in0=loc2, 46, 18 // in0 = bits 63-46 of loc2 right-justified\r
587 extr.u loc4=loc3, 0, 23;; // loc4 = bits 22-0 of loc3 right-justified\r
588 dep in0=loc4, in0, 18, 15;;\r
589 shr.u loc4=loc4,15;;\r
590 dep in0=loc4, in0, 33, 8;; // in0 = extracted slot 1\r
591 br.sptk.few GetSlotDone;;\r
592\r
593GetSlot2:\r
594 extr.u in0=loc3, 23, 41;; // in0 = extracted slot 2\r
595\r
596GetSlotDone:\r
597 NESTED_RETURN\r
598\r
599 .endp GetSlot\r
600\r
601\r
602/////////////////////////////////////////////\r
603//\r
604// Name:\r
605// SetSlot\r
606//\r
607// Description:\r
608// Sets the instruction encoding for an instruction slot and bundle\r
609//\r
610// Arguments:\r
611// in0 - Runtime address of bundle\r
612// in1 - Instruction slot (either 0, 1, or 2)\r
613// in2 - Instruction encoding (41-bits, right justified)\r
614//\r
615// Returns:\r
616//\r
617// Notes:\r
618// This procedure is a leaf routine\r
619//\r
620 .proc SetSlot\r
621\r
622SetSlot:\r
623 NESTED_SETUP (3,2+3,0,0)\r
624\r
625 ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of bundle\r
626 ld8 loc3=[in0];; // loc3 = second 8 bytes of bundle\r
627 cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r
628 (p14) br.cond.sptk.few SetSlot2;; // set slot 2\r
629 cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r
630 (p14) br.cond.sptk.few SetSlot1;; // set slot 1\r
631\r
632SetSlot0:\r
633 dep loc2=0, loc2, 5, 41;; // remove old instruction from slot 0\r
634 shl loc4=in2, 5;; // loc4 = new instruction ready to be inserted\r
635 or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r
636 add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r
637 st8 [loc4]=loc2 // [loc4] = updated bundle\r
638 br.sptk.few SetSlotDone;;\r
639 ;;\r
640\r
641SetSlot1:\r
642 dep loc2=0, loc2, 46, 18 // remove old instruction from slot 1\r
643 dep loc3=0, loc3, 0, 23;;\r
644 shl loc4=in2, 46;; // loc4 = partial instruction ready to be inserted\r
645 or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r
646 add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r
647 st8 [loc4]=loc2;; // [loc4] = updated bundle\r
648 shr.u loc4=in2, 18;; // loc4 = partial instruction ready to be inserted\r
649 or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r
650 st8 [in0]=loc3;; // [in0] = updated bundle\r
651 br.sptk.few SetSlotDone;;\r
652\r
653SetSlot2:\r
654 dep loc3=0, loc3, 23, 41;; // remove old instruction from slot 2\r
655 shl loc4=in2, 23;; // loc4 = instruction ready to be inserted\r
656 or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r
657 st8 [in0]=loc3;; // [in0] = updated bundle\r
658\r
659SetSlotDone:\r
660\r
661 NESTED_RETURN\r
662 .endp SetSlot\r
663\r
664\r
665/////////////////////////////////////////////\r
666//\r
667// Name:\r
668// GetIva\r
669//\r
670// Description:\r
671// C callable function to obtain the current value of IVA\r
672//\r
673// Returns:\r
674// Current value if IVA\r
675\r
676 .global GetIva\r
677 .proc GetIva\r
678GetIva:\r
679 mov r8=cr2;;\r
680 br.ret.sptk.many b0\r
681\r
682 .endp GetIva\r
683\r
684\r
685/////////////////////////////////////////////\r
686//\r
687// Name:\r
688// ProgramInterruptFlags\r
689//\r
690// Description:\r
691// C callable function to enable/disable interrupts\r
692//\r
693// Returns:\r
694// Previous state of psr.ic\r
695//\r
696 .global ProgramInterruptFlags\r
697 .proc ProgramInterruptFlags\r
698ProgramInterruptFlags:\r
699 alloc loc0=1,2,0,0;;\r
700 mov loc0=psr\r
701 mov loc1=0x6000;;\r
702 and r8=loc0, loc1 // obtain current psr.ic and psr.i state\r
703 and in0=in0, loc1 // insure no extra bits set in input\r
704 andcm loc0=loc0,loc1;; // clear original psr.i and psr.ic\r
705 or loc0=loc0,in0;; // OR in new psr.ic value\r
706 mov psr.l=loc0;; // write new psr\r
707 srlz.d\r
708 br.ret.sptk.many b0 // return\r
709\r
710 .endp ProgramInterruptFlags\r
711\r
712\r
713/////////////////////////////////////////////\r
714//\r
715// Name:\r
716// SpillContext\r
717//\r
718// Description:\r
719// Saves system context to context record.\r
720//\r
721// Arguments:\r
722// in0 = 512 byte aligned context record address\r
723// in1 = original B0\r
724// in2 = original ar.bsp\r
725// in3 = original ar.bspstore\r
726// in4 = original ar.rnat\r
727// in5 = original ar.pfs\r
728//\r
729// Notes:\r
730// loc0 - scratch\r
731// loc1 - scratch\r
732// loc2 - temporary application unat storage\r
733// loc3 - temporary exception handler unat storage\r
734\r
735 .proc SpillContext\r
736\r
737SpillContext:\r
738 alloc loc0=6,4,0,0;; // alloc 6 input, 4 locals, 0 outs\r
739 mov loc2=ar.unat;; // save application context unat (spilled later)\r
740 mov ar.unat=r0;; // set UNAT=0\r
741 st8.spill [in0]=r0,8;;\r
742 st8.spill [in0]=r1,8;; // save R1 - R31\r
743 st8.spill [in0]=r2,8;;\r
744 st8.spill [in0]=r3,8;;\r
745 st8.spill [in0]=r4,8;;\r
746 st8.spill [in0]=r5,8;;\r
747 st8.spill [in0]=r6,8;;\r
748 st8.spill [in0]=r7,8;;\r
749 st8.spill [in0]=r8,8;;\r
750 st8.spill [in0]=r9,8;;\r
751 st8.spill [in0]=r10,8;;\r
752 st8.spill [in0]=r11,8;;\r
753 st8.spill [in0]=r12,8;;\r
754 st8.spill [in0]=r13,8;;\r
755 st8.spill [in0]=r14,8;;\r
756 st8.spill [in0]=r15,8;;\r
757 st8.spill [in0]=r16,8;;\r
758 st8.spill [in0]=r17,8;;\r
759 st8.spill [in0]=r18,8;;\r
760 st8.spill [in0]=r19,8;;\r
761 st8.spill [in0]=r20,8;;\r
762 st8.spill [in0]=r21,8;;\r
763 st8.spill [in0]=r22,8;;\r
764 st8.spill [in0]=r23,8;;\r
765 st8.spill [in0]=r24,8;;\r
766 st8.spill [in0]=r25,8;;\r
767 st8.spill [in0]=r26,8;;\r
768 st8.spill [in0]=r27,8;;\r
769 st8.spill [in0]=r28,8;;\r
770 st8.spill [in0]=r29,8;;\r
771 st8.spill [in0]=r30,8;;\r
772 st8.spill [in0]=r31,8;;\r
773 mov loc3=ar.unat;; // save debugger context unat (spilled later)\r
774 stf.spill [in0]=f2,16;; // save f2 - f31\r
775 stf.spill [in0]=f3,16;;\r
776 stf.spill [in0]=f4,16;;\r
777 stf.spill [in0]=f5,16;;\r
778 stf.spill [in0]=f6,16;;\r
779 stf.spill [in0]=f7,16;;\r
780 stf.spill [in0]=f8,16;;\r
781 stf.spill [in0]=f9,16;;\r
782 stf.spill [in0]=f10,16;;\r
783 stf.spill [in0]=f11,16;;\r
784 stf.spill [in0]=f12,16;;\r
785 stf.spill [in0]=f13,16;;\r
786 stf.spill [in0]=f14,16;;\r
787 stf.spill [in0]=f15,16;;\r
788 stf.spill [in0]=f16,16;;\r
789 stf.spill [in0]=f17,16;;\r
790 stf.spill [in0]=f18,16;;\r
791 stf.spill [in0]=f19,16;;\r
792 stf.spill [in0]=f20,16;;\r
793 stf.spill [in0]=f21,16;;\r
794 stf.spill [in0]=f22,16;;\r
795 stf.spill [in0]=f23,16;;\r
796 stf.spill [in0]=f24,16;;\r
797 stf.spill [in0]=f25,16;;\r
798 stf.spill [in0]=f26,16;;\r
799 stf.spill [in0]=f27,16;;\r
800 stf.spill [in0]=f28,16;;\r
801 stf.spill [in0]=f29,16;;\r
802 stf.spill [in0]=f30,16;;\r
803 stf.spill [in0]=f31,16;;\r
804 mov loc0=pr;; // save predicates\r
805 st8.spill [in0]=loc0,8;;\r
806 st8.spill [in0]=in1,8;; // save b0 - b7... in1 already equals saved b0\r
807 mov loc0=b1;;\r
808 st8.spill [in0]=loc0,8;;\r
809 mov loc0=b2;;\r
810 st8.spill [in0]=loc0,8;;\r
811 mov loc0=b3;;\r
812 st8.spill [in0]=loc0,8;;\r
813 mov loc0=b4;;\r
814 st8.spill [in0]=loc0,8;;\r
815 mov loc0=b5;;\r
816 st8.spill [in0]=loc0,8;;\r
817 mov loc0=b6;;\r
818 st8.spill [in0]=loc0,8;;\r
819 mov loc0=b7;;\r
820 st8.spill [in0]=loc0,8;;\r
821 mov loc0=ar.rsc;; // save ar.rsc\r
822 st8.spill [in0]=loc0,8;;\r
823 st8.spill [in0]=in2,8;; // save ar.bsp (in2)\r
824 st8.spill [in0]=in3,8;; // save ar.bspstore (in3)\r
825 st8.spill [in0]=in4,8;; // save ar.rnat (in4)\r
826 mov loc0=ar.fcr;; // save ar.fcr (ar21 - IA32 floating-point control register)\r
827 st8.spill [in0]=loc0,8;;\r
828 mov loc0=ar.eflag;; // save ar.eflag (ar24)\r
829 st8.spill [in0]=loc0,8;;\r
830 mov loc0=ar.csd;; // save ar.csd (ar25 - ia32 CS descriptor)\r
831 st8.spill [in0]=loc0,8;;\r
832 mov loc0=ar.ssd;; // save ar.ssd (ar26 - ia32 ss descriptor)\r
833 st8.spill [in0]=loc0,8;;\r
834 mov loc0=ar.cflg;; // save ar.cflg (ar27 - ia32 cr0 and cr4)\r
835 st8.spill [in0]=loc0,8;;\r
836 mov loc0=ar.fsr;; // save ar.fsr (ar28 - ia32 floating-point status register)\r
837 st8.spill [in0]=loc0,8;;\r
838 mov loc0=ar.fir;; // save ar.fir (ar29 - ia32 floating-point instruction register)\r
839 st8.spill [in0]=loc0,8;;\r
840 mov loc0=ar.fdr;; // save ar.fdr (ar30 - ia32 floating-point data register)\r
841 st8.spill [in0]=loc0,8;;\r
842 mov loc0=ar.ccv;; // save ar.ccv\r
843 st8.spill [in0]=loc0,8;;\r
844 st8.spill [in0]=loc2,8;; // save ar.unat (saved to loc2 earlier)\r
845 mov loc0=ar.fpsr;; // save floating point status register\r
846 st8.spill [in0]=loc0,8;;\r
847 st8.spill [in0]=in5,8;; // save ar.pfs\r
848 mov loc0=ar.lc;; // save ar.lc\r
849 st8.spill [in0]=loc0,8;;\r
850 mov loc0=ar.ec;; // save ar.ec\r
851 st8.spill [in0]=loc0,8;;\r
852\r
853 // save control registers\r
854 mov loc0=cr.dcr;; // save dcr\r
855 st8.spill [in0]=loc0,8;;\r
856 mov loc0=cr.itm;; // save itm\r
857 st8.spill [in0]=loc0,8;;\r
858 mov loc0=cr.iva;; // save iva\r
859 st8.spill [in0]=loc0,8;;\r
860 mov loc0=cr.pta;; // save pta\r
861 st8.spill [in0]=loc0,8;;\r
862 mov loc0=cr.ipsr;; // save ipsr\r
863 st8.spill [in0]=loc0,8;;\r
864 mov loc0=cr.isr;; // save isr\r
865 st8.spill [in0]=loc0,8;;\r
866 mov loc0=cr.iip;; // save iip\r
867 st8.spill [in0]=loc0,8;;\r
868 mov loc0=cr.ifa;; // save ifa\r
869 st8.spill [in0]=loc0,8;;\r
870 mov loc0=cr.itir;; // save itir\r
871 st8.spill [in0]=loc0,8;;\r
872 mov loc0=cr.iipa;; // save iipa\r
873 st8.spill [in0]=loc0,8;;\r
874 mov loc0=cr.ifs;; // save ifs\r
875 st8.spill [in0]=loc0,8;;\r
876 mov loc0=cr.iim;; // save iim\r
877 st8.spill [in0]=loc0,8;;\r
878 mov loc0=cr.iha;; // save iha\r
879 st8.spill [in0]=loc0,8;;\r
880\r
881 // save debug registers\r
882 mov loc0=dbr[r0];; // save dbr0 - dbr7\r
883 st8.spill [in0]=loc0,8;;\r
884 movl loc1=1;;\r
885 mov loc0=dbr[loc1];;\r
886 st8.spill [in0]=loc0,8;;\r
887 movl loc1=2;;\r
888 mov loc0=dbr[loc1];;\r
889 st8.spill [in0]=loc0,8;;\r
890 movl loc1=3;;\r
891 mov loc0=dbr[loc1];;\r
892 st8.spill [in0]=loc0,8;;\r
893 movl loc1=4;;\r
894 mov loc0=dbr[loc1];;\r
895 st8.spill [in0]=loc0,8;;\r
896 movl loc1=5;;\r
897 mov loc0=dbr[loc1];;\r
898 st8.spill [in0]=loc0,8;;\r
899 movl loc1=6;;\r
900 mov loc0=dbr[loc1];;\r
901 st8.spill [in0]=loc0,8;;\r
902 movl loc1=7;;\r
903 mov loc0=dbr[loc1];;\r
904 st8.spill [in0]=loc0,8;;\r
905 mov loc0=ibr[r0];; // save ibr0 - ibr7\r
906 st8.spill [in0]=loc0,8;;\r
907 movl loc1=1;;\r
908 mov loc0=ibr[loc1];;\r
909 st8.spill [in0]=loc0,8;;\r
910 movl loc1=2;;\r
911 mov loc0=ibr[loc1];;\r
912 st8.spill [in0]=loc0,8;;\r
913 movl loc1=3;;\r
914 mov loc0=ibr[loc1];;\r
915 st8.spill [in0]=loc0,8;;\r
916 movl loc1=4;;\r
917 mov loc0=ibr[loc1];;\r
918 st8.spill [in0]=loc0,8;;\r
919 movl loc1=5;;\r
920 mov loc0=ibr[loc1];;\r
921 st8.spill [in0]=loc0,8;;\r
922 movl loc1=6;;\r
923 mov loc0=ibr[loc1];;\r
924 st8.spill [in0]=loc0,8;;\r
925 movl loc1=7;;\r
926 mov loc0=ibr[loc1];;\r
927 st8.spill [in0]=loc0,8;;\r
928 st8.spill [in0]=loc3;;\r
929\r
930 br.ret.sptk.few b0\r
931\r
932 .endp SpillContext\r
933\r
934\r
935/////////////////////////////////////////////\r
936//\r
937// Name:\r
938// FillContext\r
939//\r
940// Description:\r
941// Restores register context from context record.\r
942//\r
943// Arguments:\r
944// in0 = address of last element 512 byte aligned context record address\r
945// in1 = modified B0\r
946// in2 = modified ar.bsp\r
947// in3 = modified ar.bspstore\r
948// in4 = modified ar.rnat\r
949// in5 = modified ar.pfs\r
950//\r
951// Notes:\r
952// loc0 - scratch\r
953// loc1 - scratch\r
954// loc2 - temporary application unat storage\r
955// loc3 - temporary exception handler unat storage\r
956\r
957 .proc FillContext\r
958FillContext:\r
959 alloc loc0=6,4,0,0;; // alloc 6 inputs, 4 locals, 0 outs\r
960 ld8.fill loc3=[in0],-8;; // int_nat (nat bits for R1-31)\r
961 movl loc1=7;; // ibr7\r
962 ld8.fill loc0=[in0],-8;;\r
963 mov ibr[loc1]=loc0;;\r
964 movl loc1=6;; // ibr6\r
965 ld8.fill loc0=[in0],-8;;\r
966 mov ibr[loc1]=loc0;;\r
967 movl loc1=5;; // ibr5\r
968 ld8.fill loc0=[in0],-8;;\r
969 mov ibr[loc1]=loc0;;\r
970 movl loc1=4;; // ibr4\r
971 ld8.fill loc0=[in0],-8;;\r
972 mov ibr[loc1]=loc0;;\r
973 movl loc1=3;; // ibr3\r
974 ld8.fill loc0=[in0],-8;;\r
975 mov ibr[loc1]=loc0;;\r
976 movl loc1=2;; // ibr2\r
977 ld8.fill loc0=[in0],-8;;\r
978 mov ibr[loc1]=loc0;;\r
979 movl loc1=1;; // ibr1\r
980 ld8.fill loc0=[in0],-8;;\r
981 mov ibr[loc1]=loc0;;\r
982 ld8.fill loc0=[in0],-8;; // ibr0\r
983 mov ibr[r0]=loc0;;\r
984 movl loc1=7;; // dbr7\r
985 ld8.fill loc0=[in0],-8;;\r
986 mov dbr[loc1]=loc0;;\r
987 movl loc1=6;; // dbr6\r
988 ld8.fill loc0=[in0],-8;;\r
989 mov dbr[loc1]=loc0;;\r
990 movl loc1=5;; // dbr5\r
991 ld8.fill loc0=[in0],-8;;\r
992 mov dbr[loc1]=loc0;;\r
993 movl loc1=4;; // dbr4\r
994 ld8.fill loc0=[in0],-8;;\r
995 mov dbr[loc1]=loc0;;\r
996 movl loc1=3;; // dbr3\r
997 ld8.fill loc0=[in0],-8;;\r
998 mov dbr[loc1]=loc0;;\r
999 movl loc1=2;; // dbr2\r
1000 ld8.fill loc0=[in0],-8;;\r
1001 mov dbr[loc1]=loc0;;\r
1002 movl loc1=1;; // dbr1\r
1003 ld8.fill loc0=[in0],-8;;\r
1004 mov dbr[loc1]=loc0;;\r
1005 ld8.fill loc0=[in0],-8;; // dbr0\r
1006 mov dbr[r0]=loc0;;\r
1007 ld8.fill loc0=[in0],-8;; // iha\r
1008 mov cr.iha=loc0;;\r
1009 ld8.fill loc0=[in0],-8;; // iim\r
1010 mov cr.iim=loc0;;\r
1011 ld8.fill loc0=[in0],-8;; // ifs\r
1012 mov cr.ifs=loc0;;\r
1013 ld8.fill loc0=[in0],-8;; // iipa\r
1014 mov cr.iipa=loc0;;\r
1015 ld8.fill loc0=[in0],-8;; // itir\r
1016 mov cr.itir=loc0;;\r
1017 ld8.fill loc0=[in0],-8;; // ifa\r
1018 mov cr.ifa=loc0;;\r
1019 ld8.fill loc0=[in0],-8;; // iip\r
1020 mov cr.iip=loc0;;\r
1021 ld8.fill loc0=[in0],-8;; // isr\r
1022 mov cr.isr=loc0;;\r
1023 ld8.fill loc0=[in0],-8;; // ipsr\r
1024 mov cr.ipsr=loc0;;\r
1025 ld8.fill loc0=[in0],-8;; // pta\r
1026 mov cr.pta=loc0;;\r
1027 ld8.fill loc0=[in0],-8;; // iva\r
1028 mov cr.iva=loc0;;\r
1029 ld8.fill loc0=[in0],-8;; // itm\r
1030 mov cr.itm=loc0;;\r
1031 ld8.fill loc0=[in0],-8;; // dcr\r
1032 mov cr.dcr=loc0;;\r
1033 ld8.fill loc0=[in0],-8;; // ec\r
1034 mov ar.ec=loc0;;\r
1035 ld8.fill loc0=[in0],-8;; // lc\r
1036 mov ar.lc=loc0;;\r
1037 ld8.fill in5=[in0],-8;; // ar.pfs\r
1038 ld8.fill loc0=[in0],-8;; // ar.fpsr\r
1039 mov ar.fpsr=loc0;;\r
1040 ld8.fill loc2=[in0],-8;; // ar.unat - restored later...\r
1041 ld8.fill loc0=[in0],-8;; // ar.ccv\r
1042 mov ar.ccv=loc0;;\r
1043 ld8.fill loc0=[in0],-8;; // ar.fdr\r
1044 mov ar.fdr=loc0;;\r
1045 ld8.fill loc0=[in0],-8;; // ar.fir\r
1046 mov ar.fir=loc0;;\r
1047 ld8.fill loc0=[in0],-8;; // ar.fsr\r
1048 mov ar.fsr=loc0;;\r
1049 ld8.fill loc0=[in0],-8;; // ar.cflg\r
1050 mov ar.cflg=loc0;;\r
1051 ld8.fill loc0=[in0],-8;; // ar.ssd\r
1052 mov ar.ssd=loc0;;\r
1053 ld8.fill loc0=[in0],-8;; // ar.csd\r
1054 mov ar.csd=loc0;;\r
1055 ld8.fill loc0=[in0],-8;; // ar.eflag\r
1056 mov ar.eflag=loc0;;\r
1057 ld8.fill loc0=[in0],-8;; // ar.fcr\r
1058 mov ar.fcr=loc0;;\r
1059 ld8.fill in4=[in0],-8;; // ar.rnat\r
1060 ld8.fill in3=[in0],-8;; // bspstore\r
1061 ld8.fill in2=[in0],-8;; // bsp\r
1062 ld8.fill loc0=[in0],-8;; // ar.rsc\r
1063 mov ar.rsc=loc0;;\r
1064 ld8.fill loc0=[in0],-8;; // B7 - B0\r
1065 mov b7=loc0;;\r
1066 ld8.fill loc0=[in0],-8;;\r
1067 mov b6=loc0;;\r
1068 ld8.fill loc0=[in0],-8;;\r
1069 mov b5=loc0;;\r
1070 ld8.fill loc0=[in0],-8;;\r
1071 mov b4=loc0;;\r
1072 ld8.fill loc0=[in0],-8;;\r
1073 mov b3=loc0;;\r
1074 ld8.fill loc0=[in0],-8;;\r
1075 mov b2=loc0;;\r
1076 ld8.fill loc0=[in0],-8;;\r
1077 mov b1=loc0;;\r
1078 ld8.fill in1=[in0],-8;; // b0 is temporarily stored in in1\r
1079 ld8.fill loc0=[in0],-16;; // predicates\r
1080 mov pr=loc0;;\r
1081 ldf.fill f31=[in0],-16;;\r
1082 ldf.fill f30=[in0],-16;;\r
1083 ldf.fill f29=[in0],-16;;\r
1084 ldf.fill f28=[in0],-16;;\r
1085 ldf.fill f27=[in0],-16;;\r
1086 ldf.fill f26=[in0],-16;;\r
1087 ldf.fill f25=[in0],-16;;\r
1088 ldf.fill f24=[in0],-16;;\r
1089 ldf.fill f23=[in0],-16;;\r
1090 ldf.fill f22=[in0],-16;;\r
1091 ldf.fill f21=[in0],-16;;\r
1092 ldf.fill f20=[in0],-16;;\r
1093 ldf.fill f19=[in0],-16;;\r
1094 ldf.fill f18=[in0],-16;;\r
1095 ldf.fill f17=[in0],-16;;\r
1096 ldf.fill f16=[in0],-16;;\r
1097 ldf.fill f15=[in0],-16;;\r
1098 ldf.fill f14=[in0],-16;;\r
1099 ldf.fill f13=[in0],-16;;\r
1100 ldf.fill f12=[in0],-16;;\r
1101 ldf.fill f11=[in0],-16;;\r
1102 ldf.fill f10=[in0],-16;;\r
1103 ldf.fill f9=[in0],-16;;\r
1104 ldf.fill f8=[in0],-16;;\r
1105 ldf.fill f7=[in0],-16;;\r
1106 ldf.fill f6=[in0],-16;;\r
1107 ldf.fill f5=[in0],-16;;\r
1108 ldf.fill f4=[in0],-16;;\r
1109 ldf.fill f3=[in0],-16;;\r
1110 ldf.fill f2=[in0],-8;;\r
1111 mov ar.unat=loc3;; // restore unat (int_nat) before fill of general registers\r
1112 ld8.fill r31=[in0],-8;;\r
1113 ld8.fill r30=[in0],-8;;\r
1114 ld8.fill r29=[in0],-8;;\r
1115 ld8.fill r28=[in0],-8;;\r
1116 ld8.fill r27=[in0],-8;;\r
1117 ld8.fill r26=[in0],-8;;\r
1118 ld8.fill r25=[in0],-8;;\r
1119 ld8.fill r24=[in0],-8;;\r
1120 ld8.fill r23=[in0],-8;;\r
1121 ld8.fill r22=[in0],-8;;\r
1122 ld8.fill r21=[in0],-8;;\r
1123 ld8.fill r20=[in0],-8;;\r
1124 ld8.fill r19=[in0],-8;;\r
1125 ld8.fill r18=[in0],-8;;\r
1126 ld8.fill r17=[in0],-8;;\r
1127 ld8.fill r16=[in0],-8;;\r
1128 ld8.fill r15=[in0],-8;;\r
1129 ld8.fill r14=[in0],-8;;\r
1130 ld8.fill r13=[in0],-8;;\r
1131 ld8.fill r12=[in0],-8;;\r
1132 ld8.fill r11=[in0],-8;;\r
1133 ld8.fill r10=[in0],-8;;\r
1134 ld8.fill r9=[in0],-8;;\r
1135 ld8.fill r8=[in0],-8;;\r
1136 ld8.fill r7=[in0],-8;;\r
1137 ld8.fill r6=[in0],-8;;\r
1138 ld8.fill r5=[in0],-8;;\r
1139 ld8.fill r4=[in0],-8;;\r
1140 ld8.fill r3=[in0],-8;;\r
1141 ld8.fill r2=[in0],-8;;\r
1142 ld8.fill r1=[in0],-8;;\r
1143 mov ar.unat=loc2;; // restore application context unat\r
1144\r
1145 br.ret.sptk.many b0\r
1146\r
1147 .endp FillContext\r
1148\r
1149\r
1150/////////////////////////////////////////////\r
1151//\r
1152// Name:\r
1153// HookHandler\r
1154//\r
1155// Description:\r
1156// Common branch target from hooked IVT entries. Runs in interrupt context.\r
1157// Responsible for saving and restoring context and calling common C\r
1158// handler. Banked registers running on bank 0 at entry.\r
1159//\r
1160// Arguments:\r
1161// All arguments are passed in banked registers:\r
1162// B0_REG = Original B0\r
1163// SCRATCH_REG1 = IVT entry index\r
1164//\r
1165// Returns:\r
1166// Returns via rfi\r
1167//\r
1168// Notes:\r
1169// loc0 - scratch\r
1170// loc1 - scratch\r
1171// loc2 - vector number / mask\r
1172// loc3 - 16 byte aligned context record address\r
1173// loc4 - temporary storage of last address in context record\r
1174\r
1175HookHandler:\r
1176 flushrs;; // Synch RSE with backing store\r
1177 mov SCRATCH_REG2=ar.bsp // save interrupted context bsp\r
1178 mov SCRATCH_REG3=ar.bspstore // save interrupted context bspstore\r
1179 mov SCRATCH_REG4=ar.rnat // save interrupted context rnat\r
1180 mov SCRATCH_REG6=cr.ifs;; // save IFS in case we need to chain...\r
1181 cover;; // creates new frame, moves old\r
1182 // CFM to IFS.\r
1183 alloc SCRATCH_REG5=0,5,6,0 // alloc 5 locals, 6 outs\r
1184 ;;\r
1185 // save banked registers to locals\r
1186 mov out1=B0_REG // out1 = Original B0\r
1187 mov out2=SCRATCH_REG2 // out2 = original ar.bsp\r
1188 mov out3=SCRATCH_REG3 // out3 = original ar.bspstore\r
1189 mov out4=SCRATCH_REG4 // out4 = original ar.rnat\r
1190 mov out5=SCRATCH_REG5 // out5 = original ar.pfs\r
1191 mov loc2=SCRATCH_REG1;; // loc2 = vector number + chain flag\r
1192 bsw.1;; // switch banked registers to bank 1\r
1193 srlz.d // explicit serialize required\r
1194 // now fill in context record structure\r
1195 movl loc3=IpfContextBuf // Insure context record is aligned\r
1196 add loc0=-0x200,r0;; // mask the lower 9 bits (align on 512 byte boundary)\r
1197 and loc3=loc3,loc0;;\r
1198 add loc3=0x200,loc3;; // move to next 512 byte boundary\r
1199 // loc3 now contains the 512 byte aligned context record\r
1200 // spill register context into context record\r
1201 mov out0=loc3;; // Context record base in out0\r
1202 // original B0 in out1 already\r
1203 // original ar.bsp in out2 already\r
1204 // original ar.bspstore in out3 already\r
1205 br.call.sptk.few b0=SpillContext;; // spill context\r
1206 mov loc4=out0 // save modified address\r
1207\r
1208 // At this point, the context has been saved to the context record and we're\r
1209 // ready to call the C part of the handler...\r
1210\r
1211 movl loc0=CommonHandler;; // obtain address of plabel\r
1212 ld8 loc1=[loc0];; // get entry point of CommonHandler\r
1213 mov b6=loc1;; // put it in a branch register\r
1214 adds loc1= 8, loc0;; // index to GP in plabel\r
1215 ld8 r1=[loc1];; // set up gp for C call\r
1216 mov loc1=0xfffff;; // mask off so only vector bits are present\r
1217 and out0=loc2,loc1;; // pass vector number (exception type)\r
1218 mov out1=loc3;; // pass context record address\r
1219 br.call.sptk.few b0=b6;; // call C handler\r
1220\r
1221 // We've returned from the C call, so restore the context and either rfi\r
1222 // back to interrupted thread, or chain into the SAL if this was an external interrupt\r
1223 mov out0=loc4;; // pass address of last element in context record\r
1224 br.call.sptk.few b0=FillContext;; // Fill context\r
1225 mov b0=out1 // fill in b0\r
1226 mov ar.rnat=out4\r
1227 mov ar.pfs=out5\r
1228\r
1229 // Loadrs is necessary because the debugger may have changed some values in\r
1230 // the backing store. The processor, however may not be aware that the\r
1231 // stacked registers need to be reloaded from the backing store. Therefore,\r
1232 // we explicitly cause the RSE to refresh the stacked register's contents\r
1233 // from the backing store. \r
1234 mov loc0=ar.rsc // get RSC value\r
1235 mov loc1=ar.rsc // save it so we can restore it\r
1236 movl loc3=0xffffffffc000ffff;; // create mask for clearing RSC.loadrs\r
1237 and loc0=loc0,loc3;; // create value for RSC with RSC.loadrs==0\r
1238 mov ar.rsc=loc0;; // modify RSC\r
1239 loadrs;; // invalidate register stack\r
1240 mov ar.rsc=loc1;; // restore original RSC\r
1241\r
1242 bsw.0;; // switch banked registers back to bank 0\r
1243 srlz.d;; // explicit serialize required\r
1244 mov PR_REG=pr // save predicates - to be restored after chaining decision\r
1245 mov B0_REG=b0 // save b0 - required by chain code\r
1246 mov loc2=EXCPT_EXTERNAL_INTERRUPT;;\r
1247 cmp.eq p7,p0=SCRATCH_REG1,loc2;; // check to see if this is the timer tick\r
1248 (p7) br.cond.dpnt.few DO_CHAIN;;\r
1249\r
1250NO_CHAIN:\r
1251 mov pr=PR_REG;;\r
1252 rfi;; // we're outa here.\r
1253\r
1254DO_CHAIN:\r
1255 mov pr=PR_REG\r
1256 mov SCRATCH_REG1=cr.iva\r
1257 mov SCRATCH_REG2=PATCH_RETURN_OFFSET;;\r
1258 add SCRATCH_REG1=SCRATCH_REG1, SCRATCH_REG2;;\r
1259 mov b0=SCRATCH_REG1;;\r
1260 br.cond.sptk.few b0;;\r
1261\r
1262EndHookHandler:\r
1263\r
1264\r
1265/////////////////////////////////////////////\r
1266//\r
1267// Name:\r
1268// HookStub\r
1269//\r
1270// Description:\r
1271// HookStub will be copied from it's loaded location into the IVT when\r
1272// an IVT entry is hooked. The IVT entry does an indirect jump via B0 to\r
1273// HookHandler, which in turn calls into the default C handler, which calls\r
1274// the user-installed C handler. The calls return and HookHandler executes\r
1275// an rfi.\r
1276//\r
1277// Notes:\r
1278// Saves B0 to B0_REG\r
1279// Saves IVT index to SCRATCH_REG1 (immediate value is fixed up when code is copied\r
1280// to the IVT entry.\r
1281\r
1282 .global HookStub\r
1283 .proc HookStub\r
1284HookStub:\r
1285\r
1286 mov B0_REG=b0\r
1287 movl SCRATCH_REG1=HookHandler;;\r
1288 mov b0=SCRATCH_REG1;;\r
1289 mov SCRATCH_REG1=0;;// immediate value is fixed up during install of handler to be the vector number\r
1290 br.cond.sptk.few b0\r
1291\r
1292 .endp HookStub\r
1293\r
1294\r
1295/////////////////////////////////////////////\r
1296// The following code is moved into IVT entry 14 (offset 3400) which is reserved\r
1297// in the Itanium architecture. The patch code is located at the end of the\r
1298// IVT entry.\r
1299\r
1300PatchCode:\r
1301 mov SCRATCH_REG0=psr\r
1302 mov SCRATCH_REG6=cr.ipsr\r
1303 mov PR_REG=pr\r
1304 mov B0_REG=b0;;\r
1305\r
1306 // turn off any virtual translations\r
1307 movl SCRATCH_REG1 = ~( MASK(PSR_DT,1) | MASK(PSR_RT,1));;\r
1308 and SCRATCH_REG1 = SCRATCH_REG0, SCRATCH_REG1;;\r
1309 mov psr.l = SCRATCH_REG1;;\r
1310 srlz.d \r
1311 tbit.z p14, p15 = SCRATCH_REG6, PSR_IS;; // Check to see if we were\r
1312 // interrupted from IA32\r
1313 // context. If so, bail out\r
1314 // and chain to SAL immediately\r
1315 (p15) br.cond.sptk.few Stub_IVT_Passthru;;\r
1316 // we only want to take 1 out of 32 external interrupts to minimize the\r
1317 // impact to system performance. Check our interrupt count and bail\r
1318 // out if we're not up to 32\r
1319 movl SCRATCH_REG1=ExternalInterruptCount;;\r
1320 ld8 SCRATCH_REG2=[SCRATCH_REG1];; // ExternalInterruptCount\r
1321 tbit.z p14, p15 = SCRATCH_REG2, 5;; // bit 5 set?\r
1322 (p14) add SCRATCH_REG2=1, SCRATCH_REG2;; // No? Then increment\r
1323 // ExternalInterruptCount\r
1324 // and Chain to SAL\r
1325 // immediately\r
1326 (p14) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r
1327 (p14) br.cond.sptk.few Stub_IVT_Passthru;;\r
1328 (p15) mov SCRATCH_REG2=0;; // Yes? Then reset\r
1329 // ExternalInterruptCount\r
1330 // and branch to\r
1331 // HookHandler\r
1332 (p15) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r
1333 mov pr=PR_REG\r
1334 movl SCRATCH_REG1=HookHandler;; // SCRATCH_REG1 = entrypoint of HookHandler\r
1335 mov b0=SCRATCH_REG1;; // b0 = entrypoint of HookHandler\r
1336 mov SCRATCH_REG1=EXCPT_EXTERNAL_INTERRUPT;;\r
1337 br.sptk.few b0;; // branch to HookHandler\r
1338\r
1339PatchCodeRet:\r
1340 // fake-up an rfi to get RSE back to being coherent and insure psr has\r
1341 // original contents when interrupt occured, then exit to SAL\r
1342 // at this point:\r
1343 // cr.ifs has been modified by previous "cover"\r
1344 // SCRATCH_REG6 has original cr.ifs\r
1345\r
1346 mov SCRATCH_REG5=cr.ipsr\r
1347 mov SCRATCH_REG4=cr.iip;;\r
1348 mov cr.ipsr=SCRATCH_REG0\r
1349 mov SCRATCH_REG1=ip;;\r
1350 add SCRATCH_REG1=0x30, SCRATCH_REG1;;\r
1351 mov cr.iip=SCRATCH_REG1;;\r
1352 rfi;; // rfi to next instruction\r
1353\r
1354Stub_RfiTarget:\r
1355 mov cr.ifs=SCRATCH_REG6\r
1356 mov cr.ipsr=SCRATCH_REG5\r
1357 mov cr.iip=SCRATCH_REG4;;\r
1358\r
1359Stub_IVT_Passthru:\r
1360 mov pr=PR_REG // pr = saved predicate registers\r
1361 mov b0=B0_REG;; // b0 = saved b0\r
1362EndPatchCode:\r
1363\r
1364\r
1365/////////////////////////////////////////////\r
1366// The following bundle is moved into IVT entry 14 (offset 0x3400) which is reserved\r
1367// in the Itanium architecture. This bundle will be the last bundle and will\r
1368// be located at offset 0x37F0 in the IVT.\r
1369\r
1370FailsafeBranch:\r
1371{\r
1372 .mib\r
1373 nop.m 0\r
1374 nop.i 0\r
1375 br.sptk.few -(FAILSAFE_BRANCH_OFFSET - EXT_INT_ENTRY_OFFSET - 0x10)\r
1376}\r
1377\r
1378\r
1379/////////////////////////////////////////////\r
1380// The following bundle is moved into IVT entry 13 (offset 0x3000) which is the\r
1381// external interrupt. It branches to the patch code.\r
1382\r
1383PatchCodeNewBun0:\r
1384{\r
1385 .mib\r
1386 nop.m 0\r
1387 nop.i 0\r
1388 br.cond.sptk.few PATCH_BRANCH\r
1389}\r