7b414b4e |
1 | /// @file\r |
2 | /// Low level IPF routines used by the debug support driver\r |
3 | ///\r |
4 | /// Copyright (c) 2006, Intel Corporation\r |
5 | /// All rights reserved. This program and the accompanying materials\r |
6 | /// are licensed and made available under the terms and conditions of the BSD License\r |
7 | /// which accompanies this distribution. The full text of the license may be found at\r |
8 | /// http://opensource.org/licenses/bsd-license.php\r |
9 | ///\r |
10 | /// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r |
11 | /// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r |
12 | ///\r |
13 | /// Module Name: AsmFuncs.s\r |
14 | ///\r |
15 | ///\r |
c1f23d63 |
16 | \r |
17 | \r |
e9f9d09a |
18 | #include "Common.i"\r |
c1f23d63 |
19 | #include "Ds64Macros.i"\r |
20 | \r |
21 | .globl PatchSaveBuffer\r |
22 | .globl IpfContextBuf\r |
23 | .globl CommonHandler\r |
24 | .globl ExternalInterruptCount\r |
25 | \r |
26 | \r |
27 | /////////////////////////////////////////////\r |
28 | //\r |
29 | // Name:\r |
30 | // InstructionCacheFlush\r |
31 | //\r |
32 | // Description:\r |
33 | // Flushes instruction cache for specified number of bytes\r |
34 | //\r |
35 | .globl InstructionCacheFlush\r |
36 | .proc InstructionCacheFlush\r |
37 | .align 32\r |
38 | InstructionCacheFlush::\r |
39 | { .mii\r |
40 | alloc r3=2, 0, 0, 0\r |
41 | cmp4.leu p0,p6=32, r33;;\r |
42 | (p6) mov r33=32;;\r |
43 | }\r |
44 | { .mii\r |
45 | nop.m 0\r |
46 | zxt4 r29=r33;;\r |
47 | dep.z r30=r29, 0, 5;;\r |
48 | }\r |
49 | { .mii\r |
50 | cmp4.eq p0,p7=r0, r30\r |
51 | shr.u r28=r29, 5;;\r |
52 | (p7) adds r28=1, r28;;\r |
53 | }\r |
54 | { .mii\r |
55 | nop.m 0\r |
56 | shl r27=r28, 5;;\r |
57 | zxt4 r26=r27;;\r |
58 | }\r |
59 | { .mfb\r |
60 | add r31=r26, r32\r |
61 | nop.f 0\r |
62 | nop.b 0\r |
63 | }\r |
64 | LoopBack: // $L143:\r |
65 | { .mii\r |
66 | fc r32\r |
67 | adds r32=32, r32;;\r |
68 | cmp.ltu p14,p15=r32, r31\r |
69 | }\r |
70 | { .mfb\r |
71 | nop.m 0\r |
72 | nop.f 0\r |
73 | //(p14) br.cond.dptk.few $L143#;;\r |
74 | (p14) br.cond.dptk.few LoopBack;;\r |
75 | }\r |
76 | { .mmi\r |
77 | sync.i;;\r |
78 | srlz.i\r |
79 | nop.i 0;;\r |
80 | }\r |
81 | { .mfb\r |
82 | nop.m 0\r |
83 | nop.f 0\r |
84 | br.ret.sptk.few b0;;\r |
85 | }\r |
86 | .endp InstructionCacheFlush\r |
87 | \r |
88 | \r |
89 | /////////////////////////////////////////////\r |
90 | //\r |
91 | // Name:\r |
92 | // ChainHandler\r |
93 | //\r |
94 | // Description:\r |
95 | // Chains an interrupt handler\r |
96 | //\r |
97 | // The purpose of this function is to enable chaining of the external interrupt.\r |
98 | // Since there's no clean SAL abstraction for doing this, we must do it\r |
99 | // surreptitiously.\r |
100 | //\r |
101 | // The reserved IVT entry at offset 0x3400 is coopted for use by this handler.\r |
102 | // According to Itanium architecture, it is reserved. Strictly speaking, this is\r |
103 | // not safe, as we're cheating and violating the Itanium architecture. However,\r |
104 | // as long as we're the only ones cheating, we should be OK. Without hooks in\r |
105 | // the SAL to enable IVT management, there aren't many good options.\r |
106 | //\r |
107 | // The strategy is to replace the first bundle of the external interrupt handler\r |
108 | // with our own that will branch into a piece of code we've supplied and located\r |
109 | // in the reserved IVT entry. Only the first bundle of the external interrupt\r |
110 | // IVT entry is modified.\r |
111 | //\r |
112 | // The original bundle is moved and relocated to space\r |
113 | // allocated within the reserved IVT entry. The next bundle following is\r |
114 | // is generated to go a hard coded branch back to the second bundle of the\r |
115 | // external interrupt IVT entry just in case the first bundle had no branch.\r |
116 | //\r |
117 | // Our new code will execute our handler, and then fall through to the\r |
118 | // original bundle after restoring all context appropriately.\r |
119 | //\r |
120 | // The following is a representation of what the IVT memory map looks like with\r |
121 | // our chained handler installed:\r |
122 | //\r |
123 | //\r |
7b414b4e |
124 | //\r |
125 | //\r |
126 | //\r |
127 | // This IVT entry is Failsafe bundle\r |
128 | // reserved by the\r |
129 | // Itanium architecture Original bundle 0\r |
130 | // and is used for\r |
131 | // for locating our\r |
132 | // handler and the\r |
133 | // original bundle Patch code...\r |
134 | // zero of the ext\r |
135 | // interrupt handler\r |
136 | //\r |
137 | // RSVD (3400) Unused\r |
138 | //\r |
139 | //\r |
140 | //\r |
141 | //\r |
142 | //\r |
143 | //\r |
144 | //\r |
145 | //\r |
146 | //\r |
147 | //\r |
148 | //\r |
149 | //\r |
c1f23d63 |
150 | // EXT_INT (3000) Bundle 0 Bundle zero - This one is\r |
151 | // modified, all other bundles\r |
152 | // in the EXT_INT entry are\r |
153 | // untouched.\r |
154 | //\r |
155 | //\r |
156 | // Arguments:\r |
157 | //\r |
158 | // Returns:\r |
159 | //\r |
160 | // Notes:\r |
161 | //\r |
162 | //\r |
163 | .globl ChainHandler\r |
164 | .proc ChainHandler\r |
165 | ChainHandler:\r |
166 | \r |
167 | NESTED_SETUP( 0,2+3,3,0 )\r |
168 | \r |
169 | mov r8=1 // r8 = success\r |
170 | mov r2=cr.iva;;\r |
171 | //\r |
172 | // NOTE: There's a potential hazard here in that we're simply stealing a bunch of\r |
173 | // bundles (memory) from the IVT and assuming there's no catastrophic side effect.\r |
174 | //\r |
175 | // First, save IVT area we're taking over with the patch so we can restore it later\r |
176 | //\r |
177 | addl out0=PATCH_ENTRY_OFFSET, r2 // out0 = source buffer\r |
178 | movl out1=PatchSaveBuffer // out1 = destination buffer\r |
179 | mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r |
180 | br.call.sptk.few b0 = CopyBundles\r |
181 | \r |
182 | // Next, copy the patch code into the IVT\r |
183 | movl out0=PatchCode // out0 = source buffer of patch code\r |
184 | addl out1=PATCH_OFFSET, r2 // out1 = destination buffer - in IVT\r |
185 | mov out2=NUM_PATCH_BUNDLES;; // out2 = number of bundles to copy\r |
186 | br.call.sptk.few b0 = CopyBundles\r |
187 | \r |
188 | \r |
189 | // copy original bundle 0 from the external interrupt handler to the\r |
190 | // appropriate place in the reserved IVT interrupt slot\r |
191 | addl out0=EXT_INT_ENTRY_OFFSET, r2 // out0 = source buffer\r |
192 | addl out1=RELOCATED_EXT_INT, r2 // out1 = destination buffer - in reserved IVT\r |
193 | mov out2=1;; // out2 = copy 1 bundle\r |
194 | br.call.sptk.few b0 = CopyBundles\r |
195 | \r |
196 | // Now relocate it there because it very likely had a branch instruction that\r |
197 | // that must now be fixed up.\r |
198 | addl out0=RELOCATED_EXT_INT, r2 // out0 = new runtime address of bundle - in reserved IVT\r |
199 | addl out1=EXT_INT_ENTRY_OFFSET, r2;;// out1 = IP address of previous location\r |
200 | mov out2=out0;; // out2 = IP address of new location\r |
201 | br.call.sptk.few b0 = RelocateBundle\r |
202 | \r |
203 | // Now copy into the failsafe branch into the next bundle just in case\r |
204 | // the original ext int bundle 0 bundle did not contain a branch instruction\r |
205 | movl out0=FailsafeBranch // out0 = source buffer\r |
206 | addl out1=FAILSAFE_BRANCH_OFFSET, r2 // out1 = destination buffer - in reserved IVT\r |
207 | mov out2=1;; // out2 = copy 1 bundle\r |
208 | br.call.sptk.few b0 = CopyBundles\r |
209 | \r |
210 | // Last, copy in our replacement for the external interrupt IVT entry bundle 0\r |
211 | movl out0=PatchCodeNewBun0 // out0 = source buffer - our replacement bundle 0\r |
212 | addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - bundle 0 of External interrupt entry\r |
213 | mov out2=1;; // out2 = copy 1 bundle\r |
214 | br.call.sptk.few b0 = CopyBundles\r |
215 | \r |
216 | ChainHandlerDone:\r |
217 | NESTED_RETURN\r |
218 | \r |
219 | .endp ChainHandler\r |
220 | \r |
221 | \r |
222 | /////////////////////////////////////////////\r |
223 | //\r |
224 | // Name:\r |
225 | // UnchainHandler\r |
226 | //\r |
227 | // Description:\r |
228 | // Unchains an interrupt handler\r |
229 | //\r |
230 | // Arguments:\r |
231 | //\r |
232 | // Returns:\r |
233 | //\r |
234 | // Notes:\r |
235 | //\r |
236 | //\r |
237 | .globl UnchainHandler\r |
238 | .proc UnchainHandler\r |
239 | \r |
240 | UnchainHandler:\r |
241 | \r |
242 | NESTED_SETUP( 0,2+3,3,0 )\r |
243 | \r |
244 | mov r8=1 // r8 = success\r |
245 | mov r2=cr.iva;; // r2 = interrupt vector address\r |
246 | \r |
247 | // First copy original Ext Int bundle 0 back to it's proper home...\r |
248 | addl out0=RELOCATED_EXT_INT, r2 // out0 = source - in reserved IVT\r |
249 | addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - first bundle of Ext Int entry\r |
250 | mov out2=1;; // out2 = copy 1 bundle\r |
251 | br.call.sptk.few b0 = CopyBundles\r |
252 | \r |
253 | // Now, relocate it again...\r |
254 | addl out0=EXT_INT_ENTRY_OFFSET, r2 // out1 = New runtime address\r |
255 | addl out1=RELOCATED_EXT_INT, r2;; // out0 = IP address of previous location\r |
256 | mov out2=out0;; // out2 = IP address of new location\r |
257 | br.call.sptk.few b0 = RelocateBundle\r |
258 | \r |
259 | // Last, restore the patch area\r |
260 | movl out0=PatchSaveBuffer // out0 = source buffer\r |
261 | addl out1=PATCH_ENTRY_OFFSET, r2 // out1 = destination buffer\r |
262 | mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r |
263 | br.call.sptk.few b0 = CopyBundles\r |
264 | \r |
265 | UnchainHandlerDone:\r |
266 | NESTED_RETURN\r |
267 | \r |
268 | .endp UnchainHandler\r |
269 | \r |
270 | \r |
271 | /////////////////////////////////////////////\r |
272 | //\r |
273 | // Name:\r |
274 | // CopyBundles\r |
275 | //\r |
276 | // Description:\r |
277 | // Copies instruction bundles - flushes icache as necessary\r |
278 | //\r |
279 | // Arguments:\r |
280 | // in0 - Bundle source\r |
281 | // in1 - Bundle destination\r |
282 | // in2 - Bundle count\r |
283 | //\r |
284 | // Returns:\r |
285 | //\r |
286 | // Notes:\r |
287 | // This procedure is a leaf routine\r |
288 | //\r |
289 | .proc CopyBundles\r |
290 | \r |
291 | CopyBundles:\r |
292 | \r |
293 | NESTED_SETUP(3,2+1,0,0)\r |
294 | \r |
295 | shl in2=in2, 1;; // in2 = count of 8 byte blocks to copy\r |
296 | \r |
297 | CopyBundlesLoop:\r |
298 | \r |
299 | cmp.eq p14, p15 = 0, in2;; // Check if done\r |
300 | (p14) br.sptk.few CopyBundlesDone;;\r |
301 | \r |
302 | ld8 loc2=[in0], 0x8;; // loc2 = source bytes\r |
303 | st8 [in1]=loc2;; // [in1] = destination bytes\r |
304 | fc in1;; // Flush instruction cache\r |
305 | sync.i;; // Ensure local and remote data/inst caches in sync\r |
306 | srlz.i;; // Ensure sync has been observed\r |
307 | add in1=0x8, in1;; // in1 = next destination\r |
308 | add in2=-1, in2;; // in2 = decrement 8 bytes blocks to copy\r |
309 | br.sptk.few CopyBundlesLoop;;\r |
310 | \r |
311 | CopyBundlesDone:\r |
312 | NESTED_RETURN\r |
313 | \r |
314 | .endp CopyBundles\r |
315 | \r |
316 | \r |
317 | /////////////////////////////////////////////\r |
318 | //\r |
319 | // Name:\r |
320 | // RelocateBundle\r |
321 | //\r |
322 | // Description:\r |
323 | // Relocates an instruction bundle by updating any ip-relative branch instructions.\r |
324 | //\r |
325 | // Arguments:\r |
326 | // in0 - Runtime address of bundle\r |
327 | // in1 - IP address of previous location of bundle\r |
328 | // in2 - IP address of new location of bundle\r |
329 | //\r |
330 | // Returns:\r |
331 | // in0 - 1 if successful or 0 if unsuccessful\r |
332 | //\r |
333 | // Notes:\r |
334 | // This routine examines all slots in the given bundle that are destined for the\r |
335 | // branch execution unit. If any of these slots contain an IP-relative branch\r |
336 | // namely instructions B1, B2, B3, or B6, the slot is fixed-up with a new relative\r |
337 | // address. Errors can occur if a branch cannot be reached.\r |
338 | //\r |
339 | .proc RelocateBundle\r |
340 | \r |
341 | RelocateBundle:\r |
342 | \r |
343 | NESTED_SETUP(3,2+4,3,0)\r |
344 | \r |
345 | mov loc2=SLOT0 // loc2 = slot index\r |
346 | mov loc5=in0;; // loc5 = runtime address of bundle\r |
347 | mov in0=1;; // in0 = success\r |
348 | \r |
349 | RelocateBundleNextSlot:\r |
350 | \r |
351 | cmp.ge p14, p15 = SLOT2, loc2;; // Check if maximum slot\r |
352 | (p15) br.sptk.few RelocateBundleDone\r |
353 | \r |
354 | mov out0=loc5;; // out0 = runtime address of bundle\r |
355 | br.call.sptk.few b0 = GetTemplate\r |
356 | mov loc3=out0;; // loc3 = instruction template\r |
357 | mov out0=loc5 // out0 = runtime address of bundle\r |
358 | mov out1=loc2;; // out1 = instruction slot number\r |
359 | br.call.sptk.few b0 = GetSlot\r |
360 | mov loc4=out0;; // loc4 = instruction encoding\r |
361 | mov out0=loc4 // out0 = instuction encoding\r |
362 | mov out1=loc2 // out1 = instruction slot number\r |
363 | mov out2=loc3;; // out2 = instruction template\r |
364 | br.call.sptk.few b0 = IsSlotBranch\r |
365 | cmp.eq p14, p15 = 1, out0;; // Check if branch slot\r |
366 | (p15) add loc2=1,loc2 // Increment slot\r |
367 | (p15) br.sptk.few RelocateBundleNextSlot\r |
368 | mov out0=loc4 // out0 = instuction encoding\r |
369 | mov out1=in1 // out1 = IP address of previous location\r |
370 | mov out2=in2;; // out2 = IP address of new location\r |
371 | br.call.sptk.few b0 = RelocateSlot\r |
372 | cmp.eq p14, p15 = 1, out1;; // Check if relocated slot\r |
373 | (p15) mov in0=0 // in0 = failure\r |
374 | (p15) br.sptk.few RelocateBundleDone\r |
375 | mov out2=out0;; // out2 = instruction encoding\r |
376 | mov out0=loc5 // out0 = runtime address of bundle\r |
377 | mov out1=loc2;; // out1 = instruction slot number\r |
378 | br.call.sptk.few b0 = SetSlot\r |
379 | add loc2=1,loc2;; // Increment slot\r |
380 | br.sptk.few RelocateBundleNextSlot\r |
381 | \r |
382 | RelocateBundleDone:\r |
383 | NESTED_RETURN\r |
384 | \r |
385 | .endp RelocateBundle\r |
386 | \r |
387 | \r |
388 | /////////////////////////////////////////////\r |
389 | //\r |
390 | // Name:\r |
391 | // RelocateSlot\r |
392 | //\r |
393 | // Description:\r |
394 | // Relocates an instruction bundle by updating any ip-relative branch instructions.\r |
395 | //\r |
396 | // Arguments:\r |
397 | // in0 - Instruction encoding (41-bits, right justified)\r |
398 | // in1 - IP address of previous location of bundle\r |
399 | // in2 - IP address of new location of bundle\r |
400 | //\r |
401 | // Returns:\r |
402 | // in0 - Instruction encoding (41-bits, right justified)\r |
403 | // in1 - 1 if successful otherwise 0\r |
404 | //\r |
405 | // Notes:\r |
406 | // This procedure is a leaf routine\r |
407 | //\r |
408 | .proc RelocateSlot\r |
409 | \r |
410 | RelocateSlot:\r |
411 | NESTED_SETUP(3,2+5,0,0)\r |
412 | extr.u loc2=in0, 37, 4;; // loc2 = instruction opcode\r |
413 | cmp.eq p14, p15 = 4, loc2;; // IP-relative branch (B1) or\r |
414 | // IP-relative counted branch (B2)\r |
415 | (p15) cmp.eq p14, p15 = 5, loc2;; // IP-relative call (B3)\r |
416 | (p15) cmp.eq p14, p15 = 7, loc2;; // IP-relative predict (B6)\r |
417 | (p15) mov in1=1 // Instruction did not need to be reencoded\r |
418 | (p15) br.sptk.few RelocateSlotDone\r |
419 | tbit.nz p14, p15 = in0, 36;; // put relative offset sign bit in p14\r |
420 | extr.u loc2=in0, 13, 20;; // loc2 = relative offset in instruction\r |
421 | (p14) movl loc3=0xfffffffffff00000;; // extend sign\r |
422 | (p14) or loc2=loc2, loc3;;\r |
423 | shl loc2=loc2,4;; // convert to byte offset instead of bundle offset\r |
424 | add loc3=loc2, in1;; // loc3 = physical address of branch target\r |
425 | (p14) sub loc2=r0,loc2;; // flip sign in loc2 if offset is negative\r |
426 | sub loc4=loc3,in2;; // loc4 = relative offset from new ip to branch target\r |
7b414b4e |
427 | cmp.lt p15, p14 = 0, loc4;; // get new sign bit\r |
c1f23d63 |
428 | (p14) sub loc5=r0,loc4 // get absolute value of offset\r |
429 | (p15) mov loc5=loc4;;\r |
430 | movl loc6=0x0FFFFFF;; // maximum offset in bytes for ip-rel branch\r |
431 | cmp.gt p14, p15 = loc5, loc6;; // check to see we're not out of range for an ip-relative branch\r |
432 | (p14) br.sptk.few RelocateSlotError\r |
433 | cmp.lt p15, p14 = 0, loc4;; // store sign in p14 again\r |
434 | (p14) dep in0=1,in0,36,1 // store sign bit in instruction\r |
435 | (p15) dep in0=0,in0,36,1\r |
436 | shr loc4=loc4, 4;; // convert back to bundle offset\r |
437 | dep in0=loc4,in0,13,16;; // put first 16 bits of new offset into instruction\r |
438 | shr loc4=loc4,16;;\r |
439 | dep in0=loc4,in0,13+16,4 // put last 4 bits of new offset into instruction\r |
440 | mov in1=1;; // in1 = success\r |
441 | br.sptk.few RelocateSlotDone;;\r |
442 | \r |
443 | RelocateSlotError:\r |
444 | mov in1=0;; // in1 = failure\r |
445 | \r |
446 | RelocateSlotDone:\r |
447 | NESTED_RETURN\r |
448 | \r |
449 | .endp RelocateSlot\r |
450 | \r |
451 | \r |
452 | /////////////////////////////////////////////\r |
453 | //\r |
454 | // Name:\r |
455 | // IsSlotBranch\r |
456 | //\r |
457 | // Description:\r |
458 | // Determines if the given instruction is a branch instruction.\r |
459 | //\r |
460 | // Arguments:\r |
461 | // in0 - Instruction encoding (41-bits, right justified)\r |
462 | // in1 - Instruction slot number\r |
463 | // in2 - Bundle template\r |
464 | //\r |
465 | // Returns:\r |
466 | // in0 - 1 if branch or 0 if not branch\r |
467 | //\r |
468 | // Notes:\r |
469 | // This procedure is a leaf routine\r |
470 | //\r |
471 | // IsSlotBranch recognizes all branch instructions by looking at the provided template.\r |
472 | // The instruction encoding is only passed to this routine for future expansion.\r |
473 | //\r |
474 | .proc IsSlotBranch\r |
475 | \r |
476 | IsSlotBranch:\r |
477 | \r |
478 | NESTED_SETUP (3,2+0,0,0)\r |
479 | \r |
480 | mov in0=1;; // in0 = 1 which destroys the instruction\r |
481 | andcm in2=in2,in0;; // in2 = even template to reduce compares\r |
482 | mov in0=0;; // in0 = not a branch\r |
483 | cmp.eq p14, p15 = 0x16, in2;; // Template 0x16 is BBB\r |
484 | (p14) br.sptk.few IsSlotBranchTrue\r |
485 | cmp.eq p14, p15 = SLOT0, in1;; // Slot 0 has no other possiblities\r |
486 | (p14) br.sptk.few IsSlotBranchDone\r |
487 | cmp.eq p14, p15 = 0x12, in2;; // Template 0x12 is MBB\r |
488 | (p14) br.sptk.few IsSlotBranchTrue\r |
489 | cmp.eq p14, p15 = SLOT1, in1;; // Slot 1 has no other possiblities\r |
490 | (p14) br.sptk.few IsSlotBranchDone\r |
491 | cmp.eq p14, p15 = 0x10, in2;; // Template 0x10 is MIB\r |
492 | (p14) br.sptk.few IsSlotBranchTrue\r |
493 | cmp.eq p14, p15 = 0x18, in2;; // Template 0x18 is MMB\r |
494 | (p14) br.sptk.few IsSlotBranchTrue\r |
495 | cmp.eq p14, p15 = 0x1C, in2;; // Template 0x1C is MFB\r |
496 | (p14) br.sptk.few IsSlotBranchTrue\r |
497 | br.sptk.few IsSlotBranchDone\r |
498 | \r |
499 | IsSlotBranchTrue:\r |
500 | mov in0=1;; // in0 = branch\r |
501 | \r |
502 | IsSlotBranchDone:\r |
503 | NESTED_RETURN\r |
504 | \r |
505 | .endp IsSlotBranch\r |
506 | \r |
507 | \r |
508 | /////////////////////////////////////////////\r |
509 | //\r |
510 | // Name:\r |
511 | // GetTemplate\r |
512 | //\r |
513 | // Description:\r |
514 | // Retrieves the instruction template for an instruction bundle\r |
515 | //\r |
516 | // Arguments:\r |
517 | // in0 - Runtime address of bundle\r |
518 | //\r |
519 | // Returns:\r |
520 | // in0 - Instruction template (5-bits, right-justified)\r |
521 | //\r |
522 | // Notes:\r |
523 | // This procedure is a leaf routine\r |
524 | //\r |
525 | .proc GetTemplate\r |
526 | \r |
527 | GetTemplate:\r |
528 | \r |
529 | NESTED_SETUP (1,2+2,0,0)\r |
530 | \r |
531 | ld8 loc2=[in0], 0x8 // loc2 = first 8 bytes of branch bundle\r |
532 | movl loc3=MASK_0_4;; // loc3 = template mask\r |
533 | and loc2=loc2,loc3;; // loc2 = template, right justified\r |
534 | mov in0=loc2;; // in0 = template, right justified\r |
535 | \r |
536 | NESTED_RETURN\r |
537 | \r |
538 | .endp GetTemplate\r |
539 | \r |
540 | \r |
541 | /////////////////////////////////////////////\r |
542 | //\r |
543 | // Name:\r |
544 | // GetSlot\r |
545 | //\r |
546 | // Description:\r |
547 | // Gets the instruction encoding for an instruction slot and bundle\r |
548 | //\r |
549 | // Arguments:\r |
550 | // in0 - Runtime address of bundle\r |
551 | // in1 - Instruction slot (either 0, 1, or 2)\r |
552 | //\r |
553 | // Returns:\r |
554 | // in0 - Instruction encoding (41-bits, right justified)\r |
555 | //\r |
556 | // Notes:\r |
557 | // This procedure is a leaf routine\r |
558 | //\r |
559 | // Slot0 - [in0 + 0x8] Bits 45-5\r |
560 | // Slot1 - [in0 + 0x8] Bits 63-46 and [in0] Bits 22-0\r |
561 | // Slot2 - [in0] Bits 63-23\r |
562 | //\r |
563 | .proc GetSlot\r |
564 | \r |
565 | GetSlot:\r |
566 | NESTED_SETUP (2,2+3,0,0)\r |
567 | \r |
568 | ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of branch bundle\r |
569 | ld8 loc3=[in0];; // loc3 = second 8 bytes of branch bundle\r |
570 | cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r |
571 | (p14) br.cond.sptk.few GetSlot2;; // get slot 2\r |
572 | cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r |
573 | (p14) br.cond.sptk.few GetSlot1;; // get slot 1\r |
574 | \r |
575 | GetSlot0:\r |
576 | extr.u in0=loc2, 5, 45 // in0 = extracted slot 0\r |
577 | br.sptk.few GetSlotDone;;\r |
578 | \r |
579 | GetSlot1:\r |
580 | extr.u in0=loc2, 46, 18 // in0 = bits 63-46 of loc2 right-justified\r |
581 | extr.u loc4=loc3, 0, 23;; // loc4 = bits 22-0 of loc3 right-justified\r |
582 | dep in0=loc4, in0, 18, 15;;\r |
583 | shr.u loc4=loc4,15;;\r |
584 | dep in0=loc4, in0, 33, 8;; // in0 = extracted slot 1\r |
585 | br.sptk.few GetSlotDone;;\r |
586 | \r |
587 | GetSlot2:\r |
588 | extr.u in0=loc3, 23, 41;; // in0 = extracted slot 2\r |
589 | \r |
590 | GetSlotDone:\r |
591 | NESTED_RETURN\r |
592 | \r |
593 | .endp GetSlot\r |
594 | \r |
595 | \r |
596 | /////////////////////////////////////////////\r |
597 | //\r |
598 | // Name:\r |
599 | // SetSlot\r |
600 | //\r |
601 | // Description:\r |
602 | // Sets the instruction encoding for an instruction slot and bundle\r |
603 | //\r |
604 | // Arguments:\r |
605 | // in0 - Runtime address of bundle\r |
606 | // in1 - Instruction slot (either 0, 1, or 2)\r |
607 | // in2 - Instruction encoding (41-bits, right justified)\r |
608 | //\r |
609 | // Returns:\r |
610 | //\r |
611 | // Notes:\r |
612 | // This procedure is a leaf routine\r |
613 | //\r |
614 | .proc SetSlot\r |
615 | \r |
616 | SetSlot:\r |
617 | NESTED_SETUP (3,2+3,0,0)\r |
618 | \r |
619 | ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of bundle\r |
620 | ld8 loc3=[in0];; // loc3 = second 8 bytes of bundle\r |
621 | cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r |
622 | (p14) br.cond.sptk.few SetSlot2;; // set slot 2\r |
623 | cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r |
624 | (p14) br.cond.sptk.few SetSlot1;; // set slot 1\r |
625 | \r |
626 | SetSlot0:\r |
627 | dep loc2=0, loc2, 5, 41;; // remove old instruction from slot 0\r |
628 | shl loc4=in2, 5;; // loc4 = new instruction ready to be inserted\r |
629 | or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r |
630 | add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r |
631 | st8 [loc4]=loc2 // [loc4] = updated bundle\r |
632 | br.sptk.few SetSlotDone;;\r |
633 | ;;\r |
634 | \r |
635 | SetSlot1:\r |
636 | dep loc2=0, loc2, 46, 18 // remove old instruction from slot 1\r |
637 | dep loc3=0, loc3, 0, 23;;\r |
638 | shl loc4=in2, 46;; // loc4 = partial instruction ready to be inserted\r |
639 | or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r |
640 | add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r |
641 | st8 [loc4]=loc2;; // [loc4] = updated bundle\r |
642 | shr.u loc4=in2, 18;; // loc4 = partial instruction ready to be inserted\r |
643 | or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r |
644 | st8 [in0]=loc3;; // [in0] = updated bundle\r |
645 | br.sptk.few SetSlotDone;;\r |
646 | \r |
647 | SetSlot2:\r |
648 | dep loc3=0, loc3, 23, 41;; // remove old instruction from slot 2\r |
649 | shl loc4=in2, 23;; // loc4 = instruction ready to be inserted\r |
650 | or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r |
651 | st8 [in0]=loc3;; // [in0] = updated bundle\r |
652 | \r |
653 | SetSlotDone:\r |
654 | \r |
655 | NESTED_RETURN\r |
656 | .endp SetSlot\r |
657 | \r |
658 | \r |
659 | /////////////////////////////////////////////\r |
660 | //\r |
661 | // Name:\r |
662 | // GetIva\r |
663 | //\r |
664 | // Description:\r |
665 | // C callable function to obtain the current value of IVA\r |
666 | //\r |
667 | // Returns:\r |
668 | // Current value if IVA\r |
669 | \r |
670 | .globl GetIva\r |
671 | .proc GetIva\r |
672 | GetIva:\r |
673 | mov r8=cr2;;\r |
674 | br.ret.sptk.many b0\r |
675 | \r |
676 | .endp GetIva\r |
677 | \r |
678 | \r |
679 | /////////////////////////////////////////////\r |
680 | //\r |
681 | // Name:\r |
682 | // ProgramInterruptFlags\r |
683 | //\r |
684 | // Description:\r |
685 | // C callable function to enable/disable interrupts\r |
686 | //\r |
687 | // Returns:\r |
688 | // Previous state of psr.ic\r |
689 | //\r |
690 | .globl ProgramInterruptFlags\r |
691 | .proc ProgramInterruptFlags\r |
692 | ProgramInterruptFlags:\r |
693 | alloc loc0=1,2,0,0;;\r |
694 | mov loc0=psr\r |
695 | mov loc1=0x6000;;\r |
696 | and r8=loc0, loc1 // obtain current psr.ic and psr.i state\r |
697 | and in0=in0, loc1 // insure no extra bits set in input\r |
698 | andcm loc0=loc0,loc1;; // clear original psr.i and psr.ic\r |
699 | or loc0=loc0,in0;; // OR in new psr.ic value\r |
700 | mov psr.l=loc0;; // write new psr\r |
701 | srlz.d\r |
702 | br.ret.sptk.many b0 // return\r |
703 | \r |
704 | .endp ProgramInterruptFlags\r |
705 | \r |
706 | \r |
707 | /////////////////////////////////////////////\r |
708 | //\r |
709 | // Name:\r |
710 | // SpillContext\r |
711 | //\r |
712 | // Description:\r |
713 | // Saves system context to context record.\r |
714 | //\r |
715 | // Arguments:\r |
716 | // in0 = 512 byte aligned context record address\r |
717 | // in1 = original B0\r |
718 | // in2 = original ar.bsp\r |
719 | // in3 = original ar.bspstore\r |
720 | // in4 = original ar.rnat\r |
721 | // in5 = original ar.pfs\r |
722 | //\r |
723 | // Notes:\r |
724 | // loc0 - scratch\r |
725 | // loc1 - scratch\r |
726 | // loc2 - temporary application unat storage\r |
727 | // loc3 - temporary exception handler unat storage\r |
728 | \r |
729 | .proc SpillContext\r |
730 | \r |
731 | SpillContext:\r |
732 | alloc loc0=6,4,0,0;; // alloc 6 input, 4 locals, 0 outs\r |
733 | mov loc2=ar.unat;; // save application context unat (spilled later)\r |
734 | mov ar.unat=r0;; // set UNAT=0\r |
735 | st8.spill [in0]=r0,8;;\r |
736 | st8.spill [in0]=r1,8;; // save R1 - R31\r |
737 | st8.spill [in0]=r2,8;;\r |
738 | st8.spill [in0]=r3,8;;\r |
739 | st8.spill [in0]=r4,8;;\r |
740 | st8.spill [in0]=r5,8;;\r |
741 | st8.spill [in0]=r6,8;;\r |
742 | st8.spill [in0]=r7,8;;\r |
743 | st8.spill [in0]=r8,8;;\r |
744 | st8.spill [in0]=r9,8;;\r |
745 | st8.spill [in0]=r10,8;;\r |
746 | st8.spill [in0]=r11,8;;\r |
747 | st8.spill [in0]=r12,8;;\r |
748 | st8.spill [in0]=r13,8;;\r |
749 | st8.spill [in0]=r14,8;;\r |
750 | st8.spill [in0]=r15,8;;\r |
751 | st8.spill [in0]=r16,8;;\r |
752 | st8.spill [in0]=r17,8;;\r |
753 | st8.spill [in0]=r18,8;;\r |
754 | st8.spill [in0]=r19,8;;\r |
755 | st8.spill [in0]=r20,8;;\r |
756 | st8.spill [in0]=r21,8;;\r |
757 | st8.spill [in0]=r22,8;;\r |
758 | st8.spill [in0]=r23,8;;\r |
759 | st8.spill [in0]=r24,8;;\r |
760 | st8.spill [in0]=r25,8;;\r |
761 | st8.spill [in0]=r26,8;;\r |
762 | st8.spill [in0]=r27,8;;\r |
763 | st8.spill [in0]=r28,8;;\r |
764 | st8.spill [in0]=r29,8;;\r |
765 | st8.spill [in0]=r30,8;;\r |
766 | st8.spill [in0]=r31,8;;\r |
767 | mov loc3=ar.unat;; // save debugger context unat (spilled later)\r |
768 | stf.spill [in0]=f2,16;; // save f2 - f31\r |
769 | stf.spill [in0]=f3,16;;\r |
770 | stf.spill [in0]=f4,16;;\r |
771 | stf.spill [in0]=f5,16;;\r |
772 | stf.spill [in0]=f6,16;;\r |
773 | stf.spill [in0]=f7,16;;\r |
774 | stf.spill [in0]=f8,16;;\r |
775 | stf.spill [in0]=f9,16;;\r |
776 | stf.spill [in0]=f10,16;;\r |
777 | stf.spill [in0]=f11,16;;\r |
778 | stf.spill [in0]=f12,16;;\r |
779 | stf.spill [in0]=f13,16;;\r |
780 | stf.spill [in0]=f14,16;;\r |
781 | stf.spill [in0]=f15,16;;\r |
782 | stf.spill [in0]=f16,16;;\r |
783 | stf.spill [in0]=f17,16;;\r |
784 | stf.spill [in0]=f18,16;;\r |
785 | stf.spill [in0]=f19,16;;\r |
786 | stf.spill [in0]=f20,16;;\r |
787 | stf.spill [in0]=f21,16;;\r |
788 | stf.spill [in0]=f22,16;;\r |
789 | stf.spill [in0]=f23,16;;\r |
790 | stf.spill [in0]=f24,16;;\r |
791 | stf.spill [in0]=f25,16;;\r |
792 | stf.spill [in0]=f26,16;;\r |
793 | stf.spill [in0]=f27,16;;\r |
794 | stf.spill [in0]=f28,16;;\r |
795 | stf.spill [in0]=f29,16;;\r |
796 | stf.spill [in0]=f30,16;;\r |
797 | stf.spill [in0]=f31,16;;\r |
798 | mov loc0=pr;; // save predicates\r |
799 | st8.spill [in0]=loc0,8;;\r |
800 | st8.spill [in0]=in1,8;; // save b0 - b7... in1 already equals saved b0\r |
801 | mov loc0=b1;;\r |
802 | st8.spill [in0]=loc0,8;;\r |
803 | mov loc0=b2;;\r |
804 | st8.spill [in0]=loc0,8;;\r |
805 | mov loc0=b3;;\r |
806 | st8.spill [in0]=loc0,8;;\r |
807 | mov loc0=b4;;\r |
808 | st8.spill [in0]=loc0,8;;\r |
809 | mov loc0=b5;;\r |
810 | st8.spill [in0]=loc0,8;;\r |
811 | mov loc0=b6;;\r |
812 | st8.spill [in0]=loc0,8;;\r |
813 | mov loc0=b7;;\r |
814 | st8.spill [in0]=loc0,8;;\r |
815 | mov loc0=ar.rsc;; // save ar.rsc\r |
816 | st8.spill [in0]=loc0,8;;\r |
817 | st8.spill [in0]=in2,8;; // save ar.bsp (in2)\r |
818 | st8.spill [in0]=in3,8;; // save ar.bspstore (in3)\r |
819 | st8.spill [in0]=in4,8;; // save ar.rnat (in4)\r |
820 | mov loc0=ar.fcr;; // save ar.fcr (ar21 - IA32 floating-point control register)\r |
821 | st8.spill [in0]=loc0,8;;\r |
822 | mov loc0=ar.eflag;; // save ar.eflag (ar24)\r |
823 | st8.spill [in0]=loc0,8;;\r |
824 | mov loc0=ar.csd;; // save ar.csd (ar25 - ia32 CS descriptor)\r |
825 | st8.spill [in0]=loc0,8;;\r |
826 | mov loc0=ar.ssd;; // save ar.ssd (ar26 - ia32 ss descriptor)\r |
827 | st8.spill [in0]=loc0,8;;\r |
828 | mov loc0=ar.cflg;; // save ar.cflg (ar27 - ia32 cr0 and cr4)\r |
829 | st8.spill [in0]=loc0,8;;\r |
830 | mov loc0=ar.fsr;; // save ar.fsr (ar28 - ia32 floating-point status register)\r |
831 | st8.spill [in0]=loc0,8;;\r |
832 | mov loc0=ar.fir;; // save ar.fir (ar29 - ia32 floating-point instruction register)\r |
833 | st8.spill [in0]=loc0,8;;\r |
834 | mov loc0=ar.fdr;; // save ar.fdr (ar30 - ia32 floating-point data register)\r |
835 | st8.spill [in0]=loc0,8;;\r |
836 | mov loc0=ar.ccv;; // save ar.ccv\r |
837 | st8.spill [in0]=loc0,8;;\r |
838 | st8.spill [in0]=loc2,8;; // save ar.unat (saved to loc2 earlier)\r |
839 | mov loc0=ar.fpsr;; // save floating point status register\r |
840 | st8.spill [in0]=loc0,8;;\r |
841 | st8.spill [in0]=in5,8;; // save ar.pfs\r |
842 | mov loc0=ar.lc;; // save ar.lc\r |
843 | st8.spill [in0]=loc0,8;;\r |
844 | mov loc0=ar.ec;; // save ar.ec\r |
845 | st8.spill [in0]=loc0,8;;\r |
846 | \r |
847 | // save control registers\r |
848 | mov loc0=cr.dcr;; // save dcr\r |
849 | st8.spill [in0]=loc0,8;;\r |
850 | mov loc0=cr.itm;; // save itm\r |
851 | st8.spill [in0]=loc0,8;;\r |
852 | mov loc0=cr.iva;; // save iva\r |
853 | st8.spill [in0]=loc0,8;;\r |
854 | mov loc0=cr.pta;; // save pta\r |
855 | st8.spill [in0]=loc0,8;;\r |
856 | mov loc0=cr.ipsr;; // save ipsr\r |
857 | st8.spill [in0]=loc0,8;;\r |
858 | mov loc0=cr.isr;; // save isr\r |
859 | st8.spill [in0]=loc0,8;;\r |
860 | mov loc0=cr.iip;; // save iip\r |
861 | st8.spill [in0]=loc0,8;;\r |
862 | mov loc0=cr.ifa;; // save ifa\r |
863 | st8.spill [in0]=loc0,8;;\r |
864 | mov loc0=cr.itir;; // save itir\r |
865 | st8.spill [in0]=loc0,8;;\r |
866 | mov loc0=cr.iipa;; // save iipa\r |
867 | st8.spill [in0]=loc0,8;;\r |
868 | mov loc0=cr.ifs;; // save ifs\r |
869 | st8.spill [in0]=loc0,8;;\r |
870 | mov loc0=cr.iim;; // save iim\r |
871 | st8.spill [in0]=loc0,8;;\r |
872 | mov loc0=cr.iha;; // save iha\r |
873 | st8.spill [in0]=loc0,8;;\r |
874 | \r |
875 | // save debug registers\r |
876 | mov loc0=dbr[r0];; // save dbr0 - dbr7\r |
877 | st8.spill [in0]=loc0,8;;\r |
878 | movl loc1=1;;\r |
879 | mov loc0=dbr[loc1];;\r |
880 | st8.spill [in0]=loc0,8;;\r |
881 | movl loc1=2;;\r |
882 | mov loc0=dbr[loc1];;\r |
883 | st8.spill [in0]=loc0,8;;\r |
884 | movl loc1=3;;\r |
885 | mov loc0=dbr[loc1];;\r |
886 | st8.spill [in0]=loc0,8;;\r |
887 | movl loc1=4;;\r |
888 | mov loc0=dbr[loc1];;\r |
889 | st8.spill [in0]=loc0,8;;\r |
890 | movl loc1=5;;\r |
891 | mov loc0=dbr[loc1];;\r |
892 | st8.spill [in0]=loc0,8;;\r |
893 | movl loc1=6;;\r |
894 | mov loc0=dbr[loc1];;\r |
895 | st8.spill [in0]=loc0,8;;\r |
896 | movl loc1=7;;\r |
897 | mov loc0=dbr[loc1];;\r |
898 | st8.spill [in0]=loc0,8;;\r |
899 | mov loc0=ibr[r0];; // save ibr0 - ibr7\r |
900 | st8.spill [in0]=loc0,8;;\r |
901 | movl loc1=1;;\r |
902 | mov loc0=ibr[loc1];;\r |
903 | st8.spill [in0]=loc0,8;;\r |
904 | movl loc1=2;;\r |
905 | mov loc0=ibr[loc1];;\r |
906 | st8.spill [in0]=loc0,8;;\r |
907 | movl loc1=3;;\r |
908 | mov loc0=ibr[loc1];;\r |
909 | st8.spill [in0]=loc0,8;;\r |
910 | movl loc1=4;;\r |
911 | mov loc0=ibr[loc1];;\r |
912 | st8.spill [in0]=loc0,8;;\r |
913 | movl loc1=5;;\r |
914 | mov loc0=ibr[loc1];;\r |
915 | st8.spill [in0]=loc0,8;;\r |
916 | movl loc1=6;;\r |
917 | mov loc0=ibr[loc1];;\r |
918 | st8.spill [in0]=loc0,8;;\r |
919 | movl loc1=7;;\r |
920 | mov loc0=ibr[loc1];;\r |
921 | st8.spill [in0]=loc0,8;;\r |
922 | st8.spill [in0]=loc3;;\r |
923 | \r |
924 | br.ret.sptk.few b0\r |
925 | \r |
926 | .endp SpillContext\r |
927 | \r |
928 | \r |
929 | /////////////////////////////////////////////\r |
930 | //\r |
931 | // Name:\r |
932 | // FillContext\r |
933 | //\r |
934 | // Description:\r |
935 | // Restores register context from context record.\r |
936 | //\r |
937 | // Arguments:\r |
938 | // in0 = address of last element 512 byte aligned context record address\r |
939 | // in1 = modified B0\r |
940 | // in2 = modified ar.bsp\r |
941 | // in3 = modified ar.bspstore\r |
942 | // in4 = modified ar.rnat\r |
943 | // in5 = modified ar.pfs\r |
944 | //\r |
945 | // Notes:\r |
946 | // loc0 - scratch\r |
947 | // loc1 - scratch\r |
948 | // loc2 - temporary application unat storage\r |
949 | // loc3 - temporary exception handler unat storage\r |
950 | \r |
951 | .proc FillContext\r |
952 | FillContext:\r |
953 | alloc loc0=6,4,0,0;; // alloc 6 inputs, 4 locals, 0 outs\r |
954 | ld8.fill loc3=[in0],-8;; // int_nat (nat bits for R1-31)\r |
955 | movl loc1=7;; // ibr7\r |
956 | ld8.fill loc0=[in0],-8;;\r |
957 | mov ibr[loc1]=loc0;;\r |
958 | movl loc1=6;; // ibr6\r |
959 | ld8.fill loc0=[in0],-8;;\r |
960 | mov ibr[loc1]=loc0;;\r |
961 | movl loc1=5;; // ibr5\r |
962 | ld8.fill loc0=[in0],-8;;\r |
963 | mov ibr[loc1]=loc0;;\r |
964 | movl loc1=4;; // ibr4\r |
965 | ld8.fill loc0=[in0],-8;;\r |
966 | mov ibr[loc1]=loc0;;\r |
967 | movl loc1=3;; // ibr3\r |
968 | ld8.fill loc0=[in0],-8;;\r |
969 | mov ibr[loc1]=loc0;;\r |
970 | movl loc1=2;; // ibr2\r |
971 | ld8.fill loc0=[in0],-8;;\r |
972 | mov ibr[loc1]=loc0;;\r |
973 | movl loc1=1;; // ibr1\r |
974 | ld8.fill loc0=[in0],-8;;\r |
975 | mov ibr[loc1]=loc0;;\r |
976 | ld8.fill loc0=[in0],-8;; // ibr0\r |
977 | mov ibr[r0]=loc0;;\r |
978 | movl loc1=7;; // dbr7\r |
979 | ld8.fill loc0=[in0],-8;;\r |
980 | mov dbr[loc1]=loc0;;\r |
981 | movl loc1=6;; // dbr6\r |
982 | ld8.fill loc0=[in0],-8;;\r |
983 | mov dbr[loc1]=loc0;;\r |
984 | movl loc1=5;; // dbr5\r |
985 | ld8.fill loc0=[in0],-8;;\r |
986 | mov dbr[loc1]=loc0;;\r |
987 | movl loc1=4;; // dbr4\r |
988 | ld8.fill loc0=[in0],-8;;\r |
989 | mov dbr[loc1]=loc0;;\r |
990 | movl loc1=3;; // dbr3\r |
991 | ld8.fill loc0=[in0],-8;;\r |
992 | mov dbr[loc1]=loc0;;\r |
993 | movl loc1=2;; // dbr2\r |
994 | ld8.fill loc0=[in0],-8;;\r |
995 | mov dbr[loc1]=loc0;;\r |
996 | movl loc1=1;; // dbr1\r |
997 | ld8.fill loc0=[in0],-8;;\r |
998 | mov dbr[loc1]=loc0;;\r |
999 | ld8.fill loc0=[in0],-8;; // dbr0\r |
1000 | mov dbr[r0]=loc0;;\r |
1001 | ld8.fill loc0=[in0],-8;; // iha\r |
1002 | mov cr.iha=loc0;;\r |
1003 | ld8.fill loc0=[in0],-8;; // iim\r |
1004 | mov cr.iim=loc0;;\r |
1005 | ld8.fill loc0=[in0],-8;; // ifs\r |
1006 | mov cr.ifs=loc0;;\r |
1007 | ld8.fill loc0=[in0],-8;; // iipa\r |
1008 | mov cr.iipa=loc0;;\r |
1009 | ld8.fill loc0=[in0],-8;; // itir\r |
1010 | mov cr.itir=loc0;;\r |
1011 | ld8.fill loc0=[in0],-8;; // ifa\r |
1012 | mov cr.ifa=loc0;;\r |
1013 | ld8.fill loc0=[in0],-8;; // iip\r |
1014 | mov cr.iip=loc0;;\r |
1015 | ld8.fill loc0=[in0],-8;; // isr\r |
1016 | mov cr.isr=loc0;;\r |
1017 | ld8.fill loc0=[in0],-8;; // ipsr\r |
1018 | mov cr.ipsr=loc0;;\r |
1019 | ld8.fill loc0=[in0],-8;; // pta\r |
1020 | mov cr.pta=loc0;;\r |
1021 | ld8.fill loc0=[in0],-8;; // iva\r |
1022 | mov cr.iva=loc0;;\r |
1023 | ld8.fill loc0=[in0],-8;; // itm\r |
1024 | mov cr.itm=loc0;;\r |
1025 | ld8.fill loc0=[in0],-8;; // dcr\r |
1026 | mov cr.dcr=loc0;;\r |
1027 | ld8.fill loc0=[in0],-8;; // ec\r |
1028 | mov ar.ec=loc0;;\r |
1029 | ld8.fill loc0=[in0],-8;; // lc\r |
1030 | mov ar.lc=loc0;;\r |
1031 | ld8.fill in5=[in0],-8;; // ar.pfs\r |
1032 | ld8.fill loc0=[in0],-8;; // ar.fpsr\r |
1033 | mov ar.fpsr=loc0;;\r |
1034 | ld8.fill loc2=[in0],-8;; // ar.unat - restored later...\r |
1035 | ld8.fill loc0=[in0],-8;; // ar.ccv\r |
1036 | mov ar.ccv=loc0;;\r |
1037 | ld8.fill loc0=[in0],-8;; // ar.fdr\r |
1038 | mov ar.fdr=loc0;;\r |
1039 | ld8.fill loc0=[in0],-8;; // ar.fir\r |
1040 | mov ar.fir=loc0;;\r |
1041 | ld8.fill loc0=[in0],-8;; // ar.fsr\r |
1042 | mov ar.fsr=loc0;;\r |
1043 | ld8.fill loc0=[in0],-8;; // ar.cflg\r |
1044 | mov ar.cflg=loc0;;\r |
1045 | ld8.fill loc0=[in0],-8;; // ar.ssd\r |
1046 | mov ar.ssd=loc0;;\r |
1047 | ld8.fill loc0=[in0],-8;; // ar.csd\r |
1048 | mov ar.csd=loc0;;\r |
1049 | ld8.fill loc0=[in0],-8;; // ar.eflag\r |
1050 | mov ar.eflag=loc0;;\r |
1051 | ld8.fill loc0=[in0],-8;; // ar.fcr\r |
1052 | mov ar.fcr=loc0;;\r |
1053 | ld8.fill in4=[in0],-8;; // ar.rnat\r |
1054 | ld8.fill in3=[in0],-8;; // bspstore\r |
1055 | ld8.fill in2=[in0],-8;; // bsp\r |
1056 | ld8.fill loc0=[in0],-8;; // ar.rsc\r |
1057 | mov ar.rsc=loc0;;\r |
1058 | ld8.fill loc0=[in0],-8;; // B7 - B0\r |
1059 | mov b7=loc0;;\r |
1060 | ld8.fill loc0=[in0],-8;;\r |
1061 | mov b6=loc0;;\r |
1062 | ld8.fill loc0=[in0],-8;;\r |
1063 | mov b5=loc0;;\r |
1064 | ld8.fill loc0=[in0],-8;;\r |
1065 | mov b4=loc0;;\r |
1066 | ld8.fill loc0=[in0],-8;;\r |
1067 | mov b3=loc0;;\r |
1068 | ld8.fill loc0=[in0],-8;;\r |
1069 | mov b2=loc0;;\r |
1070 | ld8.fill loc0=[in0],-8;;\r |
1071 | mov b1=loc0;;\r |
1072 | ld8.fill in1=[in0],-8;; // b0 is temporarily stored in in1\r |
1073 | ld8.fill loc0=[in0],-16;; // predicates\r |
1074 | mov pr=loc0;;\r |
1075 | ldf.fill f31=[in0],-16;;\r |
1076 | ldf.fill f30=[in0],-16;;\r |
1077 | ldf.fill f29=[in0],-16;;\r |
1078 | ldf.fill f28=[in0],-16;;\r |
1079 | ldf.fill f27=[in0],-16;;\r |
1080 | ldf.fill f26=[in0],-16;;\r |
1081 | ldf.fill f25=[in0],-16;;\r |
1082 | ldf.fill f24=[in0],-16;;\r |
1083 | ldf.fill f23=[in0],-16;;\r |
1084 | ldf.fill f22=[in0],-16;;\r |
1085 | ldf.fill f21=[in0],-16;;\r |
1086 | ldf.fill f20=[in0],-16;;\r |
1087 | ldf.fill f19=[in0],-16;;\r |
1088 | ldf.fill f18=[in0],-16;;\r |
1089 | ldf.fill f17=[in0],-16;;\r |
1090 | ldf.fill f16=[in0],-16;;\r |
1091 | ldf.fill f15=[in0],-16;;\r |
1092 | ldf.fill f14=[in0],-16;;\r |
1093 | ldf.fill f13=[in0],-16;;\r |
1094 | ldf.fill f12=[in0],-16;;\r |
1095 | ldf.fill f11=[in0],-16;;\r |
1096 | ldf.fill f10=[in0],-16;;\r |
1097 | ldf.fill f9=[in0],-16;;\r |
1098 | ldf.fill f8=[in0],-16;;\r |
1099 | ldf.fill f7=[in0],-16;;\r |
1100 | ldf.fill f6=[in0],-16;;\r |
1101 | ldf.fill f5=[in0],-16;;\r |
1102 | ldf.fill f4=[in0],-16;;\r |
1103 | ldf.fill f3=[in0],-16;;\r |
1104 | ldf.fill f2=[in0],-8;;\r |
1105 | mov ar.unat=loc3;; // restore unat (int_nat) before fill of general registers\r |
1106 | ld8.fill r31=[in0],-8;;\r |
1107 | ld8.fill r30=[in0],-8;;\r |
1108 | ld8.fill r29=[in0],-8;;\r |
1109 | ld8.fill r28=[in0],-8;;\r |
1110 | ld8.fill r27=[in0],-8;;\r |
1111 | ld8.fill r26=[in0],-8;;\r |
1112 | ld8.fill r25=[in0],-8;;\r |
1113 | ld8.fill r24=[in0],-8;;\r |
1114 | ld8.fill r23=[in0],-8;;\r |
1115 | ld8.fill r22=[in0],-8;;\r |
1116 | ld8.fill r21=[in0],-8;;\r |
1117 | ld8.fill r20=[in0],-8;;\r |
1118 | ld8.fill r19=[in0],-8;;\r |
1119 | ld8.fill r18=[in0],-8;;\r |
1120 | ld8.fill r17=[in0],-8;;\r |
1121 | ld8.fill r16=[in0],-8;;\r |
1122 | ld8.fill r15=[in0],-8;;\r |
1123 | ld8.fill r14=[in0],-8;;\r |
1124 | ld8.fill r13=[in0],-8;;\r |
1125 | ld8.fill r12=[in0],-8;;\r |
1126 | ld8.fill r11=[in0],-8;;\r |
1127 | ld8.fill r10=[in0],-8;;\r |
1128 | ld8.fill r9=[in0],-8;;\r |
1129 | ld8.fill r8=[in0],-8;;\r |
1130 | ld8.fill r7=[in0],-8;;\r |
1131 | ld8.fill r6=[in0],-8;;\r |
1132 | ld8.fill r5=[in0],-8;;\r |
1133 | ld8.fill r4=[in0],-8;;\r |
1134 | ld8.fill r3=[in0],-8;;\r |
1135 | ld8.fill r2=[in0],-8;;\r |
1136 | ld8.fill r1=[in0],-8;;\r |
1137 | mov ar.unat=loc2;; // restore application context unat\r |
1138 | \r |
1139 | br.ret.sptk.many b0\r |
1140 | \r |
1141 | .endp FillContext\r |
1142 | \r |
1143 | \r |
1144 | /////////////////////////////////////////////\r |
1145 | //\r |
1146 | // Name:\r |
1147 | // HookHandler\r |
1148 | //\r |
1149 | // Description:\r |
1150 | // Common branch target from hooked IVT entries. Runs in interrupt context.\r |
1151 | // Responsible for saving and restoring context and calling common C\r |
1152 | // handler. Banked registers running on bank 0 at entry.\r |
1153 | //\r |
1154 | // Arguments:\r |
1155 | // All arguments are passed in banked registers:\r |
1156 | // B0_REG = Original B0\r |
1157 | // SCRATCH_REG1 = IVT entry index\r |
1158 | //\r |
1159 | // Returns:\r |
1160 | // Returns via rfi\r |
1161 | //\r |
1162 | // Notes:\r |
1163 | // loc0 - scratch\r |
1164 | // loc1 - scratch\r |
1165 | // loc2 - vector number / mask\r |
1166 | // loc3 - 16 byte aligned context record address\r |
1167 | // loc4 - temporary storage of last address in context record\r |
1168 | \r |
1169 | HookHandler:\r |
1170 | flushrs;; // Synch RSE with backing store\r |
1171 | mov SCRATCH_REG2=ar.bsp // save interrupted context bsp\r |
1172 | mov SCRATCH_REG3=ar.bspstore // save interrupted context bspstore\r |
1173 | mov SCRATCH_REG4=ar.rnat // save interrupted context rnat\r |
1174 | mov SCRATCH_REG6=cr.ifs;; // save IFS in case we need to chain...\r |
1175 | cover;; // creates new frame, moves old\r |
1176 | // CFM to IFS.\r |
1177 | alloc SCRATCH_REG5=0,5,6,0 // alloc 5 locals, 6 outs\r |
1178 | ;;\r |
1179 | // save banked registers to locals\r |
1180 | mov out1=B0_REG // out1 = Original B0\r |
1181 | mov out2=SCRATCH_REG2 // out2 = original ar.bsp\r |
1182 | mov out3=SCRATCH_REG3 // out3 = original ar.bspstore\r |
1183 | mov out4=SCRATCH_REG4 // out4 = original ar.rnat\r |
1184 | mov out5=SCRATCH_REG5 // out5 = original ar.pfs\r |
1185 | mov loc2=SCRATCH_REG1;; // loc2 = vector number + chain flag\r |
1186 | bsw.1;; // switch banked registers to bank 1\r |
1187 | srlz.d // explicit serialize required\r |
1188 | // now fill in context record structure\r |
1189 | movl loc3=IpfContextBuf // Insure context record is aligned\r |
1190 | add loc0=-0x200,r0;; // mask the lower 9 bits (align on 512 byte boundary)\r |
1191 | and loc3=loc3,loc0;;\r |
1192 | add loc3=0x200,loc3;; // move to next 512 byte boundary\r |
1193 | // loc3 now contains the 512 byte aligned context record\r |
1194 | // spill register context into context record\r |
1195 | mov out0=loc3;; // Context record base in out0\r |
1196 | // original B0 in out1 already\r |
1197 | // original ar.bsp in out2 already\r |
1198 | // original ar.bspstore in out3 already\r |
1199 | br.call.sptk.few b0=SpillContext;; // spill context\r |
1200 | mov loc4=out0 // save modified address\r |
1201 | \r |
1202 | // At this point, the context has been saved to the context record and we're\r |
1203 | // ready to call the C part of the handler...\r |
1204 | \r |
1205 | movl loc0=CommonHandler;; // obtain address of plabel\r |
1206 | ld8 loc1=[loc0];; // get entry point of CommonHandler\r |
1207 | mov b6=loc1;; // put it in a branch register\r |
1208 | adds loc1= 8, loc0;; // index to GP in plabel\r |
1209 | ld8 r1=[loc1];; // set up gp for C call\r |
1210 | mov loc1=0xfffff;; // mask off so only vector bits are present\r |
1211 | and out0=loc2,loc1;; // pass vector number (exception type)\r |
1212 | mov out1=loc3;; // pass context record address\r |
1213 | br.call.sptk.few b0=b6;; // call C handler\r |
1214 | \r |
1215 | // We've returned from the C call, so restore the context and either rfi\r |
1216 | // back to interrupted thread, or chain into the SAL if this was an external interrupt\r |
1217 | mov out0=loc4;; // pass address of last element in context record\r |
1218 | br.call.sptk.few b0=FillContext;; // Fill context\r |
1219 | mov b0=out1 // fill in b0\r |
1220 | mov ar.rnat=out4\r |
1221 | mov ar.pfs=out5\r |
1222 | \r |
1223 | // Loadrs is necessary because the debugger may have changed some values in\r |
1224 | // the backing store. The processor, however may not be aware that the\r |
1225 | // stacked registers need to be reloaded from the backing store. Therefore,\r |
1226 | // we explicitly cause the RSE to refresh the stacked register's contents\r |
7b414b4e |
1227 | // from the backing store.\r |
c1f23d63 |
1228 | mov loc0=ar.rsc // get RSC value\r |
1229 | mov loc1=ar.rsc // save it so we can restore it\r |
1230 | movl loc3=0xffffffffc000ffff;; // create mask for clearing RSC.loadrs\r |
1231 | and loc0=loc0,loc3;; // create value for RSC with RSC.loadrs==0\r |
1232 | mov ar.rsc=loc0;; // modify RSC\r |
1233 | loadrs;; // invalidate register stack\r |
1234 | mov ar.rsc=loc1;; // restore original RSC\r |
1235 | \r |
1236 | bsw.0;; // switch banked registers back to bank 0\r |
1237 | srlz.d;; // explicit serialize required\r |
1238 | mov PR_REG=pr // save predicates - to be restored after chaining decision\r |
1239 | mov B0_REG=b0 // save b0 - required by chain code\r |
1240 | mov loc2=EXCPT_EXTERNAL_INTERRUPT;;\r |
1241 | cmp.eq p7,p0=SCRATCH_REG1,loc2;; // check to see if this is the timer tick\r |
1242 | (p7) br.cond.dpnt.few DO_CHAIN;;\r |
1243 | \r |
1244 | NO_CHAIN:\r |
1245 | mov pr=PR_REG;;\r |
1246 | rfi;; // we're outa here.\r |
1247 | \r |
1248 | DO_CHAIN:\r |
1249 | mov pr=PR_REG\r |
1250 | mov SCRATCH_REG1=cr.iva\r |
1251 | mov SCRATCH_REG2=PATCH_RETURN_OFFSET;;\r |
1252 | add SCRATCH_REG1=SCRATCH_REG1, SCRATCH_REG2;;\r |
1253 | mov b0=SCRATCH_REG1;;\r |
1254 | br.cond.sptk.few b0;;\r |
1255 | \r |
1256 | EndHookHandler:\r |
1257 | \r |
1258 | \r |
1259 | /////////////////////////////////////////////\r |
1260 | //\r |
1261 | // Name:\r |
1262 | // HookStub\r |
1263 | //\r |
1264 | // Description:\r |
1265 | // HookStub will be copied from it's loaded location into the IVT when\r |
1266 | // an IVT entry is hooked. The IVT entry does an indirect jump via B0 to\r |
1267 | // HookHandler, which in turn calls into the default C handler, which calls\r |
1268 | // the user-installed C handler. The calls return and HookHandler executes\r |
1269 | // an rfi.\r |
1270 | //\r |
1271 | // Notes:\r |
1272 | // Saves B0 to B0_REG\r |
1273 | // Saves IVT index to SCRATCH_REG1 (immediate value is fixed up when code is copied\r |
1274 | // to the IVT entry.\r |
1275 | \r |
1276 | .globl HookStub\r |
1277 | .proc HookStub\r |
1278 | HookStub:\r |
1279 | \r |
1280 | mov B0_REG=b0\r |
1281 | movl SCRATCH_REG1=HookHandler;;\r |
1282 | mov b0=SCRATCH_REG1;;\r |
1283 | mov SCRATCH_REG1=0;;// immediate value is fixed up during install of handler to be the vector number\r |
1284 | br.cond.sptk.few b0\r |
1285 | \r |
1286 | .endp HookStub\r |
1287 | \r |
1288 | \r |
1289 | /////////////////////////////////////////////\r |
1290 | // The following code is moved into IVT entry 14 (offset 3400) which is reserved\r |
1291 | // in the Itanium architecture. The patch code is located at the end of the\r |
1292 | // IVT entry.\r |
1293 | \r |
1294 | PatchCode:\r |
1295 | mov SCRATCH_REG0=psr\r |
1296 | mov SCRATCH_REG6=cr.ipsr\r |
1297 | mov PR_REG=pr\r |
1298 | mov B0_REG=b0;;\r |
1299 | \r |
1300 | // turn off any virtual translations\r |
1301 | movl SCRATCH_REG1 = ~( MASK(PSR_DT,1) | MASK(PSR_RT,1));;\r |
1302 | and SCRATCH_REG1 = SCRATCH_REG0, SCRATCH_REG1;;\r |
1303 | mov psr.l = SCRATCH_REG1;;\r |
7b414b4e |
1304 | srlz.d\r |
c1f23d63 |
1305 | tbit.z p14, p15 = SCRATCH_REG6, PSR_IS;; // Check to see if we were\r |
1306 | // interrupted from IA32\r |
1307 | // context. If so, bail out\r |
1308 | // and chain to SAL immediately\r |
1309 | (p15) br.cond.sptk.few Stub_IVT_Passthru;;\r |
1310 | // we only want to take 1 out of 32 external interrupts to minimize the\r |
1311 | // impact to system performance. Check our interrupt count and bail\r |
1312 | // out if we're not up to 32\r |
1313 | movl SCRATCH_REG1=ExternalInterruptCount;;\r |
1314 | ld8 SCRATCH_REG2=[SCRATCH_REG1];; // ExternalInterruptCount\r |
1315 | tbit.z p14, p15 = SCRATCH_REG2, 5;; // bit 5 set?\r |
1316 | (p14) add SCRATCH_REG2=1, SCRATCH_REG2;; // No? Then increment\r |
1317 | // ExternalInterruptCount\r |
1318 | // and Chain to SAL\r |
1319 | // immediately\r |
1320 | (p14) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r |
1321 | (p14) br.cond.sptk.few Stub_IVT_Passthru;;\r |
1322 | (p15) mov SCRATCH_REG2=0;; // Yes? Then reset\r |
1323 | // ExternalInterruptCount\r |
1324 | // and branch to\r |
1325 | // HookHandler\r |
1326 | (p15) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r |
1327 | mov pr=PR_REG\r |
1328 | movl SCRATCH_REG1=HookHandler;; // SCRATCH_REG1 = entrypoint of HookHandler\r |
1329 | mov b0=SCRATCH_REG1;; // b0 = entrypoint of HookHandler\r |
1330 | mov SCRATCH_REG1=EXCPT_EXTERNAL_INTERRUPT;;\r |
1331 | br.sptk.few b0;; // branch to HookHandler\r |
1332 | \r |
1333 | PatchCodeRet:\r |
1334 | // fake-up an rfi to get RSE back to being coherent and insure psr has\r |
1335 | // original contents when interrupt occured, then exit to SAL\r |
1336 | // at this point:\r |
1337 | // cr.ifs has been modified by previous "cover"\r |
1338 | // SCRATCH_REG6 has original cr.ifs\r |
1339 | \r |
1340 | mov SCRATCH_REG5=cr.ipsr\r |
1341 | mov SCRATCH_REG4=cr.iip;;\r |
1342 | mov cr.ipsr=SCRATCH_REG0\r |
1343 | mov SCRATCH_REG1=ip;;\r |
1344 | add SCRATCH_REG1=0x30, SCRATCH_REG1;;\r |
1345 | mov cr.iip=SCRATCH_REG1;;\r |
1346 | rfi;; // rfi to next instruction\r |
1347 | \r |
1348 | Stub_RfiTarget:\r |
1349 | mov cr.ifs=SCRATCH_REG6\r |
1350 | mov cr.ipsr=SCRATCH_REG5\r |
1351 | mov cr.iip=SCRATCH_REG4;;\r |
1352 | \r |
1353 | Stub_IVT_Passthru:\r |
1354 | mov pr=PR_REG // pr = saved predicate registers\r |
1355 | mov b0=B0_REG;; // b0 = saved b0\r |
1356 | EndPatchCode:\r |
1357 | \r |
1358 | \r |
1359 | /////////////////////////////////////////////\r |
1360 | // The following bundle is moved into IVT entry 14 (offset 0x3400) which is reserved\r |
1361 | // in the Itanium architecture. This bundle will be the last bundle and will\r |
1362 | // be located at offset 0x37F0 in the IVT.\r |
1363 | \r |
1364 | FailsafeBranch:\r |
1365 | {\r |
1366 | .mib\r |
1367 | nop.m 0\r |
1368 | nop.i 0\r |
1369 | br.sptk.few -(FAILSAFE_BRANCH_OFFSET - EXT_INT_ENTRY_OFFSET - 0x10)\r |
1370 | }\r |
1371 | \r |
1372 | \r |
1373 | /////////////////////////////////////////////\r |
1374 | // The following bundle is moved into IVT entry 13 (offset 0x3000) which is the\r |
1375 | // external interrupt. It branches to the patch code.\r |
1376 | \r |
1377 | PatchCodeNewBun0:\r |
1378 | {\r |
1379 | .mib\r |
1380 | nop.m 0\r |
1381 | nop.i 0\r |
1382 | br.cond.sptk.few PATCH_BRANCH\r |
1383 | }\r |