c1f23d63 |
1 | //++\r |
2 | // Copyright (c) 2006, Intel Corporation \r |
3 | // All rights reserved. This program and the accompanying materials \r |
4 | // are licensed and made available under the terms and conditions of the BSD License \r |
5 | // which accompanies this distribution. The full text of the license may be found at \r |
6 | // http://opensource.org/licenses/bsd-license.php \r |
7 | // \r |
8 | // THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, \r |
9 | // WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. \r |
10 | // \r |
11 | // Module Name:\r |
12 | //\r |
13 | // AsmFuncs.s\r |
14 | //\r |
15 | // Abstract:\r |
16 | //\r |
17 | // Low level IPF routines used by the debug support driver\r |
18 | //\r |
19 | // Revision History:\r |
20 | //\r |
21 | //--\r |
22 | \r |
23 | \r |
24 | #include "common.i"\r |
25 | #include "Ds64Macros.i"\r |
26 | \r |
27 | .globl PatchSaveBuffer\r |
28 | .globl IpfContextBuf\r |
29 | .globl CommonHandler\r |
30 | .globl ExternalInterruptCount\r |
31 | \r |
32 | \r |
33 | /////////////////////////////////////////////\r |
34 | //\r |
35 | // Name:\r |
36 | // InstructionCacheFlush\r |
37 | //\r |
38 | // Description:\r |
39 | // Flushes instruction cache for specified number of bytes\r |
40 | //\r |
41 | .globl InstructionCacheFlush\r |
42 | .proc InstructionCacheFlush\r |
43 | .align 32\r |
44 | InstructionCacheFlush::\r |
45 | { .mii\r |
46 | alloc r3=2, 0, 0, 0\r |
47 | cmp4.leu p0,p6=32, r33;;\r |
48 | (p6) mov r33=32;;\r |
49 | }\r |
50 | { .mii\r |
51 | nop.m 0\r |
52 | zxt4 r29=r33;;\r |
53 | dep.z r30=r29, 0, 5;;\r |
54 | }\r |
55 | { .mii\r |
56 | cmp4.eq p0,p7=r0, r30\r |
57 | shr.u r28=r29, 5;;\r |
58 | (p7) adds r28=1, r28;;\r |
59 | }\r |
60 | { .mii\r |
61 | nop.m 0\r |
62 | shl r27=r28, 5;;\r |
63 | zxt4 r26=r27;;\r |
64 | }\r |
65 | { .mfb\r |
66 | add r31=r26, r32\r |
67 | nop.f 0\r |
68 | nop.b 0\r |
69 | }\r |
70 | LoopBack: // $L143:\r |
71 | { .mii\r |
72 | fc r32\r |
73 | adds r32=32, r32;;\r |
74 | cmp.ltu p14,p15=r32, r31\r |
75 | }\r |
76 | { .mfb\r |
77 | nop.m 0\r |
78 | nop.f 0\r |
79 | //(p14) br.cond.dptk.few $L143#;;\r |
80 | (p14) br.cond.dptk.few LoopBack;;\r |
81 | }\r |
82 | { .mmi\r |
83 | sync.i;;\r |
84 | srlz.i\r |
85 | nop.i 0;;\r |
86 | }\r |
87 | { .mfb\r |
88 | nop.m 0\r |
89 | nop.f 0\r |
90 | br.ret.sptk.few b0;;\r |
91 | }\r |
92 | .endp InstructionCacheFlush\r |
93 | \r |
94 | \r |
95 | /////////////////////////////////////////////\r |
96 | //\r |
97 | // Name:\r |
98 | // ChainHandler\r |
99 | //\r |
100 | // Description:\r |
101 | // Chains an interrupt handler\r |
102 | //\r |
103 | // The purpose of this function is to enable chaining of the external interrupt.\r |
104 | // Since there's no clean SAL abstraction for doing this, we must do it\r |
105 | // surreptitiously.\r |
106 | //\r |
107 | // The reserved IVT entry at offset 0x3400 is coopted for use by this handler.\r |
108 | // According to Itanium architecture, it is reserved. Strictly speaking, this is\r |
109 | // not safe, as we're cheating and violating the Itanium architecture. However,\r |
110 | // as long as we're the only ones cheating, we should be OK. Without hooks in\r |
111 | // the SAL to enable IVT management, there aren't many good options.\r |
112 | //\r |
113 | // The strategy is to replace the first bundle of the external interrupt handler\r |
114 | // with our own that will branch into a piece of code we've supplied and located\r |
115 | // in the reserved IVT entry. Only the first bundle of the external interrupt\r |
116 | // IVT entry is modified.\r |
117 | //\r |
118 | // The original bundle is moved and relocated to space\r |
119 | // allocated within the reserved IVT entry. The next bundle following is\r |
120 | // is generated to go a hard coded branch back to the second bundle of the\r |
121 | // external interrupt IVT entry just in case the first bundle had no branch.\r |
122 | //\r |
123 | // Our new code will execute our handler, and then fall through to the\r |
124 | // original bundle after restoring all context appropriately.\r |
125 | //\r |
126 | // The following is a representation of what the IVT memory map looks like with\r |
127 | // our chained handler installed:\r |
128 | //\r |
129 | //\r |
130 | // \r |
131 | // \r |
132 | // \r |
133 | // This IVT entry is Failsafe bundle \r |
134 | // reserved by the \r |
135 | // Itanium architecture Original bundle 0 \r |
136 | // and is used for \r |
137 | // for locating our \r |
138 | // handler and the \r |
139 | // original bundle Patch code... \r |
140 | // zero of the ext \r |
141 | // interrupt handler \r |
142 | // \r |
143 | // RSVD (3400) Unused \r |
144 | // \r |
145 | // \r |
146 | // \r |
147 | // \r |
148 | // \r |
149 | // \r |
150 | // \r |
151 | // \r |
152 | // \r |
153 | // \r |
154 | // \r |
155 | // \r |
156 | // EXT_INT (3000) Bundle 0 Bundle zero - This one is\r |
157 | // modified, all other bundles\r |
158 | // in the EXT_INT entry are\r |
159 | // untouched.\r |
160 | //\r |
161 | //\r |
162 | // Arguments:\r |
163 | //\r |
164 | // Returns:\r |
165 | //\r |
166 | // Notes:\r |
167 | //\r |
168 | //\r |
169 | .globl ChainHandler\r |
170 | .proc ChainHandler\r |
171 | ChainHandler:\r |
172 | \r |
173 | NESTED_SETUP( 0,2+3,3,0 )\r |
174 | \r |
175 | mov r8=1 // r8 = success\r |
176 | mov r2=cr.iva;;\r |
177 | //\r |
178 | // NOTE: There's a potential hazard here in that we're simply stealing a bunch of\r |
179 | // bundles (memory) from the IVT and assuming there's no catastrophic side effect.\r |
180 | //\r |
181 | // First, save IVT area we're taking over with the patch so we can restore it later\r |
182 | //\r |
183 | addl out0=PATCH_ENTRY_OFFSET, r2 // out0 = source buffer\r |
184 | movl out1=PatchSaveBuffer // out1 = destination buffer\r |
185 | mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r |
186 | br.call.sptk.few b0 = CopyBundles\r |
187 | \r |
188 | // Next, copy the patch code into the IVT\r |
189 | movl out0=PatchCode // out0 = source buffer of patch code\r |
190 | addl out1=PATCH_OFFSET, r2 // out1 = destination buffer - in IVT\r |
191 | mov out2=NUM_PATCH_BUNDLES;; // out2 = number of bundles to copy\r |
192 | br.call.sptk.few b0 = CopyBundles\r |
193 | \r |
194 | \r |
195 | // copy original bundle 0 from the external interrupt handler to the\r |
196 | // appropriate place in the reserved IVT interrupt slot\r |
197 | addl out0=EXT_INT_ENTRY_OFFSET, r2 // out0 = source buffer\r |
198 | addl out1=RELOCATED_EXT_INT, r2 // out1 = destination buffer - in reserved IVT\r |
199 | mov out2=1;; // out2 = copy 1 bundle\r |
200 | br.call.sptk.few b0 = CopyBundles\r |
201 | \r |
202 | // Now relocate it there because it very likely had a branch instruction that\r |
203 | // that must now be fixed up.\r |
204 | addl out0=RELOCATED_EXT_INT, r2 // out0 = new runtime address of bundle - in reserved IVT\r |
205 | addl out1=EXT_INT_ENTRY_OFFSET, r2;;// out1 = IP address of previous location\r |
206 | mov out2=out0;; // out2 = IP address of new location\r |
207 | br.call.sptk.few b0 = RelocateBundle\r |
208 | \r |
209 | // Now copy into the failsafe branch into the next bundle just in case\r |
210 | // the original ext int bundle 0 bundle did not contain a branch instruction\r |
211 | movl out0=FailsafeBranch // out0 = source buffer\r |
212 | addl out1=FAILSAFE_BRANCH_OFFSET, r2 // out1 = destination buffer - in reserved IVT\r |
213 | mov out2=1;; // out2 = copy 1 bundle\r |
214 | br.call.sptk.few b0 = CopyBundles\r |
215 | \r |
216 | // Last, copy in our replacement for the external interrupt IVT entry bundle 0\r |
217 | movl out0=PatchCodeNewBun0 // out0 = source buffer - our replacement bundle 0\r |
218 | addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - bundle 0 of External interrupt entry\r |
219 | mov out2=1;; // out2 = copy 1 bundle\r |
220 | br.call.sptk.few b0 = CopyBundles\r |
221 | \r |
222 | ChainHandlerDone:\r |
223 | NESTED_RETURN\r |
224 | \r |
225 | .endp ChainHandler\r |
226 | \r |
227 | \r |
228 | /////////////////////////////////////////////\r |
229 | //\r |
230 | // Name:\r |
231 | // UnchainHandler\r |
232 | //\r |
233 | // Description:\r |
234 | // Unchains an interrupt handler\r |
235 | //\r |
236 | // Arguments:\r |
237 | //\r |
238 | // Returns:\r |
239 | //\r |
240 | // Notes:\r |
241 | //\r |
242 | //\r |
243 | .globl UnchainHandler\r |
244 | .proc UnchainHandler\r |
245 | \r |
246 | UnchainHandler:\r |
247 | \r |
248 | NESTED_SETUP( 0,2+3,3,0 )\r |
249 | \r |
250 | mov r8=1 // r8 = success\r |
251 | mov r2=cr.iva;; // r2 = interrupt vector address\r |
252 | \r |
253 | // First copy original Ext Int bundle 0 back to it's proper home...\r |
254 | addl out0=RELOCATED_EXT_INT, r2 // out0 = source - in reserved IVT\r |
255 | addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - first bundle of Ext Int entry\r |
256 | mov out2=1;; // out2 = copy 1 bundle\r |
257 | br.call.sptk.few b0 = CopyBundles\r |
258 | \r |
259 | // Now, relocate it again...\r |
260 | addl out0=EXT_INT_ENTRY_OFFSET, r2 // out1 = New runtime address\r |
261 | addl out1=RELOCATED_EXT_INT, r2;; // out0 = IP address of previous location\r |
262 | mov out2=out0;; // out2 = IP address of new location\r |
263 | br.call.sptk.few b0 = RelocateBundle\r |
264 | \r |
265 | // Last, restore the patch area\r |
266 | movl out0=PatchSaveBuffer // out0 = source buffer\r |
267 | addl out1=PATCH_ENTRY_OFFSET, r2 // out1 = destination buffer\r |
268 | mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r |
269 | br.call.sptk.few b0 = CopyBundles\r |
270 | \r |
271 | UnchainHandlerDone:\r |
272 | NESTED_RETURN\r |
273 | \r |
274 | .endp UnchainHandler\r |
275 | \r |
276 | \r |
277 | /////////////////////////////////////////////\r |
278 | //\r |
279 | // Name:\r |
280 | // CopyBundles\r |
281 | //\r |
282 | // Description:\r |
283 | // Copies instruction bundles - flushes icache as necessary\r |
284 | //\r |
285 | // Arguments:\r |
286 | // in0 - Bundle source\r |
287 | // in1 - Bundle destination\r |
288 | // in2 - Bundle count\r |
289 | //\r |
290 | // Returns:\r |
291 | //\r |
292 | // Notes:\r |
293 | // This procedure is a leaf routine\r |
294 | //\r |
295 | .proc CopyBundles\r |
296 | \r |
297 | CopyBundles:\r |
298 | \r |
299 | NESTED_SETUP(3,2+1,0,0)\r |
300 | \r |
301 | shl in2=in2, 1;; // in2 = count of 8 byte blocks to copy\r |
302 | \r |
303 | CopyBundlesLoop:\r |
304 | \r |
305 | cmp.eq p14, p15 = 0, in2;; // Check if done\r |
306 | (p14) br.sptk.few CopyBundlesDone;;\r |
307 | \r |
308 | ld8 loc2=[in0], 0x8;; // loc2 = source bytes\r |
309 | st8 [in1]=loc2;; // [in1] = destination bytes\r |
310 | fc in1;; // Flush instruction cache\r |
311 | sync.i;; // Ensure local and remote data/inst caches in sync\r |
312 | srlz.i;; // Ensure sync has been observed\r |
313 | add in1=0x8, in1;; // in1 = next destination\r |
314 | add in2=-1, in2;; // in2 = decrement 8 bytes blocks to copy\r |
315 | br.sptk.few CopyBundlesLoop;;\r |
316 | \r |
317 | CopyBundlesDone:\r |
318 | NESTED_RETURN\r |
319 | \r |
320 | .endp CopyBundles\r |
321 | \r |
322 | \r |
323 | /////////////////////////////////////////////\r |
324 | //\r |
325 | // Name:\r |
326 | // RelocateBundle\r |
327 | //\r |
328 | // Description:\r |
329 | // Relocates an instruction bundle by updating any ip-relative branch instructions.\r |
330 | //\r |
331 | // Arguments:\r |
332 | // in0 - Runtime address of bundle\r |
333 | // in1 - IP address of previous location of bundle\r |
334 | // in2 - IP address of new location of bundle\r |
335 | //\r |
336 | // Returns:\r |
337 | // in0 - 1 if successful or 0 if unsuccessful\r |
338 | //\r |
339 | // Notes:\r |
340 | // This routine examines all slots in the given bundle that are destined for the\r |
341 | // branch execution unit. If any of these slots contain an IP-relative branch\r |
342 | // namely instructions B1, B2, B3, or B6, the slot is fixed-up with a new relative\r |
343 | // address. Errors can occur if a branch cannot be reached.\r |
344 | //\r |
345 | .proc RelocateBundle\r |
346 | \r |
347 | RelocateBundle:\r |
348 | \r |
349 | NESTED_SETUP(3,2+4,3,0)\r |
350 | \r |
351 | mov loc2=SLOT0 // loc2 = slot index\r |
352 | mov loc5=in0;; // loc5 = runtime address of bundle\r |
353 | mov in0=1;; // in0 = success\r |
354 | \r |
355 | RelocateBundleNextSlot:\r |
356 | \r |
357 | cmp.ge p14, p15 = SLOT2, loc2;; // Check if maximum slot\r |
358 | (p15) br.sptk.few RelocateBundleDone\r |
359 | \r |
360 | mov out0=loc5;; // out0 = runtime address of bundle\r |
361 | br.call.sptk.few b0 = GetTemplate\r |
362 | mov loc3=out0;; // loc3 = instruction template\r |
363 | mov out0=loc5 // out0 = runtime address of bundle\r |
364 | mov out1=loc2;; // out1 = instruction slot number\r |
365 | br.call.sptk.few b0 = GetSlot\r |
366 | mov loc4=out0;; // loc4 = instruction encoding\r |
367 | mov out0=loc4 // out0 = instuction encoding\r |
368 | mov out1=loc2 // out1 = instruction slot number\r |
369 | mov out2=loc3;; // out2 = instruction template\r |
370 | br.call.sptk.few b0 = IsSlotBranch\r |
371 | cmp.eq p14, p15 = 1, out0;; // Check if branch slot\r |
372 | (p15) add loc2=1,loc2 // Increment slot\r |
373 | (p15) br.sptk.few RelocateBundleNextSlot\r |
374 | mov out0=loc4 // out0 = instuction encoding\r |
375 | mov out1=in1 // out1 = IP address of previous location\r |
376 | mov out2=in2;; // out2 = IP address of new location\r |
377 | br.call.sptk.few b0 = RelocateSlot\r |
378 | cmp.eq p14, p15 = 1, out1;; // Check if relocated slot\r |
379 | (p15) mov in0=0 // in0 = failure\r |
380 | (p15) br.sptk.few RelocateBundleDone\r |
381 | mov out2=out0;; // out2 = instruction encoding\r |
382 | mov out0=loc5 // out0 = runtime address of bundle\r |
383 | mov out1=loc2;; // out1 = instruction slot number\r |
384 | br.call.sptk.few b0 = SetSlot\r |
385 | add loc2=1,loc2;; // Increment slot\r |
386 | br.sptk.few RelocateBundleNextSlot\r |
387 | \r |
388 | RelocateBundleDone:\r |
389 | NESTED_RETURN\r |
390 | \r |
391 | .endp RelocateBundle\r |
392 | \r |
393 | \r |
394 | /////////////////////////////////////////////\r |
395 | //\r |
396 | // Name:\r |
397 | // RelocateSlot\r |
398 | //\r |
399 | // Description:\r |
400 | // Relocates an instruction bundle by updating any ip-relative branch instructions.\r |
401 | //\r |
402 | // Arguments:\r |
403 | // in0 - Instruction encoding (41-bits, right justified)\r |
404 | // in1 - IP address of previous location of bundle\r |
405 | // in2 - IP address of new location of bundle\r |
406 | //\r |
407 | // Returns:\r |
408 | // in0 - Instruction encoding (41-bits, right justified)\r |
409 | // in1 - 1 if successful otherwise 0\r |
410 | //\r |
411 | // Notes:\r |
412 | // This procedure is a leaf routine\r |
413 | //\r |
414 | .proc RelocateSlot\r |
415 | \r |
416 | RelocateSlot:\r |
417 | NESTED_SETUP(3,2+5,0,0)\r |
418 | extr.u loc2=in0, 37, 4;; // loc2 = instruction opcode\r |
419 | cmp.eq p14, p15 = 4, loc2;; // IP-relative branch (B1) or\r |
420 | // IP-relative counted branch (B2)\r |
421 | (p15) cmp.eq p14, p15 = 5, loc2;; // IP-relative call (B3)\r |
422 | (p15) cmp.eq p14, p15 = 7, loc2;; // IP-relative predict (B6)\r |
423 | (p15) mov in1=1 // Instruction did not need to be reencoded\r |
424 | (p15) br.sptk.few RelocateSlotDone\r |
425 | tbit.nz p14, p15 = in0, 36;; // put relative offset sign bit in p14\r |
426 | extr.u loc2=in0, 13, 20;; // loc2 = relative offset in instruction\r |
427 | (p14) movl loc3=0xfffffffffff00000;; // extend sign\r |
428 | (p14) or loc2=loc2, loc3;;\r |
429 | shl loc2=loc2,4;; // convert to byte offset instead of bundle offset\r |
430 | add loc3=loc2, in1;; // loc3 = physical address of branch target\r |
431 | (p14) sub loc2=r0,loc2;; // flip sign in loc2 if offset is negative\r |
432 | sub loc4=loc3,in2;; // loc4 = relative offset from new ip to branch target\r |
433 | cmp.lt p15, p14 = 0, loc4;; // get new sign bit \r |
434 | (p14) sub loc5=r0,loc4 // get absolute value of offset\r |
435 | (p15) mov loc5=loc4;;\r |
436 | movl loc6=0x0FFFFFF;; // maximum offset in bytes for ip-rel branch\r |
437 | cmp.gt p14, p15 = loc5, loc6;; // check to see we're not out of range for an ip-relative branch\r |
438 | (p14) br.sptk.few RelocateSlotError\r |
439 | cmp.lt p15, p14 = 0, loc4;; // store sign in p14 again\r |
440 | (p14) dep in0=1,in0,36,1 // store sign bit in instruction\r |
441 | (p15) dep in0=0,in0,36,1\r |
442 | shr loc4=loc4, 4;; // convert back to bundle offset\r |
443 | dep in0=loc4,in0,13,16;; // put first 16 bits of new offset into instruction\r |
444 | shr loc4=loc4,16;;\r |
445 | dep in0=loc4,in0,13+16,4 // put last 4 bits of new offset into instruction\r |
446 | mov in1=1;; // in1 = success\r |
447 | br.sptk.few RelocateSlotDone;;\r |
448 | \r |
449 | RelocateSlotError:\r |
450 | mov in1=0;; // in1 = failure\r |
451 | \r |
452 | RelocateSlotDone:\r |
453 | NESTED_RETURN\r |
454 | \r |
455 | .endp RelocateSlot\r |
456 | \r |
457 | \r |
458 | /////////////////////////////////////////////\r |
459 | //\r |
460 | // Name:\r |
461 | // IsSlotBranch\r |
462 | //\r |
463 | // Description:\r |
464 | // Determines if the given instruction is a branch instruction.\r |
465 | //\r |
466 | // Arguments:\r |
467 | // in0 - Instruction encoding (41-bits, right justified)\r |
468 | // in1 - Instruction slot number\r |
469 | // in2 - Bundle template\r |
470 | //\r |
471 | // Returns:\r |
472 | // in0 - 1 if branch or 0 if not branch\r |
473 | //\r |
474 | // Notes:\r |
475 | // This procedure is a leaf routine\r |
476 | //\r |
477 | // IsSlotBranch recognizes all branch instructions by looking at the provided template.\r |
478 | // The instruction encoding is only passed to this routine for future expansion.\r |
479 | //\r |
480 | .proc IsSlotBranch\r |
481 | \r |
482 | IsSlotBranch:\r |
483 | \r |
484 | NESTED_SETUP (3,2+0,0,0)\r |
485 | \r |
486 | mov in0=1;; // in0 = 1 which destroys the instruction\r |
487 | andcm in2=in2,in0;; // in2 = even template to reduce compares\r |
488 | mov in0=0;; // in0 = not a branch\r |
489 | cmp.eq p14, p15 = 0x16, in2;; // Template 0x16 is BBB\r |
490 | (p14) br.sptk.few IsSlotBranchTrue\r |
491 | cmp.eq p14, p15 = SLOT0, in1;; // Slot 0 has no other possiblities\r |
492 | (p14) br.sptk.few IsSlotBranchDone\r |
493 | cmp.eq p14, p15 = 0x12, in2;; // Template 0x12 is MBB\r |
494 | (p14) br.sptk.few IsSlotBranchTrue\r |
495 | cmp.eq p14, p15 = SLOT1, in1;; // Slot 1 has no other possiblities\r |
496 | (p14) br.sptk.few IsSlotBranchDone\r |
497 | cmp.eq p14, p15 = 0x10, in2;; // Template 0x10 is MIB\r |
498 | (p14) br.sptk.few IsSlotBranchTrue\r |
499 | cmp.eq p14, p15 = 0x18, in2;; // Template 0x18 is MMB\r |
500 | (p14) br.sptk.few IsSlotBranchTrue\r |
501 | cmp.eq p14, p15 = 0x1C, in2;; // Template 0x1C is MFB\r |
502 | (p14) br.sptk.few IsSlotBranchTrue\r |
503 | br.sptk.few IsSlotBranchDone\r |
504 | \r |
505 | IsSlotBranchTrue:\r |
506 | mov in0=1;; // in0 = branch\r |
507 | \r |
508 | IsSlotBranchDone:\r |
509 | NESTED_RETURN\r |
510 | \r |
511 | .endp IsSlotBranch\r |
512 | \r |
513 | \r |
514 | /////////////////////////////////////////////\r |
515 | //\r |
516 | // Name:\r |
517 | // GetTemplate\r |
518 | //\r |
519 | // Description:\r |
520 | // Retrieves the instruction template for an instruction bundle\r |
521 | //\r |
522 | // Arguments:\r |
523 | // in0 - Runtime address of bundle\r |
524 | //\r |
525 | // Returns:\r |
526 | // in0 - Instruction template (5-bits, right-justified)\r |
527 | //\r |
528 | // Notes:\r |
529 | // This procedure is a leaf routine\r |
530 | //\r |
531 | .proc GetTemplate\r |
532 | \r |
533 | GetTemplate:\r |
534 | \r |
535 | NESTED_SETUP (1,2+2,0,0)\r |
536 | \r |
537 | ld8 loc2=[in0], 0x8 // loc2 = first 8 bytes of branch bundle\r |
538 | movl loc3=MASK_0_4;; // loc3 = template mask\r |
539 | and loc2=loc2,loc3;; // loc2 = template, right justified\r |
540 | mov in0=loc2;; // in0 = template, right justified\r |
541 | \r |
542 | NESTED_RETURN\r |
543 | \r |
544 | .endp GetTemplate\r |
545 | \r |
546 | \r |
547 | /////////////////////////////////////////////\r |
548 | //\r |
549 | // Name:\r |
550 | // GetSlot\r |
551 | //\r |
552 | // Description:\r |
553 | // Gets the instruction encoding for an instruction slot and bundle\r |
554 | //\r |
555 | // Arguments:\r |
556 | // in0 - Runtime address of bundle\r |
557 | // in1 - Instruction slot (either 0, 1, or 2)\r |
558 | //\r |
559 | // Returns:\r |
560 | // in0 - Instruction encoding (41-bits, right justified)\r |
561 | //\r |
562 | // Notes:\r |
563 | // This procedure is a leaf routine\r |
564 | //\r |
565 | // Slot0 - [in0 + 0x8] Bits 45-5\r |
566 | // Slot1 - [in0 + 0x8] Bits 63-46 and [in0] Bits 22-0\r |
567 | // Slot2 - [in0] Bits 63-23\r |
568 | //\r |
569 | .proc GetSlot\r |
570 | \r |
571 | GetSlot:\r |
572 | NESTED_SETUP (2,2+3,0,0)\r |
573 | \r |
574 | ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of branch bundle\r |
575 | ld8 loc3=[in0];; // loc3 = second 8 bytes of branch bundle\r |
576 | cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r |
577 | (p14) br.cond.sptk.few GetSlot2;; // get slot 2\r |
578 | cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r |
579 | (p14) br.cond.sptk.few GetSlot1;; // get slot 1\r |
580 | \r |
581 | GetSlot0:\r |
582 | extr.u in0=loc2, 5, 45 // in0 = extracted slot 0\r |
583 | br.sptk.few GetSlotDone;;\r |
584 | \r |
585 | GetSlot1:\r |
586 | extr.u in0=loc2, 46, 18 // in0 = bits 63-46 of loc2 right-justified\r |
587 | extr.u loc4=loc3, 0, 23;; // loc4 = bits 22-0 of loc3 right-justified\r |
588 | dep in0=loc4, in0, 18, 15;;\r |
589 | shr.u loc4=loc4,15;;\r |
590 | dep in0=loc4, in0, 33, 8;; // in0 = extracted slot 1\r |
591 | br.sptk.few GetSlotDone;;\r |
592 | \r |
593 | GetSlot2:\r |
594 | extr.u in0=loc3, 23, 41;; // in0 = extracted slot 2\r |
595 | \r |
596 | GetSlotDone:\r |
597 | NESTED_RETURN\r |
598 | \r |
599 | .endp GetSlot\r |
600 | \r |
601 | \r |
602 | /////////////////////////////////////////////\r |
603 | //\r |
604 | // Name:\r |
605 | // SetSlot\r |
606 | //\r |
607 | // Description:\r |
608 | // Sets the instruction encoding for an instruction slot and bundle\r |
609 | //\r |
610 | // Arguments:\r |
611 | // in0 - Runtime address of bundle\r |
612 | // in1 - Instruction slot (either 0, 1, or 2)\r |
613 | // in2 - Instruction encoding (41-bits, right justified)\r |
614 | //\r |
615 | // Returns:\r |
616 | //\r |
617 | // Notes:\r |
618 | // This procedure is a leaf routine\r |
619 | //\r |
620 | .proc SetSlot\r |
621 | \r |
622 | SetSlot:\r |
623 | NESTED_SETUP (3,2+3,0,0)\r |
624 | \r |
625 | ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of bundle\r |
626 | ld8 loc3=[in0];; // loc3 = second 8 bytes of bundle\r |
627 | cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r |
628 | (p14) br.cond.sptk.few SetSlot2;; // set slot 2\r |
629 | cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r |
630 | (p14) br.cond.sptk.few SetSlot1;; // set slot 1\r |
631 | \r |
632 | SetSlot0:\r |
633 | dep loc2=0, loc2, 5, 41;; // remove old instruction from slot 0\r |
634 | shl loc4=in2, 5;; // loc4 = new instruction ready to be inserted\r |
635 | or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r |
636 | add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r |
637 | st8 [loc4]=loc2 // [loc4] = updated bundle\r |
638 | br.sptk.few SetSlotDone;;\r |
639 | ;;\r |
640 | \r |
641 | SetSlot1:\r |
642 | dep loc2=0, loc2, 46, 18 // remove old instruction from slot 1\r |
643 | dep loc3=0, loc3, 0, 23;;\r |
644 | shl loc4=in2, 46;; // loc4 = partial instruction ready to be inserted\r |
645 | or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r |
646 | add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r |
647 | st8 [loc4]=loc2;; // [loc4] = updated bundle\r |
648 | shr.u loc4=in2, 18;; // loc4 = partial instruction ready to be inserted\r |
649 | or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r |
650 | st8 [in0]=loc3;; // [in0] = updated bundle\r |
651 | br.sptk.few SetSlotDone;;\r |
652 | \r |
653 | SetSlot2:\r |
654 | dep loc3=0, loc3, 23, 41;; // remove old instruction from slot 2\r |
655 | shl loc4=in2, 23;; // loc4 = instruction ready to be inserted\r |
656 | or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r |
657 | st8 [in0]=loc3;; // [in0] = updated bundle\r |
658 | \r |
659 | SetSlotDone:\r |
660 | \r |
661 | NESTED_RETURN\r |
662 | .endp SetSlot\r |
663 | \r |
664 | \r |
665 | /////////////////////////////////////////////\r |
666 | //\r |
667 | // Name:\r |
668 | // GetIva\r |
669 | //\r |
670 | // Description:\r |
671 | // C callable function to obtain the current value of IVA\r |
672 | //\r |
673 | // Returns:\r |
674 | // Current value if IVA\r |
675 | \r |
676 | .globl GetIva\r |
677 | .proc GetIva\r |
678 | GetIva:\r |
679 | mov r8=cr2;;\r |
680 | br.ret.sptk.many b0\r |
681 | \r |
682 | .endp GetIva\r |
683 | \r |
684 | \r |
685 | /////////////////////////////////////////////\r |
686 | //\r |
687 | // Name:\r |
688 | // ProgramInterruptFlags\r |
689 | //\r |
690 | // Description:\r |
691 | // C callable function to enable/disable interrupts\r |
692 | //\r |
693 | // Returns:\r |
694 | // Previous state of psr.ic\r |
695 | //\r |
696 | .globl ProgramInterruptFlags\r |
697 | .proc ProgramInterruptFlags\r |
698 | ProgramInterruptFlags:\r |
699 | alloc loc0=1,2,0,0;;\r |
700 | mov loc0=psr\r |
701 | mov loc1=0x6000;;\r |
702 | and r8=loc0, loc1 // obtain current psr.ic and psr.i state\r |
703 | and in0=in0, loc1 // insure no extra bits set in input\r |
704 | andcm loc0=loc0,loc1;; // clear original psr.i and psr.ic\r |
705 | or loc0=loc0,in0;; // OR in new psr.ic value\r |
706 | mov psr.l=loc0;; // write new psr\r |
707 | srlz.d\r |
708 | br.ret.sptk.many b0 // return\r |
709 | \r |
710 | .endp ProgramInterruptFlags\r |
711 | \r |
712 | \r |
713 | /////////////////////////////////////////////\r |
714 | //\r |
715 | // Name:\r |
716 | // SpillContext\r |
717 | //\r |
718 | // Description:\r |
719 | // Saves system context to context record.\r |
720 | //\r |
721 | // Arguments:\r |
722 | // in0 = 512 byte aligned context record address\r |
723 | // in1 = original B0\r |
724 | // in2 = original ar.bsp\r |
725 | // in3 = original ar.bspstore\r |
726 | // in4 = original ar.rnat\r |
727 | // in5 = original ar.pfs\r |
728 | //\r |
729 | // Notes:\r |
730 | // loc0 - scratch\r |
731 | // loc1 - scratch\r |
732 | // loc2 - temporary application unat storage\r |
733 | // loc3 - temporary exception handler unat storage\r |
734 | \r |
735 | .proc SpillContext\r |
736 | \r |
737 | SpillContext:\r |
738 | alloc loc0=6,4,0,0;; // alloc 6 input, 4 locals, 0 outs\r |
739 | mov loc2=ar.unat;; // save application context unat (spilled later)\r |
740 | mov ar.unat=r0;; // set UNAT=0\r |
741 | st8.spill [in0]=r0,8;;\r |
742 | st8.spill [in0]=r1,8;; // save R1 - R31\r |
743 | st8.spill [in0]=r2,8;;\r |
744 | st8.spill [in0]=r3,8;;\r |
745 | st8.spill [in0]=r4,8;;\r |
746 | st8.spill [in0]=r5,8;;\r |
747 | st8.spill [in0]=r6,8;;\r |
748 | st8.spill [in0]=r7,8;;\r |
749 | st8.spill [in0]=r8,8;;\r |
750 | st8.spill [in0]=r9,8;;\r |
751 | st8.spill [in0]=r10,8;;\r |
752 | st8.spill [in0]=r11,8;;\r |
753 | st8.spill [in0]=r12,8;;\r |
754 | st8.spill [in0]=r13,8;;\r |
755 | st8.spill [in0]=r14,8;;\r |
756 | st8.spill [in0]=r15,8;;\r |
757 | st8.spill [in0]=r16,8;;\r |
758 | st8.spill [in0]=r17,8;;\r |
759 | st8.spill [in0]=r18,8;;\r |
760 | st8.spill [in0]=r19,8;;\r |
761 | st8.spill [in0]=r20,8;;\r |
762 | st8.spill [in0]=r21,8;;\r |
763 | st8.spill [in0]=r22,8;;\r |
764 | st8.spill [in0]=r23,8;;\r |
765 | st8.spill [in0]=r24,8;;\r |
766 | st8.spill [in0]=r25,8;;\r |
767 | st8.spill [in0]=r26,8;;\r |
768 | st8.spill [in0]=r27,8;;\r |
769 | st8.spill [in0]=r28,8;;\r |
770 | st8.spill [in0]=r29,8;;\r |
771 | st8.spill [in0]=r30,8;;\r |
772 | st8.spill [in0]=r31,8;;\r |
773 | mov loc3=ar.unat;; // save debugger context unat (spilled later)\r |
774 | stf.spill [in0]=f2,16;; // save f2 - f31\r |
775 | stf.spill [in0]=f3,16;;\r |
776 | stf.spill [in0]=f4,16;;\r |
777 | stf.spill [in0]=f5,16;;\r |
778 | stf.spill [in0]=f6,16;;\r |
779 | stf.spill [in0]=f7,16;;\r |
780 | stf.spill [in0]=f8,16;;\r |
781 | stf.spill [in0]=f9,16;;\r |
782 | stf.spill [in0]=f10,16;;\r |
783 | stf.spill [in0]=f11,16;;\r |
784 | stf.spill [in0]=f12,16;;\r |
785 | stf.spill [in0]=f13,16;;\r |
786 | stf.spill [in0]=f14,16;;\r |
787 | stf.spill [in0]=f15,16;;\r |
788 | stf.spill [in0]=f16,16;;\r |
789 | stf.spill [in0]=f17,16;;\r |
790 | stf.spill [in0]=f18,16;;\r |
791 | stf.spill [in0]=f19,16;;\r |
792 | stf.spill [in0]=f20,16;;\r |
793 | stf.spill [in0]=f21,16;;\r |
794 | stf.spill [in0]=f22,16;;\r |
795 | stf.spill [in0]=f23,16;;\r |
796 | stf.spill [in0]=f24,16;;\r |
797 | stf.spill [in0]=f25,16;;\r |
798 | stf.spill [in0]=f26,16;;\r |
799 | stf.spill [in0]=f27,16;;\r |
800 | stf.spill [in0]=f28,16;;\r |
801 | stf.spill [in0]=f29,16;;\r |
802 | stf.spill [in0]=f30,16;;\r |
803 | stf.spill [in0]=f31,16;;\r |
804 | mov loc0=pr;; // save predicates\r |
805 | st8.spill [in0]=loc0,8;;\r |
806 | st8.spill [in0]=in1,8;; // save b0 - b7... in1 already equals saved b0\r |
807 | mov loc0=b1;;\r |
808 | st8.spill [in0]=loc0,8;;\r |
809 | mov loc0=b2;;\r |
810 | st8.spill [in0]=loc0,8;;\r |
811 | mov loc0=b3;;\r |
812 | st8.spill [in0]=loc0,8;;\r |
813 | mov loc0=b4;;\r |
814 | st8.spill [in0]=loc0,8;;\r |
815 | mov loc0=b5;;\r |
816 | st8.spill [in0]=loc0,8;;\r |
817 | mov loc0=b6;;\r |
818 | st8.spill [in0]=loc0,8;;\r |
819 | mov loc0=b7;;\r |
820 | st8.spill [in0]=loc0,8;;\r |
821 | mov loc0=ar.rsc;; // save ar.rsc\r |
822 | st8.spill [in0]=loc0,8;;\r |
823 | st8.spill [in0]=in2,8;; // save ar.bsp (in2)\r |
824 | st8.spill [in0]=in3,8;; // save ar.bspstore (in3)\r |
825 | st8.spill [in0]=in4,8;; // save ar.rnat (in4)\r |
826 | mov loc0=ar.fcr;; // save ar.fcr (ar21 - IA32 floating-point control register)\r |
827 | st8.spill [in0]=loc0,8;;\r |
828 | mov loc0=ar.eflag;; // save ar.eflag (ar24)\r |
829 | st8.spill [in0]=loc0,8;;\r |
830 | mov loc0=ar.csd;; // save ar.csd (ar25 - ia32 CS descriptor)\r |
831 | st8.spill [in0]=loc0,8;;\r |
832 | mov loc0=ar.ssd;; // save ar.ssd (ar26 - ia32 ss descriptor)\r |
833 | st8.spill [in0]=loc0,8;;\r |
834 | mov loc0=ar.cflg;; // save ar.cflg (ar27 - ia32 cr0 and cr4)\r |
835 | st8.spill [in0]=loc0,8;;\r |
836 | mov loc0=ar.fsr;; // save ar.fsr (ar28 - ia32 floating-point status register)\r |
837 | st8.spill [in0]=loc0,8;;\r |
838 | mov loc0=ar.fir;; // save ar.fir (ar29 - ia32 floating-point instruction register)\r |
839 | st8.spill [in0]=loc0,8;;\r |
840 | mov loc0=ar.fdr;; // save ar.fdr (ar30 - ia32 floating-point data register)\r |
841 | st8.spill [in0]=loc0,8;;\r |
842 | mov loc0=ar.ccv;; // save ar.ccv\r |
843 | st8.spill [in0]=loc0,8;;\r |
844 | st8.spill [in0]=loc2,8;; // save ar.unat (saved to loc2 earlier)\r |
845 | mov loc0=ar.fpsr;; // save floating point status register\r |
846 | st8.spill [in0]=loc0,8;;\r |
847 | st8.spill [in0]=in5,8;; // save ar.pfs\r |
848 | mov loc0=ar.lc;; // save ar.lc\r |
849 | st8.spill [in0]=loc0,8;;\r |
850 | mov loc0=ar.ec;; // save ar.ec\r |
851 | st8.spill [in0]=loc0,8;;\r |
852 | \r |
853 | // save control registers\r |
854 | mov loc0=cr.dcr;; // save dcr\r |
855 | st8.spill [in0]=loc0,8;;\r |
856 | mov loc0=cr.itm;; // save itm\r |
857 | st8.spill [in0]=loc0,8;;\r |
858 | mov loc0=cr.iva;; // save iva\r |
859 | st8.spill [in0]=loc0,8;;\r |
860 | mov loc0=cr.pta;; // save pta\r |
861 | st8.spill [in0]=loc0,8;;\r |
862 | mov loc0=cr.ipsr;; // save ipsr\r |
863 | st8.spill [in0]=loc0,8;;\r |
864 | mov loc0=cr.isr;; // save isr\r |
865 | st8.spill [in0]=loc0,8;;\r |
866 | mov loc0=cr.iip;; // save iip\r |
867 | st8.spill [in0]=loc0,8;;\r |
868 | mov loc0=cr.ifa;; // save ifa\r |
869 | st8.spill [in0]=loc0,8;;\r |
870 | mov loc0=cr.itir;; // save itir\r |
871 | st8.spill [in0]=loc0,8;;\r |
872 | mov loc0=cr.iipa;; // save iipa\r |
873 | st8.spill [in0]=loc0,8;;\r |
874 | mov loc0=cr.ifs;; // save ifs\r |
875 | st8.spill [in0]=loc0,8;;\r |
876 | mov loc0=cr.iim;; // save iim\r |
877 | st8.spill [in0]=loc0,8;;\r |
878 | mov loc0=cr.iha;; // save iha\r |
879 | st8.spill [in0]=loc0,8;;\r |
880 | \r |
881 | // save debug registers\r |
882 | mov loc0=dbr[r0];; // save dbr0 - dbr7\r |
883 | st8.spill [in0]=loc0,8;;\r |
884 | movl loc1=1;;\r |
885 | mov loc0=dbr[loc1];;\r |
886 | st8.spill [in0]=loc0,8;;\r |
887 | movl loc1=2;;\r |
888 | mov loc0=dbr[loc1];;\r |
889 | st8.spill [in0]=loc0,8;;\r |
890 | movl loc1=3;;\r |
891 | mov loc0=dbr[loc1];;\r |
892 | st8.spill [in0]=loc0,8;;\r |
893 | movl loc1=4;;\r |
894 | mov loc0=dbr[loc1];;\r |
895 | st8.spill [in0]=loc0,8;;\r |
896 | movl loc1=5;;\r |
897 | mov loc0=dbr[loc1];;\r |
898 | st8.spill [in0]=loc0,8;;\r |
899 | movl loc1=6;;\r |
900 | mov loc0=dbr[loc1];;\r |
901 | st8.spill [in0]=loc0,8;;\r |
902 | movl loc1=7;;\r |
903 | mov loc0=dbr[loc1];;\r |
904 | st8.spill [in0]=loc0,8;;\r |
905 | mov loc0=ibr[r0];; // save ibr0 - ibr7\r |
906 | st8.spill [in0]=loc0,8;;\r |
907 | movl loc1=1;;\r |
908 | mov loc0=ibr[loc1];;\r |
909 | st8.spill [in0]=loc0,8;;\r |
910 | movl loc1=2;;\r |
911 | mov loc0=ibr[loc1];;\r |
912 | st8.spill [in0]=loc0,8;;\r |
913 | movl loc1=3;;\r |
914 | mov loc0=ibr[loc1];;\r |
915 | st8.spill [in0]=loc0,8;;\r |
916 | movl loc1=4;;\r |
917 | mov loc0=ibr[loc1];;\r |
918 | st8.spill [in0]=loc0,8;;\r |
919 | movl loc1=5;;\r |
920 | mov loc0=ibr[loc1];;\r |
921 | st8.spill [in0]=loc0,8;;\r |
922 | movl loc1=6;;\r |
923 | mov loc0=ibr[loc1];;\r |
924 | st8.spill [in0]=loc0,8;;\r |
925 | movl loc1=7;;\r |
926 | mov loc0=ibr[loc1];;\r |
927 | st8.spill [in0]=loc0,8;;\r |
928 | st8.spill [in0]=loc3;;\r |
929 | \r |
930 | br.ret.sptk.few b0\r |
931 | \r |
932 | .endp SpillContext\r |
933 | \r |
934 | \r |
935 | /////////////////////////////////////////////\r |
936 | //\r |
937 | // Name:\r |
938 | // FillContext\r |
939 | //\r |
940 | // Description:\r |
941 | // Restores register context from context record.\r |
942 | //\r |
943 | // Arguments:\r |
944 | // in0 = address of last element 512 byte aligned context record address\r |
945 | // in1 = modified B0\r |
946 | // in2 = modified ar.bsp\r |
947 | // in3 = modified ar.bspstore\r |
948 | // in4 = modified ar.rnat\r |
949 | // in5 = modified ar.pfs\r |
950 | //\r |
951 | // Notes:\r |
952 | // loc0 - scratch\r |
953 | // loc1 - scratch\r |
954 | // loc2 - temporary application unat storage\r |
955 | // loc3 - temporary exception handler unat storage\r |
956 | \r |
957 | .proc FillContext\r |
958 | FillContext:\r |
959 | alloc loc0=6,4,0,0;; // alloc 6 inputs, 4 locals, 0 outs\r |
960 | ld8.fill loc3=[in0],-8;; // int_nat (nat bits for R1-31)\r |
961 | movl loc1=7;; // ibr7\r |
962 | ld8.fill loc0=[in0],-8;;\r |
963 | mov ibr[loc1]=loc0;;\r |
964 | movl loc1=6;; // ibr6\r |
965 | ld8.fill loc0=[in0],-8;;\r |
966 | mov ibr[loc1]=loc0;;\r |
967 | movl loc1=5;; // ibr5\r |
968 | ld8.fill loc0=[in0],-8;;\r |
969 | mov ibr[loc1]=loc0;;\r |
970 | movl loc1=4;; // ibr4\r |
971 | ld8.fill loc0=[in0],-8;;\r |
972 | mov ibr[loc1]=loc0;;\r |
973 | movl loc1=3;; // ibr3\r |
974 | ld8.fill loc0=[in0],-8;;\r |
975 | mov ibr[loc1]=loc0;;\r |
976 | movl loc1=2;; // ibr2\r |
977 | ld8.fill loc0=[in0],-8;;\r |
978 | mov ibr[loc1]=loc0;;\r |
979 | movl loc1=1;; // ibr1\r |
980 | ld8.fill loc0=[in0],-8;;\r |
981 | mov ibr[loc1]=loc0;;\r |
982 | ld8.fill loc0=[in0],-8;; // ibr0\r |
983 | mov ibr[r0]=loc0;;\r |
984 | movl loc1=7;; // dbr7\r |
985 | ld8.fill loc0=[in0],-8;;\r |
986 | mov dbr[loc1]=loc0;;\r |
987 | movl loc1=6;; // dbr6\r |
988 | ld8.fill loc0=[in0],-8;;\r |
989 | mov dbr[loc1]=loc0;;\r |
990 | movl loc1=5;; // dbr5\r |
991 | ld8.fill loc0=[in0],-8;;\r |
992 | mov dbr[loc1]=loc0;;\r |
993 | movl loc1=4;; // dbr4\r |
994 | ld8.fill loc0=[in0],-8;;\r |
995 | mov dbr[loc1]=loc0;;\r |
996 | movl loc1=3;; // dbr3\r |
997 | ld8.fill loc0=[in0],-8;;\r |
998 | mov dbr[loc1]=loc0;;\r |
999 | movl loc1=2;; // dbr2\r |
1000 | ld8.fill loc0=[in0],-8;;\r |
1001 | mov dbr[loc1]=loc0;;\r |
1002 | movl loc1=1;; // dbr1\r |
1003 | ld8.fill loc0=[in0],-8;;\r |
1004 | mov dbr[loc1]=loc0;;\r |
1005 | ld8.fill loc0=[in0],-8;; // dbr0\r |
1006 | mov dbr[r0]=loc0;;\r |
1007 | ld8.fill loc0=[in0],-8;; // iha\r |
1008 | mov cr.iha=loc0;;\r |
1009 | ld8.fill loc0=[in0],-8;; // iim\r |
1010 | mov cr.iim=loc0;;\r |
1011 | ld8.fill loc0=[in0],-8;; // ifs\r |
1012 | mov cr.ifs=loc0;;\r |
1013 | ld8.fill loc0=[in0],-8;; // iipa\r |
1014 | mov cr.iipa=loc0;;\r |
1015 | ld8.fill loc0=[in0],-8;; // itir\r |
1016 | mov cr.itir=loc0;;\r |
1017 | ld8.fill loc0=[in0],-8;; // ifa\r |
1018 | mov cr.ifa=loc0;;\r |
1019 | ld8.fill loc0=[in0],-8;; // iip\r |
1020 | mov cr.iip=loc0;;\r |
1021 | ld8.fill loc0=[in0],-8;; // isr\r |
1022 | mov cr.isr=loc0;;\r |
1023 | ld8.fill loc0=[in0],-8;; // ipsr\r |
1024 | mov cr.ipsr=loc0;;\r |
1025 | ld8.fill loc0=[in0],-8;; // pta\r |
1026 | mov cr.pta=loc0;;\r |
1027 | ld8.fill loc0=[in0],-8;; // iva\r |
1028 | mov cr.iva=loc0;;\r |
1029 | ld8.fill loc0=[in0],-8;; // itm\r |
1030 | mov cr.itm=loc0;;\r |
1031 | ld8.fill loc0=[in0],-8;; // dcr\r |
1032 | mov cr.dcr=loc0;;\r |
1033 | ld8.fill loc0=[in0],-8;; // ec\r |
1034 | mov ar.ec=loc0;;\r |
1035 | ld8.fill loc0=[in0],-8;; // lc\r |
1036 | mov ar.lc=loc0;;\r |
1037 | ld8.fill in5=[in0],-8;; // ar.pfs\r |
1038 | ld8.fill loc0=[in0],-8;; // ar.fpsr\r |
1039 | mov ar.fpsr=loc0;;\r |
1040 | ld8.fill loc2=[in0],-8;; // ar.unat - restored later...\r |
1041 | ld8.fill loc0=[in0],-8;; // ar.ccv\r |
1042 | mov ar.ccv=loc0;;\r |
1043 | ld8.fill loc0=[in0],-8;; // ar.fdr\r |
1044 | mov ar.fdr=loc0;;\r |
1045 | ld8.fill loc0=[in0],-8;; // ar.fir\r |
1046 | mov ar.fir=loc0;;\r |
1047 | ld8.fill loc0=[in0],-8;; // ar.fsr\r |
1048 | mov ar.fsr=loc0;;\r |
1049 | ld8.fill loc0=[in0],-8;; // ar.cflg\r |
1050 | mov ar.cflg=loc0;;\r |
1051 | ld8.fill loc0=[in0],-8;; // ar.ssd\r |
1052 | mov ar.ssd=loc0;;\r |
1053 | ld8.fill loc0=[in0],-8;; // ar.csd\r |
1054 | mov ar.csd=loc0;;\r |
1055 | ld8.fill loc0=[in0],-8;; // ar.eflag\r |
1056 | mov ar.eflag=loc0;;\r |
1057 | ld8.fill loc0=[in0],-8;; // ar.fcr\r |
1058 | mov ar.fcr=loc0;;\r |
1059 | ld8.fill in4=[in0],-8;; // ar.rnat\r |
1060 | ld8.fill in3=[in0],-8;; // bspstore\r |
1061 | ld8.fill in2=[in0],-8;; // bsp\r |
1062 | ld8.fill loc0=[in0],-8;; // ar.rsc\r |
1063 | mov ar.rsc=loc0;;\r |
1064 | ld8.fill loc0=[in0],-8;; // B7 - B0\r |
1065 | mov b7=loc0;;\r |
1066 | ld8.fill loc0=[in0],-8;;\r |
1067 | mov b6=loc0;;\r |
1068 | ld8.fill loc0=[in0],-8;;\r |
1069 | mov b5=loc0;;\r |
1070 | ld8.fill loc0=[in0],-8;;\r |
1071 | mov b4=loc0;;\r |
1072 | ld8.fill loc0=[in0],-8;;\r |
1073 | mov b3=loc0;;\r |
1074 | ld8.fill loc0=[in0],-8;;\r |
1075 | mov b2=loc0;;\r |
1076 | ld8.fill loc0=[in0],-8;;\r |
1077 | mov b1=loc0;;\r |
1078 | ld8.fill in1=[in0],-8;; // b0 is temporarily stored in in1\r |
1079 | ld8.fill loc0=[in0],-16;; // predicates\r |
1080 | mov pr=loc0;;\r |
1081 | ldf.fill f31=[in0],-16;;\r |
1082 | ldf.fill f30=[in0],-16;;\r |
1083 | ldf.fill f29=[in0],-16;;\r |
1084 | ldf.fill f28=[in0],-16;;\r |
1085 | ldf.fill f27=[in0],-16;;\r |
1086 | ldf.fill f26=[in0],-16;;\r |
1087 | ldf.fill f25=[in0],-16;;\r |
1088 | ldf.fill f24=[in0],-16;;\r |
1089 | ldf.fill f23=[in0],-16;;\r |
1090 | ldf.fill f22=[in0],-16;;\r |
1091 | ldf.fill f21=[in0],-16;;\r |
1092 | ldf.fill f20=[in0],-16;;\r |
1093 | ldf.fill f19=[in0],-16;;\r |
1094 | ldf.fill f18=[in0],-16;;\r |
1095 | ldf.fill f17=[in0],-16;;\r |
1096 | ldf.fill f16=[in0],-16;;\r |
1097 | ldf.fill f15=[in0],-16;;\r |
1098 | ldf.fill f14=[in0],-16;;\r |
1099 | ldf.fill f13=[in0],-16;;\r |
1100 | ldf.fill f12=[in0],-16;;\r |
1101 | ldf.fill f11=[in0],-16;;\r |
1102 | ldf.fill f10=[in0],-16;;\r |
1103 | ldf.fill f9=[in0],-16;;\r |
1104 | ldf.fill f8=[in0],-16;;\r |
1105 | ldf.fill f7=[in0],-16;;\r |
1106 | ldf.fill f6=[in0],-16;;\r |
1107 | ldf.fill f5=[in0],-16;;\r |
1108 | ldf.fill f4=[in0],-16;;\r |
1109 | ldf.fill f3=[in0],-16;;\r |
1110 | ldf.fill f2=[in0],-8;;\r |
1111 | mov ar.unat=loc3;; // restore unat (int_nat) before fill of general registers\r |
1112 | ld8.fill r31=[in0],-8;;\r |
1113 | ld8.fill r30=[in0],-8;;\r |
1114 | ld8.fill r29=[in0],-8;;\r |
1115 | ld8.fill r28=[in0],-8;;\r |
1116 | ld8.fill r27=[in0],-8;;\r |
1117 | ld8.fill r26=[in0],-8;;\r |
1118 | ld8.fill r25=[in0],-8;;\r |
1119 | ld8.fill r24=[in0],-8;;\r |
1120 | ld8.fill r23=[in0],-8;;\r |
1121 | ld8.fill r22=[in0],-8;;\r |
1122 | ld8.fill r21=[in0],-8;;\r |
1123 | ld8.fill r20=[in0],-8;;\r |
1124 | ld8.fill r19=[in0],-8;;\r |
1125 | ld8.fill r18=[in0],-8;;\r |
1126 | ld8.fill r17=[in0],-8;;\r |
1127 | ld8.fill r16=[in0],-8;;\r |
1128 | ld8.fill r15=[in0],-8;;\r |
1129 | ld8.fill r14=[in0],-8;;\r |
1130 | ld8.fill r13=[in0],-8;;\r |
1131 | ld8.fill r12=[in0],-8;;\r |
1132 | ld8.fill r11=[in0],-8;;\r |
1133 | ld8.fill r10=[in0],-8;;\r |
1134 | ld8.fill r9=[in0],-8;;\r |
1135 | ld8.fill r8=[in0],-8;;\r |
1136 | ld8.fill r7=[in0],-8;;\r |
1137 | ld8.fill r6=[in0],-8;;\r |
1138 | ld8.fill r5=[in0],-8;;\r |
1139 | ld8.fill r4=[in0],-8;;\r |
1140 | ld8.fill r3=[in0],-8;;\r |
1141 | ld8.fill r2=[in0],-8;;\r |
1142 | ld8.fill r1=[in0],-8;;\r |
1143 | mov ar.unat=loc2;; // restore application context unat\r |
1144 | \r |
1145 | br.ret.sptk.many b0\r |
1146 | \r |
1147 | .endp FillContext\r |
1148 | \r |
1149 | \r |
1150 | /////////////////////////////////////////////\r |
1151 | //\r |
1152 | // Name:\r |
1153 | // HookHandler\r |
1154 | //\r |
1155 | // Description:\r |
1156 | // Common branch target from hooked IVT entries. Runs in interrupt context.\r |
1157 | // Responsible for saving and restoring context and calling common C\r |
1158 | // handler. Banked registers running on bank 0 at entry.\r |
1159 | //\r |
1160 | // Arguments:\r |
1161 | // All arguments are passed in banked registers:\r |
1162 | // B0_REG = Original B0\r |
1163 | // SCRATCH_REG1 = IVT entry index\r |
1164 | //\r |
1165 | // Returns:\r |
1166 | // Returns via rfi\r |
1167 | //\r |
1168 | // Notes:\r |
1169 | // loc0 - scratch\r |
1170 | // loc1 - scratch\r |
1171 | // loc2 - vector number / mask\r |
1172 | // loc3 - 16 byte aligned context record address\r |
1173 | // loc4 - temporary storage of last address in context record\r |
1174 | \r |
1175 | HookHandler:\r |
1176 | flushrs;; // Synch RSE with backing store\r |
1177 | mov SCRATCH_REG2=ar.bsp // save interrupted context bsp\r |
1178 | mov SCRATCH_REG3=ar.bspstore // save interrupted context bspstore\r |
1179 | mov SCRATCH_REG4=ar.rnat // save interrupted context rnat\r |
1180 | mov SCRATCH_REG6=cr.ifs;; // save IFS in case we need to chain...\r |
1181 | cover;; // creates new frame, moves old\r |
1182 | // CFM to IFS.\r |
1183 | alloc SCRATCH_REG5=0,5,6,0 // alloc 5 locals, 6 outs\r |
1184 | ;;\r |
1185 | // save banked registers to locals\r |
1186 | mov out1=B0_REG // out1 = Original B0\r |
1187 | mov out2=SCRATCH_REG2 // out2 = original ar.bsp\r |
1188 | mov out3=SCRATCH_REG3 // out3 = original ar.bspstore\r |
1189 | mov out4=SCRATCH_REG4 // out4 = original ar.rnat\r |
1190 | mov out5=SCRATCH_REG5 // out5 = original ar.pfs\r |
1191 | mov loc2=SCRATCH_REG1;; // loc2 = vector number + chain flag\r |
1192 | bsw.1;; // switch banked registers to bank 1\r |
1193 | srlz.d // explicit serialize required\r |
1194 | // now fill in context record structure\r |
1195 | movl loc3=IpfContextBuf // Insure context record is aligned\r |
1196 | add loc0=-0x200,r0;; // mask the lower 9 bits (align on 512 byte boundary)\r |
1197 | and loc3=loc3,loc0;;\r |
1198 | add loc3=0x200,loc3;; // move to next 512 byte boundary\r |
1199 | // loc3 now contains the 512 byte aligned context record\r |
1200 | // spill register context into context record\r |
1201 | mov out0=loc3;; // Context record base in out0\r |
1202 | // original B0 in out1 already\r |
1203 | // original ar.bsp in out2 already\r |
1204 | // original ar.bspstore in out3 already\r |
1205 | br.call.sptk.few b0=SpillContext;; // spill context\r |
1206 | mov loc4=out0 // save modified address\r |
1207 | \r |
1208 | // At this point, the context has been saved to the context record and we're\r |
1209 | // ready to call the C part of the handler...\r |
1210 | \r |
1211 | movl loc0=CommonHandler;; // obtain address of plabel\r |
1212 | ld8 loc1=[loc0];; // get entry point of CommonHandler\r |
1213 | mov b6=loc1;; // put it in a branch register\r |
1214 | adds loc1= 8, loc0;; // index to GP in plabel\r |
1215 | ld8 r1=[loc1];; // set up gp for C call\r |
1216 | mov loc1=0xfffff;; // mask off so only vector bits are present\r |
1217 | and out0=loc2,loc1;; // pass vector number (exception type)\r |
1218 | mov out1=loc3;; // pass context record address\r |
1219 | br.call.sptk.few b0=b6;; // call C handler\r |
1220 | \r |
1221 | // We've returned from the C call, so restore the context and either rfi\r |
1222 | // back to interrupted thread, or chain into the SAL if this was an external interrupt\r |
1223 | mov out0=loc4;; // pass address of last element in context record\r |
1224 | br.call.sptk.few b0=FillContext;; // Fill context\r |
1225 | mov b0=out1 // fill in b0\r |
1226 | mov ar.rnat=out4\r |
1227 | mov ar.pfs=out5\r |
1228 | \r |
1229 | // Loadrs is necessary because the debugger may have changed some values in\r |
1230 | // the backing store. The processor, however may not be aware that the\r |
1231 | // stacked registers need to be reloaded from the backing store. Therefore,\r |
1232 | // we explicitly cause the RSE to refresh the stacked register's contents\r |
1233 | // from the backing store. \r |
1234 | mov loc0=ar.rsc // get RSC value\r |
1235 | mov loc1=ar.rsc // save it so we can restore it\r |
1236 | movl loc3=0xffffffffc000ffff;; // create mask for clearing RSC.loadrs\r |
1237 | and loc0=loc0,loc3;; // create value for RSC with RSC.loadrs==0\r |
1238 | mov ar.rsc=loc0;; // modify RSC\r |
1239 | loadrs;; // invalidate register stack\r |
1240 | mov ar.rsc=loc1;; // restore original RSC\r |
1241 | \r |
1242 | bsw.0;; // switch banked registers back to bank 0\r |
1243 | srlz.d;; // explicit serialize required\r |
1244 | mov PR_REG=pr // save predicates - to be restored after chaining decision\r |
1245 | mov B0_REG=b0 // save b0 - required by chain code\r |
1246 | mov loc2=EXCPT_EXTERNAL_INTERRUPT;;\r |
1247 | cmp.eq p7,p0=SCRATCH_REG1,loc2;; // check to see if this is the timer tick\r |
1248 | (p7) br.cond.dpnt.few DO_CHAIN;;\r |
1249 | \r |
1250 | NO_CHAIN:\r |
1251 | mov pr=PR_REG;;\r |
1252 | rfi;; // we're outa here.\r |
1253 | \r |
1254 | DO_CHAIN:\r |
1255 | mov pr=PR_REG\r |
1256 | mov SCRATCH_REG1=cr.iva\r |
1257 | mov SCRATCH_REG2=PATCH_RETURN_OFFSET;;\r |
1258 | add SCRATCH_REG1=SCRATCH_REG1, SCRATCH_REG2;;\r |
1259 | mov b0=SCRATCH_REG1;;\r |
1260 | br.cond.sptk.few b0;;\r |
1261 | \r |
1262 | EndHookHandler:\r |
1263 | \r |
1264 | \r |
1265 | /////////////////////////////////////////////\r |
1266 | //\r |
1267 | // Name:\r |
1268 | // HookStub\r |
1269 | //\r |
1270 | // Description:\r |
1271 | // HookStub will be copied from it's loaded location into the IVT when\r |
1272 | // an IVT entry is hooked. The IVT entry does an indirect jump via B0 to\r |
1273 | // HookHandler, which in turn calls into the default C handler, which calls\r |
1274 | // the user-installed C handler. The calls return and HookHandler executes\r |
1275 | // an rfi.\r |
1276 | //\r |
1277 | // Notes:\r |
1278 | // Saves B0 to B0_REG\r |
1279 | // Saves IVT index to SCRATCH_REG1 (immediate value is fixed up when code is copied\r |
1280 | // to the IVT entry.\r |
1281 | \r |
1282 | .globl HookStub\r |
1283 | .proc HookStub\r |
1284 | HookStub:\r |
1285 | \r |
1286 | mov B0_REG=b0\r |
1287 | movl SCRATCH_REG1=HookHandler;;\r |
1288 | mov b0=SCRATCH_REG1;;\r |
1289 | mov SCRATCH_REG1=0;;// immediate value is fixed up during install of handler to be the vector number\r |
1290 | br.cond.sptk.few b0\r |
1291 | \r |
1292 | .endp HookStub\r |
1293 | \r |
1294 | \r |
1295 | /////////////////////////////////////////////\r |
1296 | // The following code is moved into IVT entry 14 (offset 3400) which is reserved\r |
1297 | // in the Itanium architecture. The patch code is located at the end of the\r |
1298 | // IVT entry.\r |
1299 | \r |
1300 | PatchCode:\r |
1301 | mov SCRATCH_REG0=psr\r |
1302 | mov SCRATCH_REG6=cr.ipsr\r |
1303 | mov PR_REG=pr\r |
1304 | mov B0_REG=b0;;\r |
1305 | \r |
1306 | // turn off any virtual translations\r |
1307 | movl SCRATCH_REG1 = ~( MASK(PSR_DT,1) | MASK(PSR_RT,1));;\r |
1308 | and SCRATCH_REG1 = SCRATCH_REG0, SCRATCH_REG1;;\r |
1309 | mov psr.l = SCRATCH_REG1;;\r |
1310 | srlz.d \r |
1311 | tbit.z p14, p15 = SCRATCH_REG6, PSR_IS;; // Check to see if we were\r |
1312 | // interrupted from IA32\r |
1313 | // context. If so, bail out\r |
1314 | // and chain to SAL immediately\r |
1315 | (p15) br.cond.sptk.few Stub_IVT_Passthru;;\r |
1316 | // we only want to take 1 out of 32 external interrupts to minimize the\r |
1317 | // impact to system performance. Check our interrupt count and bail\r |
1318 | // out if we're not up to 32\r |
1319 | movl SCRATCH_REG1=ExternalInterruptCount;;\r |
1320 | ld8 SCRATCH_REG2=[SCRATCH_REG1];; // ExternalInterruptCount\r |
1321 | tbit.z p14, p15 = SCRATCH_REG2, 5;; // bit 5 set?\r |
1322 | (p14) add SCRATCH_REG2=1, SCRATCH_REG2;; // No? Then increment\r |
1323 | // ExternalInterruptCount\r |
1324 | // and Chain to SAL\r |
1325 | // immediately\r |
1326 | (p14) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r |
1327 | (p14) br.cond.sptk.few Stub_IVT_Passthru;;\r |
1328 | (p15) mov SCRATCH_REG2=0;; // Yes? Then reset\r |
1329 | // ExternalInterruptCount\r |
1330 | // and branch to\r |
1331 | // HookHandler\r |
1332 | (p15) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r |
1333 | mov pr=PR_REG\r |
1334 | movl SCRATCH_REG1=HookHandler;; // SCRATCH_REG1 = entrypoint of HookHandler\r |
1335 | mov b0=SCRATCH_REG1;; // b0 = entrypoint of HookHandler\r |
1336 | mov SCRATCH_REG1=EXCPT_EXTERNAL_INTERRUPT;;\r |
1337 | br.sptk.few b0;; // branch to HookHandler\r |
1338 | \r |
1339 | PatchCodeRet:\r |
1340 | // fake-up an rfi to get RSE back to being coherent and insure psr has\r |
1341 | // original contents when interrupt occured, then exit to SAL\r |
1342 | // at this point:\r |
1343 | // cr.ifs has been modified by previous "cover"\r |
1344 | // SCRATCH_REG6 has original cr.ifs\r |
1345 | \r |
1346 | mov SCRATCH_REG5=cr.ipsr\r |
1347 | mov SCRATCH_REG4=cr.iip;;\r |
1348 | mov cr.ipsr=SCRATCH_REG0\r |
1349 | mov SCRATCH_REG1=ip;;\r |
1350 | add SCRATCH_REG1=0x30, SCRATCH_REG1;;\r |
1351 | mov cr.iip=SCRATCH_REG1;;\r |
1352 | rfi;; // rfi to next instruction\r |
1353 | \r |
1354 | Stub_RfiTarget:\r |
1355 | mov cr.ifs=SCRATCH_REG6\r |
1356 | mov cr.ipsr=SCRATCH_REG5\r |
1357 | mov cr.iip=SCRATCH_REG4;;\r |
1358 | \r |
1359 | Stub_IVT_Passthru:\r |
1360 | mov pr=PR_REG // pr = saved predicate registers\r |
1361 | mov b0=B0_REG;; // b0 = saved b0\r |
1362 | EndPatchCode:\r |
1363 | \r |
1364 | \r |
1365 | /////////////////////////////////////////////\r |
1366 | // The following bundle is moved into IVT entry 14 (offset 0x3400) which is reserved\r |
1367 | // in the Itanium architecture. This bundle will be the last bundle and will\r |
1368 | // be located at offset 0x37F0 in the IVT.\r |
1369 | \r |
1370 | FailsafeBranch:\r |
1371 | {\r |
1372 | .mib\r |
1373 | nop.m 0\r |
1374 | nop.i 0\r |
1375 | br.sptk.few -(FAILSAFE_BRANCH_OFFSET - EXT_INT_ENTRY_OFFSET - 0x10)\r |
1376 | }\r |
1377 | \r |
1378 | \r |
1379 | /////////////////////////////////////////////\r |
1380 | // The following bundle is moved into IVT entry 13 (offset 0x3000) which is the\r |
1381 | // external interrupt. It branches to the patch code.\r |
1382 | \r |
1383 | PatchCodeNewBun0:\r |
1384 | {\r |
1385 | .mib\r |
1386 | nop.m 0\r |
1387 | nop.i 0\r |
1388 | br.cond.sptk.few PATCH_BRANCH\r |
1389 | }\r |