+//++\r
+// Copyright (c) 2006, Intel Corporation \r
+// All rights reserved. This program and the accompanying materials \r
+// are licensed and made available under the terms and conditions of the BSD License \r
+// which accompanies this distribution. The full text of the license may be found at \r
+// http://opensource.org/licenses/bsd-license.php \r
+// \r
+// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, \r
+// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. \r
+// \r
+// Module Name:\r
+//\r
+// AsmFuncs.s\r
+//\r
+// Abstract:\r
+//\r
+// Low level IPF routines used by the debug support driver\r
+//\r
+// Revision History:\r
+//\r
+//--\r
+\r
+\r
+#include "common.i"\r
+#include "Ds64Macros.i"\r
+\r
+.globl PatchSaveBuffer\r
+.globl IpfContextBuf\r
+.globl CommonHandler\r
+.globl ExternalInterruptCount\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// InstructionCacheFlush\r
+//\r
+// Description:\r
+// Flushes instruction cache for specified number of bytes\r
+//\r
+ .globl InstructionCacheFlush\r
+ .proc InstructionCacheFlush\r
+ .align 32\r
+InstructionCacheFlush::\r
+ { .mii\r
+ alloc r3=2, 0, 0, 0\r
+ cmp4.leu p0,p6=32, r33;;\r
+ (p6) mov r33=32;;\r
+ }\r
+ { .mii\r
+ nop.m 0\r
+ zxt4 r29=r33;;\r
+ dep.z r30=r29, 0, 5;;\r
+ }\r
+ { .mii\r
+ cmp4.eq p0,p7=r0, r30\r
+ shr.u r28=r29, 5;;\r
+ (p7) adds r28=1, r28;;\r
+ }\r
+ { .mii\r
+ nop.m 0\r
+ shl r27=r28, 5;;\r
+ zxt4 r26=r27;;\r
+ }\r
+ { .mfb\r
+ add r31=r26, r32\r
+ nop.f 0\r
+ nop.b 0\r
+ }\r
+LoopBack: // $L143:\r
+ { .mii\r
+ fc r32\r
+ adds r32=32, r32;;\r
+ cmp.ltu p14,p15=r32, r31\r
+ }\r
+ { .mfb\r
+ nop.m 0\r
+ nop.f 0\r
+ //(p14) br.cond.dptk.few $L143#;;\r
+ (p14) br.cond.dptk.few LoopBack;;\r
+ }\r
+ { .mmi\r
+ sync.i;;\r
+ srlz.i\r
+ nop.i 0;;\r
+ }\r
+ { .mfb\r
+ nop.m 0\r
+ nop.f 0\r
+ br.ret.sptk.few b0;;\r
+ }\r
+ .endp InstructionCacheFlush\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// ChainHandler\r
+//\r
+// Description:\r
+// Chains an interrupt handler\r
+//\r
+// The purpose of this function is to enable chaining of the external interrupt.\r
+// Since there's no clean SAL abstraction for doing this, we must do it\r
+// surreptitiously.\r
+//\r
+// The reserved IVT entry at offset 0x3400 is coopted for use by this handler.\r
+// According to Itanium architecture, it is reserved. Strictly speaking, this is\r
+// not safe, as we're cheating and violating the Itanium architecture. However,\r
+// as long as we're the only ones cheating, we should be OK. Without hooks in\r
+// the SAL to enable IVT management, there aren't many good options.\r
+//\r
+// The strategy is to replace the first bundle of the external interrupt handler\r
+// with our own that will branch into a piece of code we've supplied and located\r
+// in the reserved IVT entry. Only the first bundle of the external interrupt\r
+// IVT entry is modified.\r
+//\r
+// The original bundle is moved and relocated to space\r
+// allocated within the reserved IVT entry. The next bundle following is\r
+// is generated to go a hard coded branch back to the second bundle of the\r
+// external interrupt IVT entry just in case the first bundle had no branch.\r
+//\r
+// Our new code will execute our handler, and then fall through to the\r
+// original bundle after restoring all context appropriately.\r
+//\r
+// The following is a representation of what the IVT memory map looks like with\r
+// our chained handler installed:\r
+//\r
+//\r
+// \r
+// \r
+// \r
+// This IVT entry is Failsafe bundle \r
+// reserved by the \r
+// Itanium architecture Original bundle 0 \r
+// and is used for \r
+// for locating our \r
+// handler and the \r
+// original bundle Patch code... \r
+// zero of the ext \r
+// interrupt handler \r
+// \r
+// RSVD (3400) Unused \r
+// \r
+// \r
+// \r
+// \r
+// \r
+// \r
+// \r
+// \r
+// \r
+// \r
+// \r
+// \r
+// EXT_INT (3000) Bundle 0 Bundle zero - This one is\r
+// modified, all other bundles\r
+// in the EXT_INT entry are\r
+// untouched.\r
+//\r
+//\r
+// Arguments:\r
+//\r
+// Returns:\r
+//\r
+// Notes:\r
+//\r
+//\r
+ .globl ChainHandler\r
+ .proc ChainHandler\r
+ChainHandler:\r
+\r
+ NESTED_SETUP( 0,2+3,3,0 )\r
+\r
+ mov r8=1 // r8 = success\r
+ mov r2=cr.iva;;\r
+//\r
+// NOTE: There's a potential hazard here in that we're simply stealing a bunch of\r
+// bundles (memory) from the IVT and assuming there's no catastrophic side effect.\r
+//\r
+// First, save IVT area we're taking over with the patch so we can restore it later\r
+//\r
+ addl out0=PATCH_ENTRY_OFFSET, r2 // out0 = source buffer\r
+ movl out1=PatchSaveBuffer // out1 = destination buffer\r
+ mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r
+ br.call.sptk.few b0 = CopyBundles\r
+\r
+// Next, copy the patch code into the IVT\r
+ movl out0=PatchCode // out0 = source buffer of patch code\r
+ addl out1=PATCH_OFFSET, r2 // out1 = destination buffer - in IVT\r
+ mov out2=NUM_PATCH_BUNDLES;; // out2 = number of bundles to copy\r
+ br.call.sptk.few b0 = CopyBundles\r
+\r
+\r
+// copy original bundle 0 from the external interrupt handler to the\r
+// appropriate place in the reserved IVT interrupt slot\r
+ addl out0=EXT_INT_ENTRY_OFFSET, r2 // out0 = source buffer\r
+ addl out1=RELOCATED_EXT_INT, r2 // out1 = destination buffer - in reserved IVT\r
+ mov out2=1;; // out2 = copy 1 bundle\r
+ br.call.sptk.few b0 = CopyBundles\r
+\r
+// Now relocate it there because it very likely had a branch instruction that\r
+// that must now be fixed up.\r
+ addl out0=RELOCATED_EXT_INT, r2 // out0 = new runtime address of bundle - in reserved IVT\r
+ addl out1=EXT_INT_ENTRY_OFFSET, r2;;// out1 = IP address of previous location\r
+ mov out2=out0;; // out2 = IP address of new location\r
+ br.call.sptk.few b0 = RelocateBundle\r
+\r
+// Now copy into the failsafe branch into the next bundle just in case\r
+// the original ext int bundle 0 bundle did not contain a branch instruction\r
+ movl out0=FailsafeBranch // out0 = source buffer\r
+ addl out1=FAILSAFE_BRANCH_OFFSET, r2 // out1 = destination buffer - in reserved IVT\r
+ mov out2=1;; // out2 = copy 1 bundle\r
+ br.call.sptk.few b0 = CopyBundles\r
+\r
+// Last, copy in our replacement for the external interrupt IVT entry bundle 0\r
+ movl out0=PatchCodeNewBun0 // out0 = source buffer - our replacement bundle 0\r
+ addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - bundle 0 of External interrupt entry\r
+ mov out2=1;; // out2 = copy 1 bundle\r
+ br.call.sptk.few b0 = CopyBundles\r
+\r
+ChainHandlerDone:\r
+ NESTED_RETURN\r
+\r
+ .endp ChainHandler\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// UnchainHandler\r
+//\r
+// Description:\r
+// Unchains an interrupt handler\r
+//\r
+// Arguments:\r
+//\r
+// Returns:\r
+//\r
+// Notes:\r
+//\r
+//\r
+ .globl UnchainHandler\r
+ .proc UnchainHandler\r
+\r
+UnchainHandler:\r
+\r
+ NESTED_SETUP( 0,2+3,3,0 )\r
+\r
+ mov r8=1 // r8 = success\r
+ mov r2=cr.iva;; // r2 = interrupt vector address\r
+\r
+// First copy original Ext Int bundle 0 back to it's proper home...\r
+ addl out0=RELOCATED_EXT_INT, r2 // out0 = source - in reserved IVT\r
+ addl out1=EXT_INT_ENTRY_OFFSET, r2 // out1 = destination buffer - first bundle of Ext Int entry\r
+ mov out2=1;; // out2 = copy 1 bundle\r
+ br.call.sptk.few b0 = CopyBundles\r
+\r
+// Now, relocate it again...\r
+ addl out0=EXT_INT_ENTRY_OFFSET, r2 // out1 = New runtime address\r
+ addl out1=RELOCATED_EXT_INT, r2;; // out0 = IP address of previous location\r
+ mov out2=out0;; // out2 = IP address of new location\r
+ br.call.sptk.few b0 = RelocateBundle\r
+\r
+// Last, restore the patch area\r
+ movl out0=PatchSaveBuffer // out0 = source buffer\r
+ addl out1=PATCH_ENTRY_OFFSET, r2 // out1 = destination buffer\r
+ mov out2=0x40;; // out2 = number of bundles to copy... save entire IDT entry\r
+ br.call.sptk.few b0 = CopyBundles\r
+\r
+UnchainHandlerDone:\r
+ NESTED_RETURN\r
+\r
+ .endp UnchainHandler\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// CopyBundles\r
+//\r
+// Description:\r
+// Copies instruction bundles - flushes icache as necessary\r
+//\r
+// Arguments:\r
+// in0 - Bundle source\r
+// in1 - Bundle destination\r
+// in2 - Bundle count\r
+//\r
+// Returns:\r
+//\r
+// Notes:\r
+// This procedure is a leaf routine\r
+//\r
+ .proc CopyBundles\r
+\r
+CopyBundles:\r
+\r
+ NESTED_SETUP(3,2+1,0,0)\r
+\r
+ shl in2=in2, 1;; // in2 = count of 8 byte blocks to copy\r
+\r
+CopyBundlesLoop:\r
+\r
+ cmp.eq p14, p15 = 0, in2;; // Check if done\r
+(p14) br.sptk.few CopyBundlesDone;;\r
+\r
+ ld8 loc2=[in0], 0x8;; // loc2 = source bytes\r
+ st8 [in1]=loc2;; // [in1] = destination bytes\r
+ fc in1;; // Flush instruction cache\r
+ sync.i;; // Ensure local and remote data/inst caches in sync\r
+ srlz.i;; // Ensure sync has been observed\r
+ add in1=0x8, in1;; // in1 = next destination\r
+ add in2=-1, in2;; // in2 = decrement 8 bytes blocks to copy\r
+ br.sptk.few CopyBundlesLoop;;\r
+\r
+CopyBundlesDone:\r
+ NESTED_RETURN\r
+\r
+ .endp CopyBundles\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// RelocateBundle\r
+//\r
+// Description:\r
+// Relocates an instruction bundle by updating any ip-relative branch instructions.\r
+//\r
+// Arguments:\r
+// in0 - Runtime address of bundle\r
+// in1 - IP address of previous location of bundle\r
+// in2 - IP address of new location of bundle\r
+//\r
+// Returns:\r
+// in0 - 1 if successful or 0 if unsuccessful\r
+//\r
+// Notes:\r
+// This routine examines all slots in the given bundle that are destined for the\r
+// branch execution unit. If any of these slots contain an IP-relative branch\r
+// namely instructions B1, B2, B3, or B6, the slot is fixed-up with a new relative\r
+// address. Errors can occur if a branch cannot be reached.\r
+//\r
+ .proc RelocateBundle\r
+\r
+RelocateBundle:\r
+\r
+ NESTED_SETUP(3,2+4,3,0)\r
+\r
+ mov loc2=SLOT0 // loc2 = slot index\r
+ mov loc5=in0;; // loc5 = runtime address of bundle\r
+ mov in0=1;; // in0 = success\r
+\r
+RelocateBundleNextSlot:\r
+\r
+ cmp.ge p14, p15 = SLOT2, loc2;; // Check if maximum slot\r
+(p15) br.sptk.few RelocateBundleDone\r
+\r
+ mov out0=loc5;; // out0 = runtime address of bundle\r
+ br.call.sptk.few b0 = GetTemplate\r
+ mov loc3=out0;; // loc3 = instruction template\r
+ mov out0=loc5 // out0 = runtime address of bundle\r
+ mov out1=loc2;; // out1 = instruction slot number\r
+ br.call.sptk.few b0 = GetSlot\r
+ mov loc4=out0;; // loc4 = instruction encoding\r
+ mov out0=loc4 // out0 = instuction encoding\r
+ mov out1=loc2 // out1 = instruction slot number\r
+ mov out2=loc3;; // out2 = instruction template\r
+ br.call.sptk.few b0 = IsSlotBranch\r
+ cmp.eq p14, p15 = 1, out0;; // Check if branch slot\r
+(p15) add loc2=1,loc2 // Increment slot\r
+(p15) br.sptk.few RelocateBundleNextSlot\r
+ mov out0=loc4 // out0 = instuction encoding\r
+ mov out1=in1 // out1 = IP address of previous location\r
+ mov out2=in2;; // out2 = IP address of new location\r
+ br.call.sptk.few b0 = RelocateSlot\r
+ cmp.eq p14, p15 = 1, out1;; // Check if relocated slot\r
+(p15) mov in0=0 // in0 = failure\r
+(p15) br.sptk.few RelocateBundleDone\r
+ mov out2=out0;; // out2 = instruction encoding\r
+ mov out0=loc5 // out0 = runtime address of bundle\r
+ mov out1=loc2;; // out1 = instruction slot number\r
+ br.call.sptk.few b0 = SetSlot\r
+ add loc2=1,loc2;; // Increment slot\r
+ br.sptk.few RelocateBundleNextSlot\r
+\r
+RelocateBundleDone:\r
+ NESTED_RETURN\r
+\r
+ .endp RelocateBundle\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// RelocateSlot\r
+//\r
+// Description:\r
+// Relocates an instruction bundle by updating any ip-relative branch instructions.\r
+//\r
+// Arguments:\r
+// in0 - Instruction encoding (41-bits, right justified)\r
+// in1 - IP address of previous location of bundle\r
+// in2 - IP address of new location of bundle\r
+//\r
+// Returns:\r
+// in0 - Instruction encoding (41-bits, right justified)\r
+// in1 - 1 if successful otherwise 0\r
+//\r
+// Notes:\r
+// This procedure is a leaf routine\r
+//\r
+ .proc RelocateSlot\r
+\r
+RelocateSlot:\r
+ NESTED_SETUP(3,2+5,0,0)\r
+ extr.u loc2=in0, 37, 4;; // loc2 = instruction opcode\r
+ cmp.eq p14, p15 = 4, loc2;; // IP-relative branch (B1) or\r
+ // IP-relative counted branch (B2)\r
+(p15) cmp.eq p14, p15 = 5, loc2;; // IP-relative call (B3)\r
+(p15) cmp.eq p14, p15 = 7, loc2;; // IP-relative predict (B6)\r
+(p15) mov in1=1 // Instruction did not need to be reencoded\r
+(p15) br.sptk.few RelocateSlotDone\r
+ tbit.nz p14, p15 = in0, 36;; // put relative offset sign bit in p14\r
+ extr.u loc2=in0, 13, 20;; // loc2 = relative offset in instruction\r
+(p14) movl loc3=0xfffffffffff00000;; // extend sign\r
+(p14) or loc2=loc2, loc3;;\r
+ shl loc2=loc2,4;; // convert to byte offset instead of bundle offset\r
+ add loc3=loc2, in1;; // loc3 = physical address of branch target\r
+(p14) sub loc2=r0,loc2;; // flip sign in loc2 if offset is negative\r
+ sub loc4=loc3,in2;; // loc4 = relative offset from new ip to branch target\r
+ cmp.lt p15, p14 = 0, loc4;; // get new sign bit \r
+(p14) sub loc5=r0,loc4 // get absolute value of offset\r
+(p15) mov loc5=loc4;;\r
+ movl loc6=0x0FFFFFF;; // maximum offset in bytes for ip-rel branch\r
+ cmp.gt p14, p15 = loc5, loc6;; // check to see we're not out of range for an ip-relative branch\r
+(p14) br.sptk.few RelocateSlotError\r
+ cmp.lt p15, p14 = 0, loc4;; // store sign in p14 again\r
+(p14) dep in0=1,in0,36,1 // store sign bit in instruction\r
+(p15) dep in0=0,in0,36,1\r
+ shr loc4=loc4, 4;; // convert back to bundle offset\r
+ dep in0=loc4,in0,13,16;; // put first 16 bits of new offset into instruction\r
+ shr loc4=loc4,16;;\r
+ dep in0=loc4,in0,13+16,4 // put last 4 bits of new offset into instruction\r
+ mov in1=1;; // in1 = success\r
+ br.sptk.few RelocateSlotDone;;\r
+\r
+RelocateSlotError:\r
+ mov in1=0;; // in1 = failure\r
+\r
+RelocateSlotDone:\r
+ NESTED_RETURN\r
+\r
+ .endp RelocateSlot\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// IsSlotBranch\r
+//\r
+// Description:\r
+// Determines if the given instruction is a branch instruction.\r
+//\r
+// Arguments:\r
+// in0 - Instruction encoding (41-bits, right justified)\r
+// in1 - Instruction slot number\r
+// in2 - Bundle template\r
+//\r
+// Returns:\r
+// in0 - 1 if branch or 0 if not branch\r
+//\r
+// Notes:\r
+// This procedure is a leaf routine\r
+//\r
+// IsSlotBranch recognizes all branch instructions by looking at the provided template.\r
+// The instruction encoding is only passed to this routine for future expansion.\r
+//\r
+ .proc IsSlotBranch\r
+\r
+IsSlotBranch:\r
+\r
+ NESTED_SETUP (3,2+0,0,0)\r
+\r
+ mov in0=1;; // in0 = 1 which destroys the instruction\r
+ andcm in2=in2,in0;; // in2 = even template to reduce compares\r
+ mov in0=0;; // in0 = not a branch\r
+ cmp.eq p14, p15 = 0x16, in2;; // Template 0x16 is BBB\r
+(p14) br.sptk.few IsSlotBranchTrue\r
+ cmp.eq p14, p15 = SLOT0, in1;; // Slot 0 has no other possiblities\r
+(p14) br.sptk.few IsSlotBranchDone\r
+ cmp.eq p14, p15 = 0x12, in2;; // Template 0x12 is MBB\r
+(p14) br.sptk.few IsSlotBranchTrue\r
+ cmp.eq p14, p15 = SLOT1, in1;; // Slot 1 has no other possiblities\r
+(p14) br.sptk.few IsSlotBranchDone\r
+ cmp.eq p14, p15 = 0x10, in2;; // Template 0x10 is MIB\r
+(p14) br.sptk.few IsSlotBranchTrue\r
+ cmp.eq p14, p15 = 0x18, in2;; // Template 0x18 is MMB\r
+(p14) br.sptk.few IsSlotBranchTrue\r
+ cmp.eq p14, p15 = 0x1C, in2;; // Template 0x1C is MFB\r
+(p14) br.sptk.few IsSlotBranchTrue\r
+ br.sptk.few IsSlotBranchDone\r
+\r
+IsSlotBranchTrue:\r
+ mov in0=1;; // in0 = branch\r
+\r
+IsSlotBranchDone:\r
+ NESTED_RETURN\r
+\r
+ .endp IsSlotBranch\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// GetTemplate\r
+//\r
+// Description:\r
+// Retrieves the instruction template for an instruction bundle\r
+//\r
+// Arguments:\r
+// in0 - Runtime address of bundle\r
+//\r
+// Returns:\r
+// in0 - Instruction template (5-bits, right-justified)\r
+//\r
+// Notes:\r
+// This procedure is a leaf routine\r
+//\r
+ .proc GetTemplate\r
+\r
+GetTemplate:\r
+\r
+ NESTED_SETUP (1,2+2,0,0)\r
+\r
+ ld8 loc2=[in0], 0x8 // loc2 = first 8 bytes of branch bundle\r
+ movl loc3=MASK_0_4;; // loc3 = template mask\r
+ and loc2=loc2,loc3;; // loc2 = template, right justified\r
+ mov in0=loc2;; // in0 = template, right justified\r
+\r
+ NESTED_RETURN\r
+\r
+ .endp GetTemplate\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// GetSlot\r
+//\r
+// Description:\r
+// Gets the instruction encoding for an instruction slot and bundle\r
+//\r
+// Arguments:\r
+// in0 - Runtime address of bundle\r
+// in1 - Instruction slot (either 0, 1, or 2)\r
+//\r
+// Returns:\r
+// in0 - Instruction encoding (41-bits, right justified)\r
+//\r
+// Notes:\r
+// This procedure is a leaf routine\r
+//\r
+// Slot0 - [in0 + 0x8] Bits 45-5\r
+// Slot1 - [in0 + 0x8] Bits 63-46 and [in0] Bits 22-0\r
+// Slot2 - [in0] Bits 63-23\r
+//\r
+ .proc GetSlot\r
+\r
+GetSlot:\r
+ NESTED_SETUP (2,2+3,0,0)\r
+\r
+ ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of branch bundle\r
+ ld8 loc3=[in0];; // loc3 = second 8 bytes of branch bundle\r
+ cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r
+ (p14) br.cond.sptk.few GetSlot2;; // get slot 2\r
+ cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r
+ (p14) br.cond.sptk.few GetSlot1;; // get slot 1\r
+\r
+GetSlot0:\r
+ extr.u in0=loc2, 5, 45 // in0 = extracted slot 0\r
+ br.sptk.few GetSlotDone;;\r
+\r
+GetSlot1:\r
+ extr.u in0=loc2, 46, 18 // in0 = bits 63-46 of loc2 right-justified\r
+ extr.u loc4=loc3, 0, 23;; // loc4 = bits 22-0 of loc3 right-justified\r
+ dep in0=loc4, in0, 18, 15;;\r
+ shr.u loc4=loc4,15;;\r
+ dep in0=loc4, in0, 33, 8;; // in0 = extracted slot 1\r
+ br.sptk.few GetSlotDone;;\r
+\r
+GetSlot2:\r
+ extr.u in0=loc3, 23, 41;; // in0 = extracted slot 2\r
+\r
+GetSlotDone:\r
+ NESTED_RETURN\r
+\r
+ .endp GetSlot\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// SetSlot\r
+//\r
+// Description:\r
+// Sets the instruction encoding for an instruction slot and bundle\r
+//\r
+// Arguments:\r
+// in0 - Runtime address of bundle\r
+// in1 - Instruction slot (either 0, 1, or 2)\r
+// in2 - Instruction encoding (41-bits, right justified)\r
+//\r
+// Returns:\r
+//\r
+// Notes:\r
+// This procedure is a leaf routine\r
+//\r
+ .proc SetSlot\r
+\r
+SetSlot:\r
+ NESTED_SETUP (3,2+3,0,0)\r
+\r
+ ld8 loc2=[in0], 0x8;; // loc2 = first 8 bytes of bundle\r
+ ld8 loc3=[in0];; // loc3 = second 8 bytes of bundle\r
+ cmp.eq p14, p15 = 2, in1;; // check if slot 2 specified\r
+ (p14) br.cond.sptk.few SetSlot2;; // set slot 2\r
+ cmp.eq p14, p15 = 1, in1;; // check if slot 1 specified\r
+ (p14) br.cond.sptk.few SetSlot1;; // set slot 1\r
+\r
+SetSlot0:\r
+ dep loc2=0, loc2, 5, 41;; // remove old instruction from slot 0\r
+ shl loc4=in2, 5;; // loc4 = new instruction ready to be inserted\r
+ or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r
+ add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r
+ st8 [loc4]=loc2 // [loc4] = updated bundle\r
+ br.sptk.few SetSlotDone;;\r
+ ;;\r
+\r
+SetSlot1:\r
+ dep loc2=0, loc2, 46, 18 // remove old instruction from slot 1\r
+ dep loc3=0, loc3, 0, 23;;\r
+ shl loc4=in2, 46;; // loc4 = partial instruction ready to be inserted\r
+ or loc2=loc2, loc4;; // loc2 = updated first 8 bytes of bundle\r
+ add loc4=0x8,in0;; // loc4 = address to store first 8 bytes of bundle\r
+ st8 [loc4]=loc2;; // [loc4] = updated bundle\r
+ shr.u loc4=in2, 18;; // loc4 = partial instruction ready to be inserted\r
+ or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r
+ st8 [in0]=loc3;; // [in0] = updated bundle\r
+ br.sptk.few SetSlotDone;;\r
+\r
+SetSlot2:\r
+ dep loc3=0, loc3, 23, 41;; // remove old instruction from slot 2\r
+ shl loc4=in2, 23;; // loc4 = instruction ready to be inserted\r
+ or loc3=loc3, loc4;; // loc3 = updated second 8 bytes of bundle\r
+ st8 [in0]=loc3;; // [in0] = updated bundle\r
+\r
+SetSlotDone:\r
+\r
+ NESTED_RETURN\r
+ .endp SetSlot\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// GetIva\r
+//\r
+// Description:\r
+// C callable function to obtain the current value of IVA\r
+//\r
+// Returns:\r
+// Current value if IVA\r
+\r
+ .globl GetIva\r
+ .proc GetIva\r
+GetIva:\r
+ mov r8=cr2;;\r
+ br.ret.sptk.many b0\r
+\r
+ .endp GetIva\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// ProgramInterruptFlags\r
+//\r
+// Description:\r
+// C callable function to enable/disable interrupts\r
+//\r
+// Returns:\r
+// Previous state of psr.ic\r
+//\r
+ .globl ProgramInterruptFlags\r
+ .proc ProgramInterruptFlags\r
+ProgramInterruptFlags:\r
+ alloc loc0=1,2,0,0;;\r
+ mov loc0=psr\r
+ mov loc1=0x6000;;\r
+ and r8=loc0, loc1 // obtain current psr.ic and psr.i state\r
+ and in0=in0, loc1 // insure no extra bits set in input\r
+ andcm loc0=loc0,loc1;; // clear original psr.i and psr.ic\r
+ or loc0=loc0,in0;; // OR in new psr.ic value\r
+ mov psr.l=loc0;; // write new psr\r
+ srlz.d\r
+ br.ret.sptk.many b0 // return\r
+\r
+ .endp ProgramInterruptFlags\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// SpillContext\r
+//\r
+// Description:\r
+// Saves system context to context record.\r
+//\r
+// Arguments:\r
+// in0 = 512 byte aligned context record address\r
+// in1 = original B0\r
+// in2 = original ar.bsp\r
+// in3 = original ar.bspstore\r
+// in4 = original ar.rnat\r
+// in5 = original ar.pfs\r
+//\r
+// Notes:\r
+// loc0 - scratch\r
+// loc1 - scratch\r
+// loc2 - temporary application unat storage\r
+// loc3 - temporary exception handler unat storage\r
+\r
+ .proc SpillContext\r
+\r
+SpillContext:\r
+ alloc loc0=6,4,0,0;; // alloc 6 input, 4 locals, 0 outs\r
+ mov loc2=ar.unat;; // save application context unat (spilled later)\r
+ mov ar.unat=r0;; // set UNAT=0\r
+ st8.spill [in0]=r0,8;;\r
+ st8.spill [in0]=r1,8;; // save R1 - R31\r
+ st8.spill [in0]=r2,8;;\r
+ st8.spill [in0]=r3,8;;\r
+ st8.spill [in0]=r4,8;;\r
+ st8.spill [in0]=r5,8;;\r
+ st8.spill [in0]=r6,8;;\r
+ st8.spill [in0]=r7,8;;\r
+ st8.spill [in0]=r8,8;;\r
+ st8.spill [in0]=r9,8;;\r
+ st8.spill [in0]=r10,8;;\r
+ st8.spill [in0]=r11,8;;\r
+ st8.spill [in0]=r12,8;;\r
+ st8.spill [in0]=r13,8;;\r
+ st8.spill [in0]=r14,8;;\r
+ st8.spill [in0]=r15,8;;\r
+ st8.spill [in0]=r16,8;;\r
+ st8.spill [in0]=r17,8;;\r
+ st8.spill [in0]=r18,8;;\r
+ st8.spill [in0]=r19,8;;\r
+ st8.spill [in0]=r20,8;;\r
+ st8.spill [in0]=r21,8;;\r
+ st8.spill [in0]=r22,8;;\r
+ st8.spill [in0]=r23,8;;\r
+ st8.spill [in0]=r24,8;;\r
+ st8.spill [in0]=r25,8;;\r
+ st8.spill [in0]=r26,8;;\r
+ st8.spill [in0]=r27,8;;\r
+ st8.spill [in0]=r28,8;;\r
+ st8.spill [in0]=r29,8;;\r
+ st8.spill [in0]=r30,8;;\r
+ st8.spill [in0]=r31,8;;\r
+ mov loc3=ar.unat;; // save debugger context unat (spilled later)\r
+ stf.spill [in0]=f2,16;; // save f2 - f31\r
+ stf.spill [in0]=f3,16;;\r
+ stf.spill [in0]=f4,16;;\r
+ stf.spill [in0]=f5,16;;\r
+ stf.spill [in0]=f6,16;;\r
+ stf.spill [in0]=f7,16;;\r
+ stf.spill [in0]=f8,16;;\r
+ stf.spill [in0]=f9,16;;\r
+ stf.spill [in0]=f10,16;;\r
+ stf.spill [in0]=f11,16;;\r
+ stf.spill [in0]=f12,16;;\r
+ stf.spill [in0]=f13,16;;\r
+ stf.spill [in0]=f14,16;;\r
+ stf.spill [in0]=f15,16;;\r
+ stf.spill [in0]=f16,16;;\r
+ stf.spill [in0]=f17,16;;\r
+ stf.spill [in0]=f18,16;;\r
+ stf.spill [in0]=f19,16;;\r
+ stf.spill [in0]=f20,16;;\r
+ stf.spill [in0]=f21,16;;\r
+ stf.spill [in0]=f22,16;;\r
+ stf.spill [in0]=f23,16;;\r
+ stf.spill [in0]=f24,16;;\r
+ stf.spill [in0]=f25,16;;\r
+ stf.spill [in0]=f26,16;;\r
+ stf.spill [in0]=f27,16;;\r
+ stf.spill [in0]=f28,16;;\r
+ stf.spill [in0]=f29,16;;\r
+ stf.spill [in0]=f30,16;;\r
+ stf.spill [in0]=f31,16;;\r
+ mov loc0=pr;; // save predicates\r
+ st8.spill [in0]=loc0,8;;\r
+ st8.spill [in0]=in1,8;; // save b0 - b7... in1 already equals saved b0\r
+ mov loc0=b1;;\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=b2;;\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=b3;;\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=b4;;\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=b5;;\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=b6;;\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=b7;;\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.rsc;; // save ar.rsc\r
+ st8.spill [in0]=loc0,8;;\r
+ st8.spill [in0]=in2,8;; // save ar.bsp (in2)\r
+ st8.spill [in0]=in3,8;; // save ar.bspstore (in3)\r
+ st8.spill [in0]=in4,8;; // save ar.rnat (in4)\r
+ mov loc0=ar.fcr;; // save ar.fcr (ar21 - IA32 floating-point control register)\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.eflag;; // save ar.eflag (ar24)\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.csd;; // save ar.csd (ar25 - ia32 CS descriptor)\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.ssd;; // save ar.ssd (ar26 - ia32 ss descriptor)\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.cflg;; // save ar.cflg (ar27 - ia32 cr0 and cr4)\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.fsr;; // save ar.fsr (ar28 - ia32 floating-point status register)\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.fir;; // save ar.fir (ar29 - ia32 floating-point instruction register)\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.fdr;; // save ar.fdr (ar30 - ia32 floating-point data register)\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.ccv;; // save ar.ccv\r
+ st8.spill [in0]=loc0,8;;\r
+ st8.spill [in0]=loc2,8;; // save ar.unat (saved to loc2 earlier)\r
+ mov loc0=ar.fpsr;; // save floating point status register\r
+ st8.spill [in0]=loc0,8;;\r
+ st8.spill [in0]=in5,8;; // save ar.pfs\r
+ mov loc0=ar.lc;; // save ar.lc\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ar.ec;; // save ar.ec\r
+ st8.spill [in0]=loc0,8;;\r
+\r
+ // save control registers\r
+ mov loc0=cr.dcr;; // save dcr\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.itm;; // save itm\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.iva;; // save iva\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.pta;; // save pta\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.ipsr;; // save ipsr\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.isr;; // save isr\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.iip;; // save iip\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.ifa;; // save ifa\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.itir;; // save itir\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.iipa;; // save iipa\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.ifs;; // save ifs\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.iim;; // save iim\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=cr.iha;; // save iha\r
+ st8.spill [in0]=loc0,8;;\r
+\r
+ // save debug registers\r
+ mov loc0=dbr[r0];; // save dbr0 - dbr7\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=1;;\r
+ mov loc0=dbr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=2;;\r
+ mov loc0=dbr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=3;;\r
+ mov loc0=dbr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=4;;\r
+ mov loc0=dbr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=5;;\r
+ mov loc0=dbr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=6;;\r
+ mov loc0=dbr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=7;;\r
+ mov loc0=dbr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ mov loc0=ibr[r0];; // save ibr0 - ibr7\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=1;;\r
+ mov loc0=ibr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=2;;\r
+ mov loc0=ibr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=3;;\r
+ mov loc0=ibr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=4;;\r
+ mov loc0=ibr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=5;;\r
+ mov loc0=ibr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=6;;\r
+ mov loc0=ibr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ movl loc1=7;;\r
+ mov loc0=ibr[loc1];;\r
+ st8.spill [in0]=loc0,8;;\r
+ st8.spill [in0]=loc3;;\r
+\r
+ br.ret.sptk.few b0\r
+\r
+ .endp SpillContext\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// FillContext\r
+//\r
+// Description:\r
+// Restores register context from context record.\r
+//\r
+// Arguments:\r
+// in0 = address of last element 512 byte aligned context record address\r
+// in1 = modified B0\r
+// in2 = modified ar.bsp\r
+// in3 = modified ar.bspstore\r
+// in4 = modified ar.rnat\r
+// in5 = modified ar.pfs\r
+//\r
+// Notes:\r
+// loc0 - scratch\r
+// loc1 - scratch\r
+// loc2 - temporary application unat storage\r
+// loc3 - temporary exception handler unat storage\r
+\r
+ .proc FillContext\r
+FillContext:\r
+ alloc loc0=6,4,0,0;; // alloc 6 inputs, 4 locals, 0 outs\r
+ ld8.fill loc3=[in0],-8;; // int_nat (nat bits for R1-31)\r
+ movl loc1=7;; // ibr7\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov ibr[loc1]=loc0;;\r
+ movl loc1=6;; // ibr6\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov ibr[loc1]=loc0;;\r
+ movl loc1=5;; // ibr5\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov ibr[loc1]=loc0;;\r
+ movl loc1=4;; // ibr4\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov ibr[loc1]=loc0;;\r
+ movl loc1=3;; // ibr3\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov ibr[loc1]=loc0;;\r
+ movl loc1=2;; // ibr2\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov ibr[loc1]=loc0;;\r
+ movl loc1=1;; // ibr1\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov ibr[loc1]=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ibr0\r
+ mov ibr[r0]=loc0;;\r
+ movl loc1=7;; // dbr7\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov dbr[loc1]=loc0;;\r
+ movl loc1=6;; // dbr6\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov dbr[loc1]=loc0;;\r
+ movl loc1=5;; // dbr5\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov dbr[loc1]=loc0;;\r
+ movl loc1=4;; // dbr4\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov dbr[loc1]=loc0;;\r
+ movl loc1=3;; // dbr3\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov dbr[loc1]=loc0;;\r
+ movl loc1=2;; // dbr2\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov dbr[loc1]=loc0;;\r
+ movl loc1=1;; // dbr1\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov dbr[loc1]=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // dbr0\r
+ mov dbr[r0]=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // iha\r
+ mov cr.iha=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // iim\r
+ mov cr.iim=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ifs\r
+ mov cr.ifs=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // iipa\r
+ mov cr.iipa=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // itir\r
+ mov cr.itir=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ifa\r
+ mov cr.ifa=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // iip\r
+ mov cr.iip=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // isr\r
+ mov cr.isr=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ipsr\r
+ mov cr.ipsr=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // pta\r
+ mov cr.pta=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // iva\r
+ mov cr.iva=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // itm\r
+ mov cr.itm=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // dcr\r
+ mov cr.dcr=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ec\r
+ mov ar.ec=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // lc\r
+ mov ar.lc=loc0;;\r
+ ld8.fill in5=[in0],-8;; // ar.pfs\r
+ ld8.fill loc0=[in0],-8;; // ar.fpsr\r
+ mov ar.fpsr=loc0;;\r
+ ld8.fill loc2=[in0],-8;; // ar.unat - restored later...\r
+ ld8.fill loc0=[in0],-8;; // ar.ccv\r
+ mov ar.ccv=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ar.fdr\r
+ mov ar.fdr=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ar.fir\r
+ mov ar.fir=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ar.fsr\r
+ mov ar.fsr=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ar.cflg\r
+ mov ar.cflg=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ar.ssd\r
+ mov ar.ssd=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ar.csd\r
+ mov ar.csd=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ar.eflag\r
+ mov ar.eflag=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // ar.fcr\r
+ mov ar.fcr=loc0;;\r
+ ld8.fill in4=[in0],-8;; // ar.rnat\r
+ ld8.fill in3=[in0],-8;; // bspstore\r
+ ld8.fill in2=[in0],-8;; // bsp\r
+ ld8.fill loc0=[in0],-8;; // ar.rsc\r
+ mov ar.rsc=loc0;;\r
+ ld8.fill loc0=[in0],-8;; // B7 - B0\r
+ mov b7=loc0;;\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov b6=loc0;;\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov b5=loc0;;\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov b4=loc0;;\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov b3=loc0;;\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov b2=loc0;;\r
+ ld8.fill loc0=[in0],-8;;\r
+ mov b1=loc0;;\r
+ ld8.fill in1=[in0],-8;; // b0 is temporarily stored in in1\r
+ ld8.fill loc0=[in0],-16;; // predicates\r
+ mov pr=loc0;;\r
+ ldf.fill f31=[in0],-16;;\r
+ ldf.fill f30=[in0],-16;;\r
+ ldf.fill f29=[in0],-16;;\r
+ ldf.fill f28=[in0],-16;;\r
+ ldf.fill f27=[in0],-16;;\r
+ ldf.fill f26=[in0],-16;;\r
+ ldf.fill f25=[in0],-16;;\r
+ ldf.fill f24=[in0],-16;;\r
+ ldf.fill f23=[in0],-16;;\r
+ ldf.fill f22=[in0],-16;;\r
+ ldf.fill f21=[in0],-16;;\r
+ ldf.fill f20=[in0],-16;;\r
+ ldf.fill f19=[in0],-16;;\r
+ ldf.fill f18=[in0],-16;;\r
+ ldf.fill f17=[in0],-16;;\r
+ ldf.fill f16=[in0],-16;;\r
+ ldf.fill f15=[in0],-16;;\r
+ ldf.fill f14=[in0],-16;;\r
+ ldf.fill f13=[in0],-16;;\r
+ ldf.fill f12=[in0],-16;;\r
+ ldf.fill f11=[in0],-16;;\r
+ ldf.fill f10=[in0],-16;;\r
+ ldf.fill f9=[in0],-16;;\r
+ ldf.fill f8=[in0],-16;;\r
+ ldf.fill f7=[in0],-16;;\r
+ ldf.fill f6=[in0],-16;;\r
+ ldf.fill f5=[in0],-16;;\r
+ ldf.fill f4=[in0],-16;;\r
+ ldf.fill f3=[in0],-16;;\r
+ ldf.fill f2=[in0],-8;;\r
+ mov ar.unat=loc3;; // restore unat (int_nat) before fill of general registers\r
+ ld8.fill r31=[in0],-8;;\r
+ ld8.fill r30=[in0],-8;;\r
+ ld8.fill r29=[in0],-8;;\r
+ ld8.fill r28=[in0],-8;;\r
+ ld8.fill r27=[in0],-8;;\r
+ ld8.fill r26=[in0],-8;;\r
+ ld8.fill r25=[in0],-8;;\r
+ ld8.fill r24=[in0],-8;;\r
+ ld8.fill r23=[in0],-8;;\r
+ ld8.fill r22=[in0],-8;;\r
+ ld8.fill r21=[in0],-8;;\r
+ ld8.fill r20=[in0],-8;;\r
+ ld8.fill r19=[in0],-8;;\r
+ ld8.fill r18=[in0],-8;;\r
+ ld8.fill r17=[in0],-8;;\r
+ ld8.fill r16=[in0],-8;;\r
+ ld8.fill r15=[in0],-8;;\r
+ ld8.fill r14=[in0],-8;;\r
+ ld8.fill r13=[in0],-8;;\r
+ ld8.fill r12=[in0],-8;;\r
+ ld8.fill r11=[in0],-8;;\r
+ ld8.fill r10=[in0],-8;;\r
+ ld8.fill r9=[in0],-8;;\r
+ ld8.fill r8=[in0],-8;;\r
+ ld8.fill r7=[in0],-8;;\r
+ ld8.fill r6=[in0],-8;;\r
+ ld8.fill r5=[in0],-8;;\r
+ ld8.fill r4=[in0],-8;;\r
+ ld8.fill r3=[in0],-8;;\r
+ ld8.fill r2=[in0],-8;;\r
+ ld8.fill r1=[in0],-8;;\r
+ mov ar.unat=loc2;; // restore application context unat\r
+\r
+ br.ret.sptk.many b0\r
+\r
+ .endp FillContext\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// HookHandler\r
+//\r
+// Description:\r
+// Common branch target from hooked IVT entries. Runs in interrupt context.\r
+// Responsible for saving and restoring context and calling common C\r
+// handler. Banked registers running on bank 0 at entry.\r
+//\r
+// Arguments:\r
+// All arguments are passed in banked registers:\r
+// B0_REG = Original B0\r
+// SCRATCH_REG1 = IVT entry index\r
+//\r
+// Returns:\r
+// Returns via rfi\r
+//\r
+// Notes:\r
+// loc0 - scratch\r
+// loc1 - scratch\r
+// loc2 - vector number / mask\r
+// loc3 - 16 byte aligned context record address\r
+// loc4 - temporary storage of last address in context record\r
+\r
+HookHandler:\r
+ flushrs;; // Synch RSE with backing store\r
+ mov SCRATCH_REG2=ar.bsp // save interrupted context bsp\r
+ mov SCRATCH_REG3=ar.bspstore // save interrupted context bspstore\r
+ mov SCRATCH_REG4=ar.rnat // save interrupted context rnat\r
+ mov SCRATCH_REG6=cr.ifs;; // save IFS in case we need to chain...\r
+ cover;; // creates new frame, moves old\r
+ // CFM to IFS.\r
+ alloc SCRATCH_REG5=0,5,6,0 // alloc 5 locals, 6 outs\r
+ ;;\r
+ // save banked registers to locals\r
+ mov out1=B0_REG // out1 = Original B0\r
+ mov out2=SCRATCH_REG2 // out2 = original ar.bsp\r
+ mov out3=SCRATCH_REG3 // out3 = original ar.bspstore\r
+ mov out4=SCRATCH_REG4 // out4 = original ar.rnat\r
+ mov out5=SCRATCH_REG5 // out5 = original ar.pfs\r
+ mov loc2=SCRATCH_REG1;; // loc2 = vector number + chain flag\r
+ bsw.1;; // switch banked registers to bank 1\r
+ srlz.d // explicit serialize required\r
+ // now fill in context record structure\r
+ movl loc3=IpfContextBuf // Insure context record is aligned\r
+ add loc0=-0x200,r0;; // mask the lower 9 bits (align on 512 byte boundary)\r
+ and loc3=loc3,loc0;;\r
+ add loc3=0x200,loc3;; // move to next 512 byte boundary\r
+ // loc3 now contains the 512 byte aligned context record\r
+ // spill register context into context record\r
+ mov out0=loc3;; // Context record base in out0\r
+ // original B0 in out1 already\r
+ // original ar.bsp in out2 already\r
+ // original ar.bspstore in out3 already\r
+ br.call.sptk.few b0=SpillContext;; // spill context\r
+ mov loc4=out0 // save modified address\r
+\r
+ // At this point, the context has been saved to the context record and we're\r
+ // ready to call the C part of the handler...\r
+\r
+ movl loc0=CommonHandler;; // obtain address of plabel\r
+ ld8 loc1=[loc0];; // get entry point of CommonHandler\r
+ mov b6=loc1;; // put it in a branch register\r
+ adds loc1= 8, loc0;; // index to GP in plabel\r
+ ld8 r1=[loc1];; // set up gp for C call\r
+ mov loc1=0xfffff;; // mask off so only vector bits are present\r
+ and out0=loc2,loc1;; // pass vector number (exception type)\r
+ mov out1=loc3;; // pass context record address\r
+ br.call.sptk.few b0=b6;; // call C handler\r
+\r
+ // We've returned from the C call, so restore the context and either rfi\r
+ // back to interrupted thread, or chain into the SAL if this was an external interrupt\r
+ mov out0=loc4;; // pass address of last element in context record\r
+ br.call.sptk.few b0=FillContext;; // Fill context\r
+ mov b0=out1 // fill in b0\r
+ mov ar.rnat=out4\r
+ mov ar.pfs=out5\r
+\r
+ // Loadrs is necessary because the debugger may have changed some values in\r
+ // the backing store. The processor, however may not be aware that the\r
+ // stacked registers need to be reloaded from the backing store. Therefore,\r
+ // we explicitly cause the RSE to refresh the stacked register's contents\r
+ // from the backing store. \r
+ mov loc0=ar.rsc // get RSC value\r
+ mov loc1=ar.rsc // save it so we can restore it\r
+ movl loc3=0xffffffffc000ffff;; // create mask for clearing RSC.loadrs\r
+ and loc0=loc0,loc3;; // create value for RSC with RSC.loadrs==0\r
+ mov ar.rsc=loc0;; // modify RSC\r
+ loadrs;; // invalidate register stack\r
+ mov ar.rsc=loc1;; // restore original RSC\r
+\r
+ bsw.0;; // switch banked registers back to bank 0\r
+ srlz.d;; // explicit serialize required\r
+ mov PR_REG=pr // save predicates - to be restored after chaining decision\r
+ mov B0_REG=b0 // save b0 - required by chain code\r
+ mov loc2=EXCPT_EXTERNAL_INTERRUPT;;\r
+ cmp.eq p7,p0=SCRATCH_REG1,loc2;; // check to see if this is the timer tick\r
+ (p7) br.cond.dpnt.few DO_CHAIN;;\r
+\r
+NO_CHAIN:\r
+ mov pr=PR_REG;;\r
+ rfi;; // we're outa here.\r
+\r
+DO_CHAIN:\r
+ mov pr=PR_REG\r
+ mov SCRATCH_REG1=cr.iva\r
+ mov SCRATCH_REG2=PATCH_RETURN_OFFSET;;\r
+ add SCRATCH_REG1=SCRATCH_REG1, SCRATCH_REG2;;\r
+ mov b0=SCRATCH_REG1;;\r
+ br.cond.sptk.few b0;;\r
+\r
+EndHookHandler:\r
+\r
+\r
+/////////////////////////////////////////////\r
+//\r
+// Name:\r
+// HookStub\r
+//\r
+// Description:\r
+// HookStub will be copied from it's loaded location into the IVT when\r
+// an IVT entry is hooked. The IVT entry does an indirect jump via B0 to\r
+// HookHandler, which in turn calls into the default C handler, which calls\r
+// the user-installed C handler. The calls return and HookHandler executes\r
+// an rfi.\r
+//\r
+// Notes:\r
+// Saves B0 to B0_REG\r
+// Saves IVT index to SCRATCH_REG1 (immediate value is fixed up when code is copied\r
+// to the IVT entry.\r
+\r
+ .globl HookStub\r
+ .proc HookStub\r
+HookStub:\r
+\r
+ mov B0_REG=b0\r
+ movl SCRATCH_REG1=HookHandler;;\r
+ mov b0=SCRATCH_REG1;;\r
+ mov SCRATCH_REG1=0;;// immediate value is fixed up during install of handler to be the vector number\r
+ br.cond.sptk.few b0\r
+\r
+ .endp HookStub\r
+\r
+\r
+/////////////////////////////////////////////\r
+// The following code is moved into IVT entry 14 (offset 3400) which is reserved\r
+// in the Itanium architecture. The patch code is located at the end of the\r
+// IVT entry.\r
+\r
+PatchCode:\r
+ mov SCRATCH_REG0=psr\r
+ mov SCRATCH_REG6=cr.ipsr\r
+ mov PR_REG=pr\r
+ mov B0_REG=b0;;\r
+\r
+ // turn off any virtual translations\r
+ movl SCRATCH_REG1 = ~( MASK(PSR_DT,1) | MASK(PSR_RT,1));;\r
+ and SCRATCH_REG1 = SCRATCH_REG0, SCRATCH_REG1;;\r
+ mov psr.l = SCRATCH_REG1;;\r
+ srlz.d \r
+ tbit.z p14, p15 = SCRATCH_REG6, PSR_IS;; // Check to see if we were\r
+ // interrupted from IA32\r
+ // context. If so, bail out\r
+ // and chain to SAL immediately\r
+ (p15) br.cond.sptk.few Stub_IVT_Passthru;;\r
+ // we only want to take 1 out of 32 external interrupts to minimize the\r
+ // impact to system performance. Check our interrupt count and bail\r
+ // out if we're not up to 32\r
+ movl SCRATCH_REG1=ExternalInterruptCount;;\r
+ ld8 SCRATCH_REG2=[SCRATCH_REG1];; // ExternalInterruptCount\r
+ tbit.z p14, p15 = SCRATCH_REG2, 5;; // bit 5 set?\r
+ (p14) add SCRATCH_REG2=1, SCRATCH_REG2;; // No? Then increment\r
+ // ExternalInterruptCount\r
+ // and Chain to SAL\r
+ // immediately\r
+ (p14) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r
+ (p14) br.cond.sptk.few Stub_IVT_Passthru;;\r
+ (p15) mov SCRATCH_REG2=0;; // Yes? Then reset\r
+ // ExternalInterruptCount\r
+ // and branch to\r
+ // HookHandler\r
+ (p15) st8 [SCRATCH_REG1]=SCRATCH_REG2;;\r
+ mov pr=PR_REG\r
+ movl SCRATCH_REG1=HookHandler;; // SCRATCH_REG1 = entrypoint of HookHandler\r
+ mov b0=SCRATCH_REG1;; // b0 = entrypoint of HookHandler\r
+ mov SCRATCH_REG1=EXCPT_EXTERNAL_INTERRUPT;;\r
+ br.sptk.few b0;; // branch to HookHandler\r
+\r
+PatchCodeRet:\r
+ // fake-up an rfi to get RSE back to being coherent and insure psr has\r
+ // original contents when interrupt occured, then exit to SAL\r
+ // at this point:\r
+ // cr.ifs has been modified by previous "cover"\r
+ // SCRATCH_REG6 has original cr.ifs\r
+\r
+ mov SCRATCH_REG5=cr.ipsr\r
+ mov SCRATCH_REG4=cr.iip;;\r
+ mov cr.ipsr=SCRATCH_REG0\r
+ mov SCRATCH_REG1=ip;;\r
+ add SCRATCH_REG1=0x30, SCRATCH_REG1;;\r
+ mov cr.iip=SCRATCH_REG1;;\r
+ rfi;; // rfi to next instruction\r
+\r
+Stub_RfiTarget:\r
+ mov cr.ifs=SCRATCH_REG6\r
+ mov cr.ipsr=SCRATCH_REG5\r
+ mov cr.iip=SCRATCH_REG4;;\r
+\r
+Stub_IVT_Passthru:\r
+ mov pr=PR_REG // pr = saved predicate registers\r
+ mov b0=B0_REG;; // b0 = saved b0\r
+EndPatchCode:\r
+\r
+\r
+/////////////////////////////////////////////\r
+// The following bundle is moved into IVT entry 14 (offset 0x3400) which is reserved\r
+// in the Itanium architecture. This bundle will be the last bundle and will\r
+// be located at offset 0x37F0 in the IVT.\r
+\r
+FailsafeBranch:\r
+{\r
+ .mib\r
+ nop.m 0\r
+ nop.i 0\r
+ br.sptk.few -(FAILSAFE_BRANCH_OFFSET - EXT_INT_ENTRY_OFFSET - 0x10)\r
+}\r
+\r
+\r
+/////////////////////////////////////////////\r
+// The following bundle is moved into IVT entry 13 (offset 0x3000) which is the\r
+// external interrupt. It branches to the patch code.\r
+\r
+PatchCodeNewBun0:\r
+{\r
+ .mib\r
+ nop.m 0\r
+ nop.i 0\r
+ br.cond.sptk.few PATCH_BRANCH\r
+}\r