1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a printer that converts from our internal representation
11 // of machine-dependent LLVM code to NVPTX assembly language.
13 //===----------------------------------------------------------------------===//
15 #include "NVPTXAsmPrinter.h"
16 #include "InstPrinter/NVPTXInstPrinter.h"
17 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
19 #include "NVPTXInstrInfo.h"
20 #include "NVPTXMCExpr.h"
21 #include "NVPTXMachineFunctionInfo.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXTargetMachine.h"
24 #include "NVPTXUtilities.h"
25 #include "cl_common_defines.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/Analysis/ConstantFolding.h"
28 #include "llvm/CodeGen/Analysis.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/IR/DebugInfo.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/Mangler.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCStreamer.h"
40 #include "llvm/MC/MCSymbol.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/FormattedStream.h"
44 #include "llvm/Support/Path.h"
45 #include "llvm/Support/TargetRegistry.h"
46 #include "llvm/Support/TimeValue.h"
47 #include "llvm/Target/TargetLoweringObjectFile.h"
51 #define DEPOTNAME "__local_depot"
54 EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden
,
55 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
59 InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore
, cl::Hidden
,
60 cl::desc("NVPTX Specific: Emit source line in ptx file"),
64 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
66 void DiscoverDependentGlobals(const Value
*V
,
67 DenseSet
<const GlobalVariable
*> &Globals
) {
68 if (const GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(V
))
71 if (const User
*U
= dyn_cast
<User
>(V
)) {
72 for (unsigned i
= 0, e
= U
->getNumOperands(); i
!= e
; ++i
) {
73 DiscoverDependentGlobals(U
->getOperand(i
), Globals
);
79 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
80 /// instances to be emitted, but only after any dependents have been added
82 void VisitGlobalVariableForEmission(
83 const GlobalVariable
*GV
, SmallVectorImpl
<const GlobalVariable
*> &Order
,
84 DenseSet
<const GlobalVariable
*> &Visited
,
85 DenseSet
<const GlobalVariable
*> &Visiting
) {
86 // Have we already visited this one?
87 if (Visited
.count(GV
))
90 // Do we have a circular dependency?
91 if (!Visiting
.insert(GV
).second
)
92 report_fatal_error("Circular dependency found in global variable set");
94 // Make sure we visit all dependents first
95 DenseSet
<const GlobalVariable
*> Others
;
96 for (unsigned i
= 0, e
= GV
->getNumOperands(); i
!= e
; ++i
)
97 DiscoverDependentGlobals(GV
->getOperand(i
), Others
);
99 for (DenseSet
<const GlobalVariable
*>::iterator I
= Others
.begin(),
102 VisitGlobalVariableForEmission(*I
, Order
, Visited
, Visiting
);
104 // Now we can visit ourself
111 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr
&MI
) {
112 if (!EmitLineNumbers
)
117 DebugLoc curLoc
= MI
.getDebugLoc();
119 if (prevDebugLoc
.isUnknown() && curLoc
.isUnknown())
122 if (prevDebugLoc
== curLoc
)
125 prevDebugLoc
= curLoc
;
127 if (curLoc
.isUnknown())
130 const MachineFunction
*MF
= MI
.getParent()->getParent();
131 //const TargetMachine &TM = MF->getTarget();
133 const LLVMContext
&ctx
= MF
->getFunction()->getContext();
134 DIScope
Scope(curLoc
.getScope(ctx
));
136 assert((!Scope
|| Scope
.isScope()) &&
137 "Scope of a DebugLoc should be null or a DIScope.");
141 StringRef
fileName(Scope
.getFilename());
142 StringRef
dirName(Scope
.getDirectory());
143 SmallString
<128> FullPathName
= dirName
;
144 if (!dirName
.empty() && !sys::path::is_absolute(fileName
)) {
145 sys::path::append(FullPathName
, fileName
);
146 fileName
= FullPathName
.str();
149 if (filenameMap
.find(fileName
.str()) == filenameMap
.end())
152 // Emit the line from the source file.
154 this->emitSrcInText(fileName
.str(), curLoc
.getLine());
156 std::stringstream temp
;
157 temp
<< "\t.loc " << filenameMap
[fileName
.str()] << " " << curLoc
.getLine()
158 << " " << curLoc
.getCol();
159 OutStreamer
.EmitRawText(Twine(temp
.str().c_str()));
162 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr
*MI
) {
163 SmallString
<128> Str
;
164 raw_svector_ostream
OS(Str
);
165 if (nvptxSubtarget
.getDrvInterface() == NVPTX::CUDA
)
166 emitLineNumberAsDotLoc(*MI
);
169 lowerToMCInst(MI
, Inst
);
170 EmitToStreamer(OutStreamer
, Inst
);
173 // Handle symbol backtracking for targets that do not support image handles
174 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr
*MI
,
175 unsigned OpNo
, MCOperand
&MCOp
) {
176 const MachineOperand
&MO
= MI
->getOperand(OpNo
);
177 const MCInstrDesc
&MCID
= MI
->getDesc();
179 if (MCID
.TSFlags
& NVPTXII::IsTexFlag
) {
180 // This is a texture fetch, so operand 4 is a texref and operand 5 is
182 if (OpNo
== 4 && MO
.isImm()) {
183 lowerImageHandleSymbol(MO
.getImm(), MCOp
);
186 if (OpNo
== 5 && MO
.isImm() && !(MCID
.TSFlags
& NVPTXII::IsTexModeUnifiedFlag
)) {
187 lowerImageHandleSymbol(MO
.getImm(), MCOp
);
192 } else if (MCID
.TSFlags
& NVPTXII::IsSuldMask
) {
194 1 << (((MCID
.TSFlags
& NVPTXII::IsSuldMask
) >> NVPTXII::IsSuldShift
) - 1);
196 // For a surface load of vector size N, the Nth operand will be the surfref
197 if (OpNo
== VecSize
&& MO
.isImm()) {
198 lowerImageHandleSymbol(MO
.getImm(), MCOp
);
203 } else if (MCID
.TSFlags
& NVPTXII::IsSustFlag
) {
204 // This is a surface store, so operand 0 is a surfref
205 if (OpNo
== 0 && MO
.isImm()) {
206 lowerImageHandleSymbol(MO
.getImm(), MCOp
);
211 } else if (MCID
.TSFlags
& NVPTXII::IsSurfTexQueryFlag
) {
212 // This is a query, so operand 1 is a surfref/texref
213 if (OpNo
== 1 && MO
.isImm()) {
214 lowerImageHandleSymbol(MO
.getImm(), MCOp
);
224 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index
, MCOperand
&MCOp
) {
226 TargetMachine
&TM
= const_cast<TargetMachine
&>(MF
->getTarget());
227 NVPTXTargetMachine
&nvTM
= static_cast<NVPTXTargetMachine
&>(TM
);
228 const NVPTXMachineFunctionInfo
*MFI
= MF
->getInfo
<NVPTXMachineFunctionInfo
>();
229 const char *Sym
= MFI
->getImageHandleSymbol(Index
);
230 std::string
*SymNamePtr
=
231 nvTM
.getManagedStrPool()->getManagedString(Sym
);
232 MCOp
= GetSymbolRef(OutContext
.GetOrCreateSymbol(
233 StringRef(SymNamePtr
->c_str())));
236 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr
*MI
, MCInst
&OutMI
) {
237 OutMI
.setOpcode(MI
->getOpcode());
238 const NVPTXSubtarget
&ST
= TM
.getSubtarget
<NVPTXSubtarget
>();
240 // Special: Do not mangle symbol operand of CALL_PROTOTYPE
241 if (MI
->getOpcode() == NVPTX::CALL_PROTOTYPE
) {
242 const MachineOperand
&MO
= MI
->getOperand(0);
243 OutMI
.addOperand(GetSymbolRef(
244 OutContext
.GetOrCreateSymbol(Twine(MO
.getSymbolName()))));
248 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
249 const MachineOperand
&MO
= MI
->getOperand(i
);
252 if (!ST
.hasImageHandles()) {
253 if (lowerImageHandleOperand(MI
, i
, MCOp
)) {
254 OutMI
.addOperand(MCOp
);
259 if (lowerOperand(MO
, MCOp
))
260 OutMI
.addOperand(MCOp
);
264 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand
&MO
,
266 switch (MO
.getType()) {
267 default: llvm_unreachable("unknown operand type");
268 case MachineOperand::MO_Register
:
269 MCOp
= MCOperand::CreateReg(encodeVirtualRegister(MO
.getReg()));
271 case MachineOperand::MO_Immediate
:
272 MCOp
= MCOperand::CreateImm(MO
.getImm());
274 case MachineOperand::MO_MachineBasicBlock
:
275 MCOp
= MCOperand::CreateExpr(MCSymbolRefExpr::Create(
276 MO
.getMBB()->getSymbol(), OutContext
));
278 case MachineOperand::MO_ExternalSymbol
:
279 MCOp
= GetSymbolRef(GetExternalSymbolSymbol(MO
.getSymbolName()));
281 case MachineOperand::MO_GlobalAddress
:
282 MCOp
= GetSymbolRef(getSymbol(MO
.getGlobal()));
284 case MachineOperand::MO_FPImmediate
: {
285 const ConstantFP
*Cnt
= MO
.getFPImm();
286 APFloat Val
= Cnt
->getValueAPF();
288 switch (Cnt
->getType()->getTypeID()) {
289 default: report_fatal_error("Unsupported FP type"); break;
290 case Type::FloatTyID
:
291 MCOp
= MCOperand::CreateExpr(
292 NVPTXFloatMCExpr::CreateConstantFPSingle(Val
, OutContext
));
294 case Type::DoubleTyID
:
295 MCOp
= MCOperand::CreateExpr(
296 NVPTXFloatMCExpr::CreateConstantFPDouble(Val
, OutContext
));
305 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg
) {
306 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
307 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
309 DenseMap
<unsigned, unsigned> &RegMap
= VRegMapping
[RC
];
310 unsigned RegNum
= RegMap
[Reg
];
312 // Encode the register class in the upper 4 bits
313 // Must be kept in sync with NVPTXInstPrinter::printRegName
315 if (RC
== &NVPTX::Int1RegsRegClass
) {
317 } else if (RC
== &NVPTX::Int16RegsRegClass
) {
319 } else if (RC
== &NVPTX::Int32RegsRegClass
) {
321 } else if (RC
== &NVPTX::Int64RegsRegClass
) {
323 } else if (RC
== &NVPTX::Float32RegsRegClass
) {
325 } else if (RC
== &NVPTX::Float64RegsRegClass
) {
328 report_fatal_error("Bad register class");
331 // Insert the vreg number
332 Ret
|= (RegNum
& 0x0FFFFFFF);
335 // Some special-use registers are actually physical registers.
336 // Encode this as the register class ID of 0 and the real register ID.
337 return Reg
& 0x0FFFFFFF;
341 MCOperand
NVPTXAsmPrinter::GetSymbolRef(const MCSymbol
*Symbol
) {
343 Expr
= MCSymbolRefExpr::Create(Symbol
, MCSymbolRefExpr::VK_None
,
345 return MCOperand::CreateExpr(Expr
);
348 void NVPTXAsmPrinter::printReturnValStr(const Function
*F
, raw_ostream
&O
) {
349 const DataLayout
*TD
= TM
.getSubtargetImpl()->getDataLayout();
350 const TargetLowering
*TLI
= TM
.getSubtargetImpl()->getTargetLowering();
352 Type
*Ty
= F
->getReturnType();
354 bool isABI
= (nvptxSubtarget
.getSmVersion() >= 20);
356 if (Ty
->getTypeID() == Type::VoidTyID
)
362 if (Ty
->isFloatingPointTy() || Ty
->isIntegerTy()) {
364 if (const IntegerType
*ITy
= dyn_cast
<IntegerType
>(Ty
)) {
365 size
= ITy
->getBitWidth();
369 assert(Ty
->isFloatingPointTy() && "Floating point type expected here");
370 size
= Ty
->getPrimitiveSizeInBits();
373 O
<< ".param .b" << size
<< " func_retval0";
374 } else if (isa
<PointerType
>(Ty
)) {
375 O
<< ".param .b" << TLI
->getPointerTy().getSizeInBits()
377 } else if ((Ty
->getTypeID() == Type::StructTyID
) || isa
<VectorType
>(Ty
)) {
378 unsigned totalsz
= TD
->getTypeAllocSize(Ty
);
379 unsigned retAlignment
= 0;
380 if (!llvm::getAlign(*F
, 0, retAlignment
))
381 retAlignment
= TD
->getABITypeAlignment(Ty
);
382 O
<< ".param .align " << retAlignment
<< " .b8 func_retval0[" << totalsz
385 llvm_unreachable("Unknown return type");
387 SmallVector
<EVT
, 16> vtparts
;
388 ComputeValueVTs(*TLI
, Ty
, vtparts
);
390 for (unsigned i
= 0, e
= vtparts
.size(); i
!= e
; ++i
) {
392 EVT elemtype
= vtparts
[i
];
393 if (vtparts
[i
].isVector()) {
394 elems
= vtparts
[i
].getVectorNumElements();
395 elemtype
= vtparts
[i
].getVectorElementType();
398 for (unsigned j
= 0, je
= elems
; j
!= je
; ++j
) {
399 unsigned sz
= elemtype
.getSizeInBits();
400 if (elemtype
.isInteger() && (sz
< 32))
402 O
<< ".reg .b" << sz
<< " func_retval" << idx
;
415 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction
&MF
,
417 const Function
*F
= MF
.getFunction();
418 printReturnValStr(F
, O
);
421 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
422 SmallString
<128> Str
;
423 raw_svector_ostream
O(Str
);
425 if (!GlobalsEmitted
) {
426 emitGlobals(*MF
->getFunction()->getParent());
427 GlobalsEmitted
= true;
431 MRI
= &MF
->getRegInfo();
432 F
= MF
->getFunction();
433 emitLinkageDirective(F
, O
);
434 if (llvm::isKernelFunction(*F
))
438 printReturnValStr(*MF
, O
);
443 emitFunctionParamList(*MF
, O
);
445 if (llvm::isKernelFunction(*F
))
446 emitKernelFunctionDirectives(*F
, O
);
448 OutStreamer
.EmitRawText(O
.str());
450 prevDebugLoc
= DebugLoc();
453 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
455 OutStreamer
.EmitRawText(StringRef("{\n"));
456 setAndEmitFunctionVirtualRegisters(*MF
);
458 SmallString
<128> Str
;
459 raw_svector_ostream
O(Str
);
460 emitDemotedVars(MF
->getFunction(), O
);
461 OutStreamer
.EmitRawText(O
.str());
464 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
465 OutStreamer
.EmitRawText(StringRef("}\n"));
469 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr
*MI
) const {
470 unsigned RegNo
= MI
->getOperand(0).getReg();
471 const TargetRegisterInfo
*TRI
= TM
.getSubtargetImpl()->getRegisterInfo();
472 if (TRI
->isVirtualRegister(RegNo
)) {
473 OutStreamer
.AddComment(Twine("implicit-def: ") +
474 getVirtualRegisterName(RegNo
));
476 OutStreamer
.AddComment(
477 Twine("implicit-def: ") +
478 TM
.getSubtargetImpl()->getRegisterInfo()->getName(RegNo
));
480 OutStreamer
.AddBlankLine();
483 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function
&F
,
484 raw_ostream
&O
) const {
485 // If the NVVM IR has some of reqntid* specified, then output
486 // the reqntid directive, and set the unspecified ones to 1.
487 // If none of reqntid* is specified, don't output reqntid directive.
488 unsigned reqntidx
, reqntidy
, reqntidz
;
489 bool specified
= false;
490 if (llvm::getReqNTIDx(F
, reqntidx
) == false)
494 if (llvm::getReqNTIDy(F
, reqntidy
) == false)
498 if (llvm::getReqNTIDz(F
, reqntidz
) == false)
504 O
<< ".reqntid " << reqntidx
<< ", " << reqntidy
<< ", " << reqntidz
507 // If the NVVM IR has some of maxntid* specified, then output
508 // the maxntid directive, and set the unspecified ones to 1.
509 // If none of maxntid* is specified, don't output maxntid directive.
510 unsigned maxntidx
, maxntidy
, maxntidz
;
512 if (llvm::getMaxNTIDx(F
, maxntidx
) == false)
516 if (llvm::getMaxNTIDy(F
, maxntidy
) == false)
520 if (llvm::getMaxNTIDz(F
, maxntidz
) == false)
526 O
<< ".maxntid " << maxntidx
<< ", " << maxntidy
<< ", " << maxntidz
530 if (llvm::getMinCTASm(F
, mincta
))
531 O
<< ".minnctapersm " << mincta
<< "\n";
535 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg
) const {
536 const TargetRegisterClass
*RC
= MRI
->getRegClass(Reg
);
539 raw_string_ostream
NameStr(Name
);
541 VRegRCMap::const_iterator I
= VRegMapping
.find(RC
);
542 assert(I
!= VRegMapping
.end() && "Bad register class");
543 const DenseMap
<unsigned, unsigned> &RegMap
= I
->second
;
545 VRegMap::const_iterator VI
= RegMap
.find(Reg
);
546 assert(VI
!= RegMap
.end() && "Bad virtual register");
547 unsigned MappedVR
= VI
->second
;
549 NameStr
<< getNVPTXRegClassStr(RC
) << MappedVR
;
555 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr
,
557 O
<< getVirtualRegisterName(vr
);
560 void NVPTXAsmPrinter::printVecModifiedImmediate(
561 const MachineOperand
&MO
, const char *Modifier
, raw_ostream
&O
) {
562 static const char vecelem
[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
563 int Imm
= (int) MO
.getImm();
564 if (0 == strcmp(Modifier
, "vecelem"))
565 O
<< "_" << vecelem
[Imm
];
566 else if (0 == strcmp(Modifier
, "vecv4comm1")) {
567 if ((Imm
< 0) || (Imm
> 3))
569 } else if (0 == strcmp(Modifier
, "vecv4comm2")) {
570 if ((Imm
< 4) || (Imm
> 7))
572 } else if (0 == strcmp(Modifier
, "vecv4pos")) {
575 O
<< "_" << vecelem
[Imm
% 4];
576 } else if (0 == strcmp(Modifier
, "vecv2comm1")) {
577 if ((Imm
< 0) || (Imm
> 1))
579 } else if (0 == strcmp(Modifier
, "vecv2comm2")) {
580 if ((Imm
< 2) || (Imm
> 3))
582 } else if (0 == strcmp(Modifier
, "vecv2pos")) {
585 O
<< "_" << vecelem
[Imm
% 2];
587 llvm_unreachable("Unknown Modifier on immediate operand");
592 void NVPTXAsmPrinter::emitDeclaration(const Function
*F
, raw_ostream
&O
) {
594 emitLinkageDirective(F
, O
);
595 if (llvm::isKernelFunction(*F
))
599 printReturnValStr(F
, O
);
600 O
<< *getSymbol(F
) << "\n";
601 emitFunctionParamList(F
, O
);
605 static bool usedInGlobalVarDef(const Constant
*C
) {
609 if (const GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(C
)) {
610 if (GV
->getName().str() == "llvm.used")
615 for (const User
*U
: C
->users())
616 if (const Constant
*C
= dyn_cast
<Constant
>(U
))
617 if (usedInGlobalVarDef(C
))
623 static bool usedInOneFunc(const User
*U
, Function
const *&oneFunc
) {
624 if (const GlobalVariable
*othergv
= dyn_cast
<GlobalVariable
>(U
)) {
625 if (othergv
->getName().str() == "llvm.used")
629 if (const Instruction
*instr
= dyn_cast
<Instruction
>(U
)) {
630 if (instr
->getParent() && instr
->getParent()->getParent()) {
631 const Function
*curFunc
= instr
->getParent()->getParent();
632 if (oneFunc
&& (curFunc
!= oneFunc
))
640 for (const User
*UU
: U
->users())
641 if (usedInOneFunc(UU
, oneFunc
) == false)
647 /* Find out if a global variable can be demoted to local scope.
648 * Currently, this is valid for CUDA shared variables, which have local
649 * scope and global lifetime. So the conditions to check are :
650 * 1. Is the global variable in shared address space?
651 * 2. Does it have internal linkage?
652 * 3. Is the global variable referenced only in one function?
654 static bool canDemoteGlobalVar(const GlobalVariable
*gv
, Function
const *&f
) {
655 if (gv
->hasInternalLinkage() == false)
657 const PointerType
*Pty
= gv
->getType();
658 if (Pty
->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED
)
661 const Function
*oneFunc
= nullptr;
663 bool flag
= usedInOneFunc(gv
, oneFunc
);
672 static bool useFuncSeen(const Constant
*C
,
673 llvm::DenseMap
<const Function
*, bool> &seenMap
) {
674 for (const User
*U
: C
->users()) {
675 if (const Constant
*cu
= dyn_cast
<Constant
>(U
)) {
676 if (useFuncSeen(cu
, seenMap
))
678 } else if (const Instruction
*I
= dyn_cast
<Instruction
>(U
)) {
679 const BasicBlock
*bb
= I
->getParent();
682 const Function
*caller
= bb
->getParent();
685 if (seenMap
.find(caller
) != seenMap
.end())
692 void NVPTXAsmPrinter::emitDeclarations(const Module
&M
, raw_ostream
&O
) {
693 llvm::DenseMap
<const Function
*, bool> seenMap
;
694 for (Module::const_iterator FI
= M
.begin(), FE
= M
.end(); FI
!= FE
; ++FI
) {
695 const Function
*F
= FI
;
697 if (F
->isDeclaration()) {
700 if (F
->getIntrinsicID())
702 emitDeclaration(F
, O
);
705 for (const User
*U
: F
->users()) {
706 if (const Constant
*C
= dyn_cast
<Constant
>(U
)) {
707 if (usedInGlobalVarDef(C
)) {
708 // The use is in the initialization of a global variable
709 // that is a function pointer, so print a declaration
710 // for the original function
711 emitDeclaration(F
, O
);
714 // Emit a declaration of this function if the function that
715 // uses this constant expr has already been seen.
716 if (useFuncSeen(C
, seenMap
)) {
717 emitDeclaration(F
, O
);
722 if (!isa
<Instruction
>(U
))
724 const Instruction
*instr
= cast
<Instruction
>(U
);
725 const BasicBlock
*bb
= instr
->getParent();
728 const Function
*caller
= bb
->getParent();
732 // If a caller has already been seen, then the caller is
733 // appearing in the module before the callee. so print out
734 // a declaration for the callee.
735 if (seenMap
.find(caller
) != seenMap
.end()) {
736 emitDeclaration(F
, O
);
744 void NVPTXAsmPrinter::recordAndEmitFilenames(Module
&M
) {
745 DebugInfoFinder DbgFinder
;
746 DbgFinder
.processModule(M
);
749 for (DICompileUnit DIUnit
: DbgFinder
.compile_units()) {
750 StringRef
Filename(DIUnit
.getFilename());
751 StringRef
Dirname(DIUnit
.getDirectory());
752 SmallString
<128> FullPathName
= Dirname
;
753 if (!Dirname
.empty() && !sys::path::is_absolute(Filename
)) {
754 sys::path::append(FullPathName
, Filename
);
755 Filename
= FullPathName
.str();
757 if (filenameMap
.find(Filename
.str()) != filenameMap
.end())
759 filenameMap
[Filename
.str()] = i
;
760 OutStreamer
.EmitDwarfFileDirective(i
, "", Filename
.str());
764 for (DISubprogram SP
: DbgFinder
.subprograms()) {
765 StringRef
Filename(SP
.getFilename());
766 StringRef
Dirname(SP
.getDirectory());
767 SmallString
<128> FullPathName
= Dirname
;
768 if (!Dirname
.empty() && !sys::path::is_absolute(Filename
)) {
769 sys::path::append(FullPathName
, Filename
);
770 Filename
= FullPathName
.str();
772 if (filenameMap
.find(Filename
.str()) != filenameMap
.end())
774 filenameMap
[Filename
.str()] = i
;
779 bool NVPTXAsmPrinter::doInitialization(Module
&M
) {
781 SmallString
<128> Str1
;
782 raw_svector_ostream
OS1(Str1
);
784 MMI
= getAnalysisIfAvailable
<MachineModuleInfo
>();
785 MMI
->AnalyzeModule(M
);
787 // We need to call the parent's one explicitly.
788 //bool Result = AsmPrinter::doInitialization(M);
790 // Initialize TargetLoweringObjectFile.
791 const_cast<TargetLoweringObjectFile
&>(getObjFileLowering())
792 .Initialize(OutContext
, TM
);
794 Mang
= new Mangler(TM
.getSubtargetImpl()->getDataLayout());
796 // Emit header before any dwarf directives are emitted below.
798 OutStreamer
.EmitRawText(OS1
.str());
800 // Already commented out
801 //bool Result = AsmPrinter::doInitialization(M);
803 // Emit module-level inline asm if it exists.
804 if (!M
.getModuleInlineAsm().empty()) {
805 OutStreamer
.AddComment("Start of file scope inline assembly");
806 OutStreamer
.AddBlankLine();
807 OutStreamer
.EmitRawText(StringRef(M
.getModuleInlineAsm()));
808 OutStreamer
.AddBlankLine();
809 OutStreamer
.AddComment("End of file scope inline assembly");
810 OutStreamer
.AddBlankLine();
813 if (nvptxSubtarget
.getDrvInterface() == NVPTX::CUDA
)
814 recordAndEmitFilenames(M
);
816 GlobalsEmitted
= false;
818 return false; // success
821 void NVPTXAsmPrinter::emitGlobals(const Module
&M
) {
822 SmallString
<128> Str2
;
823 raw_svector_ostream
OS2(Str2
);
825 emitDeclarations(M
, OS2
);
827 // As ptxas does not support forward references of globals, we need to first
828 // sort the list of module-level globals in def-use order. We visit each
829 // global variable in order, and ensure that we emit it *after* its dependent
830 // globals. We use a little extra memory maintaining both a set and a list to
831 // have fast searches while maintaining a strict ordering.
832 SmallVector
<const GlobalVariable
*, 8> Globals
;
833 DenseSet
<const GlobalVariable
*> GVVisited
;
834 DenseSet
<const GlobalVariable
*> GVVisiting
;
836 // Visit each global variable, in order
837 for (Module::const_global_iterator I
= M
.global_begin(), E
= M
.global_end();
839 VisitGlobalVariableForEmission(I
, Globals
, GVVisited
, GVVisiting
);
841 assert(GVVisited
.size() == M
.getGlobalList().size() &&
842 "Missed a global variable");
843 assert(GVVisiting
.size() == 0 && "Did not fully process a global variable");
845 // Print out module-level global variables in proper order
846 for (unsigned i
= 0, e
= Globals
.size(); i
!= e
; ++i
)
847 printModuleLevelGV(Globals
[i
], OS2
);
851 OutStreamer
.EmitRawText(OS2
.str());
854 void NVPTXAsmPrinter::emitHeader(Module
&M
, raw_ostream
&O
) {
856 O
<< "// Generated by LLVM NVPTX Back-End\n";
860 unsigned PTXVersion
= nvptxSubtarget
.getPTXVersion();
861 O
<< ".version " << (PTXVersion
/ 10) << "." << (PTXVersion
% 10) << "\n";
864 O
<< nvptxSubtarget
.getTargetName();
866 if (nvptxSubtarget
.getDrvInterface() == NVPTX::NVCL
)
867 O
<< ", texmode_independent";
868 if (nvptxSubtarget
.getDrvInterface() == NVPTX::CUDA
) {
869 if (!nvptxSubtarget
.hasDouble())
870 O
<< ", map_f64_to_f32";
873 if (MAI
->doesSupportDebugInformation())
878 O
<< ".address_size ";
879 if (nvptxSubtarget
.is64Bit())
888 bool NVPTXAsmPrinter::doFinalization(Module
&M
) {
890 // If we did not emit any functions, then the global declarations have not
892 if (!GlobalsEmitted
) {
894 GlobalsEmitted
= true;
897 // XXX Temproarily remove global variables so that doFinalization() will not
898 // emit them again (global variables are emitted at beginning).
900 Module::GlobalListType
&global_list
= M
.getGlobalList();
901 int i
, n
= global_list
.size();
902 GlobalVariable
**gv_array
= new GlobalVariable
*[n
];
904 // first, back-up GlobalVariable in gv_array
906 for (Module::global_iterator I
= global_list
.begin(), E
= global_list
.end();
910 // second, empty global_list
911 while (!global_list
.empty())
912 global_list
.remove(global_list
.begin());
914 // call doFinalization
915 bool ret
= AsmPrinter::doFinalization(M
);
917 // now we restore global variables
918 for (i
= 0; i
< n
; i
++)
919 global_list
.insert(global_list
.end(), gv_array
[i
]);
921 clearAnnotationCache(&M
);
926 //bool Result = AsmPrinter::doFinalization(M);
927 // Instead of calling the parents doFinalization, we may
928 // clone parents doFinalization and customize here.
929 // Currently, we if NVISA out the EmitGlobals() in
930 // parent's doFinalization, which is too intrusive.
932 // Same for the doInitialization.
936 // This function emits appropriate linkage directives for
937 // functions and global variables.
939 // extern function declaration -> .extern
940 // extern function definition -> .visible
941 // external global variable with init -> .visible
942 // external without init -> .extern
943 // appending -> not allowed, assert.
944 // for any linkage other than
945 // internal, private, linker_private,
946 // linker_private_weak, linker_private_weak_def_auto,
949 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue
*V
,
951 if (nvptxSubtarget
.getDrvInterface() == NVPTX::CUDA
) {
952 if (V
->hasExternalLinkage()) {
953 if (isa
<GlobalVariable
>(V
)) {
954 const GlobalVariable
*GVar
= cast
<GlobalVariable
>(V
);
956 if (GVar
->hasInitializer())
961 } else if (V
->isDeclaration())
965 } else if (V
->hasAppendingLinkage()) {
967 msg
.append("Error: ");
968 msg
.append("Symbol ");
970 msg
.append(V
->getName().str());
971 msg
.append("has unsupported appending linkage type");
972 llvm_unreachable(msg
.c_str());
973 } else if (!V
->hasInternalLinkage() &&
974 !V
->hasPrivateLinkage()) {
980 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable
*GVar
,
982 bool processDemoted
) {
985 if (GVar
->hasSection()) {
986 if (GVar
->getSection() == StringRef("llvm.metadata"))
990 // Skip LLVM intrinsic global variables
991 if (GVar
->getName().startswith("llvm.") ||
992 GVar
->getName().startswith("nvvm."))
995 const DataLayout
*TD
= TM
.getSubtargetImpl()->getDataLayout();
997 // GlobalVariables are always constant pointers themselves.
998 const PointerType
*PTy
= GVar
->getType();
999 Type
*ETy
= PTy
->getElementType();
1001 if (GVar
->hasExternalLinkage()) {
1002 if (GVar
->hasInitializer())
1006 } else if (GVar
->hasLinkOnceLinkage() || GVar
->hasWeakLinkage() ||
1007 GVar
->hasAvailableExternallyLinkage() ||
1008 GVar
->hasCommonLinkage()) {
1012 if (llvm::isTexture(*GVar
)) {
1013 O
<< ".global .texref " << llvm::getTextureName(*GVar
) << ";\n";
1017 if (llvm::isSurface(*GVar
)) {
1018 O
<< ".global .surfref " << llvm::getSurfaceName(*GVar
) << ";\n";
1022 if (GVar
->isDeclaration()) {
1023 // (extern) declarations, no definition or initializer
1024 // Currently the only known declaration is for an automatic __local
1025 // (.shared) promoted to global.
1026 emitPTXGlobalVariable(GVar
, O
);
1031 if (llvm::isSampler(*GVar
)) {
1032 O
<< ".global .samplerref " << llvm::getSamplerName(*GVar
);
1034 const Constant
*Initializer
= nullptr;
1035 if (GVar
->hasInitializer())
1036 Initializer
= GVar
->getInitializer();
1037 const ConstantInt
*CI
= nullptr;
1039 CI
= dyn_cast
<ConstantInt
>(Initializer
);
1041 unsigned sample
= CI
->getZExtValue();
1046 addr
= ((sample
& __CLK_ADDRESS_MASK
) >> __CLK_ADDRESS_BASE
);
1048 O
<< "addr_mode_" << i
<< " = ";
1054 O
<< "clamp_to_border";
1057 O
<< "clamp_to_edge";
1068 O
<< "filter_mode = ";
1069 switch ((sample
& __CLK_FILTER_MASK
) >> __CLK_FILTER_BASE
) {
1077 llvm_unreachable("Anisotropic filtering is not supported");
1082 if (!((sample
& __CLK_NORMALIZED_MASK
) >> __CLK_NORMALIZED_BASE
)) {
1083 O
<< ", force_unnormalized_coords = 1";
1092 if (GVar
->hasPrivateLinkage()) {
1094 if (!strncmp(GVar
->getName().data(), "unrollpragma", 12))
1097 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1098 if (!strncmp(GVar
->getName().data(), "filename", 8))
1100 if (GVar
->use_empty())
1104 const Function
*demotedFunc
= nullptr;
1105 if (!processDemoted
&& canDemoteGlobalVar(GVar
, demotedFunc
)) {
1106 O
<< "// " << GVar
->getName().str() << " has been demoted\n";
1107 if (localDecls
.find(demotedFunc
) != localDecls
.end())
1108 localDecls
[demotedFunc
].push_back(GVar
);
1110 std::vector
<const GlobalVariable
*> temp
;
1111 temp
.push_back(GVar
);
1112 localDecls
[demotedFunc
] = temp
;
1118 emitPTXAddressSpace(PTy
->getAddressSpace(), O
);
1120 if (isManaged(*GVar
)) {
1121 O
<< " .attribute(.managed)";
1124 if (GVar
->getAlignment() == 0)
1125 O
<< " .align " << (int) TD
->getPrefTypeAlignment(ETy
);
1127 O
<< " .align " << GVar
->getAlignment();
1129 if (ETy
->isFloatingPointTy() || ETy
->isIntegerTy() || ETy
->isPointerTy()) {
1131 // Special case: ABI requires that we use .u8 for predicates
1132 if (ETy
->isIntegerTy(1))
1135 O
<< getPTXFundamentalTypeStr(ETy
, false);
1137 O
<< *getSymbol(GVar
);
1139 // Ptx allows variable initilization only for constant and global state
1141 if (GVar
->hasInitializer()) {
1142 if ((PTy
->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL
) ||
1143 (PTy
->getAddressSpace() == llvm::ADDRESS_SPACE_CONST
)) {
1144 const Constant
*Initializer
= GVar
->getInitializer();
1145 // 'undef' is treated as there is no value spefied.
1146 if (!Initializer
->isNullValue() && !isa
<UndefValue
>(Initializer
)) {
1148 printScalarConstant(Initializer
, O
);
1151 // The frontend adds zero-initializer to variables that don't have an
1152 // initial value, so skip warning for this case.
1153 if (!GVar
->getInitializer()->isNullValue()) {
1154 std::string warnMsg
= "initial value of '" + GVar
->getName().str() +
1155 "' is not allowed in addrspace(" +
1156 llvm::utostr_32(PTy
->getAddressSpace()) + ")";
1157 report_fatal_error(warnMsg
.c_str());
1162 unsigned int ElementSize
= 0;
1164 // Although PTX has direct support for struct type and array type and
1165 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1166 // targets that support these high level field accesses. Structs, arrays
1167 // and vectors are lowered into arrays of bytes.
1168 switch (ETy
->getTypeID()) {
1169 case Type::StructTyID
:
1170 case Type::ArrayTyID
:
1171 case Type::VectorTyID
:
1172 ElementSize
= TD
->getTypeStoreSize(ETy
);
1173 // Ptx allows variable initilization only for constant and
1174 // global state spaces.
1175 if (((PTy
->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL
) ||
1176 (PTy
->getAddressSpace() == llvm::ADDRESS_SPACE_CONST
)) &&
1177 GVar
->hasInitializer()) {
1178 const Constant
*Initializer
= GVar
->getInitializer();
1179 if (!isa
<UndefValue
>(Initializer
) && !Initializer
->isNullValue()) {
1180 AggBuffer
aggBuffer(ElementSize
, O
, *this);
1181 bufferAggregateConstant(Initializer
, &aggBuffer
);
1182 if (aggBuffer
.numSymbols
) {
1183 if (nvptxSubtarget
.is64Bit()) {
1184 O
<< " .u64 " << *getSymbol(GVar
) << "[";
1185 O
<< ElementSize
/ 8;
1187 O
<< " .u32 " << *getSymbol(GVar
) << "[";
1188 O
<< ElementSize
/ 4;
1192 O
<< " .b8 " << *getSymbol(GVar
) << "[";
1200 O
<< " .b8 " << *getSymbol(GVar
);
1208 O
<< " .b8 " << *getSymbol(GVar
);
1217 llvm_unreachable("type not supported yet");
1224 void NVPTXAsmPrinter::emitDemotedVars(const Function
*f
, raw_ostream
&O
) {
1225 if (localDecls
.find(f
) == localDecls
.end())
1228 std::vector
<const GlobalVariable
*> &gvars
= localDecls
[f
];
1230 for (unsigned i
= 0, e
= gvars
.size(); i
!= e
; ++i
) {
1231 O
<< "\t// demoted variable\n\t";
1232 printModuleLevelGV(gvars
[i
], O
, true);
1236 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace
,
1237 raw_ostream
&O
) const {
1238 switch (AddressSpace
) {
1239 case llvm::ADDRESS_SPACE_LOCAL
:
1242 case llvm::ADDRESS_SPACE_GLOBAL
:
1245 case llvm::ADDRESS_SPACE_CONST
:
1248 case llvm::ADDRESS_SPACE_SHARED
:
1252 report_fatal_error("Bad address space found while emitting PTX");
1258 NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type
*Ty
, bool useB4PTR
) const {
1259 switch (Ty
->getTypeID()) {
1261 llvm_unreachable("unexpected type");
1263 case Type::IntegerTyID
: {
1264 unsigned NumBits
= cast
<IntegerType
>(Ty
)->getBitWidth();
1267 else if (NumBits
<= 64) {
1268 std::string name
= "u";
1269 return name
+ utostr(NumBits
);
1271 llvm_unreachable("Integer too large");
1276 case Type::FloatTyID
:
1278 case Type::DoubleTyID
:
1280 case Type::PointerTyID
:
1281 if (nvptxSubtarget
.is64Bit())
1291 llvm_unreachable("unexpected type");
1295 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable
*GVar
,
1298 const DataLayout
*TD
= TM
.getSubtargetImpl()->getDataLayout();
1300 // GlobalVariables are always constant pointers themselves.
1301 const PointerType
*PTy
= GVar
->getType();
1302 Type
*ETy
= PTy
->getElementType();
1305 emitPTXAddressSpace(PTy
->getAddressSpace(), O
);
1306 if (GVar
->getAlignment() == 0)
1307 O
<< " .align " << (int) TD
->getPrefTypeAlignment(ETy
);
1309 O
<< " .align " << GVar
->getAlignment();
1311 if (ETy
->isFloatingPointTy() || ETy
->isIntegerTy() || ETy
->isPointerTy()) {
1313 O
<< getPTXFundamentalTypeStr(ETy
);
1315 O
<< *getSymbol(GVar
);
1319 int64_t ElementSize
= 0;
1321 // Although PTX has direct support for struct type and array type and LLVM IR
1322 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1323 // support these high level field accesses. Structs and arrays are lowered
1324 // into arrays of bytes.
1325 switch (ETy
->getTypeID()) {
1326 case Type::StructTyID
:
1327 case Type::ArrayTyID
:
1328 case Type::VectorTyID
:
1329 ElementSize
= TD
->getTypeStoreSize(ETy
);
1330 O
<< " .b8 " << *getSymbol(GVar
) << "[";
1332 O
<< itostr(ElementSize
);
1337 llvm_unreachable("type not supported yet");
1342 static unsigned int getOpenCLAlignment(const DataLayout
*TD
, Type
*Ty
) {
1343 if (Ty
->isSingleValueType())
1344 return TD
->getPrefTypeAlignment(Ty
);
1346 const ArrayType
*ATy
= dyn_cast
<ArrayType
>(Ty
);
1348 return getOpenCLAlignment(TD
, ATy
->getElementType());
1350 const StructType
*STy
= dyn_cast
<StructType
>(Ty
);
1352 unsigned int alignStruct
= 1;
1353 // Go through each element of the struct and find the
1354 // largest alignment.
1355 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; i
++) {
1356 Type
*ETy
= STy
->getElementType(i
);
1357 unsigned int align
= getOpenCLAlignment(TD
, ETy
);
1358 if (align
> alignStruct
)
1359 alignStruct
= align
;
1364 const FunctionType
*FTy
= dyn_cast
<FunctionType
>(Ty
);
1366 return TD
->getPointerPrefAlignment();
1367 return TD
->getPrefTypeAlignment(Ty
);
1370 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I
,
1371 int paramIndex
, raw_ostream
&O
) {
1372 if ((nvptxSubtarget
.getDrvInterface() == NVPTX::NVCL
) ||
1373 (nvptxSubtarget
.getDrvInterface() == NVPTX::CUDA
))
1374 O
<< *getSymbol(I
->getParent()) << "_param_" << paramIndex
;
1376 std::string argName
= I
->getName();
1377 const char *p
= argName
.c_str();
1388 void NVPTXAsmPrinter::printParamName(int paramIndex
, raw_ostream
&O
) {
1389 Function::const_arg_iterator I
, E
;
1392 if ((nvptxSubtarget
.getDrvInterface() == NVPTX::NVCL
) ||
1393 (nvptxSubtarget
.getDrvInterface() == NVPTX::CUDA
)) {
1394 O
<< *CurrentFnSym
<< "_param_" << paramIndex
;
1398 for (I
= F
->arg_begin(), E
= F
->arg_end(); I
!= E
; ++I
, i
++) {
1399 if (i
== paramIndex
) {
1400 printParamName(I
, paramIndex
, O
);
1404 llvm_unreachable("paramIndex out of bound");
1407 void NVPTXAsmPrinter::emitFunctionParamList(const Function
*F
, raw_ostream
&O
) {
1408 const DataLayout
*TD
= TM
.getSubtargetImpl()->getDataLayout();
1409 const AttributeSet
&PAL
= F
->getAttributes();
1410 const TargetLowering
*TLI
= TM
.getSubtargetImpl()->getTargetLowering();
1411 Function::const_arg_iterator I
, E
;
1412 unsigned paramIndex
= 0;
1414 bool isKernelFunc
= llvm::isKernelFunction(*F
);
1415 bool isABI
= (nvptxSubtarget
.getSmVersion() >= 20);
1416 MVT thePointerTy
= TLI
->getPointerTy();
1420 for (I
= F
->arg_begin(), E
= F
->arg_end(); I
!= E
; ++I
, paramIndex
++) {
1421 Type
*Ty
= I
->getType();
1428 // Handle image/sampler parameters
1429 if (isKernelFunction(*F
)) {
1430 if (isSampler(*I
) || isImage(*I
)) {
1432 std::string sname
= I
->getName();
1433 if (isImageWriteOnly(*I
) || isImageReadWrite(*I
)) {
1434 if (nvptxSubtarget
.hasImageHandles())
1435 O
<< "\t.param .u64 .ptr .surfref ";
1437 O
<< "\t.param .surfref ";
1438 O
<< *CurrentFnSym
<< "_param_" << paramIndex
;
1440 else { // Default image is read_only
1441 if (nvptxSubtarget
.hasImageHandles())
1442 O
<< "\t.param .u64 .ptr .texref ";
1444 O
<< "\t.param .texref ";
1445 O
<< *CurrentFnSym
<< "_param_" << paramIndex
;
1448 if (nvptxSubtarget
.hasImageHandles())
1449 O
<< "\t.param .u64 .ptr .samplerref ";
1451 O
<< "\t.param .samplerref ";
1452 O
<< *CurrentFnSym
<< "_param_" << paramIndex
;
1458 if (PAL
.hasAttribute(paramIndex
+ 1, Attribute::ByVal
) == false) {
1459 if (Ty
->isAggregateType() || Ty
->isVectorTy()) {
1460 // Just print .param .align <a> .b8 .param[size];
1461 // <a> = PAL.getparamalignment
1462 // size = typeallocsize of element type
1463 unsigned align
= PAL
.getParamAlignment(paramIndex
+ 1);
1465 align
= TD
->getABITypeAlignment(Ty
);
1467 unsigned sz
= TD
->getTypeAllocSize(Ty
);
1468 O
<< "\t.param .align " << align
<< " .b8 ";
1469 printParamName(I
, paramIndex
, O
);
1470 O
<< "[" << sz
<< "]";
1475 const PointerType
*PTy
= dyn_cast
<PointerType
>(Ty
);
1478 // Special handling for pointer arguments to kernel
1479 O
<< "\t.param .u" << thePointerTy
.getSizeInBits() << " ";
1481 if (nvptxSubtarget
.getDrvInterface() != NVPTX::CUDA
) {
1482 Type
*ETy
= PTy
->getElementType();
1483 int addrSpace
= PTy
->getAddressSpace();
1484 switch (addrSpace
) {
1488 case llvm::ADDRESS_SPACE_CONST
:
1489 O
<< ".ptr .const ";
1491 case llvm::ADDRESS_SPACE_SHARED
:
1492 O
<< ".ptr .shared ";
1494 case llvm::ADDRESS_SPACE_GLOBAL
:
1495 O
<< ".ptr .global ";
1498 O
<< ".align " << (int) getOpenCLAlignment(TD
, ETy
) << " ";
1500 printParamName(I
, paramIndex
, O
);
1504 // non-pointer scalar to kernel func
1506 // Special case: predicate operands become .u8 types
1507 if (Ty
->isIntegerTy(1))
1510 O
<< getPTXFundamentalTypeStr(Ty
);
1512 printParamName(I
, paramIndex
, O
);
1515 // Non-kernel function, just print .param .b<size> for ABI
1516 // and .reg .b<size> for non-ABI
1518 if (isa
<IntegerType
>(Ty
)) {
1519 sz
= cast
<IntegerType
>(Ty
)->getBitWidth();
1522 } else if (isa
<PointerType
>(Ty
))
1523 sz
= thePointerTy
.getSizeInBits();
1525 sz
= Ty
->getPrimitiveSizeInBits();
1527 O
<< "\t.param .b" << sz
<< " ";
1529 O
<< "\t.reg .b" << sz
<< " ";
1530 printParamName(I
, paramIndex
, O
);
1534 // param has byVal attribute. So should be a pointer
1535 const PointerType
*PTy
= dyn_cast
<PointerType
>(Ty
);
1536 assert(PTy
&& "Param with byval attribute should be a pointer type");
1537 Type
*ETy
= PTy
->getElementType();
1539 if (isABI
|| isKernelFunc
) {
1540 // Just print .param .align <a> .b8 .param[size];
1541 // <a> = PAL.getparamalignment
1542 // size = typeallocsize of element type
1543 unsigned align
= PAL
.getParamAlignment(paramIndex
+ 1);
1545 align
= TD
->getABITypeAlignment(ETy
);
1547 unsigned sz
= TD
->getTypeAllocSize(ETy
);
1548 O
<< "\t.param .align " << align
<< " .b8 ";
1549 printParamName(I
, paramIndex
, O
);
1550 O
<< "[" << sz
<< "]";
1553 // Split the ETy into constituent parts and
1554 // print .param .b<size> <name> for each part.
1555 // Further, if a part is vector, print the above for
1556 // each vector element.
1557 SmallVector
<EVT
, 16> vtparts
;
1558 ComputeValueVTs(*TLI
, ETy
, vtparts
);
1559 for (unsigned i
= 0, e
= vtparts
.size(); i
!= e
; ++i
) {
1561 EVT elemtype
= vtparts
[i
];
1562 if (vtparts
[i
].isVector()) {
1563 elems
= vtparts
[i
].getVectorNumElements();
1564 elemtype
= vtparts
[i
].getVectorElementType();
1567 for (unsigned j
= 0, je
= elems
; j
!= je
; ++j
) {
1568 unsigned sz
= elemtype
.getSizeInBits();
1569 if (elemtype
.isInteger() && (sz
< 32))
1571 O
<< "\t.reg .b" << sz
<< " ";
1572 printParamName(I
, paramIndex
, O
);
1588 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction
&MF
,
1590 const Function
*F
= MF
.getFunction();
1591 emitFunctionParamList(F
, O
);
1594 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1595 const MachineFunction
&MF
) {
1596 SmallString
<128> Str
;
1597 raw_svector_ostream
O(Str
);
1599 // Map the global virtual register number to a register class specific
1600 // virtual register number starting from 1 with that class.
1601 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
1602 //unsigned numRegClasses = TRI->getNumRegClasses();
1604 // Emit the Fake Stack Object
1605 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1606 int NumBytes
= (int) MFI
->getStackSize();
1608 O
<< "\t.local .align " << MFI
->getMaxAlignment() << " .b8 \t" << DEPOTNAME
1609 << getFunctionNumber() << "[" << NumBytes
<< "];\n";
1610 if (nvptxSubtarget
.is64Bit()) {
1611 O
<< "\t.reg .b64 \t%SP;\n";
1612 O
<< "\t.reg .b64 \t%SPL;\n";
1614 O
<< "\t.reg .b32 \t%SP;\n";
1615 O
<< "\t.reg .b32 \t%SPL;\n";
1619 // Go through all virtual registers to establish the mapping between the
1621 // register number and the per class virtual register number.
1622 // We use the per class virtual register number in the ptx output.
1623 unsigned int numVRs
= MRI
->getNumVirtRegs();
1624 for (unsigned i
= 0; i
< numVRs
; i
++) {
1625 unsigned int vr
= TRI
->index2VirtReg(i
);
1626 const TargetRegisterClass
*RC
= MRI
->getRegClass(vr
);
1627 DenseMap
<unsigned, unsigned> ®map
= VRegMapping
[RC
];
1628 int n
= regmap
.size();
1629 regmap
.insert(std::make_pair(vr
, n
+ 1));
1632 // Emit register declarations
1633 // @TODO: Extract out the real register usage
1634 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1635 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1636 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1637 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1638 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1639 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1640 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1642 // Emit declaration of the virtual registers or 'physical' registers for
1643 // each register class
1644 for (unsigned i
=0; i
< TRI
->getNumRegClasses(); i
++) {
1645 const TargetRegisterClass
*RC
= TRI
->getRegClass(i
);
1646 DenseMap
<unsigned, unsigned> ®map
= VRegMapping
[RC
];
1647 std::string rcname
= getNVPTXRegClassName(RC
);
1648 std::string rcStr
= getNVPTXRegClassStr(RC
);
1649 int n
= regmap
.size();
1651 // Only declare those registers that may be used.
1653 O
<< "\t.reg " << rcname
<< " \t" << rcStr
<< "<" << (n
+1)
1658 OutStreamer
.EmitRawText(O
.str());
1661 void NVPTXAsmPrinter::printFPConstant(const ConstantFP
*Fp
, raw_ostream
&O
) {
1662 APFloat APF
= APFloat(Fp
->getValueAPF()); // make a copy
1664 unsigned int numHex
;
1667 if (Fp
->getType()->getTypeID() == Type::FloatTyID
) {
1670 APF
.convert(APFloat::IEEEsingle
, APFloat::rmNearestTiesToEven
, &ignored
);
1671 } else if (Fp
->getType()->getTypeID() == Type::DoubleTyID
) {
1674 APF
.convert(APFloat::IEEEdouble
, APFloat::rmNearestTiesToEven
, &ignored
);
1676 llvm_unreachable("unsupported fp type");
1678 APInt API
= APF
.bitcastToAPInt();
1679 std::string
hexstr(utohexstr(API
.getZExtValue()));
1681 if (hexstr
.length() < numHex
)
1682 O
<< std::string(numHex
- hexstr
.length(), '0');
1683 O
<< utohexstr(API
.getZExtValue());
1686 void NVPTXAsmPrinter::printScalarConstant(const Constant
*CPV
, raw_ostream
&O
) {
1687 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(CPV
)) {
1688 O
<< CI
->getValue();
1691 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CPV
)) {
1692 printFPConstant(CFP
, O
);
1695 if (isa
<ConstantPointerNull
>(CPV
)) {
1699 if (const GlobalValue
*GVar
= dyn_cast
<GlobalValue
>(CPV
)) {
1700 PointerType
*PTy
= dyn_cast
<PointerType
>(GVar
->getType());
1701 bool IsNonGenericPointer
= false;
1702 if (PTy
&& PTy
->getAddressSpace() != 0) {
1703 IsNonGenericPointer
= true;
1705 if (EmitGeneric
&& !isa
<Function
>(CPV
) && !IsNonGenericPointer
) {
1707 O
<< *getSymbol(GVar
);
1710 O
<< *getSymbol(GVar
);
1714 if (const ConstantExpr
*Cexpr
= dyn_cast
<ConstantExpr
>(CPV
)) {
1715 const Value
*v
= Cexpr
->stripPointerCasts();
1716 PointerType
*PTy
= dyn_cast
<PointerType
>(Cexpr
->getType());
1717 bool IsNonGenericPointer
= false;
1718 if (PTy
&& PTy
->getAddressSpace() != 0) {
1719 IsNonGenericPointer
= true;
1721 if (const GlobalValue
*GVar
= dyn_cast
<GlobalValue
>(v
)) {
1722 if (EmitGeneric
&& !isa
<Function
>(v
) && !IsNonGenericPointer
) {
1724 O
<< *getSymbol(GVar
);
1727 O
<< *getSymbol(GVar
);
1731 O
<< *lowerConstant(CPV
);
1735 llvm_unreachable("Not scalar type found in printScalarConstant()");
1738 void NVPTXAsmPrinter::bufferLEByte(const Constant
*CPV
, int Bytes
,
1739 AggBuffer
*aggBuffer
) {
1741 const DataLayout
*TD
= TM
.getSubtargetImpl()->getDataLayout();
1743 if (isa
<UndefValue
>(CPV
) || CPV
->isNullValue()) {
1744 int s
= TD
->getTypeAllocSize(CPV
->getType());
1747 aggBuffer
->addZeros(s
);
1752 switch (CPV
->getType()->getTypeID()) {
1754 case Type::IntegerTyID
: {
1755 const Type
*ETy
= CPV
->getType();
1756 if (ETy
== Type::getInt8Ty(CPV
->getContext())) {
1758 (unsigned char)(dyn_cast
<ConstantInt
>(CPV
))->getZExtValue();
1760 aggBuffer
->addBytes(ptr
, 1, Bytes
);
1761 } else if (ETy
== Type::getInt16Ty(CPV
->getContext())) {
1762 short int16
= (short)(dyn_cast
<ConstantInt
>(CPV
))->getZExtValue();
1763 ptr
= (unsigned char *)&int16
;
1764 aggBuffer
->addBytes(ptr
, 2, Bytes
);
1765 } else if (ETy
== Type::getInt32Ty(CPV
->getContext())) {
1766 if (const ConstantInt
*constInt
= dyn_cast
<ConstantInt
>(CPV
)) {
1767 int int32
= (int)(constInt
->getZExtValue());
1768 ptr
= (unsigned char *)&int32
;
1769 aggBuffer
->addBytes(ptr
, 4, Bytes
);
1771 } else if (const ConstantExpr
*Cexpr
= dyn_cast
<ConstantExpr
>(CPV
)) {
1772 if (const ConstantInt
*constInt
= dyn_cast
<ConstantInt
>(
1773 ConstantFoldConstantExpression(Cexpr
, TD
))) {
1774 int int32
= (int)(constInt
->getZExtValue());
1775 ptr
= (unsigned char *)&int32
;
1776 aggBuffer
->addBytes(ptr
, 4, Bytes
);
1779 if (Cexpr
->getOpcode() == Instruction::PtrToInt
) {
1780 Value
*v
= Cexpr
->getOperand(0)->stripPointerCasts();
1781 aggBuffer
->addSymbol(v
);
1782 aggBuffer
->addZeros(4);
1786 llvm_unreachable("unsupported integer const type");
1787 } else if (ETy
== Type::getInt64Ty(CPV
->getContext())) {
1788 if (const ConstantInt
*constInt
= dyn_cast
<ConstantInt
>(CPV
)) {
1789 long long int64
= (long long)(constInt
->getZExtValue());
1790 ptr
= (unsigned char *)&int64
;
1791 aggBuffer
->addBytes(ptr
, 8, Bytes
);
1793 } else if (const ConstantExpr
*Cexpr
= dyn_cast
<ConstantExpr
>(CPV
)) {
1794 if (const ConstantInt
*constInt
= dyn_cast
<ConstantInt
>(
1795 ConstantFoldConstantExpression(Cexpr
, TD
))) {
1796 long long int64
= (long long)(constInt
->getZExtValue());
1797 ptr
= (unsigned char *)&int64
;
1798 aggBuffer
->addBytes(ptr
, 8, Bytes
);
1801 if (Cexpr
->getOpcode() == Instruction::PtrToInt
) {
1802 Value
*v
= Cexpr
->getOperand(0)->stripPointerCasts();
1803 aggBuffer
->addSymbol(v
);
1804 aggBuffer
->addZeros(8);
1808 llvm_unreachable("unsupported integer const type");
1810 llvm_unreachable("unsupported integer const type");
1813 case Type::FloatTyID
:
1814 case Type::DoubleTyID
: {
1815 const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CPV
);
1816 const Type
*Ty
= CFP
->getType();
1817 if (Ty
== Type::getFloatTy(CPV
->getContext())) {
1818 float float32
= (float) CFP
->getValueAPF().convertToFloat();
1819 ptr
= (unsigned char *)&float32
;
1820 aggBuffer
->addBytes(ptr
, 4, Bytes
);
1821 } else if (Ty
== Type::getDoubleTy(CPV
->getContext())) {
1822 double float64
= CFP
->getValueAPF().convertToDouble();
1823 ptr
= (unsigned char *)&float64
;
1824 aggBuffer
->addBytes(ptr
, 8, Bytes
);
1826 llvm_unreachable("unsupported fp const type");
1830 case Type::PointerTyID
: {
1831 if (const GlobalValue
*GVar
= dyn_cast
<GlobalValue
>(CPV
)) {
1832 aggBuffer
->addSymbol(GVar
);
1833 } else if (const ConstantExpr
*Cexpr
= dyn_cast
<ConstantExpr
>(CPV
)) {
1834 const Value
*v
= Cexpr
->stripPointerCasts();
1835 aggBuffer
->addSymbol(v
);
1837 unsigned int s
= TD
->getTypeAllocSize(CPV
->getType());
1838 aggBuffer
->addZeros(s
);
1842 case Type::ArrayTyID
:
1843 case Type::VectorTyID
:
1844 case Type::StructTyID
: {
1845 if (isa
<ConstantArray
>(CPV
) || isa
<ConstantVector
>(CPV
) ||
1846 isa
<ConstantStruct
>(CPV
) || isa
<ConstantDataSequential
>(CPV
)) {
1847 int ElementSize
= TD
->getTypeAllocSize(CPV
->getType());
1848 bufferAggregateConstant(CPV
, aggBuffer
);
1849 if (Bytes
> ElementSize
)
1850 aggBuffer
->addZeros(Bytes
- ElementSize
);
1851 } else if (isa
<ConstantAggregateZero
>(CPV
))
1852 aggBuffer
->addZeros(Bytes
);
1854 llvm_unreachable("Unexpected Constant type");
1859 llvm_unreachable("unsupported type");
1863 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant
*CPV
,
1864 AggBuffer
*aggBuffer
) {
1865 const DataLayout
*TD
= TM
.getSubtargetImpl()->getDataLayout();
1869 if (isa
<ConstantArray
>(CPV
) || isa
<ConstantVector
>(CPV
)) {
1870 if (CPV
->getNumOperands())
1871 for (unsigned i
= 0, e
= CPV
->getNumOperands(); i
!= e
; ++i
)
1872 bufferLEByte(cast
<Constant
>(CPV
->getOperand(i
)), 0, aggBuffer
);
1876 if (const ConstantDataSequential
*CDS
=
1877 dyn_cast
<ConstantDataSequential
>(CPV
)) {
1878 if (CDS
->getNumElements())
1879 for (unsigned i
= 0; i
< CDS
->getNumElements(); ++i
)
1880 bufferLEByte(cast
<Constant
>(CDS
->getElementAsConstant(i
)), 0,
1885 if (isa
<ConstantStruct
>(CPV
)) {
1886 if (CPV
->getNumOperands()) {
1887 StructType
*ST
= cast
<StructType
>(CPV
->getType());
1888 for (unsigned i
= 0, e
= CPV
->getNumOperands(); i
!= e
; ++i
) {
1890 Bytes
= TD
->getStructLayout(ST
)->getElementOffset(0) +
1891 TD
->getTypeAllocSize(ST
) -
1892 TD
->getStructLayout(ST
)->getElementOffset(i
);
1894 Bytes
= TD
->getStructLayout(ST
)->getElementOffset(i
+ 1) -
1895 TD
->getStructLayout(ST
)->getElementOffset(i
);
1896 bufferLEByte(cast
<Constant
>(CPV
->getOperand(i
)), Bytes
, aggBuffer
);
1901 llvm_unreachable("unsupported constant type in printAggregateConstant()");
1904 // buildTypeNameMap - Run through symbol table looking for type names.
1907 bool NVPTXAsmPrinter::isImageType(const Type
*Ty
) {
1909 std::map
<const Type
*, std::string
>::iterator PI
= TypeNameMap
.find(Ty
);
1911 if (PI
!= TypeNameMap
.end() && (!PI
->second
.compare("struct._image1d_t") ||
1912 !PI
->second
.compare("struct._image2d_t") ||
1913 !PI
->second
.compare("struct._image3d_t")))
1920 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr
&MI
) {
1921 switch (MI
.getOpcode()) {
1924 case NVPTX::CallArgBeginInst
:
1925 case NVPTX::CallArgEndInst0
:
1926 case NVPTX::CallArgEndInst1
:
1927 case NVPTX::CallArgF32
:
1928 case NVPTX::CallArgF64
:
1929 case NVPTX::CallArgI16
:
1930 case NVPTX::CallArgI32
:
1931 case NVPTX::CallArgI32imm
:
1932 case NVPTX::CallArgI64
:
1933 case NVPTX::CallArgParam
:
1934 case NVPTX::CallVoidInst
:
1935 case NVPTX::CallVoidInstReg
:
1936 case NVPTX::Callseq_End
:
1937 case NVPTX::CallVoidInstReg64
:
1938 case NVPTX::DeclareParamInst
:
1939 case NVPTX::DeclareRetMemInst
:
1940 case NVPTX::DeclareRetRegInst
:
1941 case NVPTX::DeclareRetScalarInst
:
1942 case NVPTX::DeclareScalarParamInst
:
1943 case NVPTX::DeclareScalarRegInst
:
1944 case NVPTX::StoreParamF32
:
1945 case NVPTX::StoreParamF64
:
1946 case NVPTX::StoreParamI16
:
1947 case NVPTX::StoreParamI32
:
1948 case NVPTX::StoreParamI64
:
1949 case NVPTX::StoreParamI8
:
1950 case NVPTX::StoreRetvalF32
:
1951 case NVPTX::StoreRetvalF64
:
1952 case NVPTX::StoreRetvalI16
:
1953 case NVPTX::StoreRetvalI32
:
1954 case NVPTX::StoreRetvalI64
:
1955 case NVPTX::StoreRetvalI8
:
1956 case NVPTX::LastCallArgF32
:
1957 case NVPTX::LastCallArgF64
:
1958 case NVPTX::LastCallArgI16
:
1959 case NVPTX::LastCallArgI32
:
1960 case NVPTX::LastCallArgI32imm
:
1961 case NVPTX::LastCallArgI64
:
1962 case NVPTX::LastCallArgParam
:
1963 case NVPTX::LoadParamMemF32
:
1964 case NVPTX::LoadParamMemF64
:
1965 case NVPTX::LoadParamMemI16
:
1966 case NVPTX::LoadParamMemI32
:
1967 case NVPTX::LoadParamMemI64
:
1968 case NVPTX::LoadParamMemI8
:
1969 case NVPTX::PrototypeInst
:
1970 case NVPTX::DBG_VALUE
:
1976 /// PrintAsmOperand - Print out an operand for an inline asm expression.
1978 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr
*MI
, unsigned OpNo
,
1979 unsigned AsmVariant
,
1980 const char *ExtraCode
, raw_ostream
&O
) {
1981 if (ExtraCode
&& ExtraCode
[0]) {
1982 if (ExtraCode
[1] != 0)
1983 return true; // Unknown modifier.
1985 switch (ExtraCode
[0]) {
1987 // See if this is a generic print operand
1988 return AsmPrinter::PrintAsmOperand(MI
, OpNo
, AsmVariant
, ExtraCode
, O
);
1994 printOperand(MI
, OpNo
, O
);
1999 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2000 const MachineInstr
*MI
, unsigned OpNo
, unsigned AsmVariant
,
2001 const char *ExtraCode
, raw_ostream
&O
) {
2002 if (ExtraCode
&& ExtraCode
[0])
2003 return true; // Unknown modifier
2006 printMemOperand(MI
, OpNo
, O
);
2012 void NVPTXAsmPrinter::printOperand(const MachineInstr
*MI
, int opNum
,
2013 raw_ostream
&O
, const char *Modifier
) {
2014 const MachineOperand
&MO
= MI
->getOperand(opNum
);
2015 switch (MO
.getType()) {
2016 case MachineOperand::MO_Register
:
2017 if (TargetRegisterInfo::isPhysicalRegister(MO
.getReg())) {
2018 if (MO
.getReg() == NVPTX::VRDepot
)
2019 O
<< DEPOTNAME
<< getFunctionNumber();
2021 O
<< NVPTXInstPrinter::getRegisterName(MO
.getReg());
2023 emitVirtualRegister(MO
.getReg(), O
);
2027 case MachineOperand::MO_Immediate
:
2030 else if (strstr(Modifier
, "vec") == Modifier
)
2031 printVecModifiedImmediate(MO
, Modifier
, O
);
2034 "Don't know how to handle modifier on immediate operand");
2037 case MachineOperand::MO_FPImmediate
:
2038 printFPConstant(MO
.getFPImm(), O
);
2041 case MachineOperand::MO_GlobalAddress
:
2042 O
<< *getSymbol(MO
.getGlobal());
2045 case MachineOperand::MO_MachineBasicBlock
:
2046 O
<< *MO
.getMBB()->getSymbol();
2050 llvm_unreachable("Operand type not supported.");
2054 void NVPTXAsmPrinter::printMemOperand(const MachineInstr
*MI
, int opNum
,
2055 raw_ostream
&O
, const char *Modifier
) {
2056 printOperand(MI
, opNum
, O
);
2058 if (Modifier
&& !strcmp(Modifier
, "add")) {
2060 printOperand(MI
, opNum
+ 1, O
);
2062 if (MI
->getOperand(opNum
+ 1).isImm() &&
2063 MI
->getOperand(opNum
+ 1).getImm() == 0)
2064 return; // don't print ',0' or '+0'
2066 printOperand(MI
, opNum
+ 1, O
);
2071 // Force static initialization.
2072 extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
2073 RegisterAsmPrinter
<NVPTXAsmPrinter
> X(TheNVPTXTarget32
);
2074 RegisterAsmPrinter
<NVPTXAsmPrinter
> Y(TheNVPTXTarget64
);
2077 void NVPTXAsmPrinter::emitSrcInText(StringRef filename
, unsigned line
) {
2078 std::stringstream temp
;
2079 LineReader
*reader
= this->getReader(filename
.str());
2081 temp
<< filename
.str();
2085 temp
<< reader
->readLine(line
);
2087 this->OutStreamer
.EmitRawText(Twine(temp
.str()));
2090 LineReader
*NVPTXAsmPrinter::getReader(std::string filename
) {
2092 reader
= new LineReader(filename
);
2095 if (reader
->fileName() != filename
) {
2097 reader
= new LineReader(filename
);
2103 std::string
LineReader::readLine(unsigned lineNum
) {
2104 if (lineNum
< theCurLine
) {
2106 fstr
.seekg(0, std::ios::beg
);
2108 while (theCurLine
< lineNum
) {
2109 fstr
.getline(buff
, 500);
2115 // Force static initialization.
2116 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
2117 RegisterAsmPrinter
<NVPTXAsmPrinter
> X(TheNVPTXTarget32
);
2118 RegisterAsmPrinter
<NVPTXAsmPrinter
> Y(TheNVPTXTarget64
);