]> git.proxmox.com Git - rustc.git/blob - src/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Imported Upstream version 1.0.0+dfsg1
[rustc.git] / src / llvm / lib / Target / NVPTX / NVPTXIntrinsics.td
1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 def immFloat0 : PatLeaf<(fpimm), [{
11 float f = (float)N->getValueAPF().convertToFloat();
12 return (f==0.0f);
13 }]>;
14
15 def immFloat1 : PatLeaf<(fpimm), [{
16 float f = (float)N->getValueAPF().convertToFloat();
17 return (f==1.0f);
18 }]>;
19
20 def immDouble0 : PatLeaf<(fpimm), [{
21 double d = (double)N->getValueAPF().convertToDouble();
22 return (d==0.0);
23 }]>;
24
25 def immDouble1 : PatLeaf<(fpimm), [{
26 double d = (double)N->getValueAPF().convertToDouble();
27 return (d==1.0);
28 }]>;
29
30
31
32 //-----------------------------------
33 // Synchronization Functions
34 //-----------------------------------
35 def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins),
36 "bar.sync \t0;",
37 [(int_cuda_syncthreads)]>;
38 def INT_BARRIER0 : NVPTXInst<(outs), (ins),
39 "bar.sync \t0;",
40 [(int_nvvm_barrier0)]>;
41 def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
42 !strconcat("{{ \n\t",
43 !strconcat(".reg .pred \t%p1; \n\t",
44 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
45 !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
46 !strconcat("}}", ""))))),
47 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
48 def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
49 !strconcat("{{ \n\t",
50 !strconcat(".reg .pred \t%p1; \n\t",
51 !strconcat(".reg .pred \t%p2; \n\t",
52 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
53 !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
54 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
55 !strconcat("}}", ""))))))),
56 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
57 def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
58 !strconcat("{{ \n\t",
59 !strconcat(".reg .pred \t%p1; \n\t",
60 !strconcat(".reg .pred \t%p2; \n\t",
61 !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
62 !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
63 !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
64 !strconcat("}}", ""))))))),
65 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
66
67
68 //-----------------------------------
69 // Explicit Memory Fence Functions
70 //-----------------------------------
71 class MEMBAR<string StrOp, Intrinsic IntOP> :
72 NVPTXInst<(outs), (ins),
73 StrOp, [(IntOP)]>;
74
75 def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
76 def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
77 def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
78
79
80 //-----------------------------------
81 // Math Functions
82 //-----------------------------------
83
84 // Map min(1.0, max(0.0, x)) to sat(x)
85 // Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
86 // NaN
87 // max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
88 // Same story for fmax, fmin.
89
90 def : Pat<(int_nvvm_fmin_f immFloat1,
91 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
92 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
93 def : Pat<(int_nvvm_fmin_f immFloat1,
94 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
95 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
96 def : Pat<(int_nvvm_fmin_f
97 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
98 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
99 def : Pat<(int_nvvm_fmin_f
100 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
101 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
102
103 def : Pat<(int_nvvm_fmin_d immDouble1,
104 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
105 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
106 def : Pat<(int_nvvm_fmin_d immDouble1,
107 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
108 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
109 def : Pat<(int_nvvm_fmin_d
110 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
111 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
112 def : Pat<(int_nvvm_fmin_d
113 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
114 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
115
116
117 // We need a full string for OpcStr here because we need to deal with case like
118 // INT_PTX_RECIP.
119 class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
120 NVPTXRegClass src_regclass, Intrinsic IntOP>
121 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
122 OpcStr,
123 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
124
125 // We need a full string for OpcStr here because we need to deal with the case
126 // like INT_PTX_NATIVE_POWR_F.
127 class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
128 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
129 : NVPTXInst<(outs t_regclass:$dst),
130 (ins s0_regclass:$src0, s1_regclass:$src1),
131 OpcStr,
132 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
133
134 class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
135 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
136 NVPTXRegClass s2_regclass, Intrinsic IntOP>
137 : NVPTXInst<(outs t_regclass:$dst),
138 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
139 OpcStr,
140 [(set t_regclass:$dst,
141 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
142
143 //
144 // MISC
145 //
146
147 def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
148 int_nvvm_clz_i>;
149 def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
150 int_nvvm_clz_ll>;
151
152 def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
153 int_nvvm_popc_i>;
154 def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
155 int_nvvm_popc_ll>;
156
157 def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
158 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
159
160 //
161 // Min Max
162 //
163
164 def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
165 Int32Regs, Int32Regs, int_nvvm_min_i>;
166 def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
167 Int32Regs, Int32Regs, int_nvvm_min_ui>;
168
169 def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
170 Int64Regs, Int64Regs, int_nvvm_min_ll>;
171 def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
172 Int64Regs, Int64Regs, int_nvvm_min_ull>;
173
174 def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
175 Int32Regs, Int32Regs, int_nvvm_max_i>;
176 def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
177 Int32Regs, Int32Regs, int_nvvm_max_ui>;
178
179 def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
180 Int64Regs, Int64Regs, int_nvvm_max_ll>;
181 def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
182 Int64Regs, Int64Regs, int_nvvm_max_ull>;
183
184 def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
185 Float32Regs, Float32Regs, int_nvvm_fmin_f>;
186 def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
187 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
188
189 def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
190 Float32Regs, Float32Regs, int_nvvm_fmax_f>;
191 def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
192 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
193
194 def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
195 Float64Regs, Float64Regs, int_nvvm_fmin_d>;
196 def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
197 Float64Regs, Float64Regs, int_nvvm_fmax_d>;
198
199 //
200 // Multiplication
201 //
202
203 def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
204 Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
205 def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
206 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
207
208 def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
209 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
210 def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
211 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
212
213 def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
214 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
215 def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
216 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
217 def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
218 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
219 def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
220 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
221 def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
222 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
223 def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
224 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
225 def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
226 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
227 def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
228 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
229
230 def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
231 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
232 def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
233 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
234 def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
235 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
236 def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
237 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
238
239 def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
240 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
241 def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
242 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
243
244 //
245 // Div
246 //
247
248 def INT_NVVM_DIV_APPROX_FTZ_F
249 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
250 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
251 def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
252 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
253
254 def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
255 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
256 def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
257 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
258 def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
259 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
260 def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
261 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
262 def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
263 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
264 def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
265 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
266 def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
267 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
268 def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
269 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
270
271 def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
272 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
273 def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
274 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
275 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
276 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
277 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
278 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
279
280 //
281 // Brev
282 //
283
284 def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
285 int_nvvm_brev32>;
286 def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
287 int_nvvm_brev64>;
288
289 //
290 // Sad
291 //
292
293 def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
294 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
295 def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
296 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
297
298 //
299 // Floor Ceil
300 //
301
302 def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
303 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
304 def : Pat<(int_nvvm_floor_f Float32Regs:$a),
305 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
306 def : Pat<(int_nvvm_floor_d Float64Regs:$a),
307 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
308
309 def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
310 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
311 def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
312 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
313 def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
314 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
315
316 //
317 // Abs
318 //
319
320 def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
321 int_nvvm_abs_i>;
322 def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
323 int_nvvm_abs_ll>;
324
325 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
326 Float32Regs, int_nvvm_fabs_ftz_f>;
327 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
328 Float32Regs, int_nvvm_fabs_f>;
329
330 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
331 Float64Regs, int_nvvm_fabs_d>;
332
333 //
334 // Round
335 //
336
337 def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
338 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
339 def : Pat<(int_nvvm_round_f Float32Regs:$a),
340 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
341 def : Pat<(int_nvvm_round_d Float64Regs:$a),
342 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
343
344 //
345 // Trunc
346 //
347
348 def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
349 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
350 def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
351 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
352 def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
353 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
354
355 //
356 // Saturate
357 //
358
359 def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
360 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
361 def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
362 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
363 def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
364 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
365
366 //
367 // Exp2 Log2
368 //
369
370 def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
371 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
372 def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
373 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
374 def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
375 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
376
377 def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
378 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
379 def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
380 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
381 def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
382 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
383
384 //
385 // Sin Cos
386 //
387
388 def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
389 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
390 def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
391 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
392
393 def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
394 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
395 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
396 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
397
398 //
399 // Fma
400 //
401
402 def INT_NVVM_FMA_RN_FTZ_F
403 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
404 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
405 def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
406 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
407 def INT_NVVM_FMA_RZ_FTZ_F
408 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
409 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
410 def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
411 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
412 def INT_NVVM_FMA_RM_FTZ_F
413 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
414 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
415 def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
416 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
417 def INT_NVVM_FMA_RP_FTZ_F
418 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
419 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
420 def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
421 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
422
423 def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
424 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
425 def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
426 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
427 def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
428 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
429 def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
430 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
431
432 //
433 // Rcp
434 //
435
436 def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
437 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
438 def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
439 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
440 def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
441 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
442 def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
443 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
444 def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
445 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
446 def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
447 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
448 def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
449 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
450 def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
451 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
452
453 def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
454 Float64Regs, int_nvvm_rcp_rn_d>;
455 def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
456 Float64Regs, int_nvvm_rcp_rz_d>;
457 def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
458 Float64Regs, int_nvvm_rcp_rm_d>;
459 def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
460 Float64Regs, int_nvvm_rcp_rp_d>;
461
462 def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
463 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
464
465 //
466 // Sqrt
467 //
468
469 def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
470 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
471 def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
472 Float32Regs, int_nvvm_sqrt_rn_f>;
473 def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
474 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
475 def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
476 Float32Regs, int_nvvm_sqrt_rz_f>;
477 def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
478 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
479 def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
480 Float32Regs, int_nvvm_sqrt_rm_f>;
481 def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
482 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
483 def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
484 Float32Regs, int_nvvm_sqrt_rp_f>;
485 def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
486 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
487 def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
488 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
489
490 def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
491 Float64Regs, int_nvvm_sqrt_rn_d>;
492 def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
493 Float64Regs, int_nvvm_sqrt_rz_d>;
494 def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
495 Float64Regs, int_nvvm_sqrt_rm_d>;
496 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
497 Float64Regs, int_nvvm_sqrt_rp_d>;
498
499 // nvvm_sqrt intrinsic
500 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
501 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
502 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
503 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
504 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
505 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
506 def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
507 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
508
509 //
510 // Rsqrt
511 //
512
513 def INT_NVVM_RSQRT_APPROX_FTZ_F
514 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
515 int_nvvm_rsqrt_approx_ftz_f>;
516 def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
517 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
518 def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
519 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
520
521 //
522 // Add
523 //
524
525 def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
526 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
527 def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
528 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
529 def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
530 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
531 def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
532 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
533 def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
534 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
535 def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
536 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
537 def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
538 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
539 def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
540 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
541
542 def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
543 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
544 def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
545 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
546 def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
547 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
548 def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
549 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
550
551 //
552 // Convert
553 //
554
555 def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
556 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
557 def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
558 (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
559 def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
560 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
561 def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
562 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
563 def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
564 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
565 def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
566 (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
567 def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
568 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
569 def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
570 (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
571
572 def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
573 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
574 def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
575 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
576 def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
577 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
578 def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
579 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
580
581 def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
582 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
583 def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
584 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
585 def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
586 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
587 def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
588 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
589
590 def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
591 (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
592 def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
593 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
594 def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
595 (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
596 def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
597 (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
598
599 def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
600 (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
601 def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
602 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
603 def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
604 (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
605 def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
606 (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
607
608 def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
609 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
610 def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
611 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
612 def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
613 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
614 def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
615 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
616 def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
617 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
618 def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
619 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
620 def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
621 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
622 def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
623 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
624
625 def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
626 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
627 def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
628 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
629 def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
630 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
631 def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
632 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
633 def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
634 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
635 def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
636 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
637 def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
638 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
639 def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
640 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
641
642 def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
643 (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
644 def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
645 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
646 def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
647 (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
648 def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
649 (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
650
651 def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
652 (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
653 def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
654 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
655 def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
656 (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
657 def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
658 (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
659
660 def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
661 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
662
663 def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
664 !strconcat(".reg .b32 %temp; \n\t",
665 !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
666 "}}"))),
667 Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
668 def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
669 !strconcat(".reg .b32 %temp; \n\t",
670 !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
671 "}}"))),
672 Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
673
674 def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
675 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
676 def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
677 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
678 def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
679 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
680 def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
681 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
682 def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
683 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
684 def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
685 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
686 def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
687 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
688 def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
689 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
690
691 def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
692 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
693 def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
694 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
695 def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
696 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
697 def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
698 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
699 def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
700 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
701 def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
702 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
703 def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
704 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
705 def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
706 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
707
708 def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
709 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
710 def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
711 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
712 def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
713 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
714 def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
715 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
716
717 def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
718 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
719 def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
720 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
721 def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
722 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
723 def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
724 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
725
726 def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
727 (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
728 def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
729 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
730 def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
731 (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
732 def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
733 (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
734
735 def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
736 (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
737 def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
738 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
739 def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
740 (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
741 def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
742 (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
743
744 def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
745 (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
746 def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
747 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
748 def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
749 (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
750 def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
751 (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
752
753 def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
754 (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
755 def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
756 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
757 def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
758 (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
759 def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
760 (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
761
762
763 // FIXME: Ideally, we could use these patterns instead of the scope-creating
764 // patterns, but ptxas does not like these since .s16 is not compatible with
765 // .f16. The solution is to use .bXX for all integer register types, but we
766 // are not there yet.
767 //def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
768 // (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
769 //def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
770 // (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
771 //
772 //def : Pat<(int_nvvm_h2f Int16Regs:$a),
773 // (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
774
775 def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
776 !strconcat(".reg .b16 %temp;\n\t",
777 !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
778 !strconcat("mov.b16 \t$dst, %temp;\n",
779 "}}")))),
780 Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
781 def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
782 !strconcat(".reg .b16 %temp;\n\t",
783 !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
784 !strconcat("mov.b16 \t$dst, %temp;\n",
785 "}}")))),
786 Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
787
788 def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
789 !strconcat(".reg .b16 %temp;\n\t",
790 !strconcat("mov.b16 \t%temp, $src0;\n\t",
791 !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
792 "}}")))),
793 Float32Regs, Int16Regs, int_nvvm_h2f>;
794
795 def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
796 (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
797 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
798 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
799 def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
800 (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
801
802 def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
803 (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
804 def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
805 (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
806
807 //
808 // Bitcast
809 //
810
811 def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
812 Float32Regs, int_nvvm_bitcast_f2i>;
813 def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
814 Int32Regs, int_nvvm_bitcast_i2f>;
815
816 def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
817 Int64Regs, int_nvvm_bitcast_ll2d>;
818 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
819 Float64Regs, int_nvvm_bitcast_d2ll>;
820
821 //-----------------------------------
822 // Atomic Functions
823 //-----------------------------------
824
825 class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
826 : PatFrag<ops, frag, [{
827 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
828 }]>;
829 class ATOMIC_SHARED_CHK <dag ops, dag frag>
830 : PatFrag<ops, frag, [{
831 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
832 }]>;
833 class ATOMIC_GENERIC_CHK <dag ops, dag frag>
834 : PatFrag<ops, frag, [{
835 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
836 }]>;
837
838 multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
839 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
840 Operand IMMType, SDNode IMM, Predicate Pred> {
841 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
842 !strconcat("atom",
843 !strconcat(SpaceStr,
844 !strconcat(OpcStr,
845 !strconcat(TypeStr,
846 !strconcat(" \t$dst, [$addr], $b;", ""))))),
847 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
848 Requires<[Pred]>;
849 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
850 !strconcat("atom",
851 !strconcat(SpaceStr,
852 !strconcat(OpcStr,
853 !strconcat(TypeStr,
854 !strconcat(" \t$dst, [$addr], $b;", ""))))),
855 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
856 Requires<[Pred]>;
857 }
858 multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
859 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, Predicate Pred> {
860 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
861 IntOp, IMMType, IMM, Pred>;
862 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
863 IntOp, IMMType, IMM, Pred>;
864 }
865
866 // has 2 operands, neg the second one
867 multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
868 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
869 Operand IMMType, Predicate Pred> {
870 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
871 !strconcat("{{ \n\t",
872 !strconcat(".reg \t.s",
873 !strconcat(TypeStr,
874 !strconcat(" temp; \n\t",
875 !strconcat("neg.s",
876 !strconcat(TypeStr,
877 !strconcat(" \ttemp, $b; \n\t",
878 !strconcat("atom",
879 !strconcat(SpaceStr,
880 !strconcat(OpcStr,
881 !strconcat(".u",
882 !strconcat(TypeStr,
883 !strconcat(" \t$dst, [$addr], temp; \n\t",
884 !strconcat("}}", "")))))))))))))),
885 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
886 Requires<[Pred]>;
887 }
888 multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
889 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
890 Predicate Pred> {
891 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
892 IntOp, IMMType, Pred> ;
893 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
894 IntOp, IMMType, Pred> ;
895 }
896
897 // has 3 operands
898 multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
899 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
900 Operand IMMType, Predicate Pred> {
901 def reg : NVPTXInst<(outs regclass:$dst),
902 (ins ptrclass:$addr, regclass:$b, regclass:$c),
903 !strconcat("atom",
904 !strconcat(SpaceStr,
905 !strconcat(OpcStr,
906 !strconcat(TypeStr,
907 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
908 [(set regclass:$dst,
909 (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
910 Requires<[Pred]>;
911 def imm1 : NVPTXInst<(outs regclass:$dst),
912 (ins ptrclass:$addr, IMMType:$b, regclass:$c),
913 !strconcat("atom",
914 !strconcat(SpaceStr,
915 !strconcat(OpcStr,
916 !strconcat(TypeStr,
917 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
918 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
919 Requires<[Pred]>;
920 def imm2 : NVPTXInst<(outs regclass:$dst),
921 (ins ptrclass:$addr, regclass:$b, IMMType:$c),
922 !strconcat("atom",
923 !strconcat(SpaceStr,
924 !strconcat(OpcStr,
925 !strconcat(TypeStr,
926 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
927 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
928 Requires<[Pred]>;
929 def imm3 : NVPTXInst<(outs regclass:$dst),
930 (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
931 !strconcat("atom",
932 !strconcat(SpaceStr,
933 !strconcat(OpcStr,
934 !strconcat(TypeStr,
935 !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
936 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
937 Requires<[Pred]>;
938 }
939 multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
940 string OpcStr, PatFrag IntOp, Operand IMMType, Predicate Pred> {
941 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
942 IntOp, IMMType, Pred>;
943 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
944 IntOp, IMMType, Pred>;
945 }
946
947 // atom_add
948
949 def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
950 (atomic_load_add_32 node:$a, node:$b)>;
951 def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
952 (atomic_load_add_32 node:$a, node:$b)>;
953 def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
954 (atomic_load_add_32 node:$a, node:$b)>;
955 def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
956 (atomic_load_add_64 node:$a, node:$b)>;
957 def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
958 (atomic_load_add_64 node:$a, node:$b)>;
959 def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
960 (atomic_load_add_64 node:$a, node:$b)>;
961 def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
962 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
963 def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
964 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
965 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
966 (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
967
968 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
969 atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
970 defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
971 atomic_load_add_32_s, i32imm, imm, hasAtomRedS32>;
972 defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
973 atomic_load_add_32_gen, i32imm, imm, hasAtomRedGen32>;
974 defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
975 ".add", atomic_load_add_32_gen, i32imm, imm, useAtomRedG32forGen32>;
976
977 defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
978 atomic_load_add_64_g, i64imm, imm, hasAtomRedG64>;
979 defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
980 atomic_load_add_64_s, i64imm, imm, hasAtomRedS64>;
981 defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
982 atomic_load_add_64_gen, i64imm, imm, hasAtomRedGen64>;
983 defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
984 ".add", atomic_load_add_64_gen, i64imm, imm, useAtomRedG64forGen64>;
985
986 defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
987 atomic_load_add_f32_g, f32imm, fpimm, hasAtomAddF32>;
988 defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
989 atomic_load_add_f32_s, f32imm, fpimm, hasAtomAddF32>;
990 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
991 atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
992
993 // atom_sub
994
995 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
996 (atomic_load_sub_32 node:$a, node:$b)>;
997 def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
998 (atomic_load_sub_32 node:$a, node:$b)>;
999 def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1000 (atomic_load_sub_32 node:$a, node:$b)>;
1001 def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1002 (atomic_load_sub_64 node:$a, node:$b)>;
1003 def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1004 (atomic_load_sub_64 node:$a, node:$b)>;
1005 def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1006 (atomic_load_sub_64 node:$a, node:$b)>;
1007
1008 defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1009 atomic_load_sub_32_g, i32imm, hasAtomRedG32>;
1010 defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1011 atomic_load_sub_64_g, i64imm, hasAtomRedG64>;
1012 defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1013 atomic_load_sub_32_gen, i32imm, hasAtomRedGen32>;
1014 defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1015 ".add", atomic_load_sub_32_gen, i32imm, useAtomRedG32forGen32>;
1016 defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1017 atomic_load_sub_32_s, i32imm, hasAtomRedS32>;
1018 defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1019 atomic_load_sub_64_s, i64imm, hasAtomRedS64>;
1020 defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1021 atomic_load_sub_64_gen, i64imm, hasAtomRedGen64>;
1022 defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1023 ".add", atomic_load_sub_64_gen, i64imm, useAtomRedG64forGen64>;
1024
1025 // atom_swap
1026
1027 def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1028 (atomic_swap_32 node:$a, node:$b)>;
1029 def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1030 (atomic_swap_32 node:$a, node:$b)>;
1031 def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1032 (atomic_swap_32 node:$a, node:$b)>;
1033 def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1034 (atomic_swap_64 node:$a, node:$b)>;
1035 def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1036 (atomic_swap_64 node:$a, node:$b)>;
1037 def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1038 (atomic_swap_64 node:$a, node:$b)>;
1039
1040 defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1041 atomic_swap_32_g, i32imm, imm, hasAtomRedG32>;
1042 defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1043 atomic_swap_32_s, i32imm, imm, hasAtomRedS32>;
1044 defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1045 atomic_swap_32_gen, i32imm, imm, hasAtomRedGen32>;
1046 defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1047 ".exch", atomic_swap_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1048 defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1049 atomic_swap_64_g, i64imm, imm, hasAtomRedG64>;
1050 defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1051 atomic_swap_64_s, i64imm, imm, hasAtomRedS64>;
1052 defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1053 atomic_swap_64_gen, i64imm, imm, hasAtomRedGen64>;
1054 defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1055 ".exch", atomic_swap_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1056
1057 // atom_max
1058
1059 def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1060 , (atomic_load_max_32 node:$a, node:$b)>;
1061 def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1062 (atomic_load_max_32 node:$a, node:$b)>;
1063 def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1064 (atomic_load_max_32 node:$a, node:$b)>;
1065 def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1066 , (atomic_load_max_64 node:$a, node:$b)>;
1067 def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1068 (atomic_load_max_64 node:$a, node:$b)>;
1069 def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1070 (atomic_load_max_64 node:$a, node:$b)>;
1071 def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1072 (atomic_load_umax_32 node:$a, node:$b)>;
1073 def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1074 (atomic_load_umax_32 node:$a, node:$b)>;
1075 def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1076 (atomic_load_umax_32 node:$a, node:$b)>;
1077 def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1078 (atomic_load_umax_64 node:$a, node:$b)>;
1079 def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1080 (atomic_load_umax_64 node:$a, node:$b)>;
1081 def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1082 (atomic_load_umax_64 node:$a, node:$b)>;
1083
1084 defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1085 ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
1086 defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1087 ".max", atomic_load_max_32_s, i32imm, imm, hasAtomRedS32>;
1088 defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1089 atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
1090 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1091 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1092 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1093 ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
1094 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1095 ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
1096 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1097 atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
1098 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1099 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1100 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1101 ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
1102 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1103 ".max", atomic_load_umax_32_s, i32imm, imm, hasAtomRedS32>;
1104 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1105 atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
1106 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1107 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1108 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1109 ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
1110 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1111 ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
1112 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1113 atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
1114 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1115 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1116
1117 // atom_min
1118
1119 def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1120 (atomic_load_min_32 node:$a, node:$b)>;
1121 def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1122 (atomic_load_min_32 node:$a, node:$b)>;
1123 def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1124 (atomic_load_min_32 node:$a, node:$b)>;
1125 def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1126 (atomic_load_min_64 node:$a, node:$b)>;
1127 def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1128 (atomic_load_min_64 node:$a, node:$b)>;
1129 def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1130 (atomic_load_min_64 node:$a, node:$b)>;
1131 def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1132 (atomic_load_umin_32 node:$a, node:$b)>;
1133 def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1134 (atomic_load_umin_32 node:$a, node:$b)>;
1135 def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1136 (atomic_load_umin_32 node:$a, node:$b)>;
1137 def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1138 (atomic_load_umin_64 node:$a, node:$b)>;
1139 def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1140 (atomic_load_umin_64 node:$a, node:$b)>;
1141 def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1142 (atomic_load_umin_64 node:$a, node:$b)>;
1143
1144 defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1145 ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
1146 defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1147 ".min", atomic_load_min_32_s, i32imm, imm, hasAtomRedS32>;
1148 defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1149 atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
1150 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1151 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1152 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1153 ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
1154 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1155 ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
1156 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1157 atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
1158 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1159 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1160 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1161 ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
1162 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1163 ".min", atomic_load_umin_32_s, i32imm, imm, hasAtomRedS32>;
1164 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1165 atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
1166 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1167 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1168 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1169 ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
1170 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1171 ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
1172 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1173 atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
1174 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1175 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1176
1177 // atom_inc atom_dec
1178
1179 def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1180 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1181 def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1182 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1183 def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1184 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1185 def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1186 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1187 def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1188 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1189 def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1190 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1191
1192 defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1193 atomic_load_inc_32_g, i32imm, imm, hasAtomRedG32>;
1194 defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1195 atomic_load_inc_32_s, i32imm, imm, hasAtomRedS32>;
1196 defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1197 atomic_load_inc_32_gen, i32imm, imm, hasAtomRedGen32>;
1198 defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1199 ".inc", atomic_load_inc_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1200 defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1201 atomic_load_dec_32_g, i32imm, imm, hasAtomRedG32>;
1202 defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1203 atomic_load_dec_32_s, i32imm, imm, hasAtomRedS32>;
1204 defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1205 atomic_load_dec_32_gen, i32imm, imm, hasAtomRedGen32>;
1206 defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1207 ".dec", atomic_load_dec_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1208
1209 // atom_and
1210
1211 def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1212 (atomic_load_and_32 node:$a, node:$b)>;
1213 def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1214 (atomic_load_and_32 node:$a, node:$b)>;
1215 def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1216 (atomic_load_and_32 node:$a, node:$b)>;
1217 def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1218 (atomic_load_and_64 node:$a, node:$b)>;
1219 def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1220 (atomic_load_and_64 node:$a, node:$b)>;
1221 def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1222 (atomic_load_and_64 node:$a, node:$b)>;
1223
1224 defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1225 atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
1226 defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1227 atomic_load_and_32_s, i32imm, imm, hasAtomRedS32>;
1228 defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1229 atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
1230 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1231 ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1232 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1233 atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
1234 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1235 atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
1236 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1237 atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
1238 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1239 ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1240
1241 // atom_or
1242
1243 def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1244 (atomic_load_or_32 node:$a, node:$b)>;
1245 def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1246 (atomic_load_or_32 node:$a, node:$b)>;
1247 def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1248 (atomic_load_or_32 node:$a, node:$b)>;
1249 def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1250 (atomic_load_or_64 node:$a, node:$b)>;
1251 def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1252 (atomic_load_or_64 node:$a, node:$b)>;
1253 def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1254 (atomic_load_or_64 node:$a, node:$b)>;
1255
1256 defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1257 atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
1258 defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1259 atomic_load_or_32_gen, i32imm, imm, hasAtomRedGen32>;
1260 defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1261 ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1262 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1263 atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
1264 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1265 atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
1266 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1267 atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
1268 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1269 ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1270 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1271 atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
1272
1273 // atom_xor
1274
1275 def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1276 (atomic_load_xor_32 node:$a, node:$b)>;
1277 def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1278 (atomic_load_xor_32 node:$a, node:$b)>;
1279 def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1280 (atomic_load_xor_32 node:$a, node:$b)>;
1281 def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1282 (atomic_load_xor_64 node:$a, node:$b)>;
1283 def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1284 (atomic_load_xor_64 node:$a, node:$b)>;
1285 def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1286 (atomic_load_xor_64 node:$a, node:$b)>;
1287
1288 defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1289 atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
1290 defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1291 atomic_load_xor_32_s, i32imm, imm, hasAtomRedS32>;
1292 defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1293 atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
1294 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1295 ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
1296 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1297 atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
1298 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1299 atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
1300 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1301 atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
1302 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1303 ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
1304
1305 // atom_cas
1306
1307 def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1308 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1309 def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1310 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1311 def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1312 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1313 def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1314 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1315 def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1316 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1317 def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1318 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1319
1320 defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1321 atomic_cmp_swap_32_g, i32imm, hasAtomRedG32>;
1322 defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1323 atomic_cmp_swap_32_s, i32imm, hasAtomRedS32>;
1324 defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1325 atomic_cmp_swap_32_gen, i32imm, hasAtomRedGen32>;
1326 defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1327 ".cas", atomic_cmp_swap_32_gen, i32imm, useAtomRedG32forGen32>;
1328 defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1329 atomic_cmp_swap_64_g, i64imm, hasAtomRedG64>;
1330 defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1331 atomic_cmp_swap_64_s, i64imm, hasAtomRedS64>;
1332 defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1333 atomic_cmp_swap_64_gen, i64imm, hasAtomRedGen64>;
1334 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1335 ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
1336
1337
1338 //-----------------------------------
1339 // Read Special Registers
1340 //-----------------------------------
1341 class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
1342 NVPTXInst<(outs regclassOut:$dst), (ins),
1343 OpStr,
1344 [(set regclassOut:$dst, (IntOp))]>;
1345
1346 def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
1347 int_nvvm_read_ptx_sreg_tid_x>;
1348 def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
1349 int_nvvm_read_ptx_sreg_tid_y>;
1350 def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
1351 int_nvvm_read_ptx_sreg_tid_z>;
1352
1353 def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
1354 int_nvvm_read_ptx_sreg_ntid_x>;
1355 def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
1356 int_nvvm_read_ptx_sreg_ntid_y>;
1357 def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
1358 int_nvvm_read_ptx_sreg_ntid_z>;
1359
1360 def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
1361 int_nvvm_read_ptx_sreg_ctaid_x>;
1362 def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
1363 int_nvvm_read_ptx_sreg_ctaid_y>;
1364 def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
1365 int_nvvm_read_ptx_sreg_ctaid_z>;
1366
1367 def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
1368 int_nvvm_read_ptx_sreg_nctaid_x>;
1369 def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
1370 int_nvvm_read_ptx_sreg_nctaid_y>;
1371 def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
1372 int_nvvm_read_ptx_sreg_nctaid_z>;
1373
1374 def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
1375 int_nvvm_read_ptx_sreg_warpsize>;
1376
1377
1378 //-----------------------------------
1379 // Support for ldu on sm_20 or later
1380 //-----------------------------------
1381
1382 // Scalar
1383 multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1384 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1385 !strconcat("ldu.global.", TyStr),
1386 []>, Requires<[hasLDU]>;
1387 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1388 !strconcat("ldu.global.", TyStr),
1389 []>, Requires<[hasLDU]>;
1390 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1391 !strconcat("ldu.global.", TyStr),
1392 []>, Requires<[hasLDU]>;
1393 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1394 !strconcat("ldu.global.", TyStr),
1395 []>, Requires<[hasLDU]>;
1396 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1397 !strconcat("ldu.global.", TyStr),
1398 []>, Requires<[hasLDU]>;
1399 }
1400
1401 defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1402 defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1403 defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1404 defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1405 defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1406 defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1407 defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1408 defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1409
1410 // vector
1411
1412 // Elementized vector ldu
1413 multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1414 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1415 (ins Int32Regs:$src),
1416 !strconcat("ldu.global.", TyStr), []>;
1417 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1418 (ins Int64Regs:$src),
1419 !strconcat("ldu.global.", TyStr), []>;
1420 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1421 (ins MEMri:$src),
1422 !strconcat("ldu.global.", TyStr), []>;
1423 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1424 (ins MEMri64:$src),
1425 !strconcat("ldu.global.", TyStr), []>;
1426 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1427 (ins imemAny:$src),
1428 !strconcat("ldu.global.", TyStr), []>;
1429 }
1430
1431 multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1432 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1433 regclass:$dst4), (ins Int32Regs:$src),
1434 !strconcat("ldu.global.", TyStr), []>;
1435 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1436 regclass:$dst4), (ins Int64Regs:$src),
1437 !strconcat("ldu.global.", TyStr), []>;
1438 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1439 regclass:$dst4), (ins MEMri:$src),
1440 !strconcat("ldu.global.", TyStr), []>;
1441 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1442 regclass:$dst4), (ins MEMri64:$src),
1443 !strconcat("ldu.global.", TyStr), []>;
1444 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1445 regclass:$dst4), (ins imemAny:$src),
1446 !strconcat("ldu.global.", TyStr), []>;
1447 }
1448
1449 defm INT_PTX_LDU_G_v2i8_ELE
1450 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1451 defm INT_PTX_LDU_G_v2i16_ELE
1452 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1453 defm INT_PTX_LDU_G_v2i32_ELE
1454 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1455 defm INT_PTX_LDU_G_v2f32_ELE
1456 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1457 defm INT_PTX_LDU_G_v2i64_ELE
1458 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1459 defm INT_PTX_LDU_G_v2f64_ELE
1460 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1461 defm INT_PTX_LDU_G_v4i8_ELE
1462 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1463 defm INT_PTX_LDU_G_v4i16_ELE
1464 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1465 Int16Regs>;
1466 defm INT_PTX_LDU_G_v4i32_ELE
1467 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1468 Int32Regs>;
1469 defm INT_PTX_LDU_G_v4f32_ELE
1470 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1471 Float32Regs>;
1472
1473
1474 //-----------------------------------
1475 // Support for ldg on sm_35 or later
1476 //-----------------------------------
1477
1478 multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1479 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1480 !strconcat("ld.global.nc.", TyStr),
1481 []>, Requires<[hasLDG]>;
1482 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1483 !strconcat("ld.global.nc.", TyStr),
1484 []>, Requires<[hasLDG]>;
1485 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1486 !strconcat("ld.global.nc.", TyStr),
1487 []>, Requires<[hasLDG]>;
1488 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1489 !strconcat("ld.global.nc.", TyStr),
1490 []>, Requires<[hasLDG]>;
1491 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1492 !strconcat("ld.global.nc.", TyStr),
1493 []>, Requires<[hasLDG]>;
1494 }
1495
1496 defm INT_PTX_LDG_GLOBAL_i8
1497 : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1498 defm INT_PTX_LDG_GLOBAL_i16
1499 : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1500 defm INT_PTX_LDG_GLOBAL_i32
1501 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1502 defm INT_PTX_LDG_GLOBAL_i64
1503 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1504 defm INT_PTX_LDG_GLOBAL_f32
1505 : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1506 defm INT_PTX_LDG_GLOBAL_f64
1507 : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1508 defm INT_PTX_LDG_GLOBAL_p32
1509 : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1510 defm INT_PTX_LDG_GLOBAL_p64
1511 : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1512
1513 // vector
1514
1515 // Elementized vector ldg
1516 multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1517 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1518 (ins Int32Regs:$src),
1519 !strconcat("ld.global.nc.", TyStr), []>;
1520 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1521 (ins Int64Regs:$src),
1522 !strconcat("ld.global.nc.", TyStr), []>;
1523 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1524 (ins MEMri:$src),
1525 !strconcat("ld.global.nc.", TyStr), []>;
1526 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1527 (ins MEMri64:$src),
1528 !strconcat("ld.global.nc.", TyStr), []>;
1529 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1530 (ins imemAny:$src),
1531 !strconcat("ld.global.nc.", TyStr), []>;
1532 }
1533
1534 multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1535 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1536 regclass:$dst4), (ins Int32Regs:$src),
1537 !strconcat("ld.global.nc.", TyStr), []>;
1538 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1539 regclass:$dst4), (ins Int64Regs:$src),
1540 !strconcat("ld.global.nc.", TyStr), []>;
1541 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1542 regclass:$dst4), (ins MEMri:$src),
1543 !strconcat("ld.global.nc.", TyStr), []>;
1544 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1545 regclass:$dst4), (ins MEMri64:$src),
1546 !strconcat("ld.global.nc.", TyStr), []>;
1547 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1548 regclass:$dst4), (ins imemAny:$src),
1549 !strconcat("ld.global.nc.", TyStr), []>;
1550 }
1551
1552 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1553 defm INT_PTX_LDG_G_v2i8_ELE
1554 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1555 defm INT_PTX_LDG_G_v2i16_ELE
1556 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1557 defm INT_PTX_LDG_G_v2i32_ELE
1558 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1559 defm INT_PTX_LDG_G_v2f32_ELE
1560 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1561 defm INT_PTX_LDG_G_v2i64_ELE
1562 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1563 defm INT_PTX_LDG_G_v2f64_ELE
1564 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1565 defm INT_PTX_LDG_G_v4i8_ELE
1566 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1567 defm INT_PTX_LDG_G_v4i16_ELE
1568 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1569 defm INT_PTX_LDG_G_v4i32_ELE
1570 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1571 defm INT_PTX_LDG_G_v4f32_ELE
1572 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1573
1574
1575 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1576 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1577 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1578 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1579 Requires<[hasGenericLdSt]>;
1580 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1581 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1582 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1583 Requires<[hasGenericLdSt]>;
1584
1585 // @TODO: Are these actually needed? I believe global addresses will be copied
1586 // to register values anyway.
1587 /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
1588 !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
1589 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1590 Requires<[hasGenericLdSt]>;
1591 def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
1592 !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
1593 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>,
1594 Requires<[hasGenericLdSt]>;*/
1595
1596 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1597 "mov.u32 \t$result, $src;",
1598 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1599 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1600 "mov.u64 \t$result, $src;",
1601 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1602
1603 // @TODO: Are these actually needed? I believe global addresses will be copied
1604 // to register values anyway.
1605 /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src),
1606 "mov.u32 \t$result, $src;",
1607 [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;
1608 def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1609 "mov.u64 \t$result, $src;",
1610 [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/
1611 }
1612
1613 multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1614 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1615 !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
1616 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
1617 Requires<[hasGenericLdSt]>;
1618 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1619 !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
1620 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
1621 Requires<[hasGenericLdSt]>;
1622 def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1623 "mov.u32 \t$result, $src;",
1624 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1625 def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1626 "mov.u64 \t$result, $src;",
1627 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1628 }
1629
1630 defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1631 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1632 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1633 defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1634
1635 defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1636 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1637 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1638 defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1639
1640
1641 // nvvm.ptr.gen.to.param
1642 def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1643 (ins Int32Regs:$src),
1644 "mov.u32 \t$result, $src;",
1645 [(set Int32Regs:$result,
1646 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1647 def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1648 (ins Int64Regs:$src),
1649 "mov.u64 \t$result, $src;",
1650 [(set Int64Regs:$result,
1651 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1652
1653
1654 // nvvm.move intrinsicc
1655 def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1656 "mov.b16 \t$r, $s;",
1657 [(set Int16Regs:$r,
1658 (int_nvvm_move_i16 Int16Regs:$s))]>;
1659 def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1660 "mov.b32 \t$r, $s;",
1661 [(set Int32Regs:$r,
1662 (int_nvvm_move_i32 Int32Regs:$s))]>;
1663 def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1664 "mov.b64 \t$r, $s;",
1665 [(set Int64Regs:$r,
1666 (int_nvvm_move_i64 Int64Regs:$s))]>;
1667 def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1668 "mov.f32 \t$r, $s;",
1669 [(set Float32Regs:$r,
1670 (int_nvvm_move_float Float32Regs:$s))]>;
1671 def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1672 "mov.f64 \t$r, $s;",
1673 [(set Float64Regs:$r,
1674 (int_nvvm_move_double Float64Regs:$s))]>;
1675 def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1676 "mov.u32 \t$r, $s;",
1677 [(set Int32Regs:$r,
1678 (int_nvvm_move_ptr Int32Regs:$s))]>;
1679 def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1680 "mov.u64 \t$r, $s;",
1681 [(set Int64Regs:$r,
1682 (int_nvvm_move_ptr Int64Regs:$s))]>;
1683
1684 // @TODO: Are these actually needed, or will we always just see symbols
1685 // copied to registers first?
1686 /*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1687 "mov.u32 \t$r, $s;",
1688 [(set Int32Regs:$r,
1689 (int_nvvm_move_ptr texternalsym:$s))]>;
1690 def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1691 "mov.u64 \t$r, $s;",
1692 [(set Int64Regs:$r,
1693 (int_nvvm_move_ptr texternalsym:$s))]>;*/
1694
1695
1696 // MoveParam %r1, param
1697 // ptr_local_to_gen %r2, %r1
1698 // ptr_gen_to_local %r3, %r2
1699 // ->
1700 // mov %r1, param
1701
1702 // @TODO: Revisit this. There is a type
1703 // contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1704 // instructions are not currently defined. However, we can use the ptr
1705 // variants and the asm printer will do the right thing.
1706 def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1707 (MoveParam texternalsym:$src)))),
1708 (nvvm_move_ptr64 texternalsym:$src)>;
1709 def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
1710 (MoveParam texternalsym:$src)))),
1711 (nvvm_move_ptr32 texternalsym:$src)>;
1712
1713 def texsurf_handles
1714 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
1715 "mov.u64 \t$result, $src;", []>;
1716
1717 //-----------------------------------
1718 // Compiler Error Warn
1719 // - Just ignore them in codegen
1720 //-----------------------------------
1721
1722 def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1723 "// llvm.nvvm.compiler.warn()",
1724 [(int_nvvm_compiler_warn Int32Regs:$a)]>;
1725 def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1726 "// llvm.nvvm.compiler.warn()",
1727 [(int_nvvm_compiler_warn Int64Regs:$a)]>;
1728 def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
1729 "// llvm.nvvm.compiler.error()",
1730 [(int_nvvm_compiler_error Int32Regs:$a)]>;
1731 def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
1732 "// llvm.nvvm.compiler.error()",
1733 [(int_nvvm_compiler_error Int64Regs:$a)]>;
1734
1735
1736 // isspacep
1737
1738 def ISSPACEP_CONST_32
1739 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1740 "isspacep.const \t$d, $a;",
1741 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
1742 Requires<[hasPTX31]>;
1743 def ISSPACEP_CONST_64
1744 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1745 "isspacep.const \t$d, $a;",
1746 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
1747 Requires<[hasPTX31]>;
1748 def ISSPACEP_GLOBAL_32
1749 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1750 "isspacep.global \t$d, $a;",
1751 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
1752 def ISSPACEP_GLOBAL_64
1753 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1754 "isspacep.global \t$d, $a;",
1755 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
1756 def ISSPACEP_LOCAL_32
1757 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1758 "isspacep.local \t$d, $a;",
1759 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
1760 def ISSPACEP_LOCAL_64
1761 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1762 "isspacep.local \t$d, $a;",
1763 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
1764 def ISSPACEP_SHARED_32
1765 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
1766 "isspacep.shared \t$d, $a;",
1767 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
1768 def ISSPACEP_SHARED_64
1769 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
1770 "isspacep.shared \t$d, $a;",
1771 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
1772
1773
1774 // Special register reads
1775 def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
1776 (ins SpecialRegs:$r),
1777 "mov.b32\t$d, $r;", []>;
1778
1779 def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
1780 def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
1781 def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
1782 def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
1783 def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
1784 def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
1785 def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
1786 def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
1787 def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
1788 def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
1789 def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
1790 def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
1791 def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
1792 def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
1793 def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
1794 def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
1795 def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
1796 def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
1797 def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
1798 def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
1799 def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
1800 def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
1801 def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
1802 def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
1803 def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
1804 def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
1805 def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
1806 def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
1807 def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
1808 def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
1809 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
1810 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
1811
1812
1813 // rotate builtin support
1814
1815 def ROTATE_B32_HW_IMM
1816 : NVPTXInst<(outs Int32Regs:$dst),
1817 (ins Int32Regs:$src, i32imm:$amt),
1818 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1819 [(set Int32Regs:$dst,
1820 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
1821 Requires<[hasHWROT32]> ;
1822
1823 def ROTATE_B32_HW_REG
1824 : NVPTXInst<(outs Int32Regs:$dst),
1825 (ins Int32Regs:$src, Int32Regs:$amt),
1826 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1827 [(set Int32Regs:$dst,
1828 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
1829 Requires<[hasHWROT32]> ;
1830
1831 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
1832 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1833 Requires<[noHWROT32]> ;
1834
1835 def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
1836 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
1837 Requires<[noHWROT32]> ;
1838
1839 def GET_LO_INT64
1840 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1841 !strconcat("{{\n\t",
1842 !strconcat(".reg .b32 %dummy;\n\t",
1843 !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
1844 !strconcat("}}", "")))),
1845 []> ;
1846
1847 def GET_HI_INT64
1848 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
1849 !strconcat("{{\n\t",
1850 !strconcat(".reg .b32 %dummy;\n\t",
1851 !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
1852 !strconcat("}}", "")))),
1853 []> ;
1854
1855 def PACK_TWO_INT32
1856 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
1857 "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
1858
1859 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
1860 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
1861 (GET_LO_INT64 Int64Regs:$src))> ;
1862
1863 // funnel shift, requires >= sm_32
1864 def SHF_L_WRAP_B32_IMM
1865 : NVPTXInst<(outs Int32Regs:$dst),
1866 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
1867 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1868 Requires<[hasHWROT32]>;
1869
1870 def SHF_L_WRAP_B32_REG
1871 : NVPTXInst<(outs Int32Regs:$dst),
1872 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1873 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1874 Requires<[hasHWROT32]>;
1875
1876 def SHF_R_WRAP_B32_IMM
1877 : NVPTXInst<(outs Int32Regs:$dst),
1878 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
1879 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1880 Requires<[hasHWROT32]>;
1881
1882 def SHF_R_WRAP_B32_REG
1883 : NVPTXInst<(outs Int32Regs:$dst),
1884 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1885 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
1886 Requires<[hasHWROT32]>;
1887
1888 // HW version of rotate 64
1889 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
1890 (PACK_TWO_INT32
1891 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
1892 (GET_LO_INT64 Int64Regs:$src), imm:$amt),
1893 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
1894 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
1895 Requires<[hasHWROT32]>;
1896
1897 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
1898 (PACK_TWO_INT32
1899 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
1900 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
1901 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
1902 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
1903 Requires<[hasHWROT32]>;
1904
1905
1906 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
1907 (PACK_TWO_INT32
1908 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
1909 (GET_HI_INT64 Int64Regs:$src), imm:$amt),
1910 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
1911 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
1912 Requires<[hasHWROT32]>;
1913
1914 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
1915 (PACK_TWO_INT32
1916 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
1917 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
1918 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
1919 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
1920 Requires<[hasHWROT32]>;
1921
1922 // SW version of rotate 64
1923 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
1924 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1925 Requires<[noHWROT32]>;
1926 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
1927 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
1928 Requires<[noHWROT32]>;
1929 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
1930 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
1931 Requires<[noHWROT32]>;
1932 def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
1933 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
1934 Requires<[noHWROT32]>;
1935
1936
1937 //-----------------------------------
1938 // Texture Intrinsics
1939 //-----------------------------------
1940
1941 // NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
1942 // also defined in NVPTXReplaceImageHandles.cpp
1943
1944 // texmode_independent
1945 let IsTex = 1, IsTexModeUnified = 0 in {
1946 // Texture fetch instructions using handles
1947 def TEX_1D_F32_S32
1948 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1949 Float32Regs:$b, Float32Regs:$a),
1950 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
1951 "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1952 []>;
1953 def TEX_1D_F32_F32
1954 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1955 Float32Regs:$b, Float32Regs:$a),
1956 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
1957 "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1958 []>;
1959 def TEX_1D_F32_F32_LEVEL
1960 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1961 Float32Regs:$b, Float32Regs:$a),
1962 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
1963 "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
1964 "[$t, $s, \\{$x\\}], $lod;",
1965 []>;
1966 def TEX_1D_F32_F32_GRAD
1967 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
1968 Float32Regs:$b, Float32Regs:$a),
1969 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
1970 Float32Regs:$gradx, Float32Regs:$grady),
1971 "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
1972 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
1973 []>;
1974 def TEX_1D_S32_S32
1975 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1976 Int32Regs:$b, Int32Regs:$a),
1977 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
1978 "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1979 []>;
1980 def TEX_1D_S32_F32
1981 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1982 Int32Regs:$b, Int32Regs:$a),
1983 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
1984 "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
1985 []>;
1986 def TEX_1D_S32_F32_LEVEL
1987 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1988 Int32Regs:$b, Int32Regs:$a),
1989 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
1990 Float32Regs:$lod),
1991 "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
1992 "[$t, $s, \\{$x\\}], $lod;",
1993 []>;
1994 def TEX_1D_S32_F32_GRAD
1995 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
1996 Int32Regs:$b, Int32Regs:$a),
1997 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
1998 Float32Regs:$gradx, Float32Regs:$grady),
1999 "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2000 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2001 []>;
2002 def TEX_1D_U32_S32
2003 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2004 Int32Regs:$b, Int32Regs:$a),
2005 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2006 "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2007 []>;
2008 def TEX_1D_U32_F32
2009 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2010 Int32Regs:$b, Int32Regs:$a),
2011 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2012 "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2013 []>;
2014 def TEX_1D_U32_F32_LEVEL
2015 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2016 Int32Regs:$b, Int32Regs:$a),
2017 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2018 Float32Regs:$lod),
2019 "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2020 "[$t, $s, \\{$x\\}], $lod;",
2021 []>;
2022 def TEX_1D_U32_F32_GRAD
2023 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2024 Int32Regs:$b, Int32Regs:$a),
2025 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2026 Float32Regs:$gradx, Float32Regs:$grady),
2027 "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2028 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2029 []>;
2030
2031 def TEX_1D_ARRAY_F32_S32
2032 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2033 Float32Regs:$b, Float32Regs:$a),
2034 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2035 "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2036 "[$t, $s, \\{$l, $x\\}];",
2037 []>;
2038 def TEX_1D_ARRAY_F32_F32
2039 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2040 Float32Regs:$b, Float32Regs:$a),
2041 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2042 "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2043 "[$t, $s, \\{$l, $x\\}];",
2044 []>;
2045 def TEX_1D_ARRAY_F32_F32_LEVEL
2046 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2047 Float32Regs:$b, Float32Regs:$a),
2048 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2049 Float32Regs:$lod),
2050 "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2051 "[$t, $s, \\{$l, $x\\}], $lod;",
2052 []>;
2053 def TEX_1D_ARRAY_F32_F32_GRAD
2054 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2055 Float32Regs:$b, Float32Regs:$a),
2056 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2057 Float32Regs:$gradx, Float32Regs:$grady),
2058 "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2059 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2060 []>;
2061 def TEX_1D_ARRAY_S32_S32
2062 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2063 Int32Regs:$b, Int32Regs:$a),
2064 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2065 "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2066 "[$t, $s, \\{$l, $x\\}];",
2067 []>;
2068 def TEX_1D_ARRAY_S32_F32
2069 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2070 Int32Regs:$b, Int32Regs:$a),
2071 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2072 "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2073 "[$t, $s, \\{$l, $x\\}];",
2074 []>;
2075 def TEX_1D_ARRAY_S32_F32_LEVEL
2076 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2077 Int32Regs:$b, Int32Regs:$a),
2078 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2079 Float32Regs:$lod),
2080 "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2081 "[$t, $s, \\{$l, $x\\}], $lod;",
2082 []>;
2083 def TEX_1D_ARRAY_S32_F32_GRAD
2084 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2085 Int32Regs:$b, Int32Regs:$a),
2086 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2087 Float32Regs:$gradx, Float32Regs:$grady),
2088 "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2089 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2090 []>;
2091 def TEX_1D_ARRAY_U32_S32
2092 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2093 Int32Regs:$b, Int32Regs:$a),
2094 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2095 "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2096 "[$t, $s, \\{$l, $x\\}];",
2097 []>;
2098 def TEX_1D_ARRAY_U32_F32
2099 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2100 Int32Regs:$b, Int32Regs:$a),
2101 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2102 "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2103 "[$t, $s, \\{$l, $x\\}];",
2104 []>;
2105 def TEX_1D_ARRAY_U32_F32_LEVEL
2106 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2107 Int32Regs:$b, Int32Regs:$a),
2108 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2109 Float32Regs:$lod),
2110 "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2111 "[$t, $s, \\{$l, $x\\}], $lod;",
2112 []>;
2113 def TEX_1D_ARRAY_U32_F32_GRAD
2114 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2115 Int32Regs:$b, Int32Regs:$a),
2116 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2117 Float32Regs:$gradx, Float32Regs:$grady),
2118 "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2119 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2120 []>;
2121
2122 def TEX_2D_F32_S32
2123 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2124 Float32Regs:$b, Float32Regs:$a),
2125 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2126 "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2127 "[$t, $s, \\{$x, $y\\}];",
2128 []>;
2129 def TEX_2D_F32_F32
2130 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2131 Float32Regs:$b, Float32Regs:$a),
2132 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2133 "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2134 "[$t, $s, \\{$x, $y\\}];",
2135 []>;
2136 def TEX_2D_F32_F32_LEVEL
2137 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2138 Float32Regs:$b, Float32Regs:$a),
2139 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2140 Float32Regs:$lod),
2141 "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2142 "[$t, $s, \\{$x, $y\\}], $lod;",
2143 []>;
2144 def TEX_2D_F32_F32_GRAD
2145 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2146 Float32Regs:$b, Float32Regs:$a),
2147 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2148 Float32Regs:$gradx0, Float32Regs:$gradx1,
2149 Float32Regs:$grady0, Float32Regs:$grady1),
2150 "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2151 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2152 "\\{$grady0, $grady1\\};",
2153 []>;
2154 def TEX_2D_S32_S32
2155 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2156 Int32Regs:$b, Int32Regs:$a),
2157 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2158 "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2159 "[$t, $s, \\{$x, $y\\}];",
2160 []>;
2161 def TEX_2D_S32_F32
2162 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2163 Int32Regs:$b, Int32Regs:$a),
2164 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2165 "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2166 "[$t, $s, \\{$x, $y\\}];",
2167 []>;
2168 def TEX_2D_S32_F32_LEVEL
2169 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2170 Int32Regs:$b, Int32Regs:$a),
2171 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2172 Float32Regs:$lod),
2173 "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2174 "[$t, $s, \\{$x, $y\\}], $lod;",
2175 []>;
2176 def TEX_2D_S32_F32_GRAD
2177 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2178 Int32Regs:$b, Int32Regs:$a),
2179 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2180 Float32Regs:$gradx0, Float32Regs:$gradx1,
2181 Float32Regs:$grady0, Float32Regs:$grady1),
2182 "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2183 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2184 "\\{$grady0, $grady1\\};",
2185 []>;
2186 def TEX_2D_U32_S32
2187 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2188 Int32Regs:$b, Int32Regs:$a),
2189 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2190 "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2191 "[$t, $s, \\{$x, $y\\}];",
2192 []>;
2193 def TEX_2D_U32_F32
2194 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2195 Int32Regs:$b, Int32Regs:$a),
2196 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2197 "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2198 "[$t, $s, \\{$x, $y\\}];",
2199 []>;
2200 def TEX_2D_U32_F32_LEVEL
2201 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2202 Int32Regs:$b, Int32Regs:$a),
2203 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2204 Float32Regs:$lod),
2205 "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2206 "[$t, $s, \\{$x, $y\\}], $lod;",
2207 []>;
2208 def TEX_2D_U32_F32_GRAD
2209 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2210 Int32Regs:$b, Int32Regs:$a),
2211 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2212 Float32Regs:$gradx0, Float32Regs:$gradx1,
2213 Float32Regs:$grady0, Float32Regs:$grady1),
2214 "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2215 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2216 "\\{$grady0, $grady1\\};",
2217 []>;
2218
2219 def TEX_2D_ARRAY_F32_S32
2220 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2221 Float32Regs:$b, Float32Regs:$a),
2222 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2223 Int32Regs:$y),
2224 "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2225 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2226 []>;
2227 def TEX_2D_ARRAY_F32_F32
2228 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2229 Float32Regs:$b, Float32Regs:$a),
2230 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2231 Float32Regs:$y),
2232 "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2233 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2234 []>;
2235 def TEX_2D_ARRAY_F32_F32_LEVEL
2236 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2237 Float32Regs:$b, Float32Regs:$a),
2238 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2239 Float32Regs:$y, Float32Regs:$lod),
2240 "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2241 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2242 []>;
2243 def TEX_2D_ARRAY_F32_F32_GRAD
2244 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2245 Float32Regs:$b, Float32Regs:$a),
2246 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2247 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2248 Float32Regs:$grady0, Float32Regs:$grady1),
2249 "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2250 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2251 "\\{$grady0, $grady1\\};",
2252 []>;
2253 def TEX_2D_ARRAY_S32_S32
2254 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2255 Int32Regs:$b, Int32Regs:$a),
2256 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2257 Int32Regs:$y),
2258 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2259 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2260 []>;
2261 def TEX_2D_ARRAY_S32_F32
2262 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2263 Int32Regs:$b, Int32Regs:$a),
2264 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2265 Float32Regs:$y),
2266 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2267 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2268 []>;
2269 def TEX_2D_ARRAY_S32_F32_LEVEL
2270 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2271 Int32Regs:$b, Int32Regs:$a),
2272 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2273 Float32Regs:$y, Float32Regs:$lod),
2274 "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2275 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2276 []>;
2277 def TEX_2D_ARRAY_S32_F32_GRAD
2278 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2279 Int32Regs:$b, Int32Regs:$a),
2280 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2281 Float32Regs:$y,
2282 Float32Regs:$gradx0, Float32Regs:$gradx1,
2283 Float32Regs:$grady0, Float32Regs:$grady1),
2284 "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2285 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2286 "\\{$grady0, $grady1\\};",
2287 []>;
2288 def TEX_2D_ARRAY_U32_S32
2289 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2290 Int32Regs:$b, Int32Regs:$a),
2291 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2292 Int32Regs:$y),
2293 "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2294 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2295 []>;
2296 def TEX_2D_ARRAY_U32_F32
2297 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2298 Int32Regs:$b, Int32Regs:$a),
2299 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2300 Float32Regs:$y),
2301 "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2302 "[$t, $s, \\{$l, $x, $y, $y\\}];",
2303 []>;
2304 def TEX_2D_ARRAY_U32_F32_LEVEL
2305 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2306 Int32Regs:$b, Int32Regs:$a),
2307 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2308 Float32Regs:$y, Float32Regs:$lod),
2309 "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2310 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2311 []>;
2312 def TEX_2D_ARRAY_U32_F32_GRAD
2313 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2314 Int32Regs:$b, Int32Regs:$a),
2315 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2316 Float32Regs:$y,
2317 Float32Regs:$gradx0, Float32Regs:$gradx1,
2318 Float32Regs:$grady0, Float32Regs:$grady1),
2319 "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2320 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2321 "\\{$grady0, $grady1\\};",
2322 []>;
2323
2324 def TEX_3D_F32_S32
2325 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2326 Float32Regs:$b, Float32Regs:$a),
2327 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2328 Int32Regs:$z),
2329 "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2330 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2331 []>;
2332 def TEX_3D_F32_F32
2333 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2334 Float32Regs:$b, Float32Regs:$a),
2335 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2336 Float32Regs:$z),
2337 "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2338 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2339 []>;
2340 def TEX_3D_F32_F32_LEVEL
2341 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2342 Float32Regs:$b, Float32Regs:$a),
2343 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2344 Float32Regs:$z, Float32Regs:$lod),
2345 "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2346 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2347 []>;
2348 def TEX_3D_F32_F32_GRAD
2349 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2350 Float32Regs:$b, Float32Regs:$a),
2351 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2352 Float32Regs:$z,
2353 Float32Regs:$gradx0, Float32Regs:$gradx1,
2354 Float32Regs:$gradx2, Float32Regs:$grady0,
2355 Float32Regs:$grady1, Float32Regs:$grady2),
2356 "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2357 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2358 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2359 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2360 []>;
2361 def TEX_3D_S32_S32
2362 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2363 Int32Regs:$b, Int32Regs:$a),
2364 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2365 Int32Regs:$z),
2366 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2367 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2368 []>;
2369 def TEX_3D_S32_F32
2370 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2371 Int32Regs:$b, Int32Regs:$a),
2372 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2373 Float32Regs:$z),
2374 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2375 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2376 []>;
2377 def TEX_3D_S32_F32_LEVEL
2378 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2379 Int32Regs:$b, Int32Regs:$a),
2380 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2381 Float32Regs:$z, Float32Regs:$lod),
2382 "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2383 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2384 []>;
2385 def TEX_3D_S32_F32_GRAD
2386 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2387 Int32Regs:$b, Int32Regs:$a),
2388 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2389 Float32Regs:$z,
2390 Float32Regs:$gradx0, Float32Regs:$gradx1,
2391 Float32Regs:$gradx2, Float32Regs:$grady0,
2392 Float32Regs:$grady1, Float32Regs:$grady2),
2393 "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2394 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2395 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2396 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2397 []>;
2398 def TEX_3D_U32_S32
2399 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2400 Int32Regs:$b, Int32Regs:$a),
2401 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2402 Int32Regs:$z),
2403 "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2404 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2405 []>;
2406 def TEX_3D_U32_F32
2407 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2408 Int32Regs:$b, Int32Regs:$a),
2409 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2410 Float32Regs:$z),
2411 "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2412 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2413 []>;
2414 def TEX_3D_U32_F32_LEVEL
2415 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2416 Int32Regs:$b, Int32Regs:$a),
2417 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2418 Float32Regs:$z, Float32Regs:$lod),
2419 "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2420 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2421 []>;
2422 def TEX_3D_U32_F32_GRAD
2423 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2424 Int32Regs:$b, Int32Regs:$a),
2425 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2426 Float32Regs:$z,
2427 Float32Regs:$gradx0, Float32Regs:$gradx1,
2428 Float32Regs:$gradx2, Float32Regs:$grady0,
2429 Float32Regs:$grady1, Float32Regs:$grady2),
2430 "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2431 "[$t, $s, \\{$x, $y, $z, $z\\}], "
2432 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2433 "\\{$grady0, $grady1, $grady2, $grady2\\};",
2434 []>;
2435
2436 def TEX_CUBE_F32_F32
2437 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2438 Float32Regs:$b, Float32Regs:$a),
2439 (ins Int64Regs:$t, Int64Regs:$s,
2440 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2441 "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2442 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2443 []>;
2444 def TEX_CUBE_F32_F32_LEVEL
2445 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2446 Float32Regs:$b, Float32Regs:$a),
2447 (ins Int64Regs:$t, Int64Regs:$s,
2448 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2449 Float32Regs:$lod),
2450 "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2451 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2452 []>;
2453 def TEX_CUBE_S32_F32
2454 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2455 Int32Regs:$b, Int32Regs:$a),
2456 (ins Int64Regs:$t, Int64Regs:$s,
2457 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2458 "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2459 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2460 []>;
2461 def TEX_CUBE_S32_F32_LEVEL
2462 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2463 Int32Regs:$b, Int32Regs:$a),
2464 (ins Int64Regs:$t, Int64Regs:$s,
2465 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2466 Float32Regs:$lod),
2467 "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2468 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2469 []>;
2470 def TEX_CUBE_U32_F32
2471 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2472 Int32Regs:$b, Int32Regs:$a),
2473 (ins Int64Regs:$t, Int64Regs:$s,
2474 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2475 "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2476 "[$t, $s, \\{$x, $y, $z, $z\\}];",
2477 []>;
2478 def TEX_CUBE_U32_F32_LEVEL
2479 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2480 Int32Regs:$b, Int32Regs:$a),
2481 (ins Int64Regs:$t, Int64Regs:$s,
2482 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2483 Float32Regs:$lod),
2484 "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2485 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2486 []>;
2487
2488 def TEX_CUBE_ARRAY_F32_F32
2489 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2490 Float32Regs:$b, Float32Regs:$a),
2491 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2492 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2493 "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2494 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2495 []>;
2496 def TEX_CUBE_ARRAY_F32_F32_LEVEL
2497 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2498 Float32Regs:$b, Float32Regs:$a),
2499 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2500 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2501 Float32Regs:$lod),
2502 "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2503 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2504 []>;
2505 def TEX_CUBE_ARRAY_S32_F32
2506 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2507 Int32Regs:$b, Int32Regs:$a),
2508 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2509 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2510 "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2511 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2512 []>;
2513 def TEX_CUBE_ARRAY_S32_F32_LEVEL
2514 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2515 Int32Regs:$b, Int32Regs:$a),
2516 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2517 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2518 Float32Regs:$lod),
2519 "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2520 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2521 []>;
2522 def TEX_CUBE_ARRAY_U32_F32
2523 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2524 Int32Regs:$b, Int32Regs:$a),
2525 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2526 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2527 "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2528 "[$t, $s, \\{$l, $x, $y, $z\\}];",
2529 []>;
2530 def TEX_CUBE_ARRAY_U32_F32_LEVEL
2531 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2532 Int32Regs:$b, Int32Regs:$a),
2533 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2534 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2535 Float32Regs:$lod),
2536 "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2537 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2538 []>;
2539
2540 def TLD4_R_2D_F32_F32
2541 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2542 Float32Regs:$v2, Float32Regs:$v3),
2543 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2544 "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2545 "[$t, $s, \\{$x, $y\\}];",
2546 []>;
2547 def TLD4_G_2D_F32_F32
2548 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2549 Float32Regs:$v2, Float32Regs:$v3),
2550 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2551 "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2552 "[$t, $s, \\{$x, $y\\}];",
2553 []>;
2554 def TLD4_B_2D_F32_F32
2555 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2556 Float32Regs:$v2, Float32Regs:$v3),
2557 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2558 "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2559 "[$t, $s, \\{$x, $y\\}];",
2560 []>;
2561 def TLD4_A_2D_F32_F32
2562 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2563 Float32Regs:$v2, Float32Regs:$v3),
2564 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2565 "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2566 "[$t, $s, \\{$x, $y\\}];",
2567 []>;
2568 def TLD4_R_2D_S32_F32
2569 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2570 Int32Regs:$v2, Int32Regs:$v3),
2571 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2572 "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2573 "[$t, $s, \\{$x, $y\\}];",
2574 []>;
2575 def TLD4_G_2D_S32_F32
2576 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2577 Int32Regs:$v2, Int32Regs:$v3),
2578 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2579 "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2580 "[$t, $s, \\{$x, $y\\}];",
2581 []>;
2582 def TLD4_B_2D_S32_F32
2583 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2584 Int32Regs:$v2, Int32Regs:$v3),
2585 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2586 "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2587 "[$t, $s, \\{$x, $y\\}];",
2588 []>;
2589 def TLD4_A_2D_S32_F32
2590 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2591 Int32Regs:$v2, Int32Regs:$v3),
2592 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2593 "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2594 "[$t, $s, \\{$x, $y\\}];",
2595 []>;
2596 def TLD4_R_2D_U32_F32
2597 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2598 Int32Regs:$v2, Int32Regs:$v3),
2599 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2600 "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2601 "[$t, $s, \\{$x, $y\\}];",
2602 []>;
2603 def TLD4_G_2D_U32_F32
2604 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2605 Int32Regs:$v2, Int32Regs:$v3),
2606 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2607 "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2608 "[$t, $s, \\{$x, $y\\}];",
2609 []>;
2610 def TLD4_B_2D_U32_F32
2611 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2612 Int32Regs:$v2, Int32Regs:$v3),
2613 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2614 "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2615 "[$t, $s, \\{$x, $y\\}];",
2616 []>;
2617 def TLD4_A_2D_U32_F32
2618 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2619 Int32Regs:$v2, Int32Regs:$v3),
2620 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2621 "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
2622 "[$t, $s, \\{$x, $y\\}];",
2623 []>;
2624 }
2625
2626
2627 // texmode_unified
2628 let IsTex = 1, IsTexModeUnified = 1 in {
2629 // Texture fetch instructions using handles
2630 def TEX_UNIFIED_1D_F32_S32
2631 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2632 Float32Regs:$b, Float32Regs:$a),
2633 (ins Int64Regs:$t, Int32Regs:$x),
2634 "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2635 []>;
2636 def TEX_UNIFIED_1D_F32_F32
2637 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2638 Float32Regs:$b, Float32Regs:$a),
2639 (ins Int64Regs:$t, Float32Regs:$x),
2640 "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2641 []>;
2642 def TEX_UNIFIED_1D_F32_F32_LEVEL
2643 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2644 Float32Regs:$b, Float32Regs:$a),
2645 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2646 "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2647 "[$t, \\{$x\\}], $lod;",
2648 []>;
2649 def TEX_UNIFIED_1D_F32_F32_GRAD
2650 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2651 Float32Regs:$b, Float32Regs:$a),
2652 (ins Int64Regs:$t, Float32Regs:$x,
2653 Float32Regs:$gradx, Float32Regs:$grady),
2654 "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2655 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2656 []>;
2657 def TEX_UNIFIED_1D_S32_S32
2658 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2659 Int32Regs:$b, Int32Regs:$a),
2660 (ins Int64Regs:$t, Int32Regs:$x),
2661 "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2662 []>;
2663 def TEX_UNIFIED_1D_S32_F32
2664 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2665 Int32Regs:$b, Int32Regs:$a),
2666 (ins Int64Regs:$t, Float32Regs:$x),
2667 "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2668 []>;
2669 def TEX_UNIFIED_1D_S32_F32_LEVEL
2670 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2671 Int32Regs:$b, Int32Regs:$a),
2672 (ins Int64Regs:$t, Float32Regs:$x,
2673 Float32Regs:$lod),
2674 "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2675 "[$t, \\{$x\\}], $lod;",
2676 []>;
2677 def TEX_UNIFIED_1D_S32_F32_GRAD
2678 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2679 Int32Regs:$b, Int32Regs:$a),
2680 (ins Int64Regs:$t, Float32Regs:$x,
2681 Float32Regs:$gradx, Float32Regs:$grady),
2682 "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2683 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2684 []>;
2685 def TEX_UNIFIED_1D_U32_S32
2686 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2687 Int32Regs:$b, Int32Regs:$a),
2688 (ins Int64Regs:$t, Int32Regs:$x),
2689 "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2690 []>;
2691 def TEX_UNIFIED_1D_U32_F32
2692 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2693 Int32Regs:$b, Int32Regs:$a),
2694 (ins Int64Regs:$t, Float32Regs:$x),
2695 "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2696 []>;
2697 def TEX_UNIFIED_1D_U32_F32_LEVEL
2698 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2699 Int32Regs:$b, Int32Regs:$a),
2700 (ins Int64Regs:$t, Float32Regs:$x,
2701 Float32Regs:$lod),
2702 "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2703 "[$t, \\{$x\\}], $lod;",
2704 []>;
2705 def TEX_UNIFIED_1D_U32_F32_GRAD
2706 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2707 Int32Regs:$b, Int32Regs:$a),
2708 (ins Int64Regs:$t, Float32Regs:$x,
2709 Float32Regs:$gradx, Float32Regs:$grady),
2710 "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2711 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2712 []>;
2713
2714 def TEX_UNIFIED_1D_ARRAY_F32_S32
2715 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2716 Float32Regs:$b, Float32Regs:$a),
2717 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2718 "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2719 "[$t, \\{$l, $x\\}];",
2720 []>;
2721 def TEX_UNIFIED_1D_ARRAY_F32_F32
2722 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2723 Float32Regs:$b, Float32Regs:$a),
2724 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2725 "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2726 "[$t, \\{$l, $x\\}];",
2727 []>;
2728 def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
2729 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2730 Float32Regs:$b, Float32Regs:$a),
2731 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2732 Float32Regs:$lod),
2733 "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2734 "[$t, \\{$l, $x\\}], $lod;",
2735 []>;
2736 def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
2737 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2738 Float32Regs:$b, Float32Regs:$a),
2739 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2740 Float32Regs:$gradx, Float32Regs:$grady),
2741 "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2742 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2743 []>;
2744 def TEX_UNIFIED_1D_ARRAY_S32_S32
2745 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2746 Int32Regs:$b, Int32Regs:$a),
2747 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2748 "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2749 "[$t, \\{$l, $x\\}];",
2750 []>;
2751 def TEX_UNIFIED_1D_ARRAY_S32_F32
2752 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2753 Int32Regs:$b, Int32Regs:$a),
2754 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2755 "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2756 "[$t, \\{$l, $x\\}];",
2757 []>;
2758 def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
2759 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2760 Int32Regs:$b, Int32Regs:$a),
2761 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2762 Float32Regs:$lod),
2763 "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2764 "[$t, \\{$l, $x\\}], $lod;",
2765 []>;
2766 def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
2767 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2768 Int32Regs:$b, Int32Regs:$a),
2769 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2770 Float32Regs:$gradx, Float32Regs:$grady),
2771 "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2772 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2773 []>;
2774 def TEX_UNIFIED_1D_ARRAY_U32_S32
2775 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2776 Int32Regs:$b, Int32Regs:$a),
2777 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
2778 "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2779 "[$t, \\{$l, $x\\}];",
2780 []>;
2781 def TEX_UNIFIED_1D_ARRAY_U32_F32
2782 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2783 Int32Regs:$b, Int32Regs:$a),
2784 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
2785 "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2786 "[$t, \\{$l, $x\\}];",
2787 []>;
2788 def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
2789 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2790 Int32Regs:$b, Int32Regs:$a),
2791 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2792 Float32Regs:$lod),
2793 "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2794 "[$t, \\{$l, $x\\}], $lod;",
2795 []>;
2796 def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
2797 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2798 Int32Regs:$b, Int32Regs:$a),
2799 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2800 Float32Regs:$gradx, Float32Regs:$grady),
2801 "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2802 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2803 []>;
2804
2805 def TEX_UNIFIED_2D_F32_S32
2806 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2807 Float32Regs:$b, Float32Regs:$a),
2808 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2809 "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2810 "[$t, \\{$x, $y\\}];",
2811 []>;
2812 def TEX_UNIFIED_2D_F32_F32
2813 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2814 Float32Regs:$b, Float32Regs:$a),
2815 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2816 "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2817 "[$t, \\{$x, $y\\}];",
2818 []>;
2819 def TEX_UNIFIED_2D_F32_F32_LEVEL
2820 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2821 Float32Regs:$b, Float32Regs:$a),
2822 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2823 Float32Regs:$lod),
2824 "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2825 "[$t, \\{$x, $y\\}], $lod;",
2826 []>;
2827 def TEX_UNIFIED_2D_F32_F32_GRAD
2828 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2829 Float32Regs:$b, Float32Regs:$a),
2830 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2831 Float32Regs:$gradx0, Float32Regs:$gradx1,
2832 Float32Regs:$grady0, Float32Regs:$grady1),
2833 "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2834 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2835 "\\{$grady0, $grady1\\};",
2836 []>;
2837 def TEX_UNIFIED_2D_S32_S32
2838 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2839 Int32Regs:$b, Int32Regs:$a),
2840 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2841 "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2842 "[$t, \\{$x, $y\\}];",
2843 []>;
2844 def TEX_UNIFIED_2D_S32_F32
2845 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2846 Int32Regs:$b, Int32Regs:$a),
2847 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2848 "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2849 "[$t, \\{$x, $y\\}];",
2850 []>;
2851 def TEX_UNIFIED_2D_S32_F32_LEVEL
2852 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2853 Int32Regs:$b, Int32Regs:$a),
2854 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2855 Float32Regs:$lod),
2856 "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2857 "[$t, \\{$x, $y\\}], $lod;",
2858 []>;
2859 def TEX_UNIFIED_2D_S32_F32_GRAD
2860 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2861 Int32Regs:$b, Int32Regs:$a),
2862 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2863 Float32Regs:$gradx0, Float32Regs:$gradx1,
2864 Float32Regs:$grady0, Float32Regs:$grady1),
2865 "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2866 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2867 "\\{$grady0, $grady1\\};",
2868 []>;
2869 def TEX_UNIFIED_2D_U32_S32
2870 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2871 Int32Regs:$b, Int32Regs:$a),
2872 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
2873 "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2874 "[$t, \\{$x, $y\\}];",
2875 []>;
2876 def TEX_UNIFIED_2D_U32_F32
2877 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2878 Int32Regs:$b, Int32Regs:$a),
2879 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
2880 "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2881 "[$t, \\{$x, $y\\}];",
2882 []>;
2883 def TEX_UNIFIED_2D_U32_F32_LEVEL
2884 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2885 Int32Regs:$b, Int32Regs:$a),
2886 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2887 Float32Regs:$lod),
2888 "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2889 "[$t, \\{$x, $y\\}], $lod;",
2890 []>;
2891 def TEX_UNIFIED_2D_U32_F32_GRAD
2892 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2893 Int32Regs:$b, Int32Regs:$a),
2894 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
2895 Float32Regs:$gradx0, Float32Regs:$gradx1,
2896 Float32Regs:$grady0, Float32Regs:$grady1),
2897 "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2898 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2899 "\\{$grady0, $grady1\\};",
2900 []>;
2901
2902 def TEX_UNIFIED_2D_ARRAY_F32_S32
2903 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2904 Float32Regs:$b, Float32Regs:$a),
2905 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2906 Int32Regs:$y),
2907 "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
2908 "[$t, \\{$l, $x, $y, $y\\}];",
2909 []>;
2910 def TEX_UNIFIED_2D_ARRAY_F32_F32
2911 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2912 Float32Regs:$b, Float32Regs:$a),
2913 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2914 Float32Regs:$y),
2915 "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2916 "[$t, \\{$l, $x, $y, $y\\}];",
2917 []>;
2918 def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
2919 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2920 Float32Regs:$b, Float32Regs:$a),
2921 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2922 Float32Regs:$y, Float32Regs:$lod),
2923 "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2924 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
2925 []>;
2926 def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
2927 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2928 Float32Regs:$b, Float32Regs:$a),
2929 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2930 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2931 Float32Regs:$grady0, Float32Regs:$grady1),
2932 "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
2933 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2934 "\\{$grady0, $grady1\\};",
2935 []>;
2936 def TEX_UNIFIED_2D_ARRAY_S32_S32
2937 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2938 Int32Regs:$b, Int32Regs:$a),
2939 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2940 Int32Regs:$y),
2941 "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
2942 "[$t, \\{$l, $x, $y, $y\\}];",
2943 []>;
2944 def TEX_UNIFIED_2D_ARRAY_S32_F32
2945 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2946 Int32Regs:$b, Int32Regs:$a),
2947 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2948 Float32Regs:$y),
2949 "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2950 "[$t, \\{$l, $x, $y, $y\\}];",
2951 []>;
2952 def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
2953 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2954 Int32Regs:$b, Int32Regs:$a),
2955 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2956 Float32Regs:$y, Float32Regs:$lod),
2957 "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2958 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
2959 []>;
2960 def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
2961 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2962 Int32Regs:$b, Int32Regs:$a),
2963 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2964 Float32Regs:$y,
2965 Float32Regs:$gradx0, Float32Regs:$gradx1,
2966 Float32Regs:$grady0, Float32Regs:$grady1),
2967 "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
2968 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2969 "\\{$grady0, $grady1\\};",
2970 []>;
2971 def TEX_UNIFIED_2D_ARRAY_U32_S32
2972 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2973 Int32Regs:$b, Int32Regs:$a),
2974 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
2975 Int32Regs:$y),
2976 "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
2977 "[$t, \\{$l, $x, $y, $y\\}];",
2978 []>;
2979 def TEX_UNIFIED_2D_ARRAY_U32_F32
2980 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2981 Int32Regs:$b, Int32Regs:$a),
2982 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2983 Float32Regs:$y),
2984 "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2985 "[$t, \\{$l, $x, $y, $y\\}];",
2986 []>;
2987 def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
2988 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2989 Int32Regs:$b, Int32Regs:$a),
2990 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2991 Float32Regs:$y, Float32Regs:$lod),
2992 "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
2993 "[$t, \\{$l, $x, $y, $y\\}], $lod;",
2994 []>;
2995 def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
2996 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2997 Int32Regs:$b, Int32Regs:$a),
2998 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
2999 Float32Regs:$y,
3000 Float32Regs:$gradx0, Float32Regs:$gradx1,
3001 Float32Regs:$grady0, Float32Regs:$grady1),
3002 "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3003 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3004 "\\{$grady0, $grady1\\};",
3005 []>;
3006
3007 def TEX_UNIFIED_3D_F32_S32
3008 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3009 Float32Regs:$b, Float32Regs:$a),
3010 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3011 Int32Regs:$z),
3012 "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
3013 "[$t, \\{$x, $y, $z, $z\\}];",
3014 []>;
3015 def TEX_UNIFIED_3D_F32_F32
3016 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3017 Float32Regs:$b, Float32Regs:$a),
3018 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3019 Float32Regs:$z),
3020 "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3021 "[$t, \\{$x, $y, $z, $z\\}];",
3022 []>;
3023 def TEX_UNIFIED_3D_F32_F32_LEVEL
3024 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3025 Float32Regs:$b, Float32Regs:$a),
3026 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3027 Float32Regs:$z, Float32Regs:$lod),
3028 "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3029 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3030 []>;
3031 def TEX_UNIFIED_3D_F32_F32_GRAD
3032 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3033 Float32Regs:$b, Float32Regs:$a),
3034 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3035 Float32Regs:$z,
3036 Float32Regs:$gradx0, Float32Regs:$gradx1,
3037 Float32Regs:$gradx2, Float32Regs:$grady0,
3038 Float32Regs:$grady1, Float32Regs:$grady2),
3039 "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3040 "[$t, \\{$x, $y, $z, $z\\}], "
3041 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3042 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3043 []>;
3044 def TEX_UNIFIED_3D_S32_S32
3045 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3046 Int32Regs:$b, Int32Regs:$a),
3047 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3048 Int32Regs:$z),
3049 "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
3050 "[$t, \\{$x, $y, $z, $z\\}];",
3051 []>;
3052 def TEX_UNIFIED_3D_S32_F32
3053 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3054 Int32Regs:$b, Int32Regs:$a),
3055 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3056 Float32Regs:$z),
3057 "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3058 "[$t, \\{$x, $y, $z, $z\\}];",
3059 []>;
3060 def TEX_UNIFIED_3D_S32_F32_LEVEL
3061 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3062 Int32Regs:$b, Int32Regs:$a),
3063 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3064 Float32Regs:$z, Float32Regs:$lod),
3065 "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3066 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3067 []>;
3068 def TEX_UNIFIED_3D_S32_F32_GRAD
3069 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3070 Int32Regs:$b, Int32Regs:$a),
3071 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3072 Float32Regs:$z,
3073 Float32Regs:$gradx0, Float32Regs:$gradx1,
3074 Float32Regs:$gradx2, Float32Regs:$grady0,
3075 Float32Regs:$grady1, Float32Regs:$grady2),
3076 "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3077 "[$t, \\{$x, $y, $z, $z\\}], "
3078 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3079 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3080 []>;
3081 def TEX_UNIFIED_3D_U32_S32
3082 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3083 Int32Regs:$b, Int32Regs:$a),
3084 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3085 Int32Regs:$z),
3086 "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
3087 "[$t, \\{$x, $y, $z, $z\\}];",
3088 []>;
3089 def TEX_UNIFIED_3D_U32_F32
3090 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3091 Int32Regs:$b, Int32Regs:$a),
3092 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3093 Float32Regs:$z),
3094 "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3095 "[$t, \\{$x, $y, $z, $z\\}];",
3096 []>;
3097 def TEX_UNIFIED_3D_U32_F32_LEVEL
3098 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3099 Int32Regs:$b, Int32Regs:$a),
3100 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3101 Float32Regs:$z, Float32Regs:$lod),
3102 "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3103 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3104 []>;
3105 def TEX_UNIFIED_3D_U32_F32_GRAD
3106 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3107 Int32Regs:$b, Int32Regs:$a),
3108 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3109 Float32Regs:$z,
3110 Float32Regs:$gradx0, Float32Regs:$gradx1,
3111 Float32Regs:$gradx2, Float32Regs:$grady0,
3112 Float32Regs:$grady1, Float32Regs:$grady2),
3113 "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3114 "[$t, \\{$x, $y, $z, $z\\}], "
3115 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3116 "\\{$grady0, $grady1, $grady2, $grady2\\};",
3117 []>;
3118
3119 def TEX_UNIFIED_CUBE_F32_F32
3120 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3121 Float32Regs:$b, Float32Regs:$a),
3122 (ins Int64Regs:$t,
3123 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3124 "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3125 "[$t, \\{$x, $y, $z, $z\\}];",
3126 []>;
3127 def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3128 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3129 Float32Regs:$b, Float32Regs:$a),
3130 (ins Int64Regs:$t,
3131 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3132 Float32Regs:$lod),
3133 "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3134 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3135 []>;
3136 def TEX_UNIFIED_CUBE_S32_F32
3137 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3138 Int32Regs:$b, Int32Regs:$a),
3139 (ins Int64Regs:$t,
3140 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3141 "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3142 "[$t, \\{$x, $y, $z, $z\\}];",
3143 []>;
3144 def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3145 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3146 Int32Regs:$b, Int32Regs:$a),
3147 (ins Int64Regs:$t,
3148 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3149 Float32Regs:$lod),
3150 "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3151 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3152 []>;
3153 def TEX_UNIFIED_CUBE_U32_F32
3154 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3155 Int32Regs:$b, Int32Regs:$a),
3156 (ins Int64Regs:$t,
3157 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3158 "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3159 "[$t, \\{$x, $y, $z, $z\\}];",
3160 []>;
3161 def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3162 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3163 Int32Regs:$b, Int32Regs:$a),
3164 (ins Int64Regs:$t,
3165 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3166 Float32Regs:$lod),
3167 "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3168 "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3169 []>;
3170
3171 def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3172 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3173 Float32Regs:$b, Float32Regs:$a),
3174 (ins Int64Regs:$t, Int32Regs:$l,
3175 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3176 "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3177 "[$t, \\{$l, $x, $y, $z\\}];",
3178 []>;
3179 def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3180 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3181 Float32Regs:$b, Float32Regs:$a),
3182 (ins Int64Regs:$t, Int32Regs:$l,
3183 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3184 Float32Regs:$lod),
3185 "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
3186 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3187 []>;
3188 def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3189 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3190 Int32Regs:$b, Int32Regs:$a),
3191 (ins Int64Regs:$t, Int32Regs:$l,
3192 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3193 "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3194 "[$t, \\{$l, $x, $y, $z\\}];",
3195 []>;
3196 def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3197 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3198 Int32Regs:$b, Int32Regs:$a),
3199 (ins Int64Regs:$t, Int32Regs:$l,
3200 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3201 Float32Regs:$lod),
3202 "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
3203 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3204 []>;
3205 def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3206 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3207 Int32Regs:$b, Int32Regs:$a),
3208 (ins Int64Regs:$t, Int32Regs:$l,
3209 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3210 "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3211 "[$t, \\{$l, $x, $y, $z\\}];",
3212 []>;
3213 def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3214 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3215 Int32Regs:$b, Int32Regs:$a),
3216 (ins Int64Regs:$t, Int32Regs:$l,
3217 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3218 Float32Regs:$lod),
3219 "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
3220 "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3221 []>;
3222
3223 def TLD4_UNIFIED_R_2D_F32_F32
3224 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3225 Float32Regs:$v2, Float32Regs:$v3),
3226 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3227 "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3228 "[$t, \\{$x, $y\\}];",
3229 []>;
3230 def TLD4_UNIFIED_G_2D_F32_F32
3231 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3232 Float32Regs:$v2, Float32Regs:$v3),
3233 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3234 "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3235 "[$t, \\{$x, $y\\}];",
3236 []>;
3237 def TLD4_UNIFIED_B_2D_F32_F32
3238 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3239 Float32Regs:$v2, Float32Regs:$v3),
3240 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3241 "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3242 "[$t, \\{$x, $y\\}];",
3243 []>;
3244 def TLD4_UNIFIED_A_2D_F32_F32
3245 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3246 Float32Regs:$v2, Float32Regs:$v3),
3247 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3248 "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3249 "[$t, \\{$x, $y\\}];",
3250 []>;
3251 def TLD4_UNIFIED_R_2D_S32_F32
3252 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3253 Int32Regs:$v2, Int32Regs:$v3),
3254 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3255 "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3256 "[$t, \\{$x, $y\\}];",
3257 []>;
3258 def TLD4_UNIFIED_G_2D_S32_F32
3259 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3260 Int32Regs:$v2, Int32Regs:$v3),
3261 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3262 "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3263 "[$t, \\{$x, $y\\}];",
3264 []>;
3265 def TLD4_UNIFIED_B_2D_S32_F32
3266 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3267 Int32Regs:$v2, Int32Regs:$v3),
3268 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3269 "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3270 "[$t, \\{$x, $y\\}];",
3271 []>;
3272 def TLD4_UNIFIED_A_2D_S32_F32
3273 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3274 Int32Regs:$v2, Int32Regs:$v3),
3275 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3276 "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3277 "[$t, \\{$x, $y\\}];",
3278 []>;
3279 def TLD4_UNIFIED_R_2D_U32_F32
3280 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3281 Int32Regs:$v2, Int32Regs:$v3),
3282 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3283 "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3284 "[$t, \\{$x, $y\\}];",
3285 []>;
3286 def TLD4_UNIFIED_G_2D_U32_F32
3287 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3288 Int32Regs:$v2, Int32Regs:$v3),
3289 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3290 "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3291 "[$t, \\{$x, $y\\}];",
3292 []>;
3293 def TLD4_UNIFIED_B_2D_U32_F32
3294 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3295 Int32Regs:$v2, Int32Regs:$v3),
3296 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3297 "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3298 "[$t, \\{$x, $y\\}];",
3299 []>;
3300 def TLD4_UNIFIED_A_2D_U32_F32
3301 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3302 Int32Regs:$v2, Int32Regs:$v3),
3303 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3304 "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
3305 "[$t, \\{$x, $y\\}];",
3306 []>;
3307 }
3308
3309
3310
3311 //=== Surface load instructions
3312 // .clamp variant
3313 let IsSuld = 1 in {
3314 def SULD_1D_I8_CLAMP
3315 : NVPTXInst<(outs Int16Regs:$r),
3316 (ins Int64Regs:$s, Int32Regs:$x),
3317 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3318 []>;
3319 def SULD_1D_I16_CLAMP
3320 : NVPTXInst<(outs Int16Regs:$r),
3321 (ins Int64Regs:$s, Int32Regs:$x),
3322 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3323 []>;
3324 def SULD_1D_I32_CLAMP
3325 : NVPTXInst<(outs Int32Regs:$r),
3326 (ins Int64Regs:$s, Int32Regs:$x),
3327 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3328 []>;
3329 def SULD_1D_I64_CLAMP
3330 : NVPTXInst<(outs Int64Regs:$r),
3331 (ins Int64Regs:$s, Int32Regs:$x),
3332 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3333 []>;
3334
3335 def SULD_1D_ARRAY_I8_CLAMP
3336 : NVPTXInst<(outs Int16Regs:$r),
3337 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3338 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3339 []>;
3340 def SULD_1D_ARRAY_I16_CLAMP
3341 : NVPTXInst<(outs Int16Regs:$r),
3342 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3343 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3344 []>;
3345 def SULD_1D_ARRAY_I32_CLAMP
3346 : NVPTXInst<(outs Int32Regs:$r),
3347 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3348 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3349 []>;
3350 def SULD_1D_ARRAY_I64_CLAMP
3351 : NVPTXInst<(outs Int64Regs:$r),
3352 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3353 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3354 []>;
3355
3356 def SULD_2D_I8_CLAMP
3357 : NVPTXInst<(outs Int16Regs:$r),
3358 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3359 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3360 []>;
3361 def SULD_2D_I16_CLAMP
3362 : NVPTXInst<(outs Int16Regs:$r),
3363 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3364 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3365 []>;
3366 def SULD_2D_I32_CLAMP
3367 : NVPTXInst<(outs Int32Regs:$r),
3368 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3369 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3370 []>;
3371 def SULD_2D_I64_CLAMP
3372 : NVPTXInst<(outs Int64Regs:$r),
3373 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3374 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3375 []>;
3376
3377 def SULD_2D_ARRAY_I8_CLAMP
3378 : NVPTXInst<(outs Int16Regs:$r),
3379 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3380 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3381 []>;
3382 def SULD_2D_ARRAY_I16_CLAMP
3383 : NVPTXInst<(outs Int16Regs:$r),
3384 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3385 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3386 []>;
3387 def SULD_2D_ARRAY_I32_CLAMP
3388 : NVPTXInst<(outs Int32Regs:$r),
3389 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3390 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3391 []>;
3392 def SULD_2D_ARRAY_I64_CLAMP
3393 : NVPTXInst<(outs Int64Regs:$r),
3394 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3395 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3396 []>;
3397
3398 def SULD_3D_I8_CLAMP
3399 : NVPTXInst<(outs Int16Regs:$r),
3400 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3401 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3402 []>;
3403 def SULD_3D_I16_CLAMP
3404 : NVPTXInst<(outs Int16Regs:$r),
3405 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3406 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3407 []>;
3408 def SULD_3D_I32_CLAMP
3409 : NVPTXInst<(outs Int32Regs:$r),
3410 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3411 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3412 []>;
3413 def SULD_3D_I64_CLAMP
3414 : NVPTXInst<(outs Int64Regs:$r),
3415 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3416 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3417 []>;
3418 }
3419
3420 let IsSuld = 2 in {
3421 def SULD_1D_V2I8_CLAMP
3422 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3423 (ins Int64Regs:$s, Int32Regs:$x),
3424 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3425 []>;
3426 def SULD_1D_V2I16_CLAMP
3427 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3428 (ins Int64Regs:$s, Int32Regs:$x),
3429 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3430 []>;
3431 def SULD_1D_V2I32_CLAMP
3432 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3433 (ins Int64Regs:$s, Int32Regs:$x),
3434 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3435 []>;
3436 def SULD_1D_V2I64_CLAMP
3437 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3438 (ins Int64Regs:$s, Int32Regs:$x),
3439 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3440 []>;
3441
3442 def SULD_1D_ARRAY_V2I8_CLAMP
3443 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3444 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3445 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3446 []>;
3447 def SULD_1D_ARRAY_V2I16_CLAMP
3448 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3449 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3450 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3451 []>;
3452 def SULD_1D_ARRAY_V2I32_CLAMP
3453 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3454 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3455 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3456 []>;
3457 def SULD_1D_ARRAY_V2I64_CLAMP
3458 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3459 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3460 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3461 []>;
3462
3463 def SULD_2D_V2I8_CLAMP
3464 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3465 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3466 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3467 []>;
3468 def SULD_2D_V2I16_CLAMP
3469 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3470 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3471 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3472 []>;
3473 def SULD_2D_V2I32_CLAMP
3474 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3475 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3476 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3477 []>;
3478 def SULD_2D_V2I64_CLAMP
3479 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3480 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3481 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3482 []>;
3483
3484 def SULD_2D_ARRAY_V2I8_CLAMP
3485 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3486 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3487 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3488 "[$s, \\{$l, $x, $y, $y\\}];",
3489 []>;
3490 def SULD_2D_ARRAY_V2I16_CLAMP
3491 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3492 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3493 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3494 "[$s, \\{$l, $x, $y, $y\\}];",
3495 []>;
3496 def SULD_2D_ARRAY_V2I32_CLAMP
3497 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3498 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3499 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3500 "[$s, \\{$l, $x, $y, $y\\}];",
3501 []>;
3502 def SULD_2D_ARRAY_V2I64_CLAMP
3503 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3504 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3505 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3506 "[$s, \\{$l, $x, $y, $y\\}];",
3507 []>;
3508
3509 def SULD_3D_V2I8_CLAMP
3510 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3511 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3512 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3513 []>;
3514 def SULD_3D_V2I16_CLAMP
3515 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3516 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3517 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3518 []>;
3519 def SULD_3D_V2I32_CLAMP
3520 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3521 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3522 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3523 []>;
3524 def SULD_3D_V2I64_CLAMP
3525 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3526 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3527 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3528 []>;
3529 }
3530
3531 let IsSuld = 3 in {
3532 def SULD_1D_V4I8_CLAMP
3533 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3534 (ins Int64Regs:$s, Int32Regs:$x),
3535 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3536 []>;
3537 def SULD_1D_V4I16_CLAMP
3538 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3539 (ins Int64Regs:$s, Int32Regs:$x),
3540 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3541 []>;
3542 def SULD_1D_V4I32_CLAMP
3543 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3544 (ins Int64Regs:$s, Int32Regs:$x),
3545 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3546 []>;
3547
3548 def SULD_1D_ARRAY_V4I8_CLAMP
3549 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3550 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3551 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3552 "[$s, \\{$l, $x\\}];",
3553 []>;
3554 def SULD_1D_ARRAY_V4I16_CLAMP
3555 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3556 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3557 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3558 "[$s, \\{$l, $x\\}];",
3559 []>;
3560 def SULD_1D_ARRAY_V4I32_CLAMP
3561 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3562 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3563 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3564 "[$s, \\{$l, $x\\}];",
3565 []>;
3566
3567 def SULD_2D_V4I8_CLAMP
3568 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3569 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3570 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3571 []>;
3572 def SULD_2D_V4I16_CLAMP
3573 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3574 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3575 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3576 []>;
3577 def SULD_2D_V4I32_CLAMP
3578 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3579 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3580 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3581 []>;
3582
3583 def SULD_2D_ARRAY_V4I8_CLAMP
3584 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3585 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3586 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3587 "[$s, \\{$l, $x, $y, $y\\}];",
3588 []>;
3589 def SULD_2D_ARRAY_V4I16_CLAMP
3590 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3591 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3592 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3593 "[$s, \\{$l, $x, $y, $y\\}];",
3594 []>;
3595 def SULD_2D_ARRAY_V4I32_CLAMP
3596 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3597 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3598 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3599 "[$s, \\{$l, $x, $y, $y\\}];",
3600 []>;
3601
3602
3603 def SULD_3D_V4I8_CLAMP
3604 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3605 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3606 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3607 "[$s, \\{$x, $y, $z, $z\\}];",
3608 []>;
3609 def SULD_3D_V4I16_CLAMP
3610 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3611 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3612 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3613 "[$s, \\{$x, $y, $z, $z\\}];",
3614 []>;
3615 def SULD_3D_V4I32_CLAMP
3616 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3617 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3618 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3619 "[$s, \\{$x, $y, $z, $z\\}];",
3620 []>;
3621 }
3622
3623
3624 // .trap variant
3625 let IsSuld = 1 in {
3626 def SULD_1D_I8_TRAP
3627 : NVPTXInst<(outs Int16Regs:$r),
3628 (ins Int64Regs:$s, Int32Regs:$x),
3629 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3630 []>;
3631 def SULD_1D_I16_TRAP
3632 : NVPTXInst<(outs Int16Regs:$r),
3633 (ins Int64Regs:$s, Int32Regs:$x),
3634 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3635 []>;
3636 def SULD_1D_I32_TRAP
3637 : NVPTXInst<(outs Int32Regs:$r),
3638 (ins Int64Regs:$s, Int32Regs:$x),
3639 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3640 []>;
3641 def SULD_1D_I64_TRAP
3642 : NVPTXInst<(outs Int64Regs:$r),
3643 (ins Int64Regs:$s, Int32Regs:$x),
3644 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3645 []>;
3646
3647 def SULD_1D_ARRAY_I8_TRAP
3648 : NVPTXInst<(outs Int16Regs:$r),
3649 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3650 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3651 []>;
3652 def SULD_1D_ARRAY_I16_TRAP
3653 : NVPTXInst<(outs Int16Regs:$r),
3654 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3655 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3656 []>;
3657 def SULD_1D_ARRAY_I32_TRAP
3658 : NVPTXInst<(outs Int32Regs:$r),
3659 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3660 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3661 []>;
3662 def SULD_1D_ARRAY_I64_TRAP
3663 : NVPTXInst<(outs Int64Regs:$r),
3664 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3665 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3666 []>;
3667
3668 def SULD_2D_I8_TRAP
3669 : NVPTXInst<(outs Int16Regs:$r),
3670 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3671 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3672 []>;
3673 def SULD_2D_I16_TRAP
3674 : NVPTXInst<(outs Int16Regs:$r),
3675 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3676 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3677 []>;
3678 def SULD_2D_I32_TRAP
3679 : NVPTXInst<(outs Int32Regs:$r),
3680 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3681 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3682 []>;
3683 def SULD_2D_I64_TRAP
3684 : NVPTXInst<(outs Int64Regs:$r),
3685 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3686 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3687 []>;
3688
3689 def SULD_2D_ARRAY_I8_TRAP
3690 : NVPTXInst<(outs Int16Regs:$r),
3691 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3692 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3693 []>;
3694 def SULD_2D_ARRAY_I16_TRAP
3695 : NVPTXInst<(outs Int16Regs:$r),
3696 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3697 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3698 []>;
3699 def SULD_2D_ARRAY_I32_TRAP
3700 : NVPTXInst<(outs Int32Regs:$r),
3701 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3702 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3703 []>;
3704 def SULD_2D_ARRAY_I64_TRAP
3705 : NVPTXInst<(outs Int64Regs:$r),
3706 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3707 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3708 []>;
3709
3710 def SULD_3D_I8_TRAP
3711 : NVPTXInst<(outs Int16Regs:$r),
3712 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3713 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3714 []>;
3715 def SULD_3D_I16_TRAP
3716 : NVPTXInst<(outs Int16Regs:$r),
3717 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3718 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3719 []>;
3720 def SULD_3D_I32_TRAP
3721 : NVPTXInst<(outs Int32Regs:$r),
3722 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3723 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3724 []>;
3725 def SULD_3D_I64_TRAP
3726 : NVPTXInst<(outs Int64Regs:$r),
3727 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3728 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3729 []>;
3730 }
3731
3732 let IsSuld = 2 in {
3733 def SULD_1D_V2I8_TRAP
3734 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3735 (ins Int64Regs:$s, Int32Regs:$x),
3736 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3737 []>;
3738 def SULD_1D_V2I16_TRAP
3739 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3740 (ins Int64Regs:$s, Int32Regs:$x),
3741 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3742 []>;
3743 def SULD_1D_V2I32_TRAP
3744 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3745 (ins Int64Regs:$s, Int32Regs:$x),
3746 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3747 []>;
3748 def SULD_1D_V2I64_TRAP
3749 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3750 (ins Int64Regs:$s, Int32Regs:$x),
3751 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
3752 []>;
3753
3754 def SULD_1D_ARRAY_V2I8_TRAP
3755 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3756 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3757 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3758 []>;
3759 def SULD_1D_ARRAY_V2I16_TRAP
3760 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3761 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3762 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3763 []>;
3764 def SULD_1D_ARRAY_V2I32_TRAP
3765 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3766 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3767 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3768 []>;
3769 def SULD_1D_ARRAY_V2I64_TRAP
3770 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3771 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3772 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3773 []>;
3774
3775 def SULD_2D_V2I8_TRAP
3776 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3777 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3778 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3779 []>;
3780 def SULD_2D_V2I16_TRAP
3781 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3782 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3783 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3784 []>;
3785 def SULD_2D_V2I32_TRAP
3786 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3787 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3788 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3789 []>;
3790 def SULD_2D_V2I64_TRAP
3791 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3792 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3793 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3794 []>;
3795
3796 def SULD_2D_ARRAY_V2I8_TRAP
3797 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3798 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3799 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
3800 "[$s, \\{$l, $x, $y, $y\\}];",
3801 []>;
3802 def SULD_2D_ARRAY_V2I16_TRAP
3803 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3804 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3805 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
3806 "[$s, \\{$l, $x, $y, $y\\}];",
3807 []>;
3808 def SULD_2D_ARRAY_V2I32_TRAP
3809 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3810 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3811 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
3812 "[$s, \\{$l, $x, $y, $y\\}];",
3813 []>;
3814 def SULD_2D_ARRAY_V2I64_TRAP
3815 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3816 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3817 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
3818 "[$s, \\{$l, $x, $y, $y\\}];",
3819 []>;
3820
3821 def SULD_3D_V2I8_TRAP
3822 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3823 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3824 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3825 []>;
3826 def SULD_3D_V2I16_TRAP
3827 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3828 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3829 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3830 []>;
3831 def SULD_3D_V2I32_TRAP
3832 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3833 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3834 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3835 []>;
3836 def SULD_3D_V2I64_TRAP
3837 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3838 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3839 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3840 []>;
3841 }
3842
3843 let IsSuld = 3 in {
3844 def SULD_1D_V4I8_TRAP
3845 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3846 (ins Int64Regs:$s, Int32Regs:$x),
3847 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3848 []>;
3849 def SULD_1D_V4I16_TRAP
3850 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3851 (ins Int64Regs:$s, Int32Regs:$x),
3852 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3853 []>;
3854 def SULD_1D_V4I32_TRAP
3855 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3856 (ins Int64Regs:$s, Int32Regs:$x),
3857 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3858 []>;
3859
3860 def SULD_1D_ARRAY_V4I8_TRAP
3861 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3862 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3863 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3864 "[$s, \\{$l, $x\\}];",
3865 []>;
3866 def SULD_1D_ARRAY_V4I16_TRAP
3867 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3868 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3869 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3870 "[$s, \\{$l, $x\\}];",
3871 []>;
3872 def SULD_1D_ARRAY_V4I32_TRAP
3873 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3874 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3875 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3876 "[$s, \\{$l, $x\\}];",
3877 []>;
3878
3879 def SULD_2D_V4I8_TRAP
3880 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3881 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3882 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3883 []>;
3884 def SULD_2D_V4I16_TRAP
3885 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3886 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3887 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3888 []>;
3889 def SULD_2D_V4I32_TRAP
3890 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3891 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3892 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3893 []>;
3894
3895 def SULD_2D_ARRAY_V4I8_TRAP
3896 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3897 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3898 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3899 "[$s, \\{$l, $x, $y, $y\\}];",
3900 []>;
3901 def SULD_2D_ARRAY_V4I16_TRAP
3902 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3903 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3904 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3905 "[$s, \\{$l, $x, $y, $y\\}];",
3906 []>;
3907 def SULD_2D_ARRAY_V4I32_TRAP
3908 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3909 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3910 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3911 "[$s, \\{$l, $x, $y, $y\\}];",
3912 []>;
3913
3914
3915 def SULD_3D_V4I8_TRAP
3916 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3917 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3918 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
3919 "[$s, \\{$x, $y, $z, $z\\}];",
3920 []>;
3921 def SULD_3D_V4I16_TRAP
3922 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3923 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3924 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
3925 "[$s, \\{$x, $y, $z, $z\\}];",
3926 []>;
3927 def SULD_3D_V4I32_TRAP
3928 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3929 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3930 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
3931 "[$s, \\{$x, $y, $z, $z\\}];",
3932 []>;
3933 }
3934
3935 // .zero variant
3936 let IsSuld = 1 in {
3937 def SULD_1D_I8_ZERO
3938 : NVPTXInst<(outs Int16Regs:$r),
3939 (ins Int64Regs:$s, Int32Regs:$x),
3940 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
3941 []>;
3942 def SULD_1D_I16_ZERO
3943 : NVPTXInst<(outs Int16Regs:$r),
3944 (ins Int64Regs:$s, Int32Regs:$x),
3945 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
3946 []>;
3947 def SULD_1D_I32_ZERO
3948 : NVPTXInst<(outs Int32Regs:$r),
3949 (ins Int64Regs:$s, Int32Regs:$x),
3950 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
3951 []>;
3952 def SULD_1D_I64_ZERO
3953 : NVPTXInst<(outs Int64Regs:$r),
3954 (ins Int64Regs:$s, Int32Regs:$x),
3955 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
3956 []>;
3957
3958 def SULD_1D_ARRAY_I8_ZERO
3959 : NVPTXInst<(outs Int16Regs:$r),
3960 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3961 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3962 []>;
3963 def SULD_1D_ARRAY_I16_ZERO
3964 : NVPTXInst<(outs Int16Regs:$r),
3965 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3966 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3967 []>;
3968 def SULD_1D_ARRAY_I32_ZERO
3969 : NVPTXInst<(outs Int32Regs:$r),
3970 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3971 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3972 []>;
3973 def SULD_1D_ARRAY_I64_ZERO
3974 : NVPTXInst<(outs Int64Regs:$r),
3975 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3976 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
3977 []>;
3978
3979 def SULD_2D_I8_ZERO
3980 : NVPTXInst<(outs Int16Regs:$r),
3981 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3982 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
3983 []>;
3984 def SULD_2D_I16_ZERO
3985 : NVPTXInst<(outs Int16Regs:$r),
3986 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3987 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
3988 []>;
3989 def SULD_2D_I32_ZERO
3990 : NVPTXInst<(outs Int32Regs:$r),
3991 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3992 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
3993 []>;
3994 def SULD_2D_I64_ZERO
3995 : NVPTXInst<(outs Int64Regs:$r),
3996 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3997 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
3998 []>;
3999
4000 def SULD_2D_ARRAY_I8_ZERO
4001 : NVPTXInst<(outs Int16Regs:$r),
4002 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4003 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4004 []>;
4005 def SULD_2D_ARRAY_I16_ZERO
4006 : NVPTXInst<(outs Int16Regs:$r),
4007 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4008 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4009 []>;
4010 def SULD_2D_ARRAY_I32_ZERO
4011 : NVPTXInst<(outs Int32Regs:$r),
4012 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4013 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4014 []>;
4015 def SULD_2D_ARRAY_I64_ZERO
4016 : NVPTXInst<(outs Int64Regs:$r),
4017 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4018 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4019 []>;
4020
4021 def SULD_3D_I8_ZERO
4022 : NVPTXInst<(outs Int16Regs:$r),
4023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4024 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4025 []>;
4026 def SULD_3D_I16_ZERO
4027 : NVPTXInst<(outs Int16Regs:$r),
4028 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4029 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4030 []>;
4031 def SULD_3D_I32_ZERO
4032 : NVPTXInst<(outs Int32Regs:$r),
4033 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4034 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4035 []>;
4036 def SULD_3D_I64_ZERO
4037 : NVPTXInst<(outs Int64Regs:$r),
4038 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4039 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4040 []>;
4041 }
4042
4043 let IsSuld = 2 in {
4044 def SULD_1D_V2I8_ZERO
4045 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4046 (ins Int64Regs:$s, Int32Regs:$x),
4047 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4048 []>;
4049 def SULD_1D_V2I16_ZERO
4050 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4051 (ins Int64Regs:$s, Int32Regs:$x),
4052 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4053 []>;
4054 def SULD_1D_V2I32_ZERO
4055 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4056 (ins Int64Regs:$s, Int32Regs:$x),
4057 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4058 []>;
4059 def SULD_1D_V2I64_ZERO
4060 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4061 (ins Int64Regs:$s, Int32Regs:$x),
4062 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4063 []>;
4064
4065 def SULD_1D_ARRAY_V2I8_ZERO
4066 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4067 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4068 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4069 []>;
4070 def SULD_1D_ARRAY_V2I16_ZERO
4071 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4072 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4073 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4074 []>;
4075 def SULD_1D_ARRAY_V2I32_ZERO
4076 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4077 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4078 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4079 []>;
4080 def SULD_1D_ARRAY_V2I64_ZERO
4081 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4082 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4083 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4084 []>;
4085
4086 def SULD_2D_V2I8_ZERO
4087 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4088 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4089 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4090 []>;
4091 def SULD_2D_V2I16_ZERO
4092 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4093 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4094 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4095 []>;
4096 def SULD_2D_V2I32_ZERO
4097 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4098 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4099 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4100 []>;
4101 def SULD_2D_V2I64_ZERO
4102 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4103 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4104 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4105 []>;
4106
4107 def SULD_2D_ARRAY_V2I8_ZERO
4108 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4109 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4110 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4111 "[$s, \\{$l, $x, $y, $y\\}];",
4112 []>;
4113 def SULD_2D_ARRAY_V2I16_ZERO
4114 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4115 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4116 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4117 "[$s, \\{$l, $x, $y, $y\\}];",
4118 []>;
4119 def SULD_2D_ARRAY_V2I32_ZERO
4120 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4121 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4122 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4123 "[$s, \\{$l, $x, $y, $y\\}];",
4124 []>;
4125 def SULD_2D_ARRAY_V2I64_ZERO
4126 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4127 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4128 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4129 "[$s, \\{$l, $x, $y, $y\\}];",
4130 []>;
4131
4132 def SULD_3D_V2I8_ZERO
4133 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4134 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4135 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4136 []>;
4137 def SULD_3D_V2I16_ZERO
4138 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4139 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4140 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4141 []>;
4142 def SULD_3D_V2I32_ZERO
4143 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4144 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4145 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4146 []>;
4147 def SULD_3D_V2I64_ZERO
4148 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4149 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4150 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4151 []>;
4152 }
4153
4154 let IsSuld = 3 in {
4155 def SULD_1D_V4I8_ZERO
4156 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4157 (ins Int64Regs:$s, Int32Regs:$x),
4158 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4159 []>;
4160 def SULD_1D_V4I16_ZERO
4161 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4162 (ins Int64Regs:$s, Int32Regs:$x),
4163 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4164 []>;
4165 def SULD_1D_V4I32_ZERO
4166 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4167 (ins Int64Regs:$s, Int32Regs:$x),
4168 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4169 []>;
4170
4171 def SULD_1D_ARRAY_V4I8_ZERO
4172 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4173 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4174 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4175 "[$s, \\{$l, $x\\}];",
4176 []>;
4177 def SULD_1D_ARRAY_V4I16_ZERO
4178 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4179 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4180 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4181 "[$s, \\{$l, $x\\}];",
4182 []>;
4183 def SULD_1D_ARRAY_V4I32_ZERO
4184 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4185 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4186 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4187 "[$s, \\{$l, $x\\}];",
4188 []>;
4189
4190 def SULD_2D_V4I8_ZERO
4191 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4192 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4193 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4194 []>;
4195 def SULD_2D_V4I16_ZERO
4196 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4197 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4198 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4199 []>;
4200 def SULD_2D_V4I32_ZERO
4201 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4202 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4203 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4204 []>;
4205
4206 def SULD_2D_ARRAY_V4I8_ZERO
4207 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4208 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4209 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4210 "[$s, \\{$l, $x, $y, $y\\}];",
4211 []>;
4212 def SULD_2D_ARRAY_V4I16_ZERO
4213 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4214 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4215 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4216 "[$s, \\{$l, $x, $y, $y\\}];",
4217 []>;
4218 def SULD_2D_ARRAY_V4I32_ZERO
4219 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4220 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4221 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4222 "[$s, \\{$l, $x, $y, $y\\}];",
4223 []>;
4224
4225
4226 def SULD_3D_V4I8_ZERO
4227 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4228 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4229 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4230 "[$s, \\{$x, $y, $z, $z\\}];",
4231 []>;
4232 def SULD_3D_V4I16_ZERO
4233 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4234 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4235 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4236 "[$s, \\{$x, $y, $z, $z\\}];",
4237 []>;
4238 def SULD_3D_V4I32_ZERO
4239 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4240 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4241 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4242 "[$s, \\{$x, $y, $z, $z\\}];",
4243 []>;
4244 }
4245
4246 //-----------------------------------
4247 // Texture Query Intrinsics
4248 //-----------------------------------
4249
4250 let IsSurfTexQuery = 1 in {
4251 def TXQ_CHANNEL_ORDER
4252 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4253 "txq.channel_order.b32 \t$d, [$a];",
4254 []>;
4255 def TXQ_CHANNEL_DATA_TYPE
4256 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4257 "txq.channel_data_type.b32 \t$d, [$a];",
4258 []>;
4259 def TXQ_WIDTH
4260 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4261 "txq.width.b32 \t$d, [$a];",
4262 []>;
4263 def TXQ_HEIGHT
4264 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4265 "txq.height.b32 \t$d, [$a];",
4266 []>;
4267 def TXQ_DEPTH
4268 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4269 "txq.depth.b32 \t$d, [$a];",
4270 []>;
4271 def TXQ_ARRAY_SIZE
4272 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4273 "txq.array_size.b32 \t$d, [$a];",
4274 []>;
4275 def TXQ_NUM_SAMPLES
4276 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4277 "txq.num_samples.b32 \t$d, [$a];",
4278 []>;
4279 def TXQ_NUM_MIPMAP_LEVELS
4280 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4281 "txq.num_mipmap_levels.b32 \t$d, [$a];",
4282 []>;
4283 }
4284
4285 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4286 (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4287 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4288 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4289 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4290 (TXQ_WIDTH Int64Regs:$a)>;
4291 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4292 (TXQ_HEIGHT Int64Regs:$a)>;
4293 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4294 (TXQ_DEPTH Int64Regs:$a)>;
4295 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4296 (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4297 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4298 (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4299 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4300 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4301
4302
4303 //-----------------------------------
4304 // Surface Query Intrinsics
4305 //-----------------------------------
4306
4307 let IsSurfTexQuery = 1 in {
4308 def SUQ_CHANNEL_ORDER
4309 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4310 "suq.channel_order.b32 \t$d, [$a];",
4311 []>;
4312 def SUQ_CHANNEL_DATA_TYPE
4313 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4314 "suq.channel_data_type.b32 \t$d, [$a];",
4315 []>;
4316 def SUQ_WIDTH
4317 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4318 "suq.width.b32 \t$d, [$a];",
4319 []>;
4320 def SUQ_HEIGHT
4321 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4322 "suq.height.b32 \t$d, [$a];",
4323 []>;
4324 def SUQ_DEPTH
4325 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4326 "suq.depth.b32 \t$d, [$a];",
4327 []>;
4328 def SUQ_ARRAY_SIZE
4329 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4330 "suq.array_size.b32 \t$d, [$a];",
4331 []>;
4332 }
4333
4334 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4335 (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4336 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4337 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4338 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4339 (SUQ_WIDTH Int64Regs:$a)>;
4340 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4341 (SUQ_HEIGHT Int64Regs:$a)>;
4342 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4343 (SUQ_DEPTH Int64Regs:$a)>;
4344 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4345 (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4346
4347
4348 //===- Handle Query -------------------------------------------------------===//
4349
4350 // TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4351 def ISTYPEP_SAMPLER
4352 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4353 "istypep.samplerref \t$d, $a;",
4354 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4355 def ISTYPEP_SURFACE
4356 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4357 "istypep.surfref \t$d, $a;",
4358 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4359 def ISTYPEP_TEXTURE
4360 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4361 "istypep.texref \t$d, $a;",
4362 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4363
4364 //===- Surface Stores -----------------------------------------------------===//
4365
4366 let IsSust = 1 in {
4367 // Unformatted
4368 // .clamp variant
4369 def SUST_B_1D_B8_CLAMP
4370 : NVPTXInst<(outs),
4371 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4372 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4373 []>;
4374 def SUST_B_1D_B16_CLAMP
4375 : NVPTXInst<(outs),
4376 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4377 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4378 []>;
4379 def SUST_B_1D_B32_CLAMP
4380 : NVPTXInst<(outs),
4381 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4382 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4383 []>;
4384 def SUST_B_1D_B64_CLAMP
4385 : NVPTXInst<(outs),
4386 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4387 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4388 []>;
4389 def SUST_B_1D_V2B8_CLAMP
4390 : NVPTXInst<(outs),
4391 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4392 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4393 []>;
4394 def SUST_B_1D_V2B16_CLAMP
4395 : NVPTXInst<(outs),
4396 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4397 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4398 []>;
4399 def SUST_B_1D_V2B32_CLAMP
4400 : NVPTXInst<(outs),
4401 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4402 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4403 []>;
4404 def SUST_B_1D_V2B64_CLAMP
4405 : NVPTXInst<(outs),
4406 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4407 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4408 []>;
4409 def SUST_B_1D_V4B8_CLAMP
4410 : NVPTXInst<(outs),
4411 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4412 Int16Regs:$b, Int16Regs:$a),
4413 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4414 []>;
4415 def SUST_B_1D_V4B16_CLAMP
4416 : NVPTXInst<(outs),
4417 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4418 Int16Regs:$b, Int16Regs:$a),
4419 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4420 []>;
4421 def SUST_B_1D_V4B32_CLAMP
4422 : NVPTXInst<(outs),
4423 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4424 Int32Regs:$b, Int32Regs:$a),
4425 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4426 []>;
4427
4428
4429 def SUST_B_1D_ARRAY_B8_CLAMP
4430 : NVPTXInst<(outs),
4431 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4432 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4433 []>;
4434 def SUST_B_1D_ARRAY_B16_CLAMP
4435 : NVPTXInst<(outs),
4436 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4437 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4438 []>;
4439 def SUST_B_1D_ARRAY_B32_CLAMP
4440 : NVPTXInst<(outs),
4441 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4442 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4443 []>;
4444 def SUST_B_1D_ARRAY_B64_CLAMP
4445 : NVPTXInst<(outs),
4446 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4447 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4448 []>;
4449 def SUST_B_1D_ARRAY_V2B8_CLAMP
4450 : NVPTXInst<(outs),
4451 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4452 Int16Regs:$g),
4453 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4454 []>;
4455 def SUST_B_1D_ARRAY_V2B16_CLAMP
4456 : NVPTXInst<(outs),
4457 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4458 Int16Regs:$g),
4459 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4460 []>;
4461 def SUST_B_1D_ARRAY_V2B32_CLAMP
4462 : NVPTXInst<(outs),
4463 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4464 Int32Regs:$g),
4465 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4466 []>;
4467 def SUST_B_1D_ARRAY_V2B64_CLAMP
4468 : NVPTXInst<(outs),
4469 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4470 Int64Regs:$g),
4471 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4472 []>;
4473 def SUST_B_1D_ARRAY_V4B8_CLAMP
4474 : NVPTXInst<(outs),
4475 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4476 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4477 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4478 "\\{$r, $g, $b, $a\\};",
4479 []>;
4480 def SUST_B_1D_ARRAY_V4B16_CLAMP
4481 : NVPTXInst<(outs),
4482 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4483 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4484 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4485 "\\{$r, $g, $b, $a\\};",
4486 []>;
4487 def SUST_B_1D_ARRAY_V4B32_CLAMP
4488 : NVPTXInst<(outs),
4489 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4490 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4491 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4492 "\\{$r, $g, $b, $a\\};",
4493 []>;
4494
4495
4496 def SUST_B_2D_B8_CLAMP
4497 : NVPTXInst<(outs),
4498 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4499 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4500 []>;
4501 def SUST_B_2D_B16_CLAMP
4502 : NVPTXInst<(outs),
4503 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4504 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4505 []>;
4506 def SUST_B_2D_B32_CLAMP
4507 : NVPTXInst<(outs),
4508 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4509 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4510 []>;
4511 def SUST_B_2D_B64_CLAMP
4512 : NVPTXInst<(outs),
4513 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4514 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4515 []>;
4516 def SUST_B_2D_V2B8_CLAMP
4517 : NVPTXInst<(outs),
4518 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4519 Int16Regs:$g),
4520 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4521 []>;
4522 def SUST_B_2D_V2B16_CLAMP
4523 : NVPTXInst<(outs),
4524 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4525 Int16Regs:$g),
4526 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4527 []>;
4528 def SUST_B_2D_V2B32_CLAMP
4529 : NVPTXInst<(outs),
4530 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4531 Int32Regs:$g),
4532 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4533 []>;
4534 def SUST_B_2D_V2B64_CLAMP
4535 : NVPTXInst<(outs),
4536 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4537 Int64Regs:$g),
4538 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4539 []>;
4540 def SUST_B_2D_V4B8_CLAMP
4541 : NVPTXInst<(outs),
4542 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4543 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4544 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4545 "\\{$r, $g, $b, $a\\};",
4546 []>;
4547 def SUST_B_2D_V4B16_CLAMP
4548 : NVPTXInst<(outs),
4549 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4550 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4551 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4552 "\\{$r, $g, $b, $a\\};",
4553 []>;
4554 def SUST_B_2D_V4B32_CLAMP
4555 : NVPTXInst<(outs),
4556 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4557 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4558 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4559 "\\{$r, $g, $b, $a\\};",
4560 []>;
4561
4562
4563 def SUST_B_2D_ARRAY_B8_CLAMP
4564 : NVPTXInst<(outs),
4565 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4566 Int16Regs:$r),
4567 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4568 []>;
4569 def SUST_B_2D_ARRAY_B16_CLAMP
4570 : NVPTXInst<(outs),
4571 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4572 Int16Regs:$r),
4573 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4574 []>;
4575 def SUST_B_2D_ARRAY_B32_CLAMP
4576 : NVPTXInst<(outs),
4577 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4578 Int32Regs:$r),
4579 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4580 []>;
4581 def SUST_B_2D_ARRAY_B64_CLAMP
4582 : NVPTXInst<(outs),
4583 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4584 Int64Regs:$r),
4585 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4586 []>;
4587 def SUST_B_2D_ARRAY_V2B8_CLAMP
4588 : NVPTXInst<(outs),
4589 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4590 Int16Regs:$r, Int16Regs:$g),
4591 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4592 "\\{$r, $g\\};",
4593 []>;
4594 def SUST_B_2D_ARRAY_V2B16_CLAMP
4595 : NVPTXInst<(outs),
4596 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4597 Int16Regs:$r, Int16Regs:$g),
4598 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4599 "\\{$r, $g\\};",
4600 []>;
4601 def SUST_B_2D_ARRAY_V2B32_CLAMP
4602 : NVPTXInst<(outs),
4603 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4604 Int32Regs:$r, Int32Regs:$g),
4605 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4606 "\\{$r, $g\\};",
4607 []>;
4608 def SUST_B_2D_ARRAY_V2B64_CLAMP
4609 : NVPTXInst<(outs),
4610 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4611 Int64Regs:$r, Int64Regs:$g),
4612 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4613 "\\{$r, $g\\};",
4614 []>;
4615 def SUST_B_2D_ARRAY_V4B8_CLAMP
4616 : NVPTXInst<(outs),
4617 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4618 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4619 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4620 "\\{$r, $g, $b, $a\\};",
4621 []>;
4622 def SUST_B_2D_ARRAY_V4B16_CLAMP
4623 : NVPTXInst<(outs),
4624 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4625 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4626 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4627 "\\{$r, $g, $b, $a\\};",
4628 []>;
4629 def SUST_B_2D_ARRAY_V4B32_CLAMP
4630 : NVPTXInst<(outs),
4631 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4632 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4633 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4634 "\\{$r, $g, $b, $a\\};",
4635 []>;
4636
4637
4638 def SUST_B_3D_B8_CLAMP
4639 : NVPTXInst<(outs),
4640 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4641 Int16Regs:$r),
4642 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4643 []>;
4644 def SUST_B_3D_B16_CLAMP
4645 : NVPTXInst<(outs),
4646 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4647 Int16Regs:$r),
4648 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4649 []>;
4650 def SUST_B_3D_B32_CLAMP
4651 : NVPTXInst<(outs),
4652 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4653 Int32Regs:$r),
4654 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4655 []>;
4656 def SUST_B_3D_B64_CLAMP
4657 : NVPTXInst<(outs),
4658 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4659 Int64Regs:$r),
4660 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4661 []>;
4662 def SUST_B_3D_V2B8_CLAMP
4663 : NVPTXInst<(outs),
4664 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4665 Int16Regs:$r, Int16Regs:$g),
4666 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4667 "\\{$r, $g\\};",
4668 []>;
4669 def SUST_B_3D_V2B16_CLAMP
4670 : NVPTXInst<(outs),
4671 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4672 Int16Regs:$r, Int16Regs:$g),
4673 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4674 "\\{$r, $g\\};",
4675 []>;
4676 def SUST_B_3D_V2B32_CLAMP
4677 : NVPTXInst<(outs),
4678 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4679 Int32Regs:$r, Int32Regs:$g),
4680 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4681 "\\{$r, $g\\};",
4682 []>;
4683 def SUST_B_3D_V2B64_CLAMP
4684 : NVPTXInst<(outs),
4685 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4686 Int64Regs:$r, Int64Regs:$g),
4687 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4688 "\\{$r, $g\\};",
4689 []>;
4690 def SUST_B_3D_V4B8_CLAMP
4691 : NVPTXInst<(outs),
4692 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4693 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4694 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4695 "\\{$r, $g, $b, $a\\};",
4696 []>;
4697 def SUST_B_3D_V4B16_CLAMP
4698 : NVPTXInst<(outs),
4699 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4700 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4701 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4702 "\\{$r, $g, $b, $a\\};",
4703 []>;
4704 def SUST_B_3D_V4B32_CLAMP
4705 : NVPTXInst<(outs),
4706 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4707 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4708 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4709 "\\{$r, $g, $b, $a\\};",
4710 []>;
4711
4712
4713 // .trap variant
4714 def SUST_B_1D_B8_TRAP
4715 : NVPTXInst<(outs),
4716 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4717 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
4718 []>;
4719 def SUST_B_1D_B16_TRAP
4720 : NVPTXInst<(outs),
4721 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4722 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
4723 []>;
4724 def SUST_B_1D_B32_TRAP
4725 : NVPTXInst<(outs),
4726 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4727 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
4728 []>;
4729 def SUST_B_1D_B64_TRAP
4730 : NVPTXInst<(outs),
4731 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4732 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
4733 []>;
4734 def SUST_B_1D_V2B8_TRAP
4735 : NVPTXInst<(outs),
4736 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4737 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4738 []>;
4739 def SUST_B_1D_V2B16_TRAP
4740 : NVPTXInst<(outs),
4741 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4742 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4743 []>;
4744 def SUST_B_1D_V2B32_TRAP
4745 : NVPTXInst<(outs),
4746 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4747 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4748 []>;
4749 def SUST_B_1D_V2B64_TRAP
4750 : NVPTXInst<(outs),
4751 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4752 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
4753 []>;
4754 def SUST_B_1D_V4B8_TRAP
4755 : NVPTXInst<(outs),
4756 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4757 Int16Regs:$b, Int16Regs:$a),
4758 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4759 []>;
4760 def SUST_B_1D_V4B16_TRAP
4761 : NVPTXInst<(outs),
4762 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4763 Int16Regs:$b, Int16Regs:$a),
4764 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4765 []>;
4766 def SUST_B_1D_V4B32_TRAP
4767 : NVPTXInst<(outs),
4768 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4769 Int32Regs:$b, Int32Regs:$a),
4770 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4771 []>;
4772
4773
4774 def SUST_B_1D_ARRAY_B8_TRAP
4775 : NVPTXInst<(outs),
4776 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4777 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4778 []>;
4779 def SUST_B_1D_ARRAY_B16_TRAP
4780 : NVPTXInst<(outs),
4781 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4782 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4783 []>;
4784 def SUST_B_1D_ARRAY_B32_TRAP
4785 : NVPTXInst<(outs),
4786 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4787 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4788 []>;
4789 def SUST_B_1D_ARRAY_B64_TRAP
4790 : NVPTXInst<(outs),
4791 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4792 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4793 []>;
4794 def SUST_B_1D_ARRAY_V2B8_TRAP
4795 : NVPTXInst<(outs),
4796 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4797 Int16Regs:$g),
4798 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4799 []>;
4800 def SUST_B_1D_ARRAY_V2B16_TRAP
4801 : NVPTXInst<(outs),
4802 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4803 Int16Regs:$g),
4804 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4805 []>;
4806 def SUST_B_1D_ARRAY_V2B32_TRAP
4807 : NVPTXInst<(outs),
4808 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4809 Int32Regs:$g),
4810 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4811 []>;
4812 def SUST_B_1D_ARRAY_V2B64_TRAP
4813 : NVPTXInst<(outs),
4814 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4815 Int64Regs:$g),
4816 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4817 []>;
4818 def SUST_B_1D_ARRAY_V4B8_TRAP
4819 : NVPTXInst<(outs),
4820 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4821 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4822 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
4823 "\\{$r, $g, $b, $a\\};",
4824 []>;
4825 def SUST_B_1D_ARRAY_V4B16_TRAP
4826 : NVPTXInst<(outs),
4827 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4828 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4829 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
4830 "\\{$r, $g, $b, $a\\};",
4831 []>;
4832 def SUST_B_1D_ARRAY_V4B32_TRAP
4833 : NVPTXInst<(outs),
4834 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4835 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4836 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
4837 "\\{$r, $g, $b, $a\\};",
4838 []>;
4839
4840
4841 def SUST_B_2D_B8_TRAP
4842 : NVPTXInst<(outs),
4843 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4844 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4845 []>;
4846 def SUST_B_2D_B16_TRAP
4847 : NVPTXInst<(outs),
4848 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4849 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4850 []>;
4851 def SUST_B_2D_B32_TRAP
4852 : NVPTXInst<(outs),
4853 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4854 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4855 []>;
4856 def SUST_B_2D_B64_TRAP
4857 : NVPTXInst<(outs),
4858 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4859 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
4860 []>;
4861 def SUST_B_2D_V2B8_TRAP
4862 : NVPTXInst<(outs),
4863 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4864 Int16Regs:$g),
4865 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4866 []>;
4867 def SUST_B_2D_V2B16_TRAP
4868 : NVPTXInst<(outs),
4869 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4870 Int16Regs:$g),
4871 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4872 []>;
4873 def SUST_B_2D_V2B32_TRAP
4874 : NVPTXInst<(outs),
4875 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4876 Int32Regs:$g),
4877 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4878 []>;
4879 def SUST_B_2D_V2B64_TRAP
4880 : NVPTXInst<(outs),
4881 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4882 Int64Regs:$g),
4883 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4884 []>;
4885 def SUST_B_2D_V4B8_TRAP
4886 : NVPTXInst<(outs),
4887 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4888 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4889 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
4890 "\\{$r, $g, $b, $a\\};",
4891 []>;
4892 def SUST_B_2D_V4B16_TRAP
4893 : NVPTXInst<(outs),
4894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4895 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4896 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
4897 "\\{$r, $g, $b, $a\\};",
4898 []>;
4899 def SUST_B_2D_V4B32_TRAP
4900 : NVPTXInst<(outs),
4901 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4902 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4903 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
4904 "\\{$r, $g, $b, $a\\};",
4905 []>;
4906
4907
4908 def SUST_B_2D_ARRAY_B8_TRAP
4909 : NVPTXInst<(outs),
4910 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4911 Int16Regs:$r),
4912 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4913 []>;
4914 def SUST_B_2D_ARRAY_B16_TRAP
4915 : NVPTXInst<(outs),
4916 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4917 Int16Regs:$r),
4918 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4919 []>;
4920 def SUST_B_2D_ARRAY_B32_TRAP
4921 : NVPTXInst<(outs),
4922 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4923 Int32Regs:$r),
4924 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4925 []>;
4926 def SUST_B_2D_ARRAY_B64_TRAP
4927 : NVPTXInst<(outs),
4928 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4929 Int64Regs:$r),
4930 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4931 []>;
4932 def SUST_B_2D_ARRAY_V2B8_TRAP
4933 : NVPTXInst<(outs),
4934 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4935 Int16Regs:$r, Int16Regs:$g),
4936 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4937 "\\{$r, $g\\};",
4938 []>;
4939 def SUST_B_2D_ARRAY_V2B16_TRAP
4940 : NVPTXInst<(outs),
4941 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4942 Int16Regs:$r, Int16Regs:$g),
4943 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4944 "\\{$r, $g\\};",
4945 []>;
4946 def SUST_B_2D_ARRAY_V2B32_TRAP
4947 : NVPTXInst<(outs),
4948 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4949 Int32Regs:$r, Int32Regs:$g),
4950 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4951 "\\{$r, $g\\};",
4952 []>;
4953 def SUST_B_2D_ARRAY_V2B64_TRAP
4954 : NVPTXInst<(outs),
4955 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4956 Int64Regs:$r, Int64Regs:$g),
4957 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4958 "\\{$r, $g\\};",
4959 []>;
4960 def SUST_B_2D_ARRAY_V4B8_TRAP
4961 : NVPTXInst<(outs),
4962 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4963 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4964 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4965 "\\{$r, $g, $b, $a\\};",
4966 []>;
4967 def SUST_B_2D_ARRAY_V4B16_TRAP
4968 : NVPTXInst<(outs),
4969 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4970 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4971 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4972 "\\{$r, $g, $b, $a\\};",
4973 []>;
4974 def SUST_B_2D_ARRAY_V4B32_TRAP
4975 : NVPTXInst<(outs),
4976 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4977 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4978 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
4979 "\\{$r, $g, $b, $a\\};",
4980 []>;
4981
4982
4983 def SUST_B_3D_B8_TRAP
4984 : NVPTXInst<(outs),
4985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4986 Int16Regs:$r),
4987 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4988 []>;
4989 def SUST_B_3D_B16_TRAP
4990 : NVPTXInst<(outs),
4991 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4992 Int16Regs:$r),
4993 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4994 []>;
4995 def SUST_B_3D_B32_TRAP
4996 : NVPTXInst<(outs),
4997 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4998 Int32Regs:$r),
4999 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5000 []>;
5001 def SUST_B_3D_B64_TRAP
5002 : NVPTXInst<(outs),
5003 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5004 Int64Regs:$r),
5005 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5006 []>;
5007 def SUST_B_3D_V2B8_TRAP
5008 : NVPTXInst<(outs),
5009 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5010 Int16Regs:$r, Int16Regs:$g),
5011 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5012 "\\{$r, $g\\};",
5013 []>;
5014 def SUST_B_3D_V2B16_TRAP
5015 : NVPTXInst<(outs),
5016 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5017 Int16Regs:$r, Int16Regs:$g),
5018 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5019 "\\{$r, $g\\};",
5020 []>;
5021 def SUST_B_3D_V2B32_TRAP
5022 : NVPTXInst<(outs),
5023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5024 Int32Regs:$r, Int32Regs:$g),
5025 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5026 "\\{$r, $g\\};",
5027 []>;
5028 def SUST_B_3D_V2B64_TRAP
5029 : NVPTXInst<(outs),
5030 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5031 Int64Regs:$r, Int64Regs:$g),
5032 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5033 "\\{$r, $g\\};",
5034 []>;
5035 def SUST_B_3D_V4B8_TRAP
5036 : NVPTXInst<(outs),
5037 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5038 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5039 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5040 "\\{$r, $g, $b, $a\\};",
5041 []>;
5042 def SUST_B_3D_V4B16_TRAP
5043 : NVPTXInst<(outs),
5044 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5045 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5046 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5047 "\\{$r, $g, $b, $a\\};",
5048 []>;
5049 def SUST_B_3D_V4B32_TRAP
5050 : NVPTXInst<(outs),
5051 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5052 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5053 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5054 "\\{$r, $g, $b, $a\\};",
5055 []>;
5056
5057
5058 // .zero variant
5059 def SUST_B_1D_B8_ZERO
5060 : NVPTXInst<(outs),
5061 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5062 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5063 []>;
5064 def SUST_B_1D_B16_ZERO
5065 : NVPTXInst<(outs),
5066 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5067 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5068 []>;
5069 def SUST_B_1D_B32_ZERO
5070 : NVPTXInst<(outs),
5071 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5072 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5073 []>;
5074 def SUST_B_1D_B64_ZERO
5075 : NVPTXInst<(outs),
5076 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5077 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5078 []>;
5079 def SUST_B_1D_V2B8_ZERO
5080 : NVPTXInst<(outs),
5081 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5082 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5083 []>;
5084 def SUST_B_1D_V2B16_ZERO
5085 : NVPTXInst<(outs),
5086 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5087 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5088 []>;
5089 def SUST_B_1D_V2B32_ZERO
5090 : NVPTXInst<(outs),
5091 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5092 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5093 []>;
5094 def SUST_B_1D_V2B64_ZERO
5095 : NVPTXInst<(outs),
5096 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5097 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5098 []>;
5099 def SUST_B_1D_V4B8_ZERO
5100 : NVPTXInst<(outs),
5101 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5102 Int16Regs:$b, Int16Regs:$a),
5103 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5104 []>;
5105 def SUST_B_1D_V4B16_ZERO
5106 : NVPTXInst<(outs),
5107 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5108 Int16Regs:$b, Int16Regs:$a),
5109 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5110 []>;
5111 def SUST_B_1D_V4B32_ZERO
5112 : NVPTXInst<(outs),
5113 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5114 Int32Regs:$b, Int32Regs:$a),
5115 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5116 []>;
5117
5118
5119 def SUST_B_1D_ARRAY_B8_ZERO
5120 : NVPTXInst<(outs),
5121 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5122 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5123 []>;
5124 def SUST_B_1D_ARRAY_B16_ZERO
5125 : NVPTXInst<(outs),
5126 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5127 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5128 []>;
5129 def SUST_B_1D_ARRAY_B32_ZERO
5130 : NVPTXInst<(outs),
5131 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5132 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5133 []>;
5134 def SUST_B_1D_ARRAY_B64_ZERO
5135 : NVPTXInst<(outs),
5136 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5137 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5138 []>;
5139 def SUST_B_1D_ARRAY_V2B8_ZERO
5140 : NVPTXInst<(outs),
5141 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5142 Int16Regs:$g),
5143 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5144 []>;
5145 def SUST_B_1D_ARRAY_V2B16_ZERO
5146 : NVPTXInst<(outs),
5147 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5148 Int16Regs:$g),
5149 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5150 []>;
5151 def SUST_B_1D_ARRAY_V2B32_ZERO
5152 : NVPTXInst<(outs),
5153 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5154 Int32Regs:$g),
5155 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5156 []>;
5157 def SUST_B_1D_ARRAY_V2B64_ZERO
5158 : NVPTXInst<(outs),
5159 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5160 Int64Regs:$g),
5161 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5162 []>;
5163 def SUST_B_1D_ARRAY_V4B8_ZERO
5164 : NVPTXInst<(outs),
5165 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5166 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5167 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5168 "\\{$r, $g, $b, $a\\};",
5169 []>;
5170 def SUST_B_1D_ARRAY_V4B16_ZERO
5171 : NVPTXInst<(outs),
5172 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5173 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5174 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5175 "\\{$r, $g, $b, $a\\};",
5176 []>;
5177 def SUST_B_1D_ARRAY_V4B32_ZERO
5178 : NVPTXInst<(outs),
5179 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5180 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5181 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5182 "\\{$r, $g, $b, $a\\};",
5183 []>;
5184
5185
5186 def SUST_B_2D_B8_ZERO
5187 : NVPTXInst<(outs),
5188 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5189 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5190 []>;
5191 def SUST_B_2D_B16_ZERO
5192 : NVPTXInst<(outs),
5193 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5194 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5195 []>;
5196 def SUST_B_2D_B32_ZERO
5197 : NVPTXInst<(outs),
5198 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5199 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5200 []>;
5201 def SUST_B_2D_B64_ZERO
5202 : NVPTXInst<(outs),
5203 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5204 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5205 []>;
5206 def SUST_B_2D_V2B8_ZERO
5207 : NVPTXInst<(outs),
5208 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5209 Int16Regs:$g),
5210 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5211 []>;
5212 def SUST_B_2D_V2B16_ZERO
5213 : NVPTXInst<(outs),
5214 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5215 Int16Regs:$g),
5216 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5217 []>;
5218 def SUST_B_2D_V2B32_ZERO
5219 : NVPTXInst<(outs),
5220 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5221 Int32Regs:$g),
5222 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5223 []>;
5224 def SUST_B_2D_V2B64_ZERO
5225 : NVPTXInst<(outs),
5226 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5227 Int64Regs:$g),
5228 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5229 []>;
5230 def SUST_B_2D_V4B8_ZERO
5231 : NVPTXInst<(outs),
5232 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5233 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5234 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5235 "\\{$r, $g, $b, $a\\};",
5236 []>;
5237 def SUST_B_2D_V4B16_ZERO
5238 : NVPTXInst<(outs),
5239 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5240 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5241 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5242 "\\{$r, $g, $b, $a\\};",
5243 []>;
5244 def SUST_B_2D_V4B32_ZERO
5245 : NVPTXInst<(outs),
5246 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5247 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5248 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5249 "\\{$r, $g, $b, $a\\};",
5250 []>;
5251
5252
5253 def SUST_B_2D_ARRAY_B8_ZERO
5254 : NVPTXInst<(outs),
5255 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5256 Int16Regs:$r),
5257 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5258 []>;
5259 def SUST_B_2D_ARRAY_B16_ZERO
5260 : NVPTXInst<(outs),
5261 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5262 Int16Regs:$r),
5263 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5264 []>;
5265 def SUST_B_2D_ARRAY_B32_ZERO
5266 : NVPTXInst<(outs),
5267 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5268 Int32Regs:$r),
5269 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5270 []>;
5271 def SUST_B_2D_ARRAY_B64_ZERO
5272 : NVPTXInst<(outs),
5273 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5274 Int64Regs:$r),
5275 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5276 []>;
5277 def SUST_B_2D_ARRAY_V2B8_ZERO
5278 : NVPTXInst<(outs),
5279 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5280 Int16Regs:$r, Int16Regs:$g),
5281 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5282 "\\{$r, $g\\};",
5283 []>;
5284 def SUST_B_2D_ARRAY_V2B16_ZERO
5285 : NVPTXInst<(outs),
5286 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5287 Int16Regs:$r, Int16Regs:$g),
5288 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5289 "\\{$r, $g\\};",
5290 []>;
5291 def SUST_B_2D_ARRAY_V2B32_ZERO
5292 : NVPTXInst<(outs),
5293 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5294 Int32Regs:$r, Int32Regs:$g),
5295 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5296 "\\{$r, $g\\};",
5297 []>;
5298 def SUST_B_2D_ARRAY_V2B64_ZERO
5299 : NVPTXInst<(outs),
5300 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5301 Int64Regs:$r, Int64Regs:$g),
5302 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5303 "\\{$r, $g\\};",
5304 []>;
5305 def SUST_B_2D_ARRAY_V4B8_ZERO
5306 : NVPTXInst<(outs),
5307 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5308 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5309 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5310 "\\{$r, $g, $b, $a\\};",
5311 []>;
5312 def SUST_B_2D_ARRAY_V4B16_ZERO
5313 : NVPTXInst<(outs),
5314 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5315 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5316 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5317 "\\{$r, $g, $b, $a\\};",
5318 []>;
5319 def SUST_B_2D_ARRAY_V4B32_ZERO
5320 : NVPTXInst<(outs),
5321 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5322 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5323 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5324 "\\{$r, $g, $b, $a\\};",
5325 []>;
5326
5327
5328 def SUST_B_3D_B8_ZERO
5329 : NVPTXInst<(outs),
5330 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5331 Int16Regs:$r),
5332 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5333 []>;
5334 def SUST_B_3D_B16_ZERO
5335 : NVPTXInst<(outs),
5336 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5337 Int16Regs:$r),
5338 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5339 []>;
5340 def SUST_B_3D_B32_ZERO
5341 : NVPTXInst<(outs),
5342 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5343 Int32Regs:$r),
5344 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5345 []>;
5346 def SUST_B_3D_B64_ZERO
5347 : NVPTXInst<(outs),
5348 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5349 Int64Regs:$r),
5350 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5351 []>;
5352 def SUST_B_3D_V2B8_ZERO
5353 : NVPTXInst<(outs),
5354 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5355 Int16Regs:$r, Int16Regs:$g),
5356 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5357 "\\{$r, $g\\};",
5358 []>;
5359 def SUST_B_3D_V2B16_ZERO
5360 : NVPTXInst<(outs),
5361 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5362 Int16Regs:$r, Int16Regs:$g),
5363 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5364 "\\{$r, $g\\};",
5365 []>;
5366 def SUST_B_3D_V2B32_ZERO
5367 : NVPTXInst<(outs),
5368 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5369 Int32Regs:$r, Int32Regs:$g),
5370 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5371 "\\{$r, $g\\};",
5372 []>;
5373 def SUST_B_3D_V2B64_ZERO
5374 : NVPTXInst<(outs),
5375 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5376 Int64Regs:$r, Int64Regs:$g),
5377 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5378 "\\{$r, $g\\};",
5379 []>;
5380 def SUST_B_3D_V4B8_ZERO
5381 : NVPTXInst<(outs),
5382 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5383 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5384 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5385 "\\{$r, $g, $b, $a\\};",
5386 []>;
5387 def SUST_B_3D_V4B16_ZERO
5388 : NVPTXInst<(outs),
5389 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5390 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5391 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5392 "\\{$r, $g, $b, $a\\};",
5393 []>;
5394 def SUST_B_3D_V4B32_ZERO
5395 : NVPTXInst<(outs),
5396 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5397 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5398 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5399 "\\{$r, $g, $b, $a\\};",
5400 []>;
5401
5402
5403
5404 // Formatted
5405
5406 def SUST_P_1D_B8_TRAP
5407 : NVPTXInst<(outs),
5408 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5409 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5410 []>;
5411 def SUST_P_1D_B16_TRAP
5412 : NVPTXInst<(outs),
5413 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5414 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5415 []>;
5416 def SUST_P_1D_B32_TRAP
5417 : NVPTXInst<(outs),
5418 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5419 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5420 []>;
5421 def SUST_P_1D_V2B8_TRAP
5422 : NVPTXInst<(outs),
5423 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5424 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5425 []>;
5426 def SUST_P_1D_V2B16_TRAP
5427 : NVPTXInst<(outs),
5428 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5429 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5430 []>;
5431 def SUST_P_1D_V2B32_TRAP
5432 : NVPTXInst<(outs),
5433 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5434 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5435 []>;
5436 def SUST_P_1D_V4B8_TRAP
5437 : NVPTXInst<(outs),
5438 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5439 Int16Regs:$b, Int16Regs:$a),
5440 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5441 []>;
5442 def SUST_P_1D_V4B16_TRAP
5443 : NVPTXInst<(outs),
5444 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5445 Int16Regs:$b, Int16Regs:$a),
5446 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5447 []>;
5448 def SUST_P_1D_V4B32_TRAP
5449 : NVPTXInst<(outs),
5450 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5451 Int32Regs:$b, Int32Regs:$a),
5452 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5453 []>;
5454
5455
5456 def SUST_P_1D_ARRAY_B8_TRAP
5457 : NVPTXInst<(outs),
5458 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5459 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5460 []>;
5461 def SUST_P_1D_ARRAY_B16_TRAP
5462 : NVPTXInst<(outs),
5463 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5464 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5465 []>;
5466 def SUST_P_1D_ARRAY_B32_TRAP
5467 : NVPTXInst<(outs),
5468 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5469 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5470 []>;
5471 def SUST_P_1D_ARRAY_V2B8_TRAP
5472 : NVPTXInst<(outs),
5473 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5474 Int16Regs:$g),
5475 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5476 []>;
5477 def SUST_P_1D_ARRAY_V2B16_TRAP
5478 : NVPTXInst<(outs),
5479 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5480 Int16Regs:$g),
5481 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5482 []>;
5483 def SUST_P_1D_ARRAY_V2B32_TRAP
5484 : NVPTXInst<(outs),
5485 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5486 Int32Regs:$g),
5487 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5488 []>;
5489 def SUST_P_1D_ARRAY_V4B8_TRAP
5490 : NVPTXInst<(outs),
5491 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5492 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5493 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5494 "\\{$r, $g, $b, $a\\};",
5495 []>;
5496 def SUST_P_1D_ARRAY_V4B16_TRAP
5497 : NVPTXInst<(outs),
5498 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5499 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5500 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5501 "\\{$r, $g, $b, $a\\};",
5502 []>;
5503 def SUST_P_1D_ARRAY_V4B32_TRAP
5504 : NVPTXInst<(outs),
5505 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5506 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5507 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5508 "\\{$r, $g, $b, $a\\};",
5509 []>;
5510
5511
5512 def SUST_P_2D_B8_TRAP
5513 : NVPTXInst<(outs),
5514 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5515 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5516 []>;
5517 def SUST_P_2D_B16_TRAP
5518 : NVPTXInst<(outs),
5519 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5520 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5521 []>;
5522 def SUST_P_2D_B32_TRAP
5523 : NVPTXInst<(outs),
5524 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5525 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5526 []>;
5527 def SUST_P_2D_V2B8_TRAP
5528 : NVPTXInst<(outs),
5529 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5530 Int16Regs:$g),
5531 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5532 []>;
5533 def SUST_P_2D_V2B16_TRAP
5534 : NVPTXInst<(outs),
5535 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5536 Int16Regs:$g),
5537 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5538 []>;
5539 def SUST_P_2D_V2B32_TRAP
5540 : NVPTXInst<(outs),
5541 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5542 Int32Regs:$g),
5543 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5544 []>;
5545 def SUST_P_2D_V4B8_TRAP
5546 : NVPTXInst<(outs),
5547 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5548 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5549 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5550 "\\{$r, $g, $b, $a\\};",
5551 []>;
5552 def SUST_P_2D_V4B16_TRAP
5553 : NVPTXInst<(outs),
5554 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5555 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5556 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5557 "\\{$r, $g, $b, $a\\};",
5558 []>;
5559 def SUST_P_2D_V4B32_TRAP
5560 : NVPTXInst<(outs),
5561 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5562 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5563 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5564 "\\{$r, $g, $b, $a\\};",
5565 []>;
5566
5567
5568 def SUST_P_2D_ARRAY_B8_TRAP
5569 : NVPTXInst<(outs),
5570 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5571 Int16Regs:$r),
5572 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5573 []>;
5574 def SUST_P_2D_ARRAY_B16_TRAP
5575 : NVPTXInst<(outs),
5576 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5577 Int16Regs:$r),
5578 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5579 []>;
5580 def SUST_P_2D_ARRAY_B32_TRAP
5581 : NVPTXInst<(outs),
5582 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5583 Int32Regs:$r),
5584 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5585 []>;
5586 def SUST_P_2D_ARRAY_V2B8_TRAP
5587 : NVPTXInst<(outs),
5588 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5589 Int16Regs:$r, Int16Regs:$g),
5590 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5591 "\\{$r, $g\\};",
5592 []>;
5593 def SUST_P_2D_ARRAY_V2B16_TRAP
5594 : NVPTXInst<(outs),
5595 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5596 Int16Regs:$r, Int16Regs:$g),
5597 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5598 "\\{$r, $g\\};",
5599 []>;
5600 def SUST_P_2D_ARRAY_V2B32_TRAP
5601 : NVPTXInst<(outs),
5602 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5603 Int32Regs:$r, Int32Regs:$g),
5604 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5605 "\\{$r, $g\\};",
5606 []>;
5607 def SUST_P_2D_ARRAY_V4B8_TRAP
5608 : NVPTXInst<(outs),
5609 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5610 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5611 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5612 "\\{$r, $g, $b, $a\\};",
5613 []>;
5614 def SUST_P_2D_ARRAY_V4B16_TRAP
5615 : NVPTXInst<(outs),
5616 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5617 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5618 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5619 "\\{$r, $g, $b, $a\\};",
5620 []>;
5621 def SUST_P_2D_ARRAY_V4B32_TRAP
5622 : NVPTXInst<(outs),
5623 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5624 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5625 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5626 "\\{$r, $g, $b, $a\\};",
5627 []>;
5628
5629
5630 def SUST_P_3D_B8_TRAP
5631 : NVPTXInst<(outs),
5632 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5633 Int16Regs:$r),
5634 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5635 []>;
5636 def SUST_P_3D_B16_TRAP
5637 : NVPTXInst<(outs),
5638 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5639 Int16Regs:$r),
5640 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5641 []>;
5642 def SUST_P_3D_B32_TRAP
5643 : NVPTXInst<(outs),
5644 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5645 Int32Regs:$r),
5646 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5647 []>;
5648 def SUST_P_3D_V2B8_TRAP
5649 : NVPTXInst<(outs),
5650 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5651 Int16Regs:$r, Int16Regs:$g),
5652 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5653 "\\{$r, $g\\};",
5654 []>;
5655 def SUST_P_3D_V2B16_TRAP
5656 : NVPTXInst<(outs),
5657 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5658 Int16Regs:$r, Int16Regs:$g),
5659 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5660 "\\{$r, $g\\};",
5661 []>;
5662 def SUST_P_3D_V2B32_TRAP
5663 : NVPTXInst<(outs),
5664 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5665 Int32Regs:$r, Int32Regs:$g),
5666 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5667 "\\{$r, $g\\};",
5668 []>;
5669 def SUST_P_3D_V4B8_TRAP
5670 : NVPTXInst<(outs),
5671 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5672 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5673 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5674 "\\{$r, $g, $b, $a\\};",
5675 []>;
5676 def SUST_P_3D_V4B16_TRAP
5677 : NVPTXInst<(outs),
5678 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5679 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5680 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5681 "\\{$r, $g, $b, $a\\};",
5682 []>;
5683 def SUST_P_3D_V4B32_TRAP
5684 : NVPTXInst<(outs),
5685 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5686 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5687 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5688 "\\{$r, $g, $b, $a\\};",
5689 []>;
5690 }
5691
5692 // Surface store instruction patterns
5693 // I'm not sure why we can't just include these in the instruction definitions,
5694 // but TableGen complains of type errors :(
5695
5696 // .clamp variant
5697 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5698 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5699 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5700
5701 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5702 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5703 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5704
5705 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5706 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5707 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5708
5709 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5710 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5711 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5712
5713 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5714 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5715 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5716 Int16Regs:$r, Int16Regs:$g)>;
5717
5718 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5719 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5720 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5721 Int16Regs:$r, Int16Regs:$g)>;
5722
5723 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5724 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5725 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5726 Int32Regs:$r, Int32Regs:$g)>;
5727
5728 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5729 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5730 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
5731 Int64Regs:$r, Int64Regs:$g)>;
5732
5733 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5734 Int64Regs:$s, Int32Regs:$x,
5735 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5736 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
5737 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5738
5739 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5740 Int64Regs:$s, Int32Regs:$x,
5741 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5742 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
5743 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5744
5745 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5746 Int64Regs:$s, Int32Regs:$x,
5747 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5748 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
5749 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5750
5751
5752
5753 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5754 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5755 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5756 Int16Regs:$r)>;
5757
5758 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5759 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5760 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5761 Int16Regs:$r)>;
5762
5763 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5764 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5765 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5766 Int32Regs:$r)>;
5767
5768 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5769 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5770 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5771 Int64Regs:$r)>;
5772
5773 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5774 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5775 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5776 Int16Regs:$r, Int16Regs:$g)>;
5777
5778 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5779 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5780 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5781 Int16Regs:$r, Int16Regs:$g)>;
5782
5783 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5784 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5785 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5786 Int32Regs:$r, Int32Regs:$g)>;
5787
5788 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5789 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5790 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5791 Int64Regs:$r, Int64Regs:$g)>;
5792
5793 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5794 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5795 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5796 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5797 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5798
5799 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5800 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5801 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5802 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5803 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5804
5805 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5806 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5807 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5808 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5809 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5810
5811
5812
5813 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5814 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5815 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5816 Int16Regs:$r)>;
5817
5818 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5819 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5820 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5821 Int16Regs:$r)>;
5822
5823 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5824 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5825 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5826 Int32Regs:$r)>;
5827
5828 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5829 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5830 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5831 Int64Regs:$r)>;
5832
5833 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5834 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5835 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5836 Int16Regs:$r, Int16Regs:$g)>;
5837
5838 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5839 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5840 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5841 Int16Regs:$r, Int16Regs:$g)>;
5842
5843 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5844 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5845 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5846 Int32Regs:$r, Int32Regs:$g)>;
5847
5848 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5849 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5850 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5851 Int64Regs:$r, Int64Regs:$g)>;
5852
5853 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5854 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5855 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5856 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5857 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5858
5859 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5860 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5861 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5862 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5863 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5864
5865 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5866 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5867 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5868 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5869 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5870
5871
5872
5873 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5874 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5875 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
5876 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5877 Int16Regs:$r)>;
5878
5879 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5880 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5881 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
5882 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5883 Int16Regs:$r)>;
5884
5885 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5886 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5887 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
5888 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5889 Int32Regs:$r)>;
5890
5891 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5892 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5893 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
5894 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5895 Int64Regs:$r)>;
5896
5897 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5898 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5899 Int16Regs:$r, Int16Regs:$g),
5900 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
5901 Int32Regs:$x, Int32Regs:$y,
5902 Int16Regs:$r, Int16Regs:$g)>;
5903
5904 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5905 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5906 Int16Regs:$r, Int16Regs:$g),
5907 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
5908 Int32Regs:$x, Int32Regs:$y,
5909 Int16Regs:$r, Int16Regs:$g)>;
5910
5911 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5912 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5913 Int32Regs:$g),
5914 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
5915 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5916
5917 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5918 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5919 Int64Regs:$g),
5920 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
5921 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5922
5923 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5924 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5925 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5926 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
5927 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5928 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5929
5930 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5931 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5932 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5933 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
5934 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5935 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5936
5937 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5938 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5939 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5940 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
5941 Int32Regs:$x, Int32Regs:$y,
5942 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5943
5944
5945
5946 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5947 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5948 Int16Regs:$r),
5949 (SUST_B_3D_B8_CLAMP Int64Regs:$s,
5950 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5951 Int16Regs:$r)>;
5952
5953 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5954 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5955 Int16Regs:$r),
5956 (SUST_B_3D_B16_CLAMP Int64Regs:$s,
5957 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5958 Int16Regs:$r)>;
5959
5960 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5961 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5962 Int32Regs:$r),
5963 (SUST_B_3D_B32_CLAMP Int64Regs:$s,
5964 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5965 Int32Regs:$r)>;
5966
5967 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5968 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5969 Int64Regs:$r),
5970 (SUST_B_3D_B64_CLAMP Int64Regs:$s,
5971 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5972 Int64Regs:$r)>;
5973
5974 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5975 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5976 Int16Regs:$r, Int16Regs:$g),
5977 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
5978 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5979 Int16Regs:$r, Int16Regs:$g)>;
5980
5981 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5982 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5983 Int16Regs:$r, Int16Regs:$g),
5984 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
5985 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5986 Int16Regs:$r, Int16Regs:$g)>;
5987
5988 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5989 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5990 Int32Regs:$r, Int32Regs:$g),
5991 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
5992 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5993 Int32Regs:$r, Int32Regs:$g)>;
5994
5995 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5996 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5997 Int64Regs:$r, Int64Regs:$g),
5998 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
5999 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6000 Int64Regs:$r, Int64Regs:$g)>;
6001
6002 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6003 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6004 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6005 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6006 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6007 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6008
6009 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6010 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6011 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6012 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6013 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6014 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6015
6016 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6017 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6018 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6019 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6020 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6021 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6022
6023
6024 // .trap variant
6025 def : Pat<(int_nvvm_sust_b_1d_i8_trap
6026 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6027 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6028
6029 def : Pat<(int_nvvm_sust_b_1d_i16_trap
6030 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6031 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6032
6033 def : Pat<(int_nvvm_sust_b_1d_i32_trap
6034 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6035 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6036
6037 def : Pat<(int_nvvm_sust_b_1d_i64_trap
6038 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6039 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6040
6041 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6042 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6043 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6044 Int16Regs:$r, Int16Regs:$g)>;
6045
6046 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6047 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6048 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6049 Int16Regs:$r, Int16Regs:$g)>;
6050
6051 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6052 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6053 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6054 Int32Regs:$r, Int32Regs:$g)>;
6055
6056 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6057 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6058 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6059 Int64Regs:$r, Int64Regs:$g)>;
6060
6061 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6062 Int64Regs:$s, Int32Regs:$x,
6063 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6064 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6065 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6066
6067 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6068 Int64Regs:$s, Int32Regs:$x,
6069 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6070 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6071 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6072
6073 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6074 Int64Regs:$s, Int32Regs:$x,
6075 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6076 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6077 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6078
6079
6080
6081 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6082 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6083 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6084 Int16Regs:$r)>;
6085
6086 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6087 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6088 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6089 Int16Regs:$r)>;
6090
6091 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6092 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6093 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6094 Int32Regs:$r)>;
6095
6096 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6097 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6098 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6099 Int64Regs:$r)>;
6100
6101 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6102 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6103 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6104 Int16Regs:$r, Int16Regs:$g)>;
6105
6106 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6107 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6108 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109 Int16Regs:$r, Int16Regs:$g)>;
6110
6111 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6112 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6113 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6114 Int32Regs:$r, Int32Regs:$g)>;
6115
6116 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6118 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6119 Int64Regs:$r, Int64Regs:$g)>;
6120
6121 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6122 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6123 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6124 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6125 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6126
6127 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6128 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6130 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6131 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6132
6133 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6134 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6135 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6136 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6137 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6138
6139
6140
6141 def : Pat<(int_nvvm_sust_b_2d_i8_trap
6142 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6143 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6144 Int16Regs:$r)>;
6145
6146 def : Pat<(int_nvvm_sust_b_2d_i16_trap
6147 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6148 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6149 Int16Regs:$r)>;
6150
6151 def : Pat<(int_nvvm_sust_b_2d_i32_trap
6152 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6153 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6154 Int32Regs:$r)>;
6155
6156 def : Pat<(int_nvvm_sust_b_2d_i64_trap
6157 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6158 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6159 Int64Regs:$r)>;
6160
6161 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6162 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6163 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6164 Int16Regs:$r, Int16Regs:$g)>;
6165
6166 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6167 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6168 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6169 Int16Regs:$r, Int16Regs:$g)>;
6170
6171 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6172 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6173 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6174 Int32Regs:$r, Int32Regs:$g)>;
6175
6176 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6177 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6178 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6179 Int64Regs:$r, Int64Regs:$g)>;
6180
6181 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6182 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6183 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6184 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6185 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6186
6187 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6188 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6190 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6191 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6192
6193 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6194 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6195 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6196 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6197 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6198
6199
6200
6201 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6202 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6203 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6204 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6205 Int16Regs:$r)>;
6206
6207 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6208 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6209 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6210 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6211 Int16Regs:$r)>;
6212
6213 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6214 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6215 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6216 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6217 Int32Regs:$r)>;
6218
6219 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6220 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6221 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6222 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6223 Int64Regs:$r)>;
6224
6225 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6226 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6227 Int16Regs:$r, Int16Regs:$g),
6228 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6229 Int32Regs:$x, Int32Regs:$y,
6230 Int16Regs:$r, Int16Regs:$g)>;
6231
6232 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6233 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6234 Int16Regs:$r, Int16Regs:$g),
6235 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6236 Int32Regs:$x, Int32Regs:$y,
6237 Int16Regs:$r, Int16Regs:$g)>;
6238
6239 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6240 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6241 Int32Regs:$g),
6242 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6243 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6244
6245 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6246 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6247 Int64Regs:$g),
6248 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6249 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6250
6251 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6252 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6253 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6254 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6255 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6256 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6257
6258 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6259 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6260 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6261 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6262 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6263 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6264
6265 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6266 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6267 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6268 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6269 Int32Regs:$x, Int32Regs:$y,
6270 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6271
6272
6273
6274 def : Pat<(int_nvvm_sust_b_3d_i8_trap
6275 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6276 Int16Regs:$r),
6277 (SUST_B_3D_B8_TRAP Int64Regs:$s,
6278 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6279 Int16Regs:$r)>;
6280
6281 def : Pat<(int_nvvm_sust_b_3d_i16_trap
6282 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6283 Int16Regs:$r),
6284 (SUST_B_3D_B16_TRAP Int64Regs:$s,
6285 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6286 Int16Regs:$r)>;
6287
6288 def : Pat<(int_nvvm_sust_b_3d_i32_trap
6289 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6290 Int32Regs:$r),
6291 (SUST_B_3D_B32_TRAP Int64Regs:$s,
6292 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6293 Int32Regs:$r)>;
6294
6295 def : Pat<(int_nvvm_sust_b_3d_i64_trap
6296 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6297 Int64Regs:$r),
6298 (SUST_B_3D_B64_TRAP Int64Regs:$s,
6299 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6300 Int64Regs:$r)>;
6301
6302 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6303 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304 Int16Regs:$r, Int16Regs:$g),
6305 (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6306 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6307 Int16Regs:$r, Int16Regs:$g)>;
6308
6309 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6310 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6311 Int16Regs:$r, Int16Regs:$g),
6312 (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6313 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6314 Int16Regs:$r, Int16Regs:$g)>;
6315
6316 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6317 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6318 Int32Regs:$r, Int32Regs:$g),
6319 (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6320 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6321 Int32Regs:$r, Int32Regs:$g)>;
6322
6323 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6324 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6325 Int64Regs:$r, Int64Regs:$g),
6326 (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6327 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6328 Int64Regs:$r, Int64Regs:$g)>;
6329
6330 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6331 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6332 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6333 (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6334 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6335 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6336
6337 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6338 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6339 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6340 (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6341 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6342 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6343
6344 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6345 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6346 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6347 (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6348 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6349 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6350
6351
6352 // .zero variant
6353 def : Pat<(int_nvvm_sust_b_1d_i8_zero
6354 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6355 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6356
6357 def : Pat<(int_nvvm_sust_b_1d_i16_zero
6358 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6359 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6360
6361 def : Pat<(int_nvvm_sust_b_1d_i32_zero
6362 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6363 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6364
6365 def : Pat<(int_nvvm_sust_b_1d_i64_zero
6366 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6367 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6368
6369 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6370 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6371 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6372 Int16Regs:$r, Int16Regs:$g)>;
6373
6374 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6375 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6376 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6377 Int16Regs:$r, Int16Regs:$g)>;
6378
6379 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6380 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6381 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6382 Int32Regs:$r, Int32Regs:$g)>;
6383
6384 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6385 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6386 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6387 Int64Regs:$r, Int64Regs:$g)>;
6388
6389 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6390 Int64Regs:$s, Int32Regs:$x,
6391 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6392 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6393 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6394
6395 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6396 Int64Regs:$s, Int32Regs:$x,
6397 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6398 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6399 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6400
6401 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6402 Int64Regs:$s, Int32Regs:$x,
6403 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6404 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6405 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6406
6407
6408
6409 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6410 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6411 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6412 Int16Regs:$r)>;
6413
6414 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6415 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6416 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6417 Int16Regs:$r)>;
6418
6419 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6420 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6421 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6422 Int32Regs:$r)>;
6423
6424 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6425 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6426 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6427 Int64Regs:$r)>;
6428
6429 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6430 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6431 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6432 Int16Regs:$r, Int16Regs:$g)>;
6433
6434 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6435 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6436 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6437 Int16Regs:$r, Int16Regs:$g)>;
6438
6439 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6440 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6441 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6442 Int32Regs:$r, Int32Regs:$g)>;
6443
6444 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6446 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6447 Int64Regs:$r, Int64Regs:$g)>;
6448
6449 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6450 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6451 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6452 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6453 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6454
6455 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6456 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6457 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6458 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6459 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6460
6461 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6462 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6463 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6464 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6465 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6466
6467
6468
6469 def : Pat<(int_nvvm_sust_b_2d_i8_zero
6470 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6471 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6472 Int16Regs:$r)>;
6473
6474 def : Pat<(int_nvvm_sust_b_2d_i16_zero
6475 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6476 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6477 Int16Regs:$r)>;
6478
6479 def : Pat<(int_nvvm_sust_b_2d_i32_zero
6480 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6481 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6482 Int32Regs:$r)>;
6483
6484 def : Pat<(int_nvvm_sust_b_2d_i64_zero
6485 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6486 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6487 Int64Regs:$r)>;
6488
6489 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6490 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6491 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6492 Int16Regs:$r, Int16Regs:$g)>;
6493
6494 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6495 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6496 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6497 Int16Regs:$r, Int16Regs:$g)>;
6498
6499 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6500 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6501 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6502 Int32Regs:$r, Int32Regs:$g)>;
6503
6504 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6505 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6506 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6507 Int64Regs:$r, Int64Regs:$g)>;
6508
6509 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6510 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6511 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6512 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6513 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6514
6515 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6516 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6517 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6518 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6519 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6520
6521 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6522 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6523 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6524 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6525 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6526
6527
6528
6529 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6530 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6531 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6532 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6533 Int16Regs:$r)>;
6534
6535 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6536 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6537 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6538 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6539 Int16Regs:$r)>;
6540
6541 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6542 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6543 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6544 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6545 Int32Regs:$r)>;
6546
6547 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6548 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6549 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6550 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6551 Int64Regs:$r)>;
6552
6553 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6554 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6555 Int16Regs:$r, Int16Regs:$g),
6556 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6557 Int32Regs:$x, Int32Regs:$y,
6558 Int16Regs:$r, Int16Regs:$g)>;
6559
6560 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6561 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6562 Int16Regs:$r, Int16Regs:$g),
6563 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6564 Int32Regs:$x, Int32Regs:$y,
6565 Int16Regs:$r, Int16Regs:$g)>;
6566
6567 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6568 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6569 Int32Regs:$g),
6570 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6571 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6572
6573 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6574 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6575 Int64Regs:$g),
6576 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6577 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6578
6579 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6580 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6581 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6582 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6583 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6584 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6585
6586 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6587 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6588 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6589 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6590 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6591 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6592
6593 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6594 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6595 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6596 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6597 Int32Regs:$x, Int32Regs:$y,
6598 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6599
6600
6601
6602 def : Pat<(int_nvvm_sust_b_3d_i8_zero
6603 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6604 Int16Regs:$r),
6605 (SUST_B_3D_B8_ZERO Int64Regs:$s,
6606 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6607 Int16Regs:$r)>;
6608
6609 def : Pat<(int_nvvm_sust_b_3d_i16_zero
6610 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6611 Int16Regs:$r),
6612 (SUST_B_3D_B16_ZERO Int64Regs:$s,
6613 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6614 Int16Regs:$r)>;
6615
6616 def : Pat<(int_nvvm_sust_b_3d_i32_zero
6617 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6618 Int32Regs:$r),
6619 (SUST_B_3D_B32_ZERO Int64Regs:$s,
6620 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6621 Int32Regs:$r)>;
6622
6623 def : Pat<(int_nvvm_sust_b_3d_i64_zero
6624 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6625 Int64Regs:$r),
6626 (SUST_B_3D_B64_ZERO Int64Regs:$s,
6627 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6628 Int64Regs:$r)>;
6629
6630 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6631 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6632 Int16Regs:$r, Int16Regs:$g),
6633 (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6634 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6635 Int16Regs:$r, Int16Regs:$g)>;
6636
6637 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6638 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6639 Int16Regs:$r, Int16Regs:$g),
6640 (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6641 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6642 Int16Regs:$r, Int16Regs:$g)>;
6643
6644 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6645 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6646 Int32Regs:$r, Int32Regs:$g),
6647 (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6648 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6649 Int32Regs:$r, Int32Regs:$g)>;
6650
6651 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6652 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6653 Int64Regs:$r, Int64Regs:$g),
6654 (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6655 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6656 Int64Regs:$r, Int64Regs:$g)>;
6657
6658 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6659 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6660 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6661 (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6662 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6663 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6664
6665 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6666 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6667 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6668 (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6669 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6670 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6671
6672 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6673 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6674 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6675 (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6676 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6677 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6678
6679
6680
6681
6682 def : Pat<(int_nvvm_sust_p_1d_i8_trap
6683 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6684 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6685
6686 def : Pat<(int_nvvm_sust_p_1d_i16_trap
6687 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6688 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6689
6690 def : Pat<(int_nvvm_sust_p_1d_i32_trap
6691 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6692 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6693
6694 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6695 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6696 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6697 Int16Regs:$r, Int16Regs:$g)>;
6698
6699 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6700 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6701 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6702 Int16Regs:$r, Int16Regs:$g)>;
6703
6704 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6705 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6706 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6707 Int32Regs:$r, Int32Regs:$g)>;
6708
6709 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6710 Int64Regs:$s, Int32Regs:$x,
6711 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6712 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6713 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6714
6715 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6716 Int64Regs:$s, Int32Regs:$x,
6717 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6718 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6719 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6720
6721 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6722 Int64Regs:$s, Int32Regs:$x,
6723 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6724 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6725 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6726
6727
6728
6729 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6730 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6731 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6732 Int16Regs:$r)>;
6733
6734 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6735 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6736 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6737 Int16Regs:$r)>;
6738
6739 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6740 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6741 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6742 Int32Regs:$r)>;
6743
6744 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6745 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6746 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6747 Int16Regs:$r, Int16Regs:$g)>;
6748
6749 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6750 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6751 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6752 Int16Regs:$r, Int16Regs:$g)>;
6753
6754 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6755 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6756 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6757 Int32Regs:$r, Int32Regs:$g)>;
6758
6759 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6760 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6761 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6762 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6763 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6764
6765 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6766 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6767 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6768 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6769 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6770
6771 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6772 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6773 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6774 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6775 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6776
6777
6778
6779 def : Pat<(int_nvvm_sust_p_2d_i8_trap
6780 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6781 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6782 Int16Regs:$r)>;
6783
6784 def : Pat<(int_nvvm_sust_p_2d_i16_trap
6785 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6786 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6787 Int16Regs:$r)>;
6788
6789 def : Pat<(int_nvvm_sust_p_2d_i32_trap
6790 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6791 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6792 Int32Regs:$r)>;
6793
6794 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6795 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6796 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6797 Int16Regs:$r, Int16Regs:$g)>;
6798
6799 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6800 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6801 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6802 Int16Regs:$r, Int16Regs:$g)>;
6803
6804 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6805 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6806 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6807 Int32Regs:$r, Int32Regs:$g)>;
6808
6809 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6810 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6811 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6812 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6813 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6814
6815 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6816 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6817 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6818 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6819 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6820
6821 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6822 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6823 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6824 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6825 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6826
6827
6828
6829 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6830 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6831 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
6832 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6833 Int16Regs:$r)>;
6834
6835 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6836 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6837 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
6838 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6839 Int16Regs:$r)>;
6840
6841 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6842 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6843 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
6844 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6845 Int32Regs:$r)>;
6846
6847 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6848 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6849 Int16Regs:$r, Int16Regs:$g),
6850 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6851 Int32Regs:$x, Int32Regs:$y,
6852 Int16Regs:$r, Int16Regs:$g)>;
6853
6854 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6855 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6856 Int16Regs:$r, Int16Regs:$g),
6857 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6858 Int32Regs:$x, Int32Regs:$y,
6859 Int16Regs:$r, Int16Regs:$g)>;
6860
6861 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6862 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6863 Int32Regs:$g),
6864 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6865 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6866
6867 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6868 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6869 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6870 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6871 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6872 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6873
6874 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6875 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6876 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6877 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6878 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6879 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6880
6881 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6882 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6883 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6884 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6885 Int32Regs:$x, Int32Regs:$y,
6886 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6887
6888
6889
6890 def : Pat<(int_nvvm_sust_p_3d_i8_trap
6891 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6892 Int16Regs:$r),
6893 (SUST_P_3D_B8_TRAP Int64Regs:$s,
6894 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6895 Int16Regs:$r)>;
6896
6897 def : Pat<(int_nvvm_sust_p_3d_i16_trap
6898 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6899 Int16Regs:$r),
6900 (SUST_P_3D_B16_TRAP Int64Regs:$s,
6901 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6902 Int16Regs:$r)>;
6903
6904 def : Pat<(int_nvvm_sust_p_3d_i32_trap
6905 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6906 Int32Regs:$r),
6907 (SUST_P_3D_B32_TRAP Int64Regs:$s,
6908 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6909 Int32Regs:$r)>;
6910
6911 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6912 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6913 Int16Regs:$r, Int16Regs:$g),
6914 (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
6915 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6916 Int16Regs:$r, Int16Regs:$g)>;
6917
6918 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6919 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6920 Int16Regs:$r, Int16Regs:$g),
6921 (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
6922 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6923 Int16Regs:$r, Int16Regs:$g)>;
6924
6925 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6926 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6927 Int32Regs:$r, Int32Regs:$g),
6928 (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
6929 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6930 Int32Regs:$r, Int32Regs:$g)>;
6931
6932 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6933 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6934 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6935 (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
6936 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6937 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6938
6939 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6940 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6941 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6942 (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
6943 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6944 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6945
6946 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6947 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6948 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6949 (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
6950 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6951 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6952
6953
6954
6955 //===-- Old PTX Back-end Intrinsics ---------------------------------------===//
6956
6957 // These intrinsics are handled to retain compatibility with the old backend.
6958
6959 // PTX Special Purpose Register Accessor Intrinsics
6960
6961 class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
6962 : NVPTXInst<(outs Int64Regs:$d), (ins),
6963 !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
6964 [(set Int64Regs:$d, (intop))]>;
6965
6966 class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
6967 : NVPTXInst<(outs Int32Regs:$d), (ins),
6968 !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
6969 [(set Int32Regs:$d, (intop))]>;
6970
6971 // TODO Add read vector-version of special registers
6972
6973 def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
6974 int_ptx_read_tid_x>;
6975 def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
6976 int_ptx_read_tid_y>;
6977 def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
6978 int_ptx_read_tid_z>;
6979 def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
6980 int_ptx_read_tid_w>;
6981
6982 def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
6983 int_ptx_read_ntid_x>;
6984 def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
6985 int_ptx_read_ntid_y>;
6986 def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
6987 int_ptx_read_ntid_z>;
6988 def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
6989 int_ptx_read_ntid_w>;
6990
6991 def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
6992 int_ptx_read_laneid>;
6993 def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
6994 int_ptx_read_warpid>;
6995 def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
6996 int_ptx_read_nwarpid>;
6997
6998 def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
6999 int_ptx_read_ctaid_x>;
7000 def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
7001 int_ptx_read_ctaid_y>;
7002 def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
7003 int_ptx_read_ctaid_z>;
7004 def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
7005 int_ptx_read_ctaid_w>;
7006
7007 def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
7008 int_ptx_read_nctaid_x>;
7009 def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
7010 int_ptx_read_nctaid_y>;
7011 def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
7012 int_ptx_read_nctaid_z>;
7013 def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
7014 int_ptx_read_nctaid_w>;
7015
7016 def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
7017 int_ptx_read_smid>;
7018 def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
7019 int_ptx_read_nsmid>;
7020 def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
7021 int_ptx_read_gridid>;
7022
7023 def PTX_READ_LANEMASK_EQ
7024 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
7025 def PTX_READ_LANEMASK_LE
7026 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
7027 def PTX_READ_LANEMASK_LT
7028 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
7029 def PTX_READ_LANEMASK_GE
7030 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
7031 def PTX_READ_LANEMASK_GT
7032 : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
7033
7034 def PTX_READ_CLOCK
7035 : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
7036 def PTX_READ_CLOCK64
7037 : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
7038
7039 def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
7040 def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
7041 def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
7042 def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
7043
7044 // PTX Parallel Synchronization and Communication Intrinsics
7045
7046 def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
7047 [(int_ptx_bar_sync imm:$i)]>;