]>
Commit | Line | Data |
---|---|---|
c295e0f8 XL |
1 | pub mod llvm; |
2 | mod simd; | |
3 | ||
5e7ed085 | 4 | use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp, FunctionType}; |
c295e0f8 XL |
5 | use rustc_codegen_ssa::MemFlags; |
6 | use rustc_codegen_ssa::base::wants_msvc_seh; | |
7 | use rustc_codegen_ssa::common::{IntPredicate, span_invalid_monomorphization_error}; | |
8 | use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue}; | |
9 | use rustc_codegen_ssa::mir::place::PlaceRef; | |
10 | use rustc_codegen_ssa::traits::{ArgAbiMethods, BaseTypeMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods}; | |
11 | use rustc_middle::bug; | |
12 | use rustc_middle::ty::{self, Instance, Ty}; | |
13 | use rustc_middle::ty::layout::LayoutOf; | |
14 | use rustc_span::{Span, Symbol, symbol::kw, sym}; | |
15 | use rustc_target::abi::HasDataLayout; | |
16 | use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode}; | |
17 | use rustc_target::spec::PanicStrategy; | |
18 | ||
19 | use crate::abi::GccType; | |
20 | use crate::builder::Builder; | |
21 | use crate::common::{SignType, TypeReflection}; | |
22 | use crate::context::CodegenCx; | |
23 | use crate::type_of::LayoutGccExt; | |
24 | use crate::intrinsic::simd::generic_simd_intrinsic; | |
25 | ||
26 | fn get_simple_intrinsic<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, name: Symbol) -> Option<Function<'gcc>> { | |
27 | let gcc_name = match name { | |
28 | sym::sqrtf32 => "sqrtf", | |
29 | sym::sqrtf64 => "sqrt", | |
30 | sym::powif32 => "__builtin_powif", | |
31 | sym::powif64 => "__builtin_powi", | |
32 | sym::sinf32 => "sinf", | |
33 | sym::sinf64 => "sin", | |
34 | sym::cosf32 => "cosf", | |
35 | sym::cosf64 => "cos", | |
36 | sym::powf32 => "powf", | |
37 | sym::powf64 => "pow", | |
38 | sym::expf32 => "expf", | |
39 | sym::expf64 => "exp", | |
40 | sym::exp2f32 => "exp2f", | |
41 | sym::exp2f64 => "exp2", | |
42 | sym::logf32 => "logf", | |
43 | sym::logf64 => "log", | |
44 | sym::log10f32 => "log10f", | |
45 | sym::log10f64 => "log10", | |
46 | sym::log2f32 => "log2f", | |
47 | sym::log2f64 => "log2", | |
48 | sym::fmaf32 => "fmaf", | |
49 | sym::fmaf64 => "fma", | |
50 | sym::fabsf32 => "fabsf", | |
51 | sym::fabsf64 => "fabs", | |
52 | sym::minnumf32 => "fminf", | |
53 | sym::minnumf64 => "fmin", | |
54 | sym::maxnumf32 => "fmaxf", | |
55 | sym::maxnumf64 => "fmax", | |
56 | sym::copysignf32 => "copysignf", | |
57 | sym::copysignf64 => "copysign", | |
58 | sym::floorf32 => "floorf", | |
59 | sym::floorf64 => "floor", | |
60 | sym::ceilf32 => "ceilf", | |
61 | sym::ceilf64 => "ceil", | |
62 | sym::truncf32 => "truncf", | |
63 | sym::truncf64 => "trunc", | |
64 | sym::rintf32 => "rintf", | |
65 | sym::rintf64 => "rint", | |
66 | sym::nearbyintf32 => "nearbyintf", | |
67 | sym::nearbyintf64 => "nearbyint", | |
68 | sym::roundf32 => "roundf", | |
69 | sym::roundf64 => "round", | |
70 | sym::abort => "abort", | |
71 | _ => return None, | |
72 | }; | |
73 | Some(cx.context.get_builtin_function(&gcc_name)) | |
74 | } | |
75 | ||
76 | impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { | |
77 | fn codegen_intrinsic_call(&mut self, instance: Instance<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>, args: &[OperandRef<'tcx, RValue<'gcc>>], llresult: RValue<'gcc>, span: Span) { | |
78 | let tcx = self.tcx; | |
79 | let callee_ty = instance.ty(tcx, ty::ParamEnv::reveal_all()); | |
80 | ||
81 | let (def_id, substs) = match *callee_ty.kind() { | |
82 | ty::FnDef(def_id, substs) => (def_id, substs), | |
83 | _ => bug!("expected fn item type, found {}", callee_ty), | |
84 | }; | |
85 | ||
86 | let sig = callee_ty.fn_sig(tcx); | |
87 | let sig = tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), sig); | |
88 | let arg_tys = sig.inputs(); | |
89 | let ret_ty = sig.output(); | |
90 | let name = tcx.item_name(def_id); | |
a2a8927a | 91 | let name_str = name.as_str(); |
c295e0f8 XL |
92 | |
93 | let llret_ty = self.layout_of(ret_ty).gcc_type(self, true); | |
94 | let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout); | |
95 | ||
96 | let simple = get_simple_intrinsic(self, name); | |
97 | let llval = | |
98 | match name { | |
99 | _ if simple.is_some() => { | |
100 | // FIXME(antoyo): remove this cast when the API supports function. | |
101 | let func = unsafe { std::mem::transmute(simple.expect("simple")) }; | |
102 | self.call(self.type_void(), func, &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(), None) | |
103 | }, | |
104 | sym::likely => { | |
105 | self.expect(args[0].immediate(), true) | |
106 | } | |
107 | sym::unlikely => { | |
108 | self.expect(args[0].immediate(), false) | |
109 | } | |
110 | kw::Try => { | |
111 | try_intrinsic( | |
112 | self, | |
113 | args[0].immediate(), | |
114 | args[1].immediate(), | |
115 | args[2].immediate(), | |
116 | llresult, | |
117 | ); | |
118 | return; | |
119 | } | |
120 | sym::breakpoint => { | |
121 | unimplemented!(); | |
122 | } | |
123 | sym::va_copy => { | |
124 | unimplemented!(); | |
125 | } | |
126 | sym::va_arg => { | |
127 | unimplemented!(); | |
128 | } | |
129 | ||
130 | sym::volatile_load | sym::unaligned_volatile_load => { | |
131 | let tp_ty = substs.type_at(0); | |
132 | let mut ptr = args[0].immediate(); | |
f2b60f7d | 133 | if let PassMode::Cast(ty, _) = &fn_abi.ret.mode { |
c295e0f8 XL |
134 | ptr = self.pointercast(ptr, self.type_ptr_to(ty.gcc_type(self))); |
135 | } | |
136 | let load = self.volatile_load(ptr.get_type(), ptr); | |
137 | // TODO(antoyo): set alignment. | |
138 | self.to_immediate(load, self.layout_of(tp_ty)) | |
139 | } | |
140 | sym::volatile_store => { | |
141 | let dst = args[0].deref(self.cx()); | |
142 | args[1].val.volatile_store(self, dst); | |
143 | return; | |
144 | } | |
145 | sym::unaligned_volatile_store => { | |
146 | let dst = args[0].deref(self.cx()); | |
147 | args[1].val.unaligned_volatile_store(self, dst); | |
148 | return; | |
149 | } | |
150 | sym::prefetch_read_data | |
151 | | sym::prefetch_write_data | |
152 | | sym::prefetch_read_instruction | |
153 | | sym::prefetch_write_instruction => { | |
154 | unimplemented!(); | |
155 | } | |
156 | sym::ctlz | |
157 | | sym::ctlz_nonzero | |
158 | | sym::cttz | |
159 | | sym::cttz_nonzero | |
160 | | sym::ctpop | |
161 | | sym::bswap | |
162 | | sym::bitreverse | |
163 | | sym::rotate_left | |
164 | | sym::rotate_right | |
165 | | sym::saturating_add | |
166 | | sym::saturating_sub => { | |
167 | let ty = arg_tys[0]; | |
168 | match int_type_width_signed(ty, self) { | |
169 | Some((width, signed)) => match name { | |
170 | sym::ctlz | sym::cttz => { | |
171 | let func = self.current_func.borrow().expect("func"); | |
172 | let then_block = func.new_block("then"); | |
173 | let else_block = func.new_block("else"); | |
174 | let after_block = func.new_block("after"); | |
175 | ||
176 | let arg = args[0].immediate(); | |
177 | let result = func.new_local(None, arg.get_type(), "zeros"); | |
5e7ed085 FG |
178 | let zero = self.cx.gcc_zero(arg.get_type()); |
179 | let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); | |
c295e0f8 XL |
180 | self.llbb().end_with_conditional(None, cond, then_block, else_block); |
181 | ||
5e7ed085 | 182 | let zero_result = self.cx.gcc_uint(arg.get_type(), width); |
c295e0f8 XL |
183 | then_block.add_assignment(None, result, zero_result); |
184 | then_block.end_with_jump(None, after_block); | |
185 | ||
186 | // NOTE: since jumps were added in a place | |
5e7ed085 | 187 | // count_leading_zeroes() does not expect, the current block |
c295e0f8 | 188 | // in the state need to be updated. |
5e7ed085 | 189 | self.switch_to_block(else_block); |
c295e0f8 XL |
190 | |
191 | let zeros = | |
192 | match name { | |
193 | sym::ctlz => self.count_leading_zeroes(width, arg), | |
194 | sym::cttz => self.count_trailing_zeroes(width, arg), | |
195 | _ => unreachable!(), | |
196 | }; | |
5e7ed085 FG |
197 | self.llbb().add_assignment(None, result, zeros); |
198 | self.llbb().end_with_jump(None, after_block); | |
c295e0f8 XL |
199 | |
200 | // NOTE: since jumps were added in a place rustc does not | |
5e7ed085 FG |
201 | // expect, the current block in the state need to be updated. |
202 | self.switch_to_block(after_block); | |
c295e0f8 XL |
203 | |
204 | result.to_rvalue() | |
205 | } | |
206 | sym::ctlz_nonzero => { | |
207 | self.count_leading_zeroes(width, args[0].immediate()) | |
208 | }, | |
209 | sym::cttz_nonzero => { | |
210 | self.count_trailing_zeroes(width, args[0].immediate()) | |
211 | } | |
212 | sym::ctpop => self.pop_count(args[0].immediate()), | |
213 | sym::bswap => { | |
214 | if width == 8 { | |
215 | args[0].immediate() // byte swap a u8/i8 is just a no-op | |
216 | } | |
217 | else { | |
5e7ed085 | 218 | self.gcc_bswap(args[0].immediate(), width) |
c295e0f8 XL |
219 | } |
220 | }, | |
221 | sym::bitreverse => self.bit_reverse(width, args[0].immediate()), | |
222 | sym::rotate_left | sym::rotate_right => { | |
223 | // TODO(antoyo): implement using algorithm from: | |
224 | // https://blog.regehr.org/archives/1063 | |
225 | // for other platforms. | |
226 | let is_left = name == sym::rotate_left; | |
227 | let val = args[0].immediate(); | |
228 | let raw_shift = args[1].immediate(); | |
229 | if is_left { | |
230 | self.rotate_left(val, raw_shift, width) | |
231 | } | |
232 | else { | |
233 | self.rotate_right(val, raw_shift, width) | |
234 | } | |
235 | }, | |
236 | sym::saturating_add => { | |
237 | self.saturating_add(args[0].immediate(), args[1].immediate(), signed, width) | |
238 | }, | |
239 | sym::saturating_sub => { | |
240 | self.saturating_sub(args[0].immediate(), args[1].immediate(), signed, width) | |
241 | }, | |
242 | _ => bug!(), | |
243 | }, | |
244 | None => { | |
245 | span_invalid_monomorphization_error( | |
246 | tcx.sess, | |
247 | span, | |
248 | &format!( | |
249 | "invalid monomorphization of `{}` intrinsic: \ | |
250 | expected basic integer type, found `{}`", | |
251 | name, ty | |
252 | ), | |
253 | ); | |
254 | return; | |
255 | } | |
256 | } | |
257 | } | |
258 | ||
259 | sym::raw_eq => { | |
260 | use rustc_target::abi::Abi::*; | |
261 | let tp_ty = substs.type_at(0); | |
262 | let layout = self.layout_of(tp_ty).layout; | |
5e7ed085 | 263 | let _use_integer_compare = match layout.abi() { |
c295e0f8 XL |
264 | Scalar(_) | ScalarPair(_, _) => true, |
265 | Uninhabited | Vector { .. } => false, | |
266 | Aggregate { .. } => { | |
267 | // For rusty ABIs, small aggregates are actually passed | |
268 | // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`), | |
269 | // so we re-use that same threshold here. | |
5e7ed085 | 270 | layout.size() <= self.data_layout().pointer_size * 2 |
c295e0f8 XL |
271 | } |
272 | }; | |
273 | ||
274 | let a = args[0].immediate(); | |
275 | let b = args[1].immediate(); | |
5e7ed085 | 276 | if layout.size().bytes() == 0 { |
c295e0f8 XL |
277 | self.const_bool(true) |
278 | } | |
279 | /*else if use_integer_compare { | |
280 | let integer_ty = self.type_ix(layout.size.bits()); // FIXME(antoyo): LLVM creates an integer of 96 bits for [i32; 3], but gcc doesn't support this, so it creates an integer of 128 bits. | |
281 | let ptr_ty = self.type_ptr_to(integer_ty); | |
282 | let a_ptr = self.bitcast(a, ptr_ty); | |
283 | let a_val = self.load(integer_ty, a_ptr, layout.align.abi); | |
284 | let b_ptr = self.bitcast(b, ptr_ty); | |
285 | let b_val = self.load(integer_ty, b_ptr, layout.align.abi); | |
286 | self.icmp(IntPredicate::IntEQ, a_val, b_val) | |
287 | }*/ | |
288 | else { | |
289 | let void_ptr_type = self.context.new_type::<*const ()>(); | |
290 | let a_ptr = self.bitcast(a, void_ptr_type); | |
291 | let b_ptr = self.bitcast(b, void_ptr_type); | |
5e7ed085 | 292 | let n = self.context.new_cast(None, self.const_usize(layout.size().bytes()), self.sizet_type); |
c295e0f8 XL |
293 | let builtin = self.context.get_builtin_function("memcmp"); |
294 | let cmp = self.context.new_call(None, builtin, &[a_ptr, b_ptr, n]); | |
295 | self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0)) | |
296 | } | |
297 | } | |
298 | ||
299 | sym::black_box => { | |
300 | args[0].val.store(self, result); | |
301 | ||
302 | let block = self.llbb(); | |
303 | let extended_asm = block.add_extended_asm(None, ""); | |
304 | extended_asm.add_input_operand(None, "r", result.llval); | |
305 | extended_asm.add_clobber("memory"); | |
306 | extended_asm.set_volatile_flag(true); | |
3c0e092e | 307 | |
c295e0f8 XL |
308 | // We have copied the value to `result` already. |
309 | return; | |
310 | } | |
311 | ||
f2b60f7d FG |
312 | sym::ptr_mask => { |
313 | let usize_type = self.context.new_type::<usize>(); | |
314 | let void_ptr_type = self.context.new_type::<*const ()>(); | |
315 | ||
316 | let ptr = args[0].immediate(); | |
317 | let mask = args[1].immediate(); | |
318 | ||
319 | let addr = self.bitcast(ptr, usize_type); | |
320 | let masked = self.and(addr, mask); | |
321 | self.bitcast(masked, void_ptr_type) | |
322 | }, | |
323 | ||
c295e0f8 XL |
324 | _ if name_str.starts_with("simd_") => { |
325 | match generic_simd_intrinsic(self, name, callee_ty, args, ret_ty, llret_ty, span) { | |
326 | Ok(llval) => llval, | |
327 | Err(()) => return, | |
328 | } | |
329 | } | |
330 | ||
331 | _ => bug!("unknown intrinsic '{}'", name), | |
332 | }; | |
333 | ||
334 | if !fn_abi.ret.is_ignore() { | |
f2b60f7d | 335 | if let PassMode::Cast(ty, _) = &fn_abi.ret.mode { |
c295e0f8 XL |
336 | let ptr_llty = self.type_ptr_to(ty.gcc_type(self)); |
337 | let ptr = self.pointercast(result.llval, ptr_llty); | |
338 | self.store(llval, ptr, result.align); | |
339 | } | |
340 | else { | |
341 | OperandRef::from_immediate_or_packed_pair(self, llval, result.layout) | |
342 | .val | |
343 | .store(self, result); | |
344 | } | |
345 | } | |
346 | } | |
347 | ||
348 | fn abort(&mut self) { | |
349 | let func = self.context.get_builtin_function("abort"); | |
350 | let func: RValue<'gcc> = unsafe { std::mem::transmute(func) }; | |
351 | self.call(self.type_void(), func, &[], None); | |
352 | } | |
353 | ||
354 | fn assume(&mut self, value: Self::Value) { | |
5e7ed085 | 355 | // TODO(antoyo): switch to assume when it exists. |
c295e0f8 XL |
356 | // Or use something like this: |
357 | // #define __assume(cond) do { if (!(cond)) __builtin_unreachable(); } while (0) | |
358 | self.expect(value, true); | |
359 | } | |
360 | ||
361 | fn expect(&mut self, cond: Self::Value, _expected: bool) -> Self::Value { | |
362 | // TODO(antoyo) | |
363 | cond | |
364 | } | |
365 | ||
3c0e092e XL |
366 | fn type_test(&mut self, _pointer: Self::Value, _typeid: Self::Value) -> Self::Value { |
367 | // Unsupported. | |
368 | self.context.new_rvalue_from_int(self.int_type, 0) | |
c295e0f8 XL |
369 | } |
370 | ||
923072b8 FG |
371 | fn type_checked_load( |
372 | &mut self, | |
373 | _llvtable: Self::Value, | |
374 | _vtable_byte_offset: u64, | |
375 | _typeid: Self::Value, | |
376 | ) -> Self::Value { | |
377 | // Unsupported. | |
378 | self.context.new_rvalue_from_int(self.int_type, 0) | |
379 | } | |
380 | ||
c295e0f8 XL |
381 | fn va_start(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> { |
382 | unimplemented!(); | |
383 | } | |
384 | ||
385 | fn va_end(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> { | |
386 | unimplemented!(); | |
387 | } | |
388 | } | |
389 | ||
390 | impl<'a, 'gcc, 'tcx> ArgAbiMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { | |
391 | fn store_fn_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, idx: &mut usize, dst: PlaceRef<'tcx, Self::Value>) { | |
392 | arg_abi.store_fn_arg(self, idx, dst) | |
393 | } | |
394 | ||
395 | fn store_arg(&mut self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) { | |
396 | arg_abi.store(self, val, dst) | |
397 | } | |
398 | ||
399 | fn arg_memory_ty(&self, arg_abi: &ArgAbi<'tcx, Ty<'tcx>>) -> Type<'gcc> { | |
400 | arg_abi.memory_ty(self) | |
401 | } | |
402 | } | |
403 | ||
404 | pub trait ArgAbiExt<'gcc, 'tcx> { | |
405 | fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>; | |
406 | fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>); | |
407 | fn store_fn_arg(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>); | |
408 | } | |
409 | ||
410 | impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { | |
411 | /// Gets the LLVM type for a place of the original Rust type of | |
412 | /// this argument/return, i.e., the result of `type_of::type_of`. | |
413 | fn memory_ty(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> { | |
414 | self.layout.gcc_type(cx, true) | |
415 | } | |
416 | ||
417 | /// Stores a direct/indirect value described by this ArgAbi into a | |
418 | /// place for the original Rust type of this argument/return. | |
419 | /// Can be used for both storing formal arguments into Rust variables | |
420 | /// or results of call/invoke instructions into their destinations. | |
421 | fn store(&self, bx: &mut Builder<'_, 'gcc, 'tcx>, val: RValue<'gcc>, dst: PlaceRef<'tcx, RValue<'gcc>>) { | |
422 | if self.is_ignore() { | |
423 | return; | |
424 | } | |
425 | if self.is_sized_indirect() { | |
426 | OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst) | |
427 | } | |
428 | else if self.is_unsized_indirect() { | |
429 | bug!("unsized `ArgAbi` must be handled through `store_fn_arg`"); | |
430 | } | |
f2b60f7d | 431 | else if let PassMode::Cast(ref cast, _) = self.mode { |
c295e0f8 XL |
432 | // FIXME(eddyb): Figure out when the simpler Store is safe, clang |
433 | // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}. | |
434 | let can_store_through_cast_ptr = false; | |
435 | if can_store_through_cast_ptr { | |
436 | let cast_ptr_llty = bx.type_ptr_to(cast.gcc_type(bx)); | |
437 | let cast_dst = bx.pointercast(dst.llval, cast_ptr_llty); | |
438 | bx.store(val, cast_dst, self.layout.align.abi); | |
439 | } | |
440 | else { | |
441 | // The actual return type is a struct, but the ABI | |
442 | // adaptation code has cast it into some scalar type. The | |
443 | // code that follows is the only reliable way I have | |
444 | // found to do a transform like i64 -> {i32,i32}. | |
445 | // Basically we dump the data onto the stack then memcpy it. | |
446 | // | |
447 | // Other approaches I tried: | |
448 | // - Casting rust ret pointer to the foreign type and using Store | |
449 | // is (a) unsafe if size of foreign type > size of rust type and | |
450 | // (b) runs afoul of strict aliasing rules, yielding invalid | |
451 | // assembly under -O (specifically, the store gets removed). | |
452 | // - Truncating foreign type to correct integral type and then | |
453 | // bitcasting to the struct type yields invalid cast errors. | |
454 | ||
455 | // We instead thus allocate some scratch space... | |
456 | let scratch_size = cast.size(bx); | |
457 | let scratch_align = cast.align(bx); | |
458 | let llscratch = bx.alloca(cast.gcc_type(bx), scratch_align); | |
459 | bx.lifetime_start(llscratch, scratch_size); | |
460 | ||
461 | // ... where we first store the value... | |
462 | bx.store(val, llscratch, scratch_align); | |
463 | ||
464 | // ... and then memcpy it to the intended destination. | |
465 | bx.memcpy( | |
466 | dst.llval, | |
467 | self.layout.align.abi, | |
468 | llscratch, | |
469 | scratch_align, | |
470 | bx.const_usize(self.layout.size.bytes()), | |
471 | MemFlags::empty(), | |
472 | ); | |
473 | ||
474 | bx.lifetime_end(llscratch, scratch_size); | |
475 | } | |
476 | } | |
477 | else { | |
478 | OperandValue::Immediate(val).store(bx, dst); | |
479 | } | |
480 | } | |
481 | ||
482 | fn store_fn_arg<'a>(&self, bx: &mut Builder<'a, 'gcc, 'tcx>, idx: &mut usize, dst: PlaceRef<'tcx, RValue<'gcc>>) { | |
483 | let mut next = || { | |
484 | let val = bx.current_func().get_param(*idx as i32); | |
485 | *idx += 1; | |
486 | val.to_rvalue() | |
487 | }; | |
488 | match self.mode { | |
5e7ed085 | 489 | PassMode::Ignore => {}, |
c295e0f8 XL |
490 | PassMode::Pair(..) => { |
491 | OperandValue::Pair(next(), next()).store(bx, dst); | |
5e7ed085 | 492 | }, |
c295e0f8 XL |
493 | PassMode::Indirect { extra_attrs: Some(_), .. } => { |
494 | OperandValue::Ref(next(), Some(next()), self.layout.align.abi).store(bx, dst); | |
5e7ed085 | 495 | }, |
f2b60f7d | 496 | PassMode::Direct(_) | PassMode::Indirect { extra_attrs: None, .. } | PassMode::Cast(..) => { |
c295e0f8 | 497 | let next_arg = next(); |
5e7ed085 FG |
498 | self.store(bx, next_arg, dst); |
499 | }, | |
c295e0f8 XL |
500 | } |
501 | } | |
502 | } | |
503 | ||
504 | fn int_type_width_signed<'gcc, 'tcx>(ty: Ty<'tcx>, cx: &CodegenCx<'gcc, 'tcx>) -> Option<(u64, bool)> { | |
505 | match ty.kind() { | |
506 | ty::Int(t) => Some(( | |
507 | match t { | |
508 | rustc_middle::ty::IntTy::Isize => u64::from(cx.tcx.sess.target.pointer_width), | |
509 | rustc_middle::ty::IntTy::I8 => 8, | |
510 | rustc_middle::ty::IntTy::I16 => 16, | |
511 | rustc_middle::ty::IntTy::I32 => 32, | |
512 | rustc_middle::ty::IntTy::I64 => 64, | |
513 | rustc_middle::ty::IntTy::I128 => 128, | |
514 | }, | |
515 | true, | |
516 | )), | |
517 | ty::Uint(t) => Some(( | |
518 | match t { | |
519 | rustc_middle::ty::UintTy::Usize => u64::from(cx.tcx.sess.target.pointer_width), | |
520 | rustc_middle::ty::UintTy::U8 => 8, | |
521 | rustc_middle::ty::UintTy::U16 => 16, | |
522 | rustc_middle::ty::UintTy::U32 => 32, | |
523 | rustc_middle::ty::UintTy::U64 => 64, | |
524 | rustc_middle::ty::UintTy::U128 => 128, | |
525 | }, | |
526 | false, | |
527 | )), | |
528 | _ => None, | |
529 | } | |
530 | } | |
531 | ||
532 | impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { | |
533 | fn bit_reverse(&mut self, width: u64, value: RValue<'gcc>) -> RValue<'gcc> { | |
534 | let result_type = value.get_type(); | |
535 | let typ = result_type.to_unsigned(self.cx); | |
536 | ||
537 | let value = | |
538 | if result_type.is_signed(self.cx) { | |
5e7ed085 | 539 | self.gcc_int_cast(value, typ) |
c295e0f8 XL |
540 | } |
541 | else { | |
542 | value | |
543 | }; | |
544 | ||
545 | let context = &self.cx.context; | |
546 | let result = | |
547 | match width { | |
548 | 8 => { | |
549 | // First step. | |
550 | let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0)); | |
551 | let left = self.lshr(left, context.new_rvalue_from_int(typ, 4)); | |
552 | let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F)); | |
553 | let right = self.shl(right, context.new_rvalue_from_int(typ, 4)); | |
554 | let step1 = self.or(left, right); | |
555 | ||
556 | // Second step. | |
557 | let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC)); | |
558 | let left = self.lshr(left, context.new_rvalue_from_int(typ, 2)); | |
559 | let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33)); | |
560 | let right = self.shl(right, context.new_rvalue_from_int(typ, 2)); | |
561 | let step2 = self.or(left, right); | |
562 | ||
563 | // Third step. | |
564 | let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA)); | |
565 | let left = self.lshr(left, context.new_rvalue_from_int(typ, 1)); | |
566 | let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55)); | |
567 | let right = self.shl(right, context.new_rvalue_from_int(typ, 1)); | |
568 | let step3 = self.or(left, right); | |
569 | ||
570 | step3 | |
571 | }, | |
572 | 16 => { | |
573 | // First step. | |
574 | let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555)); | |
575 | let left = self.shl(left, context.new_rvalue_from_int(typ, 1)); | |
576 | let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA)); | |
577 | let right = self.lshr(right, context.new_rvalue_from_int(typ, 1)); | |
578 | let step1 = self.or(left, right); | |
579 | ||
580 | // Second step. | |
581 | let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333)); | |
582 | let left = self.shl(left, context.new_rvalue_from_int(typ, 2)); | |
583 | let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC)); | |
584 | let right = self.lshr(right, context.new_rvalue_from_int(typ, 2)); | |
585 | let step2 = self.or(left, right); | |
586 | ||
587 | // Third step. | |
588 | let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F)); | |
589 | let left = self.shl(left, context.new_rvalue_from_int(typ, 4)); | |
590 | let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0)); | |
591 | let right = self.lshr(right, context.new_rvalue_from_int(typ, 4)); | |
592 | let step3 = self.or(left, right); | |
593 | ||
594 | // Fourth step. | |
595 | let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF)); | |
596 | let left = self.shl(left, context.new_rvalue_from_int(typ, 8)); | |
597 | let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00)); | |
598 | let right = self.lshr(right, context.new_rvalue_from_int(typ, 8)); | |
599 | let step4 = self.or(left, right); | |
600 | ||
601 | step4 | |
602 | }, | |
603 | 32 => { | |
604 | // TODO(antoyo): Refactor with other implementations. | |
605 | // First step. | |
606 | let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555)); | |
607 | let left = self.shl(left, context.new_rvalue_from_long(typ, 1)); | |
608 | let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA)); | |
609 | let right = self.lshr(right, context.new_rvalue_from_long(typ, 1)); | |
610 | let step1 = self.or(left, right); | |
611 | ||
612 | // Second step. | |
613 | let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333)); | |
614 | let left = self.shl(left, context.new_rvalue_from_long(typ, 2)); | |
615 | let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC)); | |
616 | let right = self.lshr(right, context.new_rvalue_from_long(typ, 2)); | |
617 | let step2 = self.or(left, right); | |
618 | ||
619 | // Third step. | |
620 | let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F)); | |
621 | let left = self.shl(left, context.new_rvalue_from_long(typ, 4)); | |
622 | let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0)); | |
623 | let right = self.lshr(right, context.new_rvalue_from_long(typ, 4)); | |
624 | let step3 = self.or(left, right); | |
625 | ||
626 | // Fourth step. | |
627 | let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF)); | |
628 | let left = self.shl(left, context.new_rvalue_from_long(typ, 8)); | |
629 | let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00)); | |
630 | let right = self.lshr(right, context.new_rvalue_from_long(typ, 8)); | |
631 | let step4 = self.or(left, right); | |
632 | ||
633 | // Fifth step. | |
634 | let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF)); | |
635 | let left = self.shl(left, context.new_rvalue_from_long(typ, 16)); | |
636 | let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000)); | |
637 | let right = self.lshr(right, context.new_rvalue_from_long(typ, 16)); | |
638 | let step5 = self.or(left, right); | |
639 | ||
640 | step5 | |
641 | }, | |
642 | 64 => { | |
643 | // First step. | |
644 | let left = self.shl(value, context.new_rvalue_from_long(typ, 32)); | |
645 | let right = self.lshr(value, context.new_rvalue_from_long(typ, 32)); | |
646 | let step1 = self.or(left, right); | |
647 | ||
648 | // Second step. | |
649 | let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF)); | |
650 | let left = self.shl(left, context.new_rvalue_from_long(typ, 15)); | |
651 | let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead? | |
652 | let right = self.lshr(right, context.new_rvalue_from_long(typ, 17)); | |
653 | let step2 = self.or(left, right); | |
654 | ||
655 | // Third step. | |
656 | let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10)); | |
657 | let left = self.xor(step2, left); | |
658 | let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F)); | |
659 | ||
660 | let left = self.shl(temp, context.new_rvalue_from_long(typ, 10)); | |
661 | let left = self.or(temp, left); | |
662 | let step3 = self.xor(left, step2); | |
663 | ||
664 | // Fourth step. | |
665 | let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4)); | |
666 | let left = self.xor(step3, left); | |
667 | let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421)); | |
668 | ||
669 | let left = self.shl(temp, context.new_rvalue_from_long(typ, 4)); | |
670 | let left = self.or(temp, left); | |
671 | let step4 = self.xor(left, step3); | |
672 | ||
673 | // Fifth step. | |
674 | let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2)); | |
675 | let left = self.xor(step4, left); | |
676 | let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842)); | |
677 | ||
678 | let left = self.shl(temp, context.new_rvalue_from_long(typ, 2)); | |
679 | let left = self.or(temp, left); | |
680 | let step5 = self.xor(left, step4); | |
681 | ||
682 | step5 | |
683 | }, | |
684 | 128 => { | |
685 | // TODO(antoyo): find a more efficient implementation? | |
5e7ed085 FG |
686 | let sixty_four = self.gcc_int(typ, 64); |
687 | let right_shift = self.gcc_lshr(value, sixty_four); | |
688 | let high = self.gcc_int_cast(right_shift, self.u64_type); | |
689 | let low = self.gcc_int_cast(value, self.u64_type); | |
c295e0f8 XL |
690 | |
691 | let reversed_high = self.bit_reverse(64, high); | |
692 | let reversed_low = self.bit_reverse(64, low); | |
693 | ||
5e7ed085 FG |
694 | let new_low = self.gcc_int_cast(reversed_high, typ); |
695 | let new_high = self.shl(self.gcc_int_cast(reversed_low, typ), sixty_four); | |
c295e0f8 | 696 | |
5e7ed085 | 697 | self.gcc_or(new_low, new_high) |
c295e0f8 XL |
698 | }, |
699 | _ => { | |
700 | panic!("cannot bit reverse with width = {}", width); | |
701 | }, | |
702 | }; | |
703 | ||
5e7ed085 | 704 | self.gcc_int_cast(result, result_type) |
c295e0f8 XL |
705 | } |
706 | ||
5e7ed085 | 707 | fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { |
c295e0f8 XL |
708 | // TODO(antoyo): use width? |
709 | let arg_type = arg.get_type(); | |
710 | let count_leading_zeroes = | |
5e7ed085 FG |
711 | // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here |
712 | // instead of using is_uint(). | |
c295e0f8 XL |
713 | if arg_type.is_uint(&self.cx) { |
714 | "__builtin_clz" | |
715 | } | |
716 | else if arg_type.is_ulong(&self.cx) { | |
717 | "__builtin_clzl" | |
718 | } | |
719 | else if arg_type.is_ulonglong(&self.cx) { | |
720 | "__builtin_clzll" | |
721 | } | |
722 | else if width == 128 { | |
723 | // Algorithm from: https://stackoverflow.com/a/28433850/389119 | |
724 | let array_type = self.context.new_array_type(None, arg_type, 3); | |
725 | let result = self.current_func() | |
726 | .new_local(None, array_type, "count_loading_zeroes_results"); | |
727 | ||
5e7ed085 FG |
728 | let sixty_four = self.const_uint(arg_type, 64); |
729 | let shift = self.lshr(arg, sixty_four); | |
730 | let high = self.gcc_int_cast(shift, self.u64_type); | |
731 | let low = self.gcc_int_cast(arg, self.u64_type); | |
c295e0f8 XL |
732 | |
733 | let zero = self.context.new_rvalue_zero(self.usize_type); | |
734 | let one = self.context.new_rvalue_one(self.usize_type); | |
735 | let two = self.context.new_rvalue_from_long(self.usize_type, 2); | |
736 | ||
737 | let clzll = self.context.get_builtin_function("__builtin_clzll"); | |
738 | ||
739 | let first_elem = self.context.new_array_access(None, result, zero); | |
5e7ed085 | 740 | let first_value = self.gcc_int_cast(self.context.new_call(None, clzll, &[high]), arg_type); |
c295e0f8 XL |
741 | self.llbb() |
742 | .add_assignment(None, first_elem, first_value); | |
743 | ||
744 | let second_elem = self.context.new_array_access(None, result, one); | |
5e7ed085 FG |
745 | let cast = self.gcc_int_cast(self.context.new_call(None, clzll, &[low]), arg_type); |
746 | let second_value = self.add(cast, sixty_four); | |
c295e0f8 XL |
747 | self.llbb() |
748 | .add_assignment(None, second_elem, second_value); | |
749 | ||
750 | let third_elem = self.context.new_array_access(None, result, two); | |
5e7ed085 | 751 | let third_value = self.const_uint(arg_type, 128); |
c295e0f8 XL |
752 | self.llbb() |
753 | .add_assignment(None, third_elem, third_value); | |
754 | ||
755 | let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high); | |
756 | let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low); | |
757 | let not_low_and_not_high = not_low & not_high; | |
758 | let index = not_high + not_low_and_not_high; | |
a2a8927a XL |
759 | // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in |
760 | // gcc. | |
761 | // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the | |
762 | // compilation stage. | |
763 | let index = self.context.new_cast(None, index, self.i32_type); | |
c295e0f8 XL |
764 | |
765 | let res = self.context.new_array_access(None, result, index); | |
766 | ||
5e7ed085 | 767 | return self.gcc_int_cast(res.to_rvalue(), arg_type); |
c295e0f8 XL |
768 | } |
769 | else { | |
5e7ed085 FG |
770 | let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll"); |
771 | let arg = self.context.new_cast(None, arg, self.ulonglong_type); | |
772 | let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64; | |
773 | let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8); | |
c295e0f8 XL |
774 | let res = self.context.new_call(None, count_leading_zeroes, &[arg]) - diff; |
775 | return self.context.new_cast(None, res, arg_type); | |
776 | }; | |
777 | let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes); | |
778 | let res = self.context.new_call(None, count_leading_zeroes, &[arg]); | |
779 | self.context.new_cast(None, res, arg_type) | |
780 | } | |
781 | ||
5e7ed085 | 782 | fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { |
c295e0f8 XL |
783 | let result_type = arg.get_type(); |
784 | let arg = | |
785 | if result_type.is_signed(self.cx) { | |
786 | let new_type = result_type.to_unsigned(self.cx); | |
5e7ed085 | 787 | self.gcc_int_cast(arg, new_type) |
c295e0f8 XL |
788 | } |
789 | else { | |
790 | arg | |
791 | }; | |
792 | let arg_type = arg.get_type(); | |
793 | let (count_trailing_zeroes, expected_type) = | |
5e7ed085 FG |
794 | // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here |
795 | // instead of using is_uint(). | |
c295e0f8 XL |
796 | if arg_type.is_uchar(&self.cx) || arg_type.is_ushort(&self.cx) || arg_type.is_uint(&self.cx) { |
797 | // NOTE: we don't need to & 0xFF for uchar because the result is undefined on zero. | |
798 | ("__builtin_ctz", self.cx.uint_type) | |
799 | } | |
800 | else if arg_type.is_ulong(&self.cx) { | |
801 | ("__builtin_ctzl", self.cx.ulong_type) | |
802 | } | |
803 | else if arg_type.is_ulonglong(&self.cx) { | |
804 | ("__builtin_ctzll", self.cx.ulonglong_type) | |
805 | } | |
806 | else if arg_type.is_u128(&self.cx) { | |
807 | // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119 | |
808 | let array_type = self.context.new_array_type(None, arg_type, 3); | |
809 | let result = self.current_func() | |
810 | .new_local(None, array_type, "count_loading_zeroes_results"); | |
811 | ||
5e7ed085 FG |
812 | let sixty_four = self.gcc_int(arg_type, 64); |
813 | let shift = self.gcc_lshr(arg, sixty_four); | |
814 | let high = self.gcc_int_cast(shift, self.u64_type); | |
815 | let low = self.gcc_int_cast(arg, self.u64_type); | |
c295e0f8 XL |
816 | |
817 | let zero = self.context.new_rvalue_zero(self.usize_type); | |
818 | let one = self.context.new_rvalue_one(self.usize_type); | |
819 | let two = self.context.new_rvalue_from_long(self.usize_type, 2); | |
820 | ||
821 | let ctzll = self.context.get_builtin_function("__builtin_ctzll"); | |
822 | ||
823 | let first_elem = self.context.new_array_access(None, result, zero); | |
5e7ed085 | 824 | let first_value = self.gcc_int_cast(self.context.new_call(None, ctzll, &[low]), arg_type); |
c295e0f8 XL |
825 | self.llbb() |
826 | .add_assignment(None, first_elem, first_value); | |
827 | ||
828 | let second_elem = self.context.new_array_access(None, result, one); | |
5e7ed085 | 829 | let second_value = self.gcc_add(self.gcc_int_cast(self.context.new_call(None, ctzll, &[high]), arg_type), sixty_four); |
c295e0f8 XL |
830 | self.llbb() |
831 | .add_assignment(None, second_elem, second_value); | |
832 | ||
833 | let third_elem = self.context.new_array_access(None, result, two); | |
5e7ed085 | 834 | let third_value = self.gcc_int(arg_type, 128); |
c295e0f8 XL |
835 | self.llbb() |
836 | .add_assignment(None, third_elem, third_value); | |
837 | ||
838 | let not_low = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, low); | |
839 | let not_high = self.context.new_unary_op(None, UnaryOp::LogicalNegate, self.u64_type, high); | |
840 | let not_low_and_not_high = not_low & not_high; | |
841 | let index = not_low + not_low_and_not_high; | |
a2a8927a XL |
842 | // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in |
843 | // gcc. | |
844 | // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the | |
845 | // compilation stage. | |
846 | let index = self.context.new_cast(None, index, self.i32_type); | |
c295e0f8 XL |
847 | |
848 | let res = self.context.new_array_access(None, result, index); | |
849 | ||
5e7ed085 | 850 | return self.gcc_int_cast(res.to_rvalue(), result_type); |
c295e0f8 XL |
851 | } |
852 | else { | |
5e7ed085 FG |
853 | let count_trailing_zeroes = self.context.get_builtin_function("__builtin_ctzll"); |
854 | let arg_size = arg_type.get_size(); | |
855 | let casted_arg = self.context.new_cast(None, arg, self.ulonglong_type); | |
856 | let byte_diff = self.ulonglong_type.get_size() as i64 - arg_size as i64; | |
857 | let diff = self.context.new_rvalue_from_long(self.int_type, byte_diff * 8); | |
858 | let mask = self.context.new_rvalue_from_long(arg_type, -1); // To get the value with all bits set. | |
859 | let masked = mask & self.context.new_unary_op(None, UnaryOp::BitwiseNegate, arg_type, arg); | |
860 | let cond = self.context.new_comparison(None, ComparisonOp::Equals, masked, mask); | |
861 | let diff = diff * self.context.new_cast(None, cond, self.int_type); | |
862 | let res = self.context.new_call(None, count_trailing_zeroes, &[casted_arg]) - diff; | |
863 | return self.context.new_cast(None, res, result_type); | |
c295e0f8 XL |
864 | }; |
865 | let count_trailing_zeroes = self.context.get_builtin_function(count_trailing_zeroes); | |
866 | let arg = | |
867 | if arg_type != expected_type { | |
868 | self.context.new_cast(None, arg, expected_type) | |
869 | } | |
870 | else { | |
871 | arg | |
872 | }; | |
873 | let res = self.context.new_call(None, count_trailing_zeroes, &[arg]); | |
a2a8927a | 874 | self.context.new_cast(None, res, result_type) |
c295e0f8 XL |
875 | } |
876 | ||
5e7ed085 | 877 | fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> { |
c295e0f8 XL |
878 | // TODO(antoyo): use the optimized version with fewer operations. |
879 | let result_type = value.get_type(); | |
880 | let value_type = result_type.to_unsigned(self.cx); | |
881 | ||
882 | let value = | |
883 | if result_type.is_signed(self.cx) { | |
5e7ed085 | 884 | self.gcc_int_cast(value, value_type) |
c295e0f8 XL |
885 | } |
886 | else { | |
887 | value | |
888 | }; | |
889 | ||
890 | if value_type.is_u128(&self.cx) { | |
891 | // TODO(antoyo): implement in the normal algorithm below to have a more efficient | |
892 | // implementation (that does not require a call to __popcountdi2). | |
893 | let popcount = self.context.get_builtin_function("__builtin_popcountll"); | |
5e7ed085 FG |
894 | let sixty_four = self.gcc_int(value_type, 64); |
895 | let right_shift = self.gcc_lshr(value, sixty_four); | |
896 | let high = self.gcc_int_cast(right_shift, self.cx.ulonglong_type); | |
c295e0f8 | 897 | let high = self.context.new_call(None, popcount, &[high]); |
5e7ed085 | 898 | let low = self.gcc_int_cast(value, self.cx.ulonglong_type); |
c295e0f8 XL |
899 | let low = self.context.new_call(None, popcount, &[low]); |
900 | let res = high + low; | |
5e7ed085 | 901 | return self.gcc_int_cast(res, result_type); |
c295e0f8 XL |
902 | } |
903 | ||
904 | // First step. | |
905 | let mask = self.context.new_rvalue_from_long(value_type, 0x5555555555555555); | |
906 | let left = value & mask; | |
907 | let shifted = value >> self.context.new_rvalue_from_int(value_type, 1); | |
908 | let right = shifted & mask; | |
909 | let value = left + right; | |
910 | ||
911 | // Second step. | |
912 | let mask = self.context.new_rvalue_from_long(value_type, 0x3333333333333333); | |
913 | let left = value & mask; | |
914 | let shifted = value >> self.context.new_rvalue_from_int(value_type, 2); | |
915 | let right = shifted & mask; | |
916 | let value = left + right; | |
917 | ||
918 | // Third step. | |
919 | let mask = self.context.new_rvalue_from_long(value_type, 0x0F0F0F0F0F0F0F0F); | |
920 | let left = value & mask; | |
921 | let shifted = value >> self.context.new_rvalue_from_int(value_type, 4); | |
922 | let right = shifted & mask; | |
923 | let value = left + right; | |
924 | ||
925 | if value_type.is_u8(&self.cx) { | |
a2a8927a | 926 | return self.context.new_cast(None, value, result_type); |
c295e0f8 XL |
927 | } |
928 | ||
929 | // Fourth step. | |
930 | let mask = self.context.new_rvalue_from_long(value_type, 0x00FF00FF00FF00FF); | |
931 | let left = value & mask; | |
932 | let shifted = value >> self.context.new_rvalue_from_int(value_type, 8); | |
933 | let right = shifted & mask; | |
934 | let value = left + right; | |
935 | ||
936 | if value_type.is_u16(&self.cx) { | |
a2a8927a | 937 | return self.context.new_cast(None, value, result_type); |
c295e0f8 XL |
938 | } |
939 | ||
940 | // Fifth step. | |
941 | let mask = self.context.new_rvalue_from_long(value_type, 0x0000FFFF0000FFFF); | |
942 | let left = value & mask; | |
943 | let shifted = value >> self.context.new_rvalue_from_int(value_type, 16); | |
944 | let right = shifted & mask; | |
945 | let value = left + right; | |
946 | ||
947 | if value_type.is_u32(&self.cx) { | |
a2a8927a | 948 | return self.context.new_cast(None, value, result_type); |
c295e0f8 XL |
949 | } |
950 | ||
951 | // Sixth step. | |
952 | let mask = self.context.new_rvalue_from_long(value_type, 0x00000000FFFFFFFF); | |
953 | let left = value & mask; | |
954 | let shifted = value >> self.context.new_rvalue_from_int(value_type, 32); | |
955 | let right = shifted & mask; | |
956 | let value = left + right; | |
957 | ||
a2a8927a | 958 | self.context.new_cast(None, value, result_type) |
c295e0f8 XL |
959 | } |
960 | ||
961 | // Algorithm from: https://blog.regehr.org/archives/1063 | |
962 | fn rotate_left(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> { | |
5e7ed085 FG |
963 | let max = self.const_uint(shift.get_type(), width); |
964 | let shift = self.urem(shift, max); | |
c295e0f8 | 965 | let lhs = self.shl(value, shift); |
5e7ed085 | 966 | let result_neg = self.neg(shift); |
c295e0f8 XL |
967 | let result_and = |
968 | self.and( | |
5e7ed085 FG |
969 | result_neg, |
970 | self.const_uint(shift.get_type(), width - 1), | |
c295e0f8 XL |
971 | ); |
972 | let rhs = self.lshr(value, result_and); | |
973 | self.or(lhs, rhs) | |
974 | } | |
975 | ||
976 | // Algorithm from: https://blog.regehr.org/archives/1063 | |
977 | fn rotate_right(&mut self, value: RValue<'gcc>, shift: RValue<'gcc>, width: u64) -> RValue<'gcc> { | |
5e7ed085 FG |
978 | let max = self.const_uint(shift.get_type(), width); |
979 | let shift = self.urem(shift, max); | |
c295e0f8 | 980 | let lhs = self.lshr(value, shift); |
5e7ed085 | 981 | let result_neg = self.neg(shift); |
c295e0f8 XL |
982 | let result_and = |
983 | self.and( | |
5e7ed085 FG |
984 | result_neg, |
985 | self.const_uint(shift.get_type(), width - 1), | |
c295e0f8 XL |
986 | ); |
987 | let rhs = self.shl(value, result_and); | |
988 | self.or(lhs, rhs) | |
989 | } | |
990 | ||
991 | fn saturating_add(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> { | |
923072b8 | 992 | let result_type = lhs.get_type(); |
c295e0f8 | 993 | if signed { |
923072b8 FG |
994 | // Based on algorithm from: https://stackoverflow.com/a/56531252/389119 |
995 | let func = self.current_func.borrow().expect("func"); | |
c295e0f8 | 996 | let res = func.new_local(None, result_type, "saturating_sum"); |
923072b8 FG |
997 | let supports_native_type = self.is_native_int_type(result_type); |
998 | let overflow = | |
999 | if supports_native_type { | |
1000 | let func_name = | |
1001 | match width { | |
1002 | 8 => "__builtin_add_overflow", | |
1003 | 16 => "__builtin_add_overflow", | |
1004 | 32 => "__builtin_sadd_overflow", | |
1005 | 64 => "__builtin_saddll_overflow", | |
1006 | 128 => "__builtin_add_overflow", | |
1007 | _ => unreachable!(), | |
1008 | }; | |
1009 | let overflow_func = self.context.get_builtin_function(func_name); | |
1010 | self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None) | |
1011 | } | |
1012 | else { | |
1013 | let func_name = | |
1014 | match width { | |
1015 | 128 => "__rust_i128_addo", | |
1016 | _ => unreachable!(), | |
1017 | }; | |
1018 | let param_a = self.context.new_parameter(None, result_type, "a"); | |
1019 | let param_b = self.context.new_parameter(None, result_type, "b"); | |
1020 | let result_field = self.context.new_field(None, result_type, "result"); | |
1021 | let overflow_field = self.context.new_field(None, self.bool_type, "overflow"); | |
1022 | let return_type = self.context.new_struct_type(None, "result_overflow", &[result_field, overflow_field]); | |
1023 | let func = self.context.new_function(None, FunctionType::Extern, return_type.as_type(), &[param_a, param_b], func_name, false); | |
1024 | let result = self.context.new_call(None, func, &[lhs, rhs]); | |
1025 | let overflow = result.access_field(None, overflow_field); | |
1026 | let int_result = result.access_field(None, result_field); | |
1027 | self.llbb().add_assignment(None, res, int_result); | |
1028 | overflow | |
1029 | }; | |
c295e0f8 XL |
1030 | |
1031 | let then_block = func.new_block("then"); | |
923072b8 | 1032 | let after_block = func.new_block("after"); |
c295e0f8 | 1033 | |
923072b8 FG |
1034 | // Return `result_type`'s maximum or minimum value on overflow |
1035 | // NOTE: convert the type to unsigned to have an unsigned shift. | |
1036 | let unsigned_type = result_type.to_unsigned(&self.cx); | |
1037 | let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1)); | |
1038 | let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0)); | |
1039 | let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1)); | |
1040 | then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type)); | |
c295e0f8 XL |
1041 | then_block.end_with_jump(None, after_block); |
1042 | ||
1043 | self.llbb().end_with_conditional(None, overflow, then_block, after_block); | |
1044 | ||
1045 | // NOTE: since jumps were added in a place rustc does not | |
5e7ed085 FG |
1046 | // expect, the current block in the state need to be updated. |
1047 | self.switch_to_block(after_block); | |
c295e0f8 XL |
1048 | |
1049 | res.to_rvalue() | |
1050 | } | |
1051 | else { | |
1052 | // Algorithm from: http://locklessinc.com/articles/sat_arithmetic/ | |
923072b8 FG |
1053 | let res = self.gcc_add(lhs, rhs); |
1054 | let cond = self.gcc_icmp(IntPredicate::IntULT, res, lhs); | |
1055 | let value = self.gcc_neg(self.gcc_int_cast(cond, result_type)); | |
1056 | self.gcc_or(res, value) | |
c295e0f8 XL |
1057 | } |
1058 | } | |
1059 | ||
1060 | // Algorithm from: https://locklessinc.com/articles/sat_arithmetic/ | |
1061 | fn saturating_sub(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>, signed: bool, width: u64) -> RValue<'gcc> { | |
923072b8 | 1062 | let result_type = lhs.get_type(); |
c295e0f8 | 1063 | if signed { |
923072b8 | 1064 | // Based on algorithm from: https://stackoverflow.com/a/56531252/389119 |
c295e0f8 XL |
1065 | let func = self.current_func.borrow().expect("func"); |
1066 | let res = func.new_local(None, result_type, "saturating_diff"); | |
5e7ed085 FG |
1067 | let supports_native_type = self.is_native_int_type(result_type); |
1068 | let overflow = | |
1069 | if supports_native_type { | |
1070 | let func_name = | |
1071 | match width { | |
1072 | 8 => "__builtin_sub_overflow", | |
1073 | 16 => "__builtin_sub_overflow", | |
1074 | 32 => "__builtin_ssub_overflow", | |
1075 | 64 => "__builtin_ssubll_overflow", | |
1076 | 128 => "__builtin_sub_overflow", | |
1077 | _ => unreachable!(), | |
1078 | }; | |
1079 | let overflow_func = self.context.get_builtin_function(func_name); | |
1080 | self.overflow_call(overflow_func, &[lhs, rhs, res.get_address(None)], None) | |
1081 | } | |
1082 | else { | |
1083 | let func_name = | |
1084 | match width { | |
1085 | 128 => "__rust_i128_subo", | |
1086 | _ => unreachable!(), | |
1087 | }; | |
1088 | let param_a = self.context.new_parameter(None, result_type, "a"); | |
1089 | let param_b = self.context.new_parameter(None, result_type, "b"); | |
1090 | let result_field = self.context.new_field(None, result_type, "result"); | |
1091 | let overflow_field = self.context.new_field(None, self.bool_type, "overflow"); | |
1092 | let return_type = self.context.new_struct_type(None, "result_overflow", &[result_field, overflow_field]); | |
1093 | let func = self.context.new_function(None, FunctionType::Extern, return_type.as_type(), &[param_a, param_b], func_name, false); | |
1094 | let result = self.context.new_call(None, func, &[lhs, rhs]); | |
1095 | let overflow = result.access_field(None, overflow_field); | |
1096 | let int_result = result.access_field(None, result_field); | |
1097 | self.llbb().add_assignment(None, res, int_result); | |
1098 | overflow | |
1099 | }; | |
c295e0f8 XL |
1100 | |
1101 | let then_block = func.new_block("then"); | |
1102 | let after_block = func.new_block("after"); | |
1103 | ||
923072b8 | 1104 | // Return `result_type`'s maximum or minimum value on overflow |
5e7ed085 FG |
1105 | // NOTE: convert the type to unsigned to have an unsigned shift. |
1106 | let unsigned_type = result_type.to_unsigned(&self.cx); | |
1107 | let shifted = self.gcc_lshr(self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1)); | |
1108 | let uint_max = self.gcc_not(self.gcc_int(unsigned_type, 0)); | |
1109 | let int_max = self.gcc_lshr(uint_max, self.gcc_int(unsigned_type, 1)); | |
1110 | then_block.add_assignment(None, res, self.gcc_int_cast(self.gcc_add(shifted, int_max), result_type)); | |
c295e0f8 XL |
1111 | then_block.end_with_jump(None, after_block); |
1112 | ||
1113 | self.llbb().end_with_conditional(None, overflow, then_block, after_block); | |
1114 | ||
1115 | // NOTE: since jumps were added in a place rustc does not | |
5e7ed085 FG |
1116 | // expect, the current block in the state need to be updated. |
1117 | self.switch_to_block(after_block); | |
c295e0f8 XL |
1118 | |
1119 | res.to_rvalue() | |
1120 | } | |
1121 | else { | |
923072b8 FG |
1122 | let res = self.gcc_sub(lhs, rhs); |
1123 | let comparison = self.gcc_icmp(IntPredicate::IntULE, res, lhs); | |
1124 | let value = self.gcc_neg(self.gcc_int_cast(comparison, result_type)); | |
1125 | self.gcc_and(res, value) | |
c295e0f8 XL |
1126 | } |
1127 | } | |
1128 | } | |
1129 | ||
1130 | fn try_intrinsic<'gcc, 'tcx>(bx: &mut Builder<'_, 'gcc, 'tcx>, try_func: RValue<'gcc>, data: RValue<'gcc>, _catch_func: RValue<'gcc>, dest: RValue<'gcc>) { | |
5e7ed085 FG |
1131 | // NOTE: the `|| true` here is to use the panic=abort strategy with panic=unwind too |
1132 | if bx.sess().panic_strategy() == PanicStrategy::Abort || true { | |
1133 | // TODO(bjorn3): Properly implement unwinding and remove the `|| true` once this is done. | |
c295e0f8 XL |
1134 | bx.call(bx.type_void(), try_func, &[data], None); |
1135 | // Return 0 unconditionally from the intrinsic call; | |
1136 | // we can never unwind. | |
1137 | let ret_align = bx.tcx.data_layout.i32_align.abi; | |
1138 | bx.store(bx.const_i32(0), dest, ret_align); | |
1139 | } | |
1140 | else if wants_msvc_seh(bx.sess()) { | |
1141 | unimplemented!(); | |
1142 | } | |
1143 | else { | |
1144 | unimplemented!(); | |
1145 | } | |
1146 | } |