use std::borrow::Cow;
use std::convert::{TryFrom, TryInto};
use std::fmt;
+use std::hash;
use std::iter;
use std::ops::{Deref, Range};
use std::ptr;
/// Its public API is rather low-level, working directly with allocation offsets and a custom error
/// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory`
/// module provides higher-level access.
-#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
+// Note: for performance reasons when interning, some of the `Allocation` fields can be partially
+// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
+#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)]
#[derive(HashStable)]
pub struct Allocation<Tag = AllocId, Extra = ()> {
/// The actual bytes of the allocation.
pub extra: Extra,
}
+/// This is the maximum size we will hash at a time, when interning an `Allocation` and its
+/// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer.
+/// Used when these two structures are large: we only partially hash the larger fields in that
+/// situation. See the comment at the top of their respective `Hash` impl for more details.
+const MAX_BYTES_TO_HASH: usize = 64;
+
+/// This is the maximum size (in bytes) for which a buffer will be fully hashed, when interning.
+/// Otherwise, it will be partially hashed in 2 slices, requiring at least 2 `MAX_BYTES_TO_HASH`
+/// bytes.
+const MAX_HASHED_BUFFER_LEN: usize = 2 * MAX_BYTES_TO_HASH;
+
+// Const allocations are only hashed for interning. However, they can be large, making the hashing
+// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
+// big buffers like the actual bytes of allocation. We can partially hash some fields when they're
+// large.
+impl hash::Hash for Allocation {
+ fn hash<H: hash::Hasher>(&self, state: &mut H) {
+ // Partially hash the `bytes` buffer when it is large. To limit collisions with common
+ // prefixes and suffixes, we hash the length and some slices of the buffer.
+ let byte_count = self.bytes.len();
+ if byte_count > MAX_HASHED_BUFFER_LEN {
+ // Hash the buffer's length.
+ byte_count.hash(state);
+
+ // And its head and tail.
+ self.bytes[..MAX_BYTES_TO_HASH].hash(state);
+ self.bytes[byte_count - MAX_BYTES_TO_HASH..].hash(state);
+ } else {
+ self.bytes.hash(state);
+ }
+
+ // Hash the other fields as usual.
+ self.relocations.hash(state);
+ self.init_mask.hash(state);
+ self.align.hash(state);
+ self.mutability.hash(state);
+ self.extra.hash(state);
+ }
+}
+
/// Interned types generally have an `Outer` type and an `Inner` type, where
/// `Outer` is a newtype around `Interned<Inner>`, and all the operations are
/// done on `Outer`, because all occurrences are interned. E.g. `Ty` is an
/// Try to create an Allocation of `size` bytes, failing if there is not enough memory
/// available to the compiler to do so.
- pub fn uninit(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'static, Self> {
+ pub fn uninit<'tcx>(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'tcx, Self> {
let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).map_err(|_| {
// This results in an error that can happen non-deterministically, since the memory
// available to the compiler can change between runs. Normally queries are always
impl Allocation {
/// Convert Tag and add Extra fields
- pub fn convert_tag_add_extra<Tag, Extra>(
+ pub fn convert_tag_add_extra<Tag, Extra, Err>(
self,
cx: &impl HasDataLayout,
extra: Extra,
- mut tagger: impl FnMut(Pointer<AllocId>) -> Pointer<Tag>,
- ) -> Allocation<Tag, Extra> {
+ mut tagger: impl FnMut(Pointer<AllocId>) -> Result<Pointer<Tag>, Err>,
+ ) -> Result<Allocation<Tag, Extra>, Err> {
// Compute new pointer tags, which also adjusts the bytes.
let mut bytes = self.bytes;
let mut new_relocations = Vec::with_capacity(self.relocations.0.len());
let ptr_bytes = &mut bytes[idx..idx + ptr_size];
let bits = read_target_uint(endian, ptr_bytes).unwrap();
let (ptr_tag, ptr_offset) =
- tagger(Pointer::new(alloc_id, Size::from_bytes(bits))).into_parts();
+ tagger(Pointer::new(alloc_id, Size::from_bytes(bits)))?.into_parts();
write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap();
new_relocations.push((offset, ptr_tag));
}
// Create allocation.
- Allocation {
+ Ok(Allocation {
bytes,
relocations: Relocations::from_presorted(new_relocations),
init_mask: self.init_mask,
align: self.align,
mutability: self.mutability,
extra,
- }
+ })
}
}
/// Byte accessors.
impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
- /// The last argument controls whether we error out when there are uninitialized
- /// or pointer bytes. You should never call this, call `get_bytes` or
- /// `get_bytes_with_uninit_and_ptr` instead,
+ /// This is the entirely abstraction-violating way to just grab the raw bytes without
+ /// caring about relocations. It just deduplicates some code between `read_scalar`
+ /// and `get_bytes_internal`.
+ fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
+ &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
+ }
+
+ /// The last argument controls whether we error out when there are uninitialized or pointer
+ /// bytes. However, we *always* error when there are relocations overlapping the edges of the
+ /// range.
+ ///
+ /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
///
/// This function also guarantees that the resulting pointer will remain stable
/// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
self.check_relocation_edges(cx, range)?;
}
- Ok(&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
+ Ok(self.get_bytes_even_more_internal(range))
}
/// Checks that these bytes are initialized and not pointer bytes, and then return them
/// Reading and writing.
impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
/// Validates that `ptr.offset` and `ptr.offset + size` do not point to the middle of a
- /// relocation. If `allow_uninit_and_ptr` is `false`, also enforces that the memory in the
- /// given range contains neither relocations nor uninitialized bytes.
+ /// relocation. If `allow_uninit`/`allow_ptr` is `false`, also enforces that the memory in the
+ /// given range contains no uninitialized bytes/relocations.
pub fn check_bytes(
&self,
cx: &impl HasDataLayout,
range: AllocRange,
- allow_uninit_and_ptr: bool,
+ allow_uninit: bool,
+ allow_ptr: bool,
) -> AllocResult {
// Check bounds and relocations on the edges.
self.get_bytes_with_uninit_and_ptr(cx, range)?;
// Check uninit and ptr.
- if !allow_uninit_and_ptr {
+ if !allow_uninit {
self.check_init(range)?;
+ }
+ if !allow_ptr {
self.check_relocations(cx, range)?;
}
Ok(())
/// Reads a *non-ZST* scalar.
///
+ /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
+ /// supports that) provenance is entirely ignored.
+ ///
/// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
/// for ZSTness anyway due to integer pointers being valid for ZSTs.
///
&self,
cx: &impl HasDataLayout,
range: AllocRange,
+ read_provenance: bool,
) -> AllocResult<ScalarMaybeUninit<Tag>> {
- // `get_bytes_with_uninit_and_ptr` tests relocation edges.
- // We deliberately error when loading data that partially has provenance, or partially
- // initialized data (that's the check below), into a scalar. The LLVM semantics of this are
- // unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
- // further discussion.
- let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
- // Uninit check happens *after* we established that the alignment is correct.
- // We must not return `Ok()` for unaligned pointers!
+ if read_provenance {
+ assert_eq!(range.size, cx.data_layout().pointer_size);
+ }
+
+ // First and foremost, if anything is uninit, bail.
if self.is_init(range).is_err() {
// This inflates uninitialized bytes to the entire scalar, even if only a few
// bytes are uninitialized.
return Ok(ScalarMaybeUninit::Uninit);
}
- // Now we do the actual reading.
- let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
- // See if we got a pointer.
- if range.size != cx.data_layout().pointer_size {
- // Not a pointer.
- // *Now*, we better make sure that the inside is free of relocations too.
- self.check_relocations(cx, range)?;
- } else {
- // Maybe a pointer.
- if let Some(&prov) = self.relocations.get(&range.start) {
- let ptr = Pointer::new(prov, Size::from_bytes(bits));
- return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
- }
+
+ // If we are doing a pointer read, and there is a relocation exactly where we
+ // are reading, then we can put data and relocation back together and return that.
+ if read_provenance && let Some(&prov) = self.relocations.get(&range.start) {
+ // We already checked init and relocations, so we can use this function.
+ let bytes = self.get_bytes_even_more_internal(range);
+ let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
+ let ptr = Pointer::new(prov, Size::from_bytes(bits));
+ return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
+ }
+
+ // If we are *not* reading a pointer, and we can just ignore relocations,
+ // then do exactly that.
+ if !read_provenance && Tag::OFFSET_IS_ADDR {
+ // We just strip provenance.
+ let bytes = self.get_bytes_even_more_internal(range);
+ let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
+ return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)));
}
- // We don't. Just return the bits.
+
+ // It's complicated. Better make sure there is no provenance anywhere.
+ // FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
+ // `read_pointer` is true and we ideally would distinguish the following two cases:
+ // - The entire `range` is covered by 2 relocations for the same provenance.
+ // Then we should return a pointer with that provenance.
+ // - The range has inhomogeneous provenance. Then we should return just the
+ // underlying bits.
+ let bytes = self.get_bytes(cx, range)?;
+ let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)))
}
let start = range.start;
let end = range.end();
- // We need to handle clearing the relocations from parts of a pointer. See
- // <https://github.com/rust-lang/rust/issues/87184> for details.
+ // We need to handle clearing the relocations from parts of a pointer.
+ // FIXME: Miri should preserve partial relocations; see
+ // https://github.com/rust-lang/miri/issues/2181.
if first < start {
if Tag::ERR_ON_PARTIAL_PTR_OVERWRITE {
return Err(AllocError::PartialPointerOverwrite(first));
/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte
/// is initialized. If it is `false` the byte is uninitialized.
-#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
+// Note: for performance reasons when interning, some of the `InitMask` fields can be partially
+// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
+#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)]
#[derive(HashStable)]
pub struct InitMask {
blocks: Vec<Block>,
len: Size,
}
+// Const allocations are only hashed for interning. However, they can be large, making the hashing
+// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
+// big buffers like the allocation's init mask. We can partially hash some fields when they're
+// large.
+impl hash::Hash for InitMask {
+ fn hash<H: hash::Hasher>(&self, state: &mut H) {
+ const MAX_BLOCKS_TO_HASH: usize = MAX_BYTES_TO_HASH / std::mem::size_of::<Block>();
+ const MAX_BLOCKS_LEN: usize = MAX_HASHED_BUFFER_LEN / std::mem::size_of::<Block>();
+
+ // Partially hash the `blocks` buffer when it is large. To limit collisions with common
+ // prefixes and suffixes, we hash the length and some slices of the buffer.
+ let block_count = self.blocks.len();
+ if block_count > MAX_BLOCKS_LEN {
+ // Hash the buffer's length.
+ block_count.hash(state);
+
+ // And its head and tail.
+ self.blocks[..MAX_BLOCKS_TO_HASH].hash(state);
+ self.blocks[block_count - MAX_BLOCKS_TO_HASH..].hash(state);
+ } else {
+ self.blocks.hash(state);
+ }
+
+ // Hash the other fields as usual.
+ self.len.hash(state);
+ }
+}
+
impl InitMask {
pub const BLOCK_SIZE: u64 = 64;