New upstream version 1.63.0+dfsg1

[rustc.git] / compiler / rustc_middle / src / mir / interpret / allocation.rs
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs

index 5564852f305399e6f328b0311b3fc58131499ece..10c4ea63a68b2677158ef47e1015d66329f3bb11 100644 (file)
--- a/compiler/rustc_middle/src/mir/interpret/allocation.rs
+++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs
@@ -3,6 +3,7 @@
  use std::borrow::Cow;
  use std::convert::{TryFrom, TryInto};
  use std::fmt;
+use std::hash;
  use std::iter;
  use std::ops::{Deref, Range};
  use std::ptr;
@@ -25,7 +26,9 @@ use crate::ty;
  /// Its public API is rather low-level, working directly with allocation offsets and a custom error
  /// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory`
  /// module provides higher-level access.
-#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
+// Note: for performance reasons when interning, some of the `Allocation` fields can be partially
+// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
+#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)]
  #[derive(HashStable)]
  pub struct Allocation<Tag = AllocId, Extra = ()> {
      /// The actual bytes of the allocation.
@@ -49,6 +52,46 @@ pub struct Allocation<Tag = AllocId, Extra = ()> {
      pub extra: Extra,
  }
  
+/// This is the maximum size we will hash at a time, when interning an `Allocation` and its
+/// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer.
+/// Used when these two structures are large: we only partially hash the larger fields in that
+/// situation. See the comment at the top of their respective `Hash` impl for more details.
+const MAX_BYTES_TO_HASH: usize = 64;
+
+/// This is the maximum size (in bytes) for which a buffer will be fully hashed, when interning.
+/// Otherwise, it will be partially hashed in 2 slices, requiring at least 2 `MAX_BYTES_TO_HASH`
+/// bytes.
+const MAX_HASHED_BUFFER_LEN: usize = 2 * MAX_BYTES_TO_HASH;
+
+// Const allocations are only hashed for interning. However, they can be large, making the hashing
+// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
+// big buffers like the actual bytes of allocation. We can partially hash some fields when they're
+// large.
+impl hash::Hash for Allocation {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        // Partially hash the `bytes` buffer when it is large. To limit collisions with common
+        // prefixes and suffixes, we hash the length and some slices of the buffer.
+        let byte_count = self.bytes.len();
+        if byte_count > MAX_HASHED_BUFFER_LEN {
+            // Hash the buffer's length.
+            byte_count.hash(state);
+
+            // And its head and tail.
+            self.bytes[..MAX_BYTES_TO_HASH].hash(state);
+            self.bytes[byte_count - MAX_BYTES_TO_HASH..].hash(state);
+        } else {
+            self.bytes.hash(state);
+        }
+
+        // Hash the other fields as usual.
+        self.relocations.hash(state);
+        self.init_mask.hash(state);
+        self.align.hash(state);
+        self.mutability.hash(state);
+        self.extra.hash(state);
+    }
+}
+
  /// Interned types generally have an `Outer` type and an `Inner` type, where
  /// `Outer` is a newtype around `Interned<Inner>`, and all the operations are
  /// done on `Outer`, because all occurrences are interned. E.g. `Ty` is an
@@ -171,7 +214,7 @@ impl<Tag> Allocation<Tag> {
  
      /// Try to create an Allocation of `size` bytes, failing if there is not enough memory
      /// available to the compiler to do so.
-    pub fn uninit(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'static, Self> {
+    pub fn uninit<'tcx>(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'tcx, Self> {
          let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).map_err(|_| {
              // This results in an error that can happen non-deterministically, since the memory
              // available to the compiler can change between runs. Normally queries are always
@@ -201,12 +244,12 @@ impl<Tag> Allocation<Tag> {
  
  impl Allocation {
      /// Convert Tag and add Extra fields
-    pub fn convert_tag_add_extra<Tag, Extra>(
+    pub fn convert_tag_add_extra<Tag, Extra, Err>(
          self,
          cx: &impl HasDataLayout,
          extra: Extra,
-        mut tagger: impl FnMut(Pointer<AllocId>) -> Pointer<Tag>,
-    ) -> Allocation<Tag, Extra> {
+        mut tagger: impl FnMut(Pointer<AllocId>) -> Result<Pointer<Tag>, Err>,
+    ) -> Result<Allocation<Tag, Extra>, Err> {
          // Compute new pointer tags, which also adjusts the bytes.
          let mut bytes = self.bytes;
          let mut new_relocations = Vec::with_capacity(self.relocations.0.len());
@@ -217,19 +260,19 @@ impl Allocation {
              let ptr_bytes = &mut bytes[idx..idx + ptr_size];
              let bits = read_target_uint(endian, ptr_bytes).unwrap();
              let (ptr_tag, ptr_offset) =
-                tagger(Pointer::new(alloc_id, Size::from_bytes(bits))).into_parts();
+                tagger(Pointer::new(alloc_id, Size::from_bytes(bits)))?.into_parts();
              write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap();
              new_relocations.push((offset, ptr_tag));
          }
          // Create allocation.
-        Allocation {
+        Ok(Allocation {
              bytes,
              relocations: Relocations::from_presorted(new_relocations),
              init_mask: self.init_mask,
              align: self.align,
              mutability: self.mutability,
              extra,
-        }
+        })
      }
  }
  
@@ -264,9 +307,18 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
  
  /// Byte accessors.
  impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
-    /// The last argument controls whether we error out when there are uninitialized
-    /// or pointer bytes. You should never call this, call `get_bytes` or
-    /// `get_bytes_with_uninit_and_ptr` instead,
+    /// This is the entirely abstraction-violating way to just grab the raw bytes without
+    /// caring about relocations. It just deduplicates some code between `read_scalar`
+    /// and `get_bytes_internal`.
+    fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
+        &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
+    }
+
+    /// The last argument controls whether we error out when there are uninitialized or pointer
+    /// bytes. However, we *always* error when there are relocations overlapping the edges of the
+    /// range.
+    ///
+    /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
      ///
      /// This function also guarantees that the resulting pointer will remain stable
      /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
@@ -287,7 +339,7 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
              self.check_relocation_edges(cx, range)?;
          }
  
-        Ok(&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
+        Ok(self.get_bytes_even_more_internal(range))
      }
  
      /// Checks that these bytes are initialized and not pointer bytes, and then return them
@@ -350,19 +402,22 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
  /// Reading and writing.
  impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
      /// Validates that `ptr.offset` and `ptr.offset + size` do not point to the middle of a
-    /// relocation. If `allow_uninit_and_ptr` is `false`, also enforces that the memory in the
-    /// given range contains neither relocations nor uninitialized bytes.
+    /// relocation. If `allow_uninit`/`allow_ptr` is `false`, also enforces that the memory in the
+    /// given range contains no uninitialized bytes/relocations.
      pub fn check_bytes(
          &self,
          cx: &impl HasDataLayout,
          range: AllocRange,
-        allow_uninit_and_ptr: bool,
+        allow_uninit: bool,
+        allow_ptr: bool,
      ) -> AllocResult {
          // Check bounds and relocations on the edges.
          self.get_bytes_with_uninit_and_ptr(cx, range)?;
          // Check uninit and ptr.
-        if !allow_uninit_and_ptr {
+        if !allow_uninit {
              self.check_init(range)?;
+        }
+        if !allow_ptr {
              self.check_relocations(cx, range)?;
          }
          Ok(())
@@ -370,6 +425,9 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
  
      /// Reads a *non-ZST* scalar.
      ///
+    /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
+    /// supports that) provenance is entirely ignored.
+    ///
      /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
      /// for ZSTness anyway due to integer pointers being valid for ZSTs.
      ///
@@ -379,35 +437,47 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
          &self,
          cx: &impl HasDataLayout,
          range: AllocRange,
+        read_provenance: bool,
      ) -> AllocResult<ScalarMaybeUninit<Tag>> {
-        // `get_bytes_with_uninit_and_ptr` tests relocation edges.
-        // We deliberately error when loading data that partially has provenance, or partially
-        // initialized data (that's the check below), into a scalar. The LLVM semantics of this are
-        // unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
-        // further discussion.
-        let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
-        // Uninit check happens *after* we established that the alignment is correct.
-        // We must not return `Ok()` for unaligned pointers!
+        if read_provenance {
+            assert_eq!(range.size, cx.data_layout().pointer_size);
+        }
+
+        // First and foremost, if anything is uninit, bail.
          if self.is_init(range).is_err() {
              // This inflates uninitialized bytes to the entire scalar, even if only a few
              // bytes are uninitialized.
              return Ok(ScalarMaybeUninit::Uninit);
          }
-        // Now we do the actual reading.
-        let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
-        // See if we got a pointer.
-        if range.size != cx.data_layout().pointer_size {
-            // Not a pointer.
-            // *Now*, we better make sure that the inside is free of relocations too.
-            self.check_relocations(cx, range)?;
-        } else {
-            // Maybe a pointer.
-            if let Some(&prov) = self.relocations.get(&range.start) {
-                let ptr = Pointer::new(prov, Size::from_bytes(bits));
-                return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
-            }
+
+        // If we are doing a pointer read, and there is a relocation exactly where we
+        // are reading, then we can put data and relocation back together and return that.
+        if read_provenance && let Some(&prov) = self.relocations.get(&range.start) {
+            // We already checked init and relocations, so we can use this function.
+            let bytes = self.get_bytes_even_more_internal(range);
+            let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
+            let ptr = Pointer::new(prov, Size::from_bytes(bits));
+            return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
+        }
+
+        // If we are *not* reading a pointer, and we can just ignore relocations,
+        // then do exactly that.
+        if !read_provenance && Tag::OFFSET_IS_ADDR {
+            // We just strip provenance.
+            let bytes = self.get_bytes_even_more_internal(range);
+            let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
+            return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)));
          }
-        // We don't. Just return the bits.
+
+        // It's complicated. Better make sure there is no provenance anywhere.
+        // FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
+        // `read_pointer` is true and we ideally would distinguish the following two cases:
+        // - The entire `range` is covered by 2 relocations for the same provenance.
+        //   Then we should return a pointer with that provenance.
+        // - The range has inhomogeneous provenance. Then we should return just the
+        //   underlying bits.
+        let bytes = self.get_bytes(cx, range)?;
+        let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
          Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)))
      }
  
@@ -510,8 +580,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
          let start = range.start;
          let end = range.end();
  
-        // We need to handle clearing the relocations from parts of a pointer. See
-        // <https://github.com/rust-lang/rust/issues/87184> for details.
+        // We need to handle clearing the relocations from parts of a pointer.
+        // FIXME: Miri should preserve partial relocations; see
+        // https://github.com/rust-lang/miri/issues/2181.
          if first < start {
              if Tag::ERR_ON_PARTIAL_PTR_OVERWRITE {
                  return Err(AllocError::PartialPointerOverwrite(first));
@@ -637,13 +708,43 @@ type Block = u64;
  
  /// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte
  /// is initialized. If it is `false` the byte is uninitialized.
-#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
+// Note: for performance reasons when interning, some of the `InitMask` fields can be partially
+// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
+#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)]
  #[derive(HashStable)]
  pub struct InitMask {
      blocks: Vec<Block>,
      len: Size,
  }
  
+// Const allocations are only hashed for interning. However, they can be large, making the hashing
+// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
+// big buffers like the allocation's init mask. We can partially hash some fields when they're
+// large.
+impl hash::Hash for InitMask {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        const MAX_BLOCKS_TO_HASH: usize = MAX_BYTES_TO_HASH / std::mem::size_of::<Block>();
+        const MAX_BLOCKS_LEN: usize = MAX_HASHED_BUFFER_LEN / std::mem::size_of::<Block>();
+
+        // Partially hash the `blocks` buffer when it is large. To limit collisions with common
+        // prefixes and suffixes, we hash the length and some slices of the buffer.
+        let block_count = self.blocks.len();
+        if block_count > MAX_BLOCKS_LEN {
+            // Hash the buffer's length.
+            block_count.hash(state);
+
+            // And its head and tail.
+            self.blocks[..MAX_BLOCKS_TO_HASH].hash(state);
+            self.blocks[block_count - MAX_BLOCKS_TO_HASH..].hash(state);
+        } else {
+            self.blocks.hash(state);
+        }
+
+        // Hash the other fields as usual.
+        self.len.hash(state);
+    }
+}
+
  impl InitMask {
      pub const BLOCK_SIZE: u64 = 64;