]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_span/src/span_encoding.rs
New upstream version 1.75.0+dfsg1
[rustc.git] / compiler / rustc_span / src / span_encoding.rs
CommitLineData
9c376795 1use crate::def_id::{DefIndex, LocalDefId};
dfeec247 2use crate::hygiene::SyntaxContext;
c295e0f8 3use crate::SPAN_TRACK;
9fa01778 4use crate::{BytePos, SpanData};
ea8adc8c 5
3dfed10e 6use rustc_data_structures::fx::FxIndexSet;
ea8adc8c
XL
7
8/// A compressed span.
48663c56 9///
781aab86
FG
10/// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
11/// takes up 8 bytes, with less space for the length, parent and context. The
12/// vast majority (99.9%+) of `SpanData` instances can be made to fit within
13/// those 8 bytes. Any `SpanData` whose fields don't fit into a `Span` are
48663c56
XL
14/// stored in a separate interner table, and the `Span` will index into that
15/// table. Interning is rare enough that the cost is low, but common enough
16/// that the code is exercised regularly.
17///
18/// An earlier version of this code used only 4 bytes for `Span`, but that was
19/// slower because only 80--90% of spans could be stored inline (even less in
781aab86
FG
20/// very large crates) and so the interner was used a lot more. That version of
21/// the code also predated the storage of parents.
22///
23/// There are four different span forms.
48663c56 24///
781aab86
FG
25/// Inline-context format (requires non-huge length, non-huge context, and no parent):
26/// - `span.lo_or_index == span_data.lo`
27/// - `span.len_with_tag_or_marker == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
28/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
2b03887a 29///
781aab86
FG
30/// Inline-parent format (requires non-huge length, root context, and non-huge parent):
31/// - `span.lo_or_index == span_data.lo`
32/// - `span.len_with_tag_or_marker & !PARENT_TAG == len == span_data.hi - span_data.lo`
33/// (must be `<= MAX_LEN`)
34/// - `span.len_with_tag_or_marker` has top bit (`PARENT_TAG`) set
35/// - `span.ctxt_or_parent_or_marker == span_data.parent` (must be `<= MAX_CTXT`)
48663c56 36///
781aab86
FG
37/// Partially-interned format (requires non-huge context):
38/// - `span.lo_or_index == index` (indexes into the interner table)
39/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
40/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
9c376795 41///
781aab86
FG
42/// Fully-interned format (all cases not covered above):
43/// - `span.lo_or_index == index` (indexes into the interner table)
44/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
45/// - `span.ctxt_or_parent_or_marker == CTXT_INTERNED_MARKER`
48663c56 46///
781aab86
FG
47/// The partially-interned form requires looking in the interning table for
48/// lo and length, but the context is stored inline as well as interned.
49/// This is useful because context lookups are often done in isolation, and
50/// inline lookups are quicker.
48663c56
XL
51///
52/// Notes about the choice of field sizes:
781aab86
FG
53/// - `lo` is 32 bits in both `Span` and `SpanData`, which means that `lo`
54/// values never cause interning. The number of bits needed for `lo`
48663c56 55/// depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
781aab86
FG
56/// Having no compression on this field means there is no performance cliff
57/// if a crate exceeds a particular size.
58/// - `len` is ~15 bits in `Span` (a u16, minus 1 bit for PARENT_TAG) and 32
59/// bits in `SpanData`, which means that large `len` values will cause
60/// interning. The number of bits needed for `len` does not depend on the
61/// crate size. The most common numbers of bits for `len` are from 0 to 7,
62/// with a peak usually at 3 or 4, and then it drops off quickly from 8
63/// onwards. 15 bits is enough for 99.99%+ of cases, but larger values
64/// (sometimes 20+ bits) might occur dozens of times in a typical crate.
65/// - `ctxt_or_parent_or_marker` is 16 bits in `Span` and two 32 bit fields in
66/// `SpanData`, which means intering will happen if `ctxt` is large, if
67/// `parent` is large, or if both values are non-zero. The number of bits
68/// needed for `ctxt` values depend partly on the crate size and partly on
69/// the form of the code. No crates in `rustc-perf` need more than 15 bits
70/// for `ctxt_or_parent_or_marker`, but larger crates might need more than 16
71/// bits. The number of bits needed for `parent` hasn't been measured,
72/// because `parent` isn't currently used by default.
48663c56 73///
c295e0f8
XL
74/// In order to reliably use parented spans in incremental compilation,
75/// the dependency to the parent definition's span. This is performed
76/// using the callback `SPAN_TRACK` to access the query engine.
77///
48663c56 78#[derive(Clone, Copy, Eq, PartialEq, Hash)]
5e7ed085 79#[rustc_pass_by_value]
48663c56 80pub struct Span {
781aab86
FG
81 lo_or_index: u32,
82 len_with_tag_or_marker: u16,
83 ctxt_or_parent_or_marker: u16,
0531ce1d
XL
84}
85
781aab86
FG
86// `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from
87// `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.)
88const MAX_LEN: u32 = 0b0111_1111_1111_1110;
89const MAX_CTXT: u32 = 0b0111_1111_1111_1110;
90const PARENT_TAG: u16 = 0b1000_0000_0000_0000;
91const BASE_LEN_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
92const CTXT_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
48663c56 93
781aab86
FG
94/// The dummy span has zero position, length, and context, and no parent.
95pub const DUMMY_SP: Span =
96 Span { lo_or_index: 0, len_with_tag_or_marker: 0, ctxt_or_parent_or_marker: 0 };
ea8adc8c
XL
97
98impl Span {
99 #[inline]
c295e0f8
XL
100 pub fn new(
101 mut lo: BytePos,
102 mut hi: BytePos,
103 ctxt: SyntaxContext,
104 parent: Option<LocalDefId>,
105 ) -> Self {
48663c56
XL
106 if lo > hi {
107 std::mem::swap(&mut lo, &mut hi);
108 }
109
781aab86 110 let (lo2, len, ctxt2) = (lo.0, hi.0 - lo.0, ctxt.as_u32());
9c376795 111
781aab86
FG
112 if len <= MAX_LEN {
113 if ctxt2 <= MAX_CTXT && parent.is_none() {
114 // Inline-context format.
9c376795 115 return Span {
781aab86
FG
116 lo_or_index: lo2,
117 len_with_tag_or_marker: len as u16,
118 ctxt_or_parent_or_marker: ctxt2 as u16,
119 };
120 } else if ctxt2 == SyntaxContext::root().as_u32()
121 && let Some(parent) = parent
122 && let parent2 = parent.local_def_index.as_u32()
123 && parent2 <= MAX_CTXT
124 {
125 // Inline-parent format.
126 return Span {
127 lo_or_index: lo2,
128 len_with_tag_or_marker: PARENT_TAG | len as u16,
ed00b5ec 129 ctxt_or_parent_or_marker: parent2 as u16,
9c376795
FG
130 };
131 }
48663c56 132 }
9c376795 133
781aab86 134 // Partially-interned or fully-interned format.
9c376795
FG
135 let index =
136 with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }));
781aab86
FG
137 let ctxt_or_parent_or_marker = if ctxt2 <= MAX_CTXT {
138 ctxt2 as u16 // partially-interned
139 } else {
140 CTXT_INTERNED_MARKER // fully-interned
141 };
142 Span {
143 lo_or_index: index,
144 len_with_tag_or_marker: BASE_LEN_INTERNED_MARKER,
145 ctxt_or_parent_or_marker,
146 }
ea8adc8c
XL
147 }
148
149 #[inline]
150 pub fn data(self) -> SpanData {
c295e0f8
XL
151 let data = self.data_untracked();
152 if let Some(parent) = data.parent {
153 (*SPAN_TRACK)(parent);
154 }
155 data
156 }
157
158 /// Internal function to translate between an encoded span and the expanded representation.
159 /// This function must not be used outside the incremental engine.
160 #[inline]
161 pub fn data_untracked(self) -> SpanData {
781aab86
FG
162 if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
163 if self.len_with_tag_or_marker & PARENT_TAG == 0 {
164 // Inline-context format.
165 let len = self.len_with_tag_or_marker as u32;
166 debug_assert!(len <= MAX_LEN);
9c376795 167 SpanData {
781aab86
FG
168 lo: BytePos(self.lo_or_index),
169 hi: BytePos(self.lo_or_index + len),
170 ctxt: SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32),
9c376795
FG
171 parent: None,
172 }
173 } else {
781aab86
FG
174 // Inline-parent format.
175 let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
176 debug_assert!(len <= MAX_LEN);
177 let parent = LocalDefId {
178 local_def_index: DefIndex::from_u32(self.ctxt_or_parent_or_marker as u32),
179 };
9c376795 180 SpanData {
781aab86
FG
181 lo: BytePos(self.lo_or_index),
182 hi: BytePos(self.lo_or_index + len),
9c376795
FG
183 ctxt: SyntaxContext::root(),
184 parent: Some(parent),
185 }
48663c56
XL
186 }
187 } else {
781aab86
FG
188 // Fully-interned or partially-interned format. In either case,
189 // the interned value contains all the data, so we don't need to
190 // distinguish them.
191 let index = self.lo_or_index;
17df50a5 192 with_span_interner(|interner| interner.spans[index as usize])
48663c56 193 }
ea8adc8c 194 }
2b03887a 195
781aab86
FG
196 /// Returns `true` if this is a dummy span with any hygienic context.
197 #[inline]
198 pub fn is_dummy(self) -> bool {
199 if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
200 // Inline-context or inline-parent format.
201 let lo = self.lo_or_index;
202 let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
203 debug_assert!(len <= MAX_LEN);
204 lo == 0 && len == 0
205 } else {
206 // Fully-interned or partially-interned format.
207 let index = self.lo_or_index;
208 let data = with_span_interner(|interner| interner.spans[index as usize]);
209 data.lo == BytePos(0) && data.hi == BytePos(0)
210 }
211 }
212
2b03887a 213 /// This function is used as a fast path when decoding the full `SpanData` is not necessary.
781aab86 214 /// It's a cut-down version of `data_untracked`.
ed00b5ec 215 #[cfg_attr(not(test), rustc_diagnostic_item = "SpanCtxt")]
2b03887a
FG
216 #[inline]
217 pub fn ctxt(self) -> SyntaxContext {
781aab86
FG
218 if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
219 if self.len_with_tag_or_marker & PARENT_TAG == 0 {
220 // Inline-context format.
221 SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32)
9c376795 222 } else {
781aab86
FG
223 // Inline-parent format. We know that the SyntaxContext is root.
224 SyntaxContext::root()
9c376795 225 }
2b03887a 226 } else {
781aab86
FG
227 if self.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
228 // Partially-interned format. This path avoids looking up the
229 // interned value, and is the whole point of the
230 // partially-interned format.
231 SyntaxContext::from_u32(self.ctxt_or_parent_or_marker as u32)
232 } else {
233 // Fully-interned format.
234 let index = self.lo_or_index;
235 with_span_interner(|interner| interner.spans[index as usize].ctxt)
236 }
2b03887a
FG
237 }
238 }
ea8adc8c
XL
239}
240
ea8adc8c 241#[derive(Default)]
0531ce1d 242pub struct SpanInterner {
3dfed10e 243 spans: FxIndexSet<SpanData>,
ea8adc8c
XL
244}
245
246impl SpanInterner {
247 fn intern(&mut self, span_data: &SpanData) -> u32 {
3dfed10e
XL
248 let (index, _) = self.spans.insert_full(*span_data);
249 index as u32
ea8adc8c 250 }
ea8adc8c
XL
251}
252
0531ce1d 253// If an interner exists, return it. Otherwise, prepare a fresh one.
ea8adc8c
XL
254#[inline]
255fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T {
487cf647 256 crate::with_session_globals(|session_globals| f(&mut session_globals.span_interner.lock()))
ea8adc8c 257}