]>
Commit | Line | Data |
---|---|---|
48663c56 XL |
1 | //! Code related to parsing literals. |
2 | ||
487cf647 | 3 | use crate::ast::{self, LitKind, MetaItemLit}; |
60c5eb7d | 4 | use crate::token::{self, Token}; |
487cf647 | 5 | use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; |
dfeec247 XL |
6 | use rustc_span::symbol::{kw, sym, Symbol}; |
7 | use rustc_span::Span; | |
48663c56 XL |
8 | use std::ascii; |
9 | ||
487cf647 | 10 | #[derive(Debug)] |
60c5eb7d | 11 | pub enum LitError { |
dc9dc135 XL |
12 | LexerError, |
13 | InvalidSuffix, | |
14 | InvalidIntSuffix, | |
15 | InvalidFloatSuffix, | |
16 | NonDecimalFloat(u32), | |
17 | IntTooLarge, | |
18 | } | |
19 | ||
48663c56 | 20 | impl LitKind { |
dc9dc135 | 21 | /// Converts literal token into a semantic literal. |
f2b60f7d | 22 | pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> { |
dc9dc135 XL |
23 | let token::Lit { kind, symbol, suffix } = lit; |
24 | if suffix.is_some() && !kind.may_have_suffix() { | |
25 | return Err(LitError::InvalidSuffix); | |
48663c56 XL |
26 | } |
27 | ||
dc9dc135 XL |
28 | Ok(match kind { |
29 | token::Bool => { | |
e1599b0c | 30 | assert!(symbol.is_bool_lit()); |
dc9dc135 | 31 | LitKind::Bool(symbol == kw::True) |
48663c56 | 32 | } |
dfeec247 | 33 | token::Byte => { |
a2a8927a | 34 | return unescape_byte(symbol.as_str()) |
dfeec247 XL |
35 | .map(LitKind::Byte) |
36 | .map_err(|_| LitError::LexerError); | |
37 | } | |
38 | token::Char => { | |
a2a8927a | 39 | return unescape_char(symbol.as_str()) |
dfeec247 XL |
40 | .map(LitKind::Char) |
41 | .map_err(|_| LitError::LexerError); | |
42 | } | |
48663c56 XL |
43 | |
44 | // There are some valid suffixes for integer and float literals, | |
45 | // so all the handling is done internally. | |
dc9dc135 XL |
46 | token::Integer => return integer_lit(symbol, suffix), |
47 | token::Float => return float_lit(symbol, suffix), | |
48663c56 | 48 | |
dc9dc135 | 49 | token::Str => { |
48663c56 | 50 | // If there are no characters requiring special treatment we can |
dc9dc135 | 51 | // reuse the symbol from the token. Otherwise, we must generate a |
48663c56 | 52 | // new symbol because the string in the LitKind is different to the |
dc9dc135 XL |
53 | // string in the token. |
54 | let s = symbol.as_str(); | |
487cf647 | 55 | let symbol = if s.contains(['\\', '\r']) { |
5e7ed085 FG |
56 | let mut buf = String::with_capacity(s.len()); |
57 | let mut error = Ok(()); | |
58 | // Force-inlining here is aggressive but the closure is | |
59 | // called on every char in the string, so it can be | |
60 | // hot in programs with many long strings. | |
61 | unescape_literal( | |
487cf647 | 62 | s, |
5e7ed085 FG |
63 | Mode::Str, |
64 | &mut #[inline(always)] | |
65 | |_, unescaped_char| match unescaped_char { | |
66 | Ok(c) => buf.push(c), | |
67 | Err(err) => { | |
68 | if err.is_fatal() { | |
69 | error = Err(LitError::LexerError); | |
94222f64 | 70 | } |
f9f354fc | 71 | } |
5e7ed085 FG |
72 | }, |
73 | ); | |
74 | error?; | |
75 | Symbol::intern(&buf) | |
76 | } else { | |
77 | symbol | |
78 | }; | |
dc9dc135 | 79 | LitKind::Str(symbol, ast::StrStyle::Cooked) |
48663c56 | 80 | } |
dc9dc135 | 81 | token::StrRaw(n) => { |
48663c56 | 82 | // Ditto. |
dc9dc135 | 83 | let s = symbol.as_str(); |
f9f354fc XL |
84 | let symbol = |
85 | if s.contains('\r') { | |
86 | let mut buf = String::with_capacity(s.len()); | |
87 | let mut error = Ok(()); | |
487cf647 | 88 | unescape_literal(s, Mode::RawStr, &mut |_, unescaped_char| { |
f9f354fc XL |
89 | match unescaped_char { |
90 | Ok(c) => buf.push(c), | |
94222f64 XL |
91 | Err(err) => { |
92 | if err.is_fatal() { | |
93 | error = Err(LitError::LexerError); | |
94 | } | |
95 | } | |
f9f354fc XL |
96 | } |
97 | }); | |
98 | error?; | |
f9f354fc XL |
99 | Symbol::intern(&buf) |
100 | } else { | |
101 | symbol | |
102 | }; | |
dc9dc135 | 103 | LitKind::Str(symbol, ast::StrStyle::Raw(n)) |
48663c56 | 104 | } |
dc9dc135 XL |
105 | token::ByteStr => { |
106 | let s = symbol.as_str(); | |
48663c56 | 107 | let mut buf = Vec::with_capacity(s.len()); |
dc9dc135 | 108 | let mut error = Ok(()); |
487cf647 FG |
109 | unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { |
110 | Ok(c) => buf.push(byte_from_char(c)), | |
111 | Err(err) => { | |
112 | if err.is_fatal() { | |
113 | error = Err(LitError::LexerError); | |
94222f64 | 114 | } |
f9f354fc | 115 | } |
48663c56 | 116 | }); |
dc9dc135 | 117 | error?; |
29967ef6 | 118 | LitKind::ByteStr(buf.into()) |
48663c56 | 119 | } |
dc9dc135 XL |
120 | token::ByteStrRaw(_) => { |
121 | let s = symbol.as_str(); | |
122 | let bytes = if s.contains('\r') { | |
123 | let mut buf = Vec::with_capacity(s.len()); | |
124 | let mut error = Ok(()); | |
487cf647 FG |
125 | unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { |
126 | Ok(c) => buf.push(byte_from_char(c)), | |
127 | Err(err) => { | |
128 | if err.is_fatal() { | |
129 | error = Err(LitError::LexerError); | |
94222f64 | 130 | } |
f9f354fc | 131 | } |
dc9dc135 XL |
132 | }); |
133 | error?; | |
dc9dc135 XL |
134 | buf |
135 | } else { | |
136 | symbol.to_string().into_bytes() | |
137 | }; | |
138 | ||
29967ef6 | 139 | LitKind::ByteStr(bytes.into()) |
dfeec247 | 140 | } |
f2b60f7d | 141 | token::Err => LitKind::Err, |
48663c56 XL |
142 | }) |
143 | } | |
144 | ||
145 | /// Attempts to recover a token from semantic literal. | |
146 | /// This function is used when the original token doesn't exist (e.g. the literal is created | |
147 | /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). | |
f2b60f7d | 148 | pub fn to_token_lit(&self) -> token::Lit { |
dc9dc135 XL |
149 | let (kind, symbol, suffix) = match *self { |
150 | LitKind::Str(symbol, ast::StrStyle::Cooked) => { | |
151 | // Don't re-intern unless the escaped string is different. | |
60c5eb7d | 152 | let s = symbol.as_str(); |
dc9dc135 | 153 | let escaped = s.escape_default().to_string(); |
60c5eb7d | 154 | let symbol = if s == escaped { symbol } else { Symbol::intern(&escaped) }; |
dc9dc135 | 155 | (token::Str, symbol, None) |
48663c56 | 156 | } |
dfeec247 | 157 | LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None), |
48663c56 | 158 | LitKind::ByteStr(ref bytes) => { |
f2b60f7d | 159 | let string = bytes.escape_ascii().to_string(); |
dc9dc135 | 160 | (token::ByteStr, Symbol::intern(&string), None) |
48663c56 XL |
161 | } |
162 | LitKind::Byte(byte) => { | |
163 | let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect(); | |
dc9dc135 | 164 | (token::Byte, Symbol::intern(&string), None) |
48663c56 XL |
165 | } |
166 | LitKind::Char(ch) => { | |
167 | let string: String = ch.escape_default().map(Into::<char>::into).collect(); | |
dc9dc135 | 168 | (token::Char, Symbol::intern(&string), None) |
48663c56 XL |
169 | } |
170 | LitKind::Int(n, ty) => { | |
171 | let suffix = match ty { | |
60c5eb7d XL |
172 | ast::LitIntType::Unsigned(ty) => Some(ty.name()), |
173 | ast::LitIntType::Signed(ty) => Some(ty.name()), | |
48663c56 XL |
174 | ast::LitIntType::Unsuffixed => None, |
175 | }; | |
dc9dc135 | 176 | (token::Integer, sym::integer(n), suffix) |
48663c56 XL |
177 | } |
178 | LitKind::Float(symbol, ty) => { | |
60c5eb7d XL |
179 | let suffix = match ty { |
180 | ast::LitFloatType::Suffixed(ty) => Some(ty.name()), | |
181 | ast::LitFloatType::Unsuffixed => None, | |
182 | }; | |
183 | (token::Float, symbol, suffix) | |
48663c56 | 184 | } |
48663c56 | 185 | LitKind::Bool(value) => { |
dc9dc135 XL |
186 | let symbol = if value { kw::True } else { kw::False }; |
187 | (token::Bool, symbol, None) | |
48663c56 | 188 | } |
f2b60f7d FG |
189 | // This only shows up in places like `-Zunpretty=hir` output, so we |
190 | // don't bother to produce something useful. | |
191 | LitKind::Err => (token::Err, Symbol::intern("<bad-literal>"), None), | |
dc9dc135 XL |
192 | }; |
193 | ||
194 | token::Lit::new(kind, symbol, suffix) | |
48663c56 XL |
195 | } |
196 | } | |
197 | ||
487cf647 FG |
198 | impl MetaItemLit { |
199 | /// Converts token literal into a meta item literal. | |
200 | pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result<MetaItemLit, LitError> { | |
201 | Ok(MetaItemLit { token_lit, kind: LitKind::from_token_lit(token_lit)?, span }) | |
48663c56 XL |
202 | } |
203 | ||
487cf647 FG |
204 | /// Converts an arbitrary token into meta item literal. |
205 | pub fn from_token(token: &Token) -> Option<MetaItemLit> { | |
206 | token::Lit::from_token(token) | |
207 | .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).ok()) | |
48663c56 | 208 | } |
48663c56 XL |
209 | } |
210 | ||
dc9dc135 XL |
211 | fn strip_underscores(symbol: Symbol) -> Symbol { |
212 | // Do not allocate a new string unless necessary. | |
213 | let s = symbol.as_str(); | |
214 | if s.contains('_') { | |
215 | let mut s = s.to_string(); | |
216 | s.retain(|c| c != '_'); | |
217 | return Symbol::intern(&s); | |
218 | } | |
219 | symbol | |
220 | } | |
48663c56 | 221 | |
dfeec247 XL |
222 | fn filtered_float_lit( |
223 | symbol: Symbol, | |
224 | suffix: Option<Symbol>, | |
225 | base: u32, | |
226 | ) -> Result<LitKind, LitError> { | |
dc9dc135 XL |
227 | debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base); |
228 | if base != 10 { | |
229 | return Err(LitError::NonDecimalFloat(base)); | |
230 | } | |
231 | Ok(match suffix { | |
dfeec247 XL |
232 | Some(suf) => LitKind::Float( |
233 | symbol, | |
234 | ast::LitFloatType::Suffixed(match suf { | |
235 | sym::f32 => ast::FloatTy::F32, | |
236 | sym::f64 => ast::FloatTy::F64, | |
237 | _ => return Err(LitError::InvalidFloatSuffix), | |
238 | }), | |
239 | ), | |
240 | None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed), | |
48663c56 XL |
241 | }) |
242 | } | |
48663c56 | 243 | |
dc9dc135 XL |
244 | fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> { |
245 | debug!("float_lit: {:?}, {:?}", symbol, suffix); | |
246 | filtered_float_lit(strip_underscores(symbol), suffix, 10) | |
48663c56 XL |
247 | } |
248 | ||
dc9dc135 XL |
249 | fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> { |
250 | debug!("integer_lit: {:?}, {:?}", symbol, suffix); | |
251 | let symbol = strip_underscores(symbol); | |
252 | let s = symbol.as_str(); | |
48663c56 | 253 | |
e74abb32 XL |
254 | let base = match s.as_bytes() { |
255 | [b'0', b'x', ..] => 16, | |
256 | [b'0', b'o', ..] => 8, | |
257 | [b'0', b'b', ..] => 2, | |
258 | _ => 10, | |
259 | }; | |
48663c56 | 260 | |
dc9dc135 XL |
261 | let ty = match suffix { |
262 | Some(suf) => match suf { | |
263 | sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize), | |
dfeec247 | 264 | sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8), |
dc9dc135 XL |
265 | sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16), |
266 | sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32), | |
267 | sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64), | |
268 | sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128), | |
269 | sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize), | |
dfeec247 | 270 | sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8), |
dc9dc135 XL |
271 | sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16), |
272 | sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32), | |
273 | sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64), | |
274 | sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128), | |
275 | // `1f64` and `2f32` etc. are valid float literals, and | |
276 | // `fxxx` looks more like an invalid float literal than invalid integer literal. | |
277 | _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base), | |
278 | _ => return Err(LitError::InvalidIntSuffix), | |
dfeec247 XL |
279 | }, |
280 | _ => ast::LitIntType::Unsuffixed, | |
dc9dc135 | 281 | }; |
48663c56 | 282 | |
dfeec247 | 283 | let s = &s[if base != 10 { 2 } else { 0 }..]; |
dc9dc135 XL |
284 | u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| { |
285 | // Small bases are lexed as if they were base 10, e.g, the string | |
286 | // might be `0b10201`. This will cause the conversion above to fail, | |
287 | // but these kinds of errors are already reported by the lexer. | |
288 | let from_lexer = | |
289 | base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); | |
290 | if from_lexer { LitError::LexerError } else { LitError::IntTooLarge } | |
48663c56 XL |
291 | }) |
292 | } |