1 //! Code related to parsing literals.
3 use crate::ast
::{self, Lit, LitKind}
;
4 use crate::parse
::token
::{self, Token}
;
5 use crate::symbol
::{kw, sym, Symbol}
;
6 use crate::tokenstream
::TokenTree
;
9 use rustc_data_structures
::sync
::Lrc
;
11 use rustc_lexer
::unescape
::{unescape_char, unescape_byte}
;
12 use rustc_lexer
::unescape
::{unescape_str, unescape_byte_str}
;
13 use rustc_lexer
::unescape
::{unescape_raw_str, unescape_raw_byte_str}
;
28 /// Converts literal token into a semantic literal.
29 fn from_lit_token(lit
: token
::Lit
) -> Result
<LitKind
, LitError
> {
30 let token
::Lit { kind, symbol, suffix }
= lit
;
31 if suffix
.is_some() && !kind
.may_have_suffix() {
32 return Err(LitError
::InvalidSuffix
);
37 assert
!(symbol
.is_bool_lit());
38 LitKind
::Bool(symbol
== kw
::True
)
40 token
::Byte
=> return unescape_byte(&symbol
.as_str())
41 .map(LitKind
::Byte
).map_err(|_
| LitError
::LexerError
),
42 token
::Char
=> return unescape_char(&symbol
.as_str())
43 .map(LitKind
::Char
).map_err(|_
| LitError
::LexerError
),
45 // There are some valid suffixes for integer and float literals,
46 // so all the handling is done internally.
47 token
::Integer
=> return integer_lit(symbol
, suffix
),
48 token
::Float
=> return float_lit(symbol
, suffix
),
51 // If there are no characters requiring special treatment we can
52 // reuse the symbol from the token. Otherwise, we must generate a
53 // new symbol because the string in the LitKind is different to the
54 // string in the token.
55 let s
= symbol
.as_str();
56 let symbol
= if s
.contains(&['
\\'
, '
\r'
][..]) {
57 let mut buf
= String
::with_capacity(s
.len());
58 let mut error
= Ok(());
59 unescape_str(&s
, &mut |_
, unescaped_char
| {
60 match unescaped_char
{
62 Err(_
) => error
= Err(LitError
::LexerError
),
70 LitKind
::Str(symbol
, ast
::StrStyle
::Cooked
)
74 let s
= symbol
.as_str();
75 let symbol
= if s
.contains('
\r'
) {
76 let mut buf
= String
::with_capacity(s
.len());
77 let mut error
= Ok(());
78 unescape_raw_str(&s
, &mut |_
, unescaped_char
| {
79 match unescaped_char
{
81 Err(_
) => error
= Err(LitError
::LexerError
),
90 LitKind
::Str(symbol
, ast
::StrStyle
::Raw(n
))
93 let s
= symbol
.as_str();
94 let mut buf
= Vec
::with_capacity(s
.len());
95 let mut error
= Ok(());
96 unescape_byte_str(&s
, &mut |_
, unescaped_byte
| {
97 match unescaped_byte
{
99 Err(_
) => error
= Err(LitError
::LexerError
),
104 LitKind
::ByteStr(Lrc
::new(buf
))
106 token
::ByteStrRaw(_
) => {
107 let s
= symbol
.as_str();
108 let bytes
= if s
.contains('
\r'
) {
109 let mut buf
= Vec
::with_capacity(s
.len());
110 let mut error
= Ok(());
111 unescape_raw_byte_str(&s
, &mut |_
, unescaped_byte
| {
112 match unescaped_byte
{
113 Ok(c
) => buf
.push(c
),
114 Err(_
) => error
= Err(LitError
::LexerError
),
121 symbol
.to_string().into_bytes()
124 LitKind
::ByteStr(Lrc
::new(bytes
))
126 token
::Err
=> LitKind
::Err(symbol
),
130 /// Attempts to recover a token from semantic literal.
131 /// This function is used when the original token doesn't exist (e.g. the literal is created
132 /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
133 pub fn to_lit_token(&self) -> token
::Lit
{
134 let (kind
, symbol
, suffix
) = match *self {
135 LitKind
::Str(symbol
, ast
::StrStyle
::Cooked
) => {
136 // Don't re-intern unless the escaped string is different.
137 let s
: &str = &symbol
.as_str();
138 let escaped
= s
.escape_default().to_string();
139 let symbol
= if escaped
== *s { symbol }
else { Symbol::intern(&escaped) }
;
140 (token
::Str
, symbol
, None
)
142 LitKind
::Str(symbol
, ast
::StrStyle
::Raw(n
)) => {
143 (token
::StrRaw(n
), symbol
, None
)
145 LitKind
::ByteStr(ref bytes
) => {
146 let string
= bytes
.iter().cloned().flat_map(ascii
::escape_default
)
147 .map(Into
::<char>::into
).collect
::<String
>();
148 (token
::ByteStr
, Symbol
::intern(&string
), None
)
150 LitKind
::Byte(byte
) => {
151 let string
: String
= ascii
::escape_default(byte
).map(Into
::<char>::into
).collect();
152 (token
::Byte
, Symbol
::intern(&string
), None
)
154 LitKind
::Char(ch
) => {
155 let string
: String
= ch
.escape_default().map(Into
::<char>::into
).collect();
156 (token
::Char
, Symbol
::intern(&string
), None
)
158 LitKind
::Int(n
, ty
) => {
159 let suffix
= match ty
{
160 ast
::LitIntType
::Unsigned(ty
) => Some(ty
.to_symbol()),
161 ast
::LitIntType
::Signed(ty
) => Some(ty
.to_symbol()),
162 ast
::LitIntType
::Unsuffixed
=> None
,
164 (token
::Integer
, sym
::integer(n
), suffix
)
166 LitKind
::Float(symbol
, ty
) => {
167 (token
::Float
, symbol
, Some(ty
.to_symbol()))
169 LitKind
::FloatUnsuffixed(symbol
) => {
170 (token
::Float
, symbol
, None
)
172 LitKind
::Bool(value
) => {
173 let symbol
= if value { kw::True }
else { kw::False }
;
174 (token
::Bool
, symbol
, None
)
176 LitKind
::Err(symbol
) => {
177 (token
::Err
, symbol
, None
)
181 token
::Lit
::new(kind
, symbol
, suffix
)
186 /// Converts literal token into an AST literal.
187 crate fn from_lit_token(token
: token
::Lit
, span
: Span
) -> Result
<Lit
, LitError
> {
188 Ok(Lit { token, kind: LitKind::from_lit_token(token)?, span }
)
191 /// Converts arbitrary token into an AST literal.
192 crate fn from_token(token
: &Token
) -> Result
<Lit
, LitError
> {
193 let lit
= match token
.kind
{
194 token
::Ident(name
, false) if name
.is_bool_lit() =>
195 token
::Lit
::new(token
::Bool
, name
, None
),
196 token
::Literal(lit
) =>
198 token
::Interpolated(ref nt
) => {
199 if let token
::NtExpr(expr
) | token
::NtLiteral(expr
) = &**nt
{
200 if let ast
::ExprKind
::Lit(lit
) = &expr
.kind
{
201 return Ok(lit
.clone());
204 return Err(LitError
::NotLiteral
);
206 _
=> return Err(LitError
::NotLiteral
)
209 Lit
::from_lit_token(lit
, token
.span
)
212 /// Attempts to recover an AST literal from semantic literal.
213 /// This function is used when the original token doesn't exist (e.g. the literal is created
214 /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
215 pub fn from_lit_kind(kind
: LitKind
, span
: Span
) -> Lit
{
216 Lit { token: kind.to_lit_token(), kind, span }
219 /// Losslessly convert an AST literal into a token tree.
220 crate fn token_tree(&self) -> TokenTree
{
221 let token
= match self.token
.kind
{
222 token
::Bool
=> token
::Ident(self.token
.symbol
, false),
223 _
=> token
::Literal(self.token
),
225 TokenTree
::token(token
, self.span
)
229 fn strip_underscores(symbol
: Symbol
) -> Symbol
{
230 // Do not allocate a new string unless necessary.
231 let s
= symbol
.as_str();
233 let mut s
= s
.to_string();
234 s
.retain(|c
| c
!= '_'
);
235 return Symbol
::intern(&s
);
240 fn filtered_float_lit(symbol
: Symbol
, suffix
: Option
<Symbol
>, base
: u32)
241 -> Result
<LitKind
, LitError
> {
242 debug
!("filtered_float_lit: {:?}, {:?}, {:?}", symbol
, suffix
, base
);
244 return Err(LitError
::NonDecimalFloat(base
));
247 Some(suf
) => match suf
{
248 sym
::f32 => LitKind
::Float(symbol
, ast
::FloatTy
::F32
),
249 sym
::f64 => LitKind
::Float(symbol
, ast
::FloatTy
::F64
),
250 _
=> return Err(LitError
::InvalidFloatSuffix
),
252 None
=> LitKind
::FloatUnsuffixed(symbol
)
256 fn float_lit(symbol
: Symbol
, suffix
: Option
<Symbol
>) -> Result
<LitKind
, LitError
> {
257 debug
!("float_lit: {:?}, {:?}", symbol
, suffix
);
258 filtered_float_lit(strip_underscores(symbol
), suffix
, 10)
261 fn integer_lit(symbol
: Symbol
, suffix
: Option
<Symbol
>) -> Result
<LitKind
, LitError
> {
262 debug
!("integer_lit: {:?}, {:?}", symbol
, suffix
);
263 let symbol
= strip_underscores(symbol
);
264 let s
= symbol
.as_str();
266 let base
= match s
.as_bytes() {
267 [b'
0'
, b'x'
, ..] => 16,
268 [b'
0'
, b'o'
, ..] => 8,
269 [b'
0'
, b'b'
, ..] => 2,
273 let ty
= match suffix
{
274 Some(suf
) => match suf
{
275 sym
::isize => ast
::LitIntType
::Signed(ast
::IntTy
::Isize
),
276 sym
::i8 => ast
::LitIntType
::Signed(ast
::IntTy
::I8
),
277 sym
::i16 => ast
::LitIntType
::Signed(ast
::IntTy
::I16
),
278 sym
::i32 => ast
::LitIntType
::Signed(ast
::IntTy
::I32
),
279 sym
::i64 => ast
::LitIntType
::Signed(ast
::IntTy
::I64
),
280 sym
::i128
=> ast
::LitIntType
::Signed(ast
::IntTy
::I128
),
281 sym
::usize => ast
::LitIntType
::Unsigned(ast
::UintTy
::Usize
),
282 sym
::u8 => ast
::LitIntType
::Unsigned(ast
::UintTy
::U8
),
283 sym
::u16 => ast
::LitIntType
::Unsigned(ast
::UintTy
::U16
),
284 sym
::u32 => ast
::LitIntType
::Unsigned(ast
::UintTy
::U32
),
285 sym
::u64 => ast
::LitIntType
::Unsigned(ast
::UintTy
::U64
),
286 sym
::u128
=> ast
::LitIntType
::Unsigned(ast
::UintTy
::U128
),
287 // `1f64` and `2f32` etc. are valid float literals, and
288 // `fxxx` looks more like an invalid float literal than invalid integer literal.
289 _
if suf
.as_str().starts_with('f'
) => return filtered_float_lit(symbol
, suffix
, base
),
290 _
=> return Err(LitError
::InvalidIntSuffix
),
292 _
=> ast
::LitIntType
::Unsuffixed
295 let s
= &s
[if base
!= 10 { 2 }
else { 0 }
..];
296 u128
::from_str_radix(s
, base
).map(|i
| LitKind
::Int(i
, ty
)).map_err(|_
| {
297 // Small bases are lexed as if they were base 10, e.g, the string
298 // might be `0b10201`. This will cause the conversion above to fail,
299 // but these kinds of errors are already reported by the lexer.
301 base
< 10 && s
.chars().any(|c
| c
.to_digit(10).map_or(false, |d
| d
>= base
));
302 if from_lexer { LitError::LexerError }
else { LitError::IntTooLarge }