]>
Commit | Line | Data |
---|---|---|
9c376795 FG |
1 | use crate::{ |
2 | lints::{ | |
3 | HiddenUnicodeCodepointsDiag, HiddenUnicodeCodepointsDiagLabels, | |
4 | HiddenUnicodeCodepointsDiagSub, | |
5 | }, | |
6 | EarlyContext, EarlyLintPass, LintContext, | |
7 | }; | |
3c0e092e | 8 | use ast::util::unicode::{contains_text_flow_control_chars, TEXT_FLOW_CONTROL_CHARS}; |
c295e0f8 | 9 | use rustc_ast as ast; |
31ef2f64 | 10 | use rustc_session::{declare_lint, declare_lint_pass}; |
c295e0f8 XL |
11 | use rustc_span::{BytePos, Span, Symbol}; |
12 | ||
13 | declare_lint! { | |
14 | /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the | |
15 | /// visual representation of text on screen in a way that does not correspond to their on | |
16 | /// memory representation. | |
17 | /// | |
18 | /// ### Explanation | |
19 | /// | |
20 | /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, | |
21 | /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change | |
22 | /// its direction on software that supports these codepoints. This makes the text "abc" display | |
23 | /// as "cba" on screen. By leveraging software that supports these, people can write specially | |
24 | /// crafted literals that make the surrounding code seem like it's performing one action, when | |
25 | /// in reality it is performing another. Because of this, we proactively lint against their | |
26 | /// presence to avoid surprises. | |
27 | /// | |
28 | /// ### Example | |
29 | /// | |
30 | /// ```rust,compile_fail | |
31 | /// #![deny(text_direction_codepoint_in_literal)] | |
32 | /// fn main() { | |
33 | /// println!("{:?}", ''); | |
34 | /// } | |
35 | /// ``` | |
36 | /// | |
37 | /// {{produces}} | |
38 | /// | |
39 | pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, | |
40 | Deny, | |
41 | "detect special Unicode codepoints that affect the visual representation of text on screen, \ | |
42 | changing the direction in which text flows", | |
43 | } | |
44 | ||
45 | declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]); | |
46 | ||
c295e0f8 XL |
47 | impl HiddenUnicodeCodepoints { |
48 | fn lint_text_direction_codepoint( | |
49 | &self, | |
50 | cx: &EarlyContext<'_>, | |
51 | text: Symbol, | |
52 | span: Span, | |
53 | padding: u32, | |
54 | point_at_inner_spans: bool, | |
55 | label: &str, | |
56 | ) { | |
57 | // Obtain the `Span`s for each of the forbidden chars. | |
58 | let spans: Vec<_> = text | |
59 | .as_str() | |
60 | .char_indices() | |
61 | .filter_map(|(i, c)| { | |
3c0e092e | 62 | TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| { |
c295e0f8 XL |
63 | let lo = span.lo() + BytePos(i as u32 + padding); |
64 | (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) | |
65 | }) | |
66 | }) | |
67 | .collect(); | |
68 | ||
9c376795 FG |
69 | let count = spans.len(); |
70 | let labels = point_at_inner_spans | |
71 | .then_some(HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() }); | |
72 | let sub = if point_at_inner_spans && !spans.is_empty() { | |
73 | HiddenUnicodeCodepointsDiagSub::Escape { spans } | |
74 | } else { | |
75 | HiddenUnicodeCodepointsDiagSub::NoEscape { spans } | |
76 | }; | |
77 | ||
c0240ec0 | 78 | cx.emit_span_lint( |
2b03887a FG |
79 | TEXT_DIRECTION_CODEPOINT_IN_LITERAL, |
80 | span, | |
9c376795 | 81 | HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub }, |
2b03887a | 82 | ); |
c295e0f8 XL |
83 | } |
84 | } | |
85 | impl EarlyLintPass for HiddenUnicodeCodepoints { | |
86 | fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { | |
87 | if let ast::AttrKind::DocComment(_, comment) = attr.kind { | |
a2a8927a | 88 | if contains_text_flow_control_chars(comment.as_str()) { |
c295e0f8 XL |
89 | self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment"); |
90 | } | |
91 | } | |
92 | } | |
93 | ||
9c376795 | 94 | #[inline] |
c295e0f8 XL |
95 | fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { |
96 | // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` | |
487cf647 FG |
97 | match &expr.kind { |
98 | ast::ExprKind::Lit(token_lit) => { | |
f2b60f7d | 99 | let text = token_lit.symbol; |
a2a8927a | 100 | if !contains_text_flow_control_chars(text.as_str()) { |
c295e0f8 XL |
101 | return; |
102 | } | |
487cf647 | 103 | let padding = match token_lit.kind { |
c295e0f8 | 104 | // account for `"` or `'` |
487cf647 | 105 | ast::token::LitKind::Str | ast::token::LitKind::Char => 1, |
c295e0f8 | 106 | // account for `r###"` |
487cf647 | 107 | ast::token::LitKind::StrRaw(n) => n as u32 + 2, |
c295e0f8 XL |
108 | _ => return, |
109 | }; | |
487cf647 | 110 | self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal"); |
c295e0f8 | 111 | } |
487cf647 | 112 | _ => {} |
c295e0f8 | 113 | }; |
c295e0f8 XL |
114 | } |
115 | } |