]>
Commit | Line | Data |
---|---|---|
ea8adc8c XL |
1 | use rustc::lint::*; |
2 | use rustc::hir::*; | |
3 | use syntax::ast::{LitKind, NodeId}; | |
4 | use syntax::codemap::Span; | |
5 | use unicode_normalization::UnicodeNormalization; | |
abe05a73 | 6 | use utils::{is_allowed, snippet, span_help_and_lint}; |
ea8adc8c XL |
7 | |
8 | /// **What it does:** Checks for the Unicode zero-width space in the code. | |
9 | /// | |
10 | /// **Why is this bad?** Having an invisible character in the code makes for all | |
11 | /// sorts of April fools, but otherwise is very much frowned upon. | |
12 | /// | |
13 | /// **Known problems:** None. | |
14 | /// | |
15 | /// **Example:** You don't see it, but there may be a zero-width space | |
16 | /// somewhere in this text. | |
17 | declare_lint! { | |
18 | pub ZERO_WIDTH_SPACE, | |
19 | Deny, | |
20 | "using a zero-width space in a string literal, which is confusing" | |
21 | } | |
22 | ||
23 | /// **What it does:** Checks for non-ASCII characters in string literals. | |
24 | /// | |
25 | /// **Why is this bad?** Yeah, we know, the 90's called and wanted their charset | |
26 | /// back. Even so, there still are editors and other programs out there that | |
27 | /// don't work well with Unicode. So if the code is meant to be used | |
28 | /// internationally, on multiple operating systems, or has other portability | |
29 | /// requirements, activating this lint could be useful. | |
30 | /// | |
31 | /// **Known problems:** None. | |
32 | /// | |
33 | /// **Example:** | |
34 | /// ```rust | |
35 | /// let x = "Hä?" | |
36 | /// ``` | |
37 | declare_lint! { | |
38 | pub NON_ASCII_LITERAL, | |
39 | Allow, | |
40 | "using any literal non-ASCII chars in a string literal instead of \ | |
41 | using the `\\u` escape" | |
42 | } | |
43 | ||
44 | /// **What it does:** Checks for string literals that contain Unicode in a form | |
45 | /// that is not equal to its | |
46 | /// [NFC-recomposition](http://www.unicode.org/reports/tr15/#Norm_Forms). | |
47 | /// | |
48 | /// **Why is this bad?** If such a string is compared to another, the results | |
49 | /// may be surprising. | |
50 | /// | |
51 | /// **Known problems** None. | |
52 | /// | |
53 | /// **Example:** You may not see it, but “à” and “à” aren't the same string. The | |
54 | /// former when escaped is actually `"a\u{300}"` while the latter is `"\u{e0}"`. | |
55 | declare_lint! { | |
56 | pub UNICODE_NOT_NFC, | |
57 | Allow, | |
58 | "using a unicode literal not in NFC normal form (see \ | |
59 | [unicode tr15](http://www.unicode.org/reports/tr15/) for further information)" | |
60 | } | |
61 | ||
62 | ||
63 | #[derive(Copy, Clone)] | |
64 | pub struct Unicode; | |
65 | ||
66 | impl LintPass for Unicode { | |
67 | fn get_lints(&self) -> LintArray { | |
68 | lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC) | |
69 | } | |
70 | } | |
71 | ||
72 | impl<'a, 'tcx> LateLintPass<'a, 'tcx> for Unicode { | |
73 | fn check_expr(&mut self, cx: &LateContext<'a, 'tcx>, expr: &'tcx Expr) { | |
74 | if let ExprLit(ref lit) = expr.node { | |
75 | if let LitKind::Str(_, _) = lit.node { | |
76 | check_str(cx, lit.span, expr.id) | |
77 | } | |
78 | } | |
79 | } | |
80 | } | |
81 | ||
82 | fn escape<T: Iterator<Item = char>>(s: T) -> String { | |
83 | let mut result = String::new(); | |
84 | for c in s { | |
85 | if c as u32 > 0x7F { | |
86 | for d in c.escape_unicode() { | |
87 | result.push(d) | |
88 | } | |
89 | } else { | |
90 | result.push(c); | |
91 | } | |
92 | } | |
93 | result | |
94 | } | |
95 | ||
96 | fn check_str(cx: &LateContext, span: Span, id: NodeId) { | |
97 | let string = snippet(cx, span, ""); | |
98 | if string.contains('\u{200B}') { | |
99 | span_help_and_lint( | |
100 | cx, | |
101 | ZERO_WIDTH_SPACE, | |
102 | span, | |
103 | "zero-width space detected", | |
104 | &format!( | |
105 | "Consider replacing the string with:\n\"{}\"", | |
106 | string.replace("\u{200B}", "\\u{200B}") | |
107 | ), | |
108 | ); | |
109 | } | |
110 | if string.chars().any(|c| c as u32 > 0x7F) { | |
111 | span_help_and_lint( | |
112 | cx, | |
113 | NON_ASCII_LITERAL, | |
114 | span, | |
115 | "literal non-ASCII character detected", | |
116 | &format!( | |
117 | "Consider replacing the string with:\n\"{}\"", | |
118 | if is_allowed(cx, UNICODE_NOT_NFC, id) { | |
119 | escape(string.chars()) | |
120 | } else { | |
121 | escape(string.nfc()) | |
122 | } | |
123 | ), | |
124 | ); | |
125 | } | |
126 | if is_allowed(cx, NON_ASCII_LITERAL, id) && string.chars().zip(string.nfc()).any(|(a, b)| a != b) { | |
127 | span_help_and_lint( | |
128 | cx, | |
129 | UNICODE_NOT_NFC, | |
130 | span, | |
131 | "non-nfc unicode sequence detected", | |
132 | &format!("Consider replacing the string with:\n\"{}\"", string.nfc().collect::<String>()), | |
133 | ); | |
134 | } | |
135 | } |