]>
Commit | Line | Data |
---|---|---|
136023e0 XL |
1 | use clippy_utils::diagnostics::span_lint; |
2 | use rustc_ast::ast; | |
3 | use rustc_data_structures::fx::FxHashSet; | |
4 | use rustc_lint::{EarlyContext, EarlyLintPass, Level}; | |
5 | use rustc_session::{declare_tool_lint, impl_lint_pass}; | |
6 | use unicode_script::{Script, UnicodeScript}; | |
7 | ||
8 | declare_clippy_lint! { | |
94222f64 XL |
9 | /// ### What it does |
10 | /// Checks for usage of unicode scripts other than those explicitly allowed | |
136023e0 XL |
11 | /// by the lint config. |
12 | /// | |
13 | /// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`. | |
14 | /// It also ignores the `Common` script type. | |
15 | /// While configuring, be sure to use official script name [aliases] from | |
16 | /// [the list of supported scripts][supported_scripts]. | |
17 | /// | |
18 | /// See also: [`non_ascii_idents`]. | |
19 | /// | |
20 | /// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases | |
21 | /// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html | |
22 | /// | |
94222f64 XL |
23 | /// ### Why is this bad? |
24 | /// It may be not desired to have many different scripts for | |
136023e0 XL |
25 | /// identifiers in the codebase. |
26 | /// | |
27 | /// Note that if you only want to allow plain English, you might want to use | |
28 | /// built-in [`non_ascii_idents`] lint instead. | |
29 | /// | |
30 | /// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents | |
31 | /// | |
94222f64 | 32 | /// ### Example |
136023e0 XL |
33 | /// ```rust |
34 | /// // Assuming that `clippy.toml` contains the following line: | |
35 | /// // allowed-locales = ["Latin", "Cyrillic"] | |
36 | /// let counter = 10; // OK, latin is allowed. | |
37 | /// let счётчик = 10; // OK, cyrillic is allowed. | |
38 | /// let zähler = 10; // OK, it's still latin. | |
39 | /// let カウンタ = 10; // Will spawn the lint. | |
40 | /// ``` | |
41 | pub DISALLOWED_SCRIPT_IDENTS, | |
42 | restriction, | |
43 | "usage of non-allowed Unicode scripts" | |
44 | } | |
45 | ||
46 | #[derive(Clone, Debug)] | |
47 | pub struct DisallowedScriptIdents { | |
48 | whitelist: FxHashSet<Script>, | |
49 | } | |
50 | ||
51 | impl DisallowedScriptIdents { | |
52 | pub fn new(whitelist: &[String]) -> Self { | |
53 | let whitelist = whitelist | |
54 | .iter() | |
55 | .map(String::as_str) | |
56 | .filter_map(Script::from_full_name) | |
57 | .collect(); | |
58 | Self { whitelist } | |
59 | } | |
60 | } | |
61 | ||
62 | impl_lint_pass!(DisallowedScriptIdents => [DISALLOWED_SCRIPT_IDENTS]); | |
63 | ||
64 | impl EarlyLintPass for DisallowedScriptIdents { | |
65 | fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) { | |
66 | // Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint: | |
67 | // https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs | |
68 | ||
69 | let check_disallowed_script_idents = cx.builder.lint_level(DISALLOWED_SCRIPT_IDENTS).0 != Level::Allow; | |
70 | if !check_disallowed_script_idents { | |
71 | return; | |
72 | } | |
73 | ||
74 | let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); | |
75 | // Sort by `Span` so that error messages make sense with respect to the | |
76 | // order of identifier locations in the code. | |
77 | let mut symbols: Vec<_> = symbols.iter().collect(); | |
78 | symbols.sort_unstable_by_key(|k| k.1); | |
79 | ||
80 | for (symbol, &span) in &symbols { | |
81 | // Note: `symbol.as_str()` is an expensive operation, thus should not be called | |
82 | // more than once for a single symbol. | |
83 | let symbol_str = symbol.as_str(); | |
84 | if symbol_str.is_ascii() { | |
85 | continue; | |
86 | } | |
87 | ||
88 | for c in symbol_str.chars() { | |
89 | // We want to iterate through all the scripts associated with this character | |
90 | // and check whether at least of one scripts is in the whitelist. | |
91 | let forbidden_script = c | |
92 | .script_extension() | |
93 | .iter() | |
94 | .find(|script| !self.whitelist.contains(script)); | |
95 | if let Some(script) = forbidden_script { | |
96 | span_lint( | |
97 | cx, | |
98 | DISALLOWED_SCRIPT_IDENTS, | |
99 | span, | |
100 | &format!( | |
101 | "identifier `{}` has a Unicode script that is not allowed by configuration: {}", | |
102 | symbol_str, | |
103 | script.full_name() | |
104 | ), | |
105 | ); | |
106 | // We don't want to spawn warning multiple times over a single identifier. | |
107 | break; | |
108 | } | |
109 | } | |
110 | } | |
111 | } | |
112 | } |