1 use clippy_utils
::diagnostics
::span_lint
;
3 use rustc_data_structures
::fx
::FxHashSet
;
4 use rustc_lint
::{EarlyContext, EarlyLintPass, Level}
;
5 use rustc_session
::{declare_tool_lint, impl_lint_pass}
;
6 use unicode_script
::{Script, UnicodeScript}
;
10 /// Checks for usage of unicode scripts other than those explicitly allowed
11 /// by the lint config.
13 /// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`.
14 /// It also ignores the `Common` script type.
15 /// While configuring, be sure to use official script name [aliases] from
16 /// [the list of supported scripts][supported_scripts].
18 /// See also: [`non_ascii_idents`].
20 /// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases
21 /// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html
23 /// ### Why is this bad?
24 /// It may be not desired to have many different scripts for
25 /// identifiers in the codebase.
27 /// Note that if you only want to allow plain English, you might want to use
28 /// built-in [`non_ascii_idents`] lint instead.
30 /// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents
34 /// // Assuming that `clippy.toml` contains the following line:
35 /// // allowed-locales = ["Latin", "Cyrillic"]
36 /// let counter = 10; // OK, latin is allowed.
37 /// let счётчик = 10; // OK, cyrillic is allowed.
38 /// let zähler = 10; // OK, it's still latin.
39 /// let カウンタ = 10; // Will spawn the lint.
41 pub DISALLOWED_SCRIPT_IDENTS
,
43 "usage of non-allowed Unicode scripts"
46 #[derive(Clone, Debug)]
47 pub struct DisallowedScriptIdents
{
48 whitelist
: FxHashSet
<Script
>,
51 impl DisallowedScriptIdents
{
52 pub fn new(whitelist
: &[String
]) -> Self {
53 let whitelist
= whitelist
56 .filter_map(Script
::from_full_name
)
62 impl_lint_pass
!(DisallowedScriptIdents
=> [DISALLOWED_SCRIPT_IDENTS
]);
64 impl EarlyLintPass
for DisallowedScriptIdents
{
65 fn check_crate(&mut self, cx
: &EarlyContext
<'_
>, _
: &ast
::Crate
) {
66 // Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint:
67 // https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs
69 let check_disallowed_script_idents
= cx
.builder
.lint_level(DISALLOWED_SCRIPT_IDENTS
).0 != Level
::Allow
;
70 if !check_disallowed_script_idents
{
74 let symbols
= cx
.sess
.parse_sess
.symbol_gallery
.symbols
.lock();
75 // Sort by `Span` so that error messages make sense with respect to the
76 // order of identifier locations in the code.
77 let mut symbols
: Vec
<_
> = symbols
.iter().collect();
78 symbols
.sort_unstable_by_key(|k
| k
.1);
80 for (symbol
, &span
) in &symbols
{
81 // Note: `symbol.as_str()` is an expensive operation, thus should not be called
82 // more than once for a single symbol.
83 let symbol_str
= symbol
.as_str();
84 if symbol_str
.is_ascii() {
88 for c
in symbol_str
.chars() {
89 // We want to iterate through all the scripts associated with this character
90 // and check whether at least of one scripts is in the whitelist.
91 let forbidden_script
= c
94 .find(|script
| !self.whitelist
.contains(script
));
95 if let Some(script
) = forbidden_script
{
98 DISALLOWED_SCRIPT_IDENTS
,
101 "identifier `{}` has a Unicode script that is not allowed by configuration: {}",
106 // We don't want to spawn warning multiple times over a single identifier.