]>
Commit | Line | Data |
---|---|---|
eb39fafa DC |
1 | /** |
2 | * @author Toru Nagashima <https://github.com/mysticatea> | |
3 | */ | |
4 | "use strict"; | |
5 | ||
f2a92ac6 DC |
6 | const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils"); |
7 | const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp"); | |
eb39fafa | 8 | const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode"); |
8f9d1d4d | 9 | const astUtils = require("./utils/ast-utils.js"); |
f2a92ac6 | 10 | const { isValidWithUnicodeFlag } = require("./utils/regular-expressions"); |
eb39fafa DC |
11 | |
12 | //------------------------------------------------------------------------------ | |
13 | // Helpers | |
14 | //------------------------------------------------------------------------------ | |
15 | ||
16 | /** | |
17 | * Iterate character sequences of a given nodes. | |
18 | * | |
19 | * CharacterClassRange syntax can steal a part of character sequence, | |
20 | * so this function reverts CharacterClassRange syntax and restore the sequence. | |
21 | * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences. | |
22 | * @returns {IterableIterator<number[]>} The list of character sequences. | |
23 | */ | |
24 | function *iterateCharacterSequence(nodes) { | |
25 | let seq = []; | |
26 | ||
27 | for (const node of nodes) { | |
28 | switch (node.type) { | |
29 | case "Character": | |
30 | seq.push(node.value); | |
31 | break; | |
32 | ||
33 | case "CharacterClassRange": | |
34 | seq.push(node.min.value); | |
35 | yield seq; | |
36 | seq = [node.max.value]; | |
37 | break; | |
38 | ||
39 | case "CharacterSet": | |
40 | if (seq.length > 0) { | |
41 | yield seq; | |
42 | seq = []; | |
43 | } | |
44 | break; | |
45 | ||
46 | // no default | |
47 | } | |
48 | } | |
49 | ||
50 | if (seq.length > 0) { | |
51 | yield seq; | |
52 | } | |
53 | } | |
54 | ||
55 | const hasCharacterSequence = { | |
56 | surrogatePairWithoutUFlag(chars) { | |
57 | return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c)); | |
58 | }, | |
59 | ||
60 | combiningClass(chars) { | |
61 | return chars.some((c, i) => ( | |
62 | i !== 0 && | |
63 | isCombiningCharacter(c) && | |
64 | !isCombiningCharacter(chars[i - 1]) | |
65 | )); | |
66 | }, | |
67 | ||
68 | emojiModifier(chars) { | |
69 | return chars.some((c, i) => ( | |
70 | i !== 0 && | |
71 | isEmojiModifier(c) && | |
72 | !isEmojiModifier(chars[i - 1]) | |
73 | )); | |
74 | }, | |
75 | ||
76 | regionalIndicatorSymbol(chars) { | |
77 | return chars.some((c, i) => ( | |
78 | i !== 0 && | |
79 | isRegionalIndicatorSymbol(c) && | |
80 | isRegionalIndicatorSymbol(chars[i - 1]) | |
81 | )); | |
82 | }, | |
83 | ||
84 | zwj(chars) { | |
85 | const lastIndex = chars.length - 1; | |
86 | ||
87 | return chars.some((c, i) => ( | |
88 | i !== 0 && | |
89 | i !== lastIndex && | |
90 | c === 0x200d && | |
91 | chars[i - 1] !== 0x200d && | |
92 | chars[i + 1] !== 0x200d | |
93 | )); | |
94 | } | |
95 | }; | |
96 | ||
97 | const kinds = Object.keys(hasCharacterSequence); | |
98 | ||
99 | //------------------------------------------------------------------------------ | |
100 | // Rule Definition | |
101 | //------------------------------------------------------------------------------ | |
102 | ||
34eeec05 | 103 | /** @type {import('../shared/types').Rule} */ |
eb39fafa DC |
104 | module.exports = { |
105 | meta: { | |
106 | type: "problem", | |
107 | ||
108 | docs: { | |
8f9d1d4d | 109 | description: "Disallow characters which are made with multiple code points in character class syntax", |
eb39fafa | 110 | recommended: true, |
f2a92ac6 | 111 | url: "https://eslint.org/docs/latest/rules/no-misleading-character-class" |
eb39fafa DC |
112 | }, |
113 | ||
8f9d1d4d DC |
114 | hasSuggestions: true, |
115 | ||
eb39fafa DC |
116 | schema: [], |
117 | ||
118 | messages: { | |
119 | surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.", | |
120 | combiningClass: "Unexpected combined character in character class.", | |
121 | emojiModifier: "Unexpected modified Emoji in character class.", | |
122 | regionalIndicatorSymbol: "Unexpected national flag in character class.", | |
8f9d1d4d DC |
123 | zwj: "Unexpected joined character sequence in character class.", |
124 | suggestUnicodeFlag: "Add unicode 'u' flag to regex." | |
eb39fafa DC |
125 | } |
126 | }, | |
127 | create(context) { | |
f2a92ac6 | 128 | const sourceCode = context.sourceCode; |
eb39fafa DC |
129 | const parser = new RegExpParser(); |
130 | ||
131 | /** | |
132 | * Verify a given regular expression. | |
133 | * @param {Node} node The node to report. | |
134 | * @param {string} pattern The regular expression pattern to verify. | |
135 | * @param {string} flags The flags of the regular expression. | |
8f9d1d4d | 136 | * @param {Function} unicodeFixer Fixer for missing "u" flag. |
eb39fafa DC |
137 | * @returns {void} |
138 | */ | |
8f9d1d4d | 139 | function verify(node, pattern, flags, unicodeFixer) { |
eb39fafa DC |
140 | let patternNode; |
141 | ||
142 | try { | |
143 | patternNode = parser.parsePattern( | |
144 | pattern, | |
145 | 0, | |
146 | pattern.length, | |
147 | flags.includes("u") | |
148 | ); | |
d3726936 | 149 | } catch { |
eb39fafa DC |
150 | |
151 | // Ignore regular expressions with syntax errors | |
152 | return; | |
153 | } | |
154 | ||
8f9d1d4d DC |
155 | const foundKinds = new Set(); |
156 | ||
eb39fafa DC |
157 | visitRegExpAST(patternNode, { |
158 | onCharacterClassEnter(ccNode) { | |
159 | for (const chars of iterateCharacterSequence(ccNode.elements)) { | |
160 | for (const kind of kinds) { | |
8f9d1d4d DC |
161 | if (hasCharacterSequence[kind](chars)) { |
162 | foundKinds.add(kind); | |
163 | } | |
eb39fafa DC |
164 | } |
165 | } | |
166 | } | |
167 | }); | |
168 | ||
8f9d1d4d DC |
169 | for (const kind of foundKinds) { |
170 | let suggest; | |
171 | ||
172 | if (kind === "surrogatePairWithoutUFlag") { | |
173 | suggest = [{ | |
174 | messageId: "suggestUnicodeFlag", | |
175 | fix: unicodeFixer | |
176 | }]; | |
eb39fafa | 177 | } |
8f9d1d4d DC |
178 | |
179 | context.report({ | |
180 | node, | |
181 | messageId: kind, | |
182 | suggest | |
183 | }); | |
eb39fafa DC |
184 | } |
185 | } | |
186 | ||
187 | return { | |
188 | "Literal[regex]"(node) { | |
8f9d1d4d | 189 | verify(node, node.regex.pattern, node.regex.flags, fixer => { |
f2a92ac6 | 190 | if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) { |
8f9d1d4d DC |
191 | return null; |
192 | } | |
193 | ||
194 | return fixer.insertTextAfter(node, "u"); | |
195 | }); | |
eb39fafa | 196 | }, |
f2a92ac6 DC |
197 | "Program"(node) { |
198 | const scope = sourceCode.getScope(node); | |
eb39fafa DC |
199 | const tracker = new ReferenceTracker(scope); |
200 | ||
201 | /* | |
202 | * Iterate calls of RegExp. | |
203 | * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`, | |
204 | * `const {RegExp: a} = window; new a()`, etc... | |
205 | */ | |
f2a92ac6 | 206 | for (const { node: refNode } of tracker.iterateGlobalReferences({ |
eb39fafa DC |
207 | RegExp: { [CALL]: true, [CONSTRUCT]: true } |
208 | })) { | |
f2a92ac6 | 209 | const [patternNode, flagsNode] = refNode.arguments; |
eb39fafa DC |
210 | const pattern = getStringIfConstant(patternNode, scope); |
211 | const flags = getStringIfConstant(flagsNode, scope); | |
212 | ||
213 | if (typeof pattern === "string") { | |
f2a92ac6 | 214 | verify(refNode, pattern, flags || "", fixer => { |
8f9d1d4d | 215 | |
f2a92ac6 | 216 | if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) { |
8f9d1d4d DC |
217 | return null; |
218 | } | |
219 | ||
f2a92ac6 DC |
220 | if (refNode.arguments.length === 1) { |
221 | const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis | |
8f9d1d4d DC |
222 | |
223 | return fixer.insertTextAfter( | |
224 | penultimateToken, | |
225 | astUtils.isCommaToken(penultimateToken) | |
226 | ? ' "u",' | |
227 | : ', "u"' | |
228 | ); | |
229 | } | |
230 | ||
231 | if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") { | |
232 | const range = [flagsNode.range[0], flagsNode.range[1] - 1]; | |
233 | ||
234 | return fixer.insertTextAfterRange(range, "u"); | |
235 | } | |
236 | ||
237 | return null; | |
238 | }); | |
eb39fafa DC |
239 | } |
240 | } | |
241 | } | |
242 | }; | |
243 | } | |
244 | }; |