]>
Commit | Line | Data |
---|---|---|
eb39fafa DC |
1 | /** |
2 | * @author Toru Nagashima <https://github.com/mysticatea> | |
3 | */ | |
4 | "use strict"; | |
5 | ||
6 | const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils"); | |
8f9d1d4d | 7 | const { RegExpValidator, RegExpParser, visitRegExpAST } = require("regexpp"); |
eb39fafa | 8 | const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode"); |
8f9d1d4d | 9 | const astUtils = require("./utils/ast-utils.js"); |
eb39fafa DC |
10 | |
11 | //------------------------------------------------------------------------------ | |
12 | // Helpers | |
13 | //------------------------------------------------------------------------------ | |
14 | ||
8f9d1d4d DC |
15 | const REGEXPP_LATEST_ECMA_VERSION = 2022; |
16 | ||
eb39fafa DC |
17 | /** |
18 | * Iterate character sequences of a given nodes. | |
19 | * | |
20 | * CharacterClassRange syntax can steal a part of character sequence, | |
21 | * so this function reverts CharacterClassRange syntax and restore the sequence. | |
22 | * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences. | |
23 | * @returns {IterableIterator<number[]>} The list of character sequences. | |
24 | */ | |
25 | function *iterateCharacterSequence(nodes) { | |
26 | let seq = []; | |
27 | ||
28 | for (const node of nodes) { | |
29 | switch (node.type) { | |
30 | case "Character": | |
31 | seq.push(node.value); | |
32 | break; | |
33 | ||
34 | case "CharacterClassRange": | |
35 | seq.push(node.min.value); | |
36 | yield seq; | |
37 | seq = [node.max.value]; | |
38 | break; | |
39 | ||
40 | case "CharacterSet": | |
41 | if (seq.length > 0) { | |
42 | yield seq; | |
43 | seq = []; | |
44 | } | |
45 | break; | |
46 | ||
47 | // no default | |
48 | } | |
49 | } | |
50 | ||
51 | if (seq.length > 0) { | |
52 | yield seq; | |
53 | } | |
54 | } | |
55 | ||
56 | const hasCharacterSequence = { | |
57 | surrogatePairWithoutUFlag(chars) { | |
58 | return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c)); | |
59 | }, | |
60 | ||
61 | combiningClass(chars) { | |
62 | return chars.some((c, i) => ( | |
63 | i !== 0 && | |
64 | isCombiningCharacter(c) && | |
65 | !isCombiningCharacter(chars[i - 1]) | |
66 | )); | |
67 | }, | |
68 | ||
69 | emojiModifier(chars) { | |
70 | return chars.some((c, i) => ( | |
71 | i !== 0 && | |
72 | isEmojiModifier(c) && | |
73 | !isEmojiModifier(chars[i - 1]) | |
74 | )); | |
75 | }, | |
76 | ||
77 | regionalIndicatorSymbol(chars) { | |
78 | return chars.some((c, i) => ( | |
79 | i !== 0 && | |
80 | isRegionalIndicatorSymbol(c) && | |
81 | isRegionalIndicatorSymbol(chars[i - 1]) | |
82 | )); | |
83 | }, | |
84 | ||
85 | zwj(chars) { | |
86 | const lastIndex = chars.length - 1; | |
87 | ||
88 | return chars.some((c, i) => ( | |
89 | i !== 0 && | |
90 | i !== lastIndex && | |
91 | c === 0x200d && | |
92 | chars[i - 1] !== 0x200d && | |
93 | chars[i + 1] !== 0x200d | |
94 | )); | |
95 | } | |
96 | }; | |
97 | ||
98 | const kinds = Object.keys(hasCharacterSequence); | |
99 | ||
100 | //------------------------------------------------------------------------------ | |
101 | // Rule Definition | |
102 | //------------------------------------------------------------------------------ | |
103 | ||
34eeec05 | 104 | /** @type {import('../shared/types').Rule} */ |
eb39fafa DC |
105 | module.exports = { |
106 | meta: { | |
107 | type: "problem", | |
108 | ||
109 | docs: { | |
8f9d1d4d | 110 | description: "Disallow characters which are made with multiple code points in character class syntax", |
eb39fafa DC |
111 | recommended: true, |
112 | url: "https://eslint.org/docs/rules/no-misleading-character-class" | |
113 | }, | |
114 | ||
8f9d1d4d DC |
115 | hasSuggestions: true, |
116 | ||
eb39fafa DC |
117 | schema: [], |
118 | ||
119 | messages: { | |
120 | surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.", | |
121 | combiningClass: "Unexpected combined character in character class.", | |
122 | emojiModifier: "Unexpected modified Emoji in character class.", | |
123 | regionalIndicatorSymbol: "Unexpected national flag in character class.", | |
8f9d1d4d DC |
124 | zwj: "Unexpected joined character sequence in character class.", |
125 | suggestUnicodeFlag: "Add unicode 'u' flag to regex." | |
eb39fafa DC |
126 | } |
127 | }, | |
128 | create(context) { | |
8f9d1d4d | 129 | const sourceCode = context.getSourceCode(); |
eb39fafa DC |
130 | const parser = new RegExpParser(); |
131 | ||
132 | /** | |
133 | * Verify a given regular expression. | |
134 | * @param {Node} node The node to report. | |
135 | * @param {string} pattern The regular expression pattern to verify. | |
136 | * @param {string} flags The flags of the regular expression. | |
8f9d1d4d | 137 | * @param {Function} unicodeFixer Fixer for missing "u" flag. |
eb39fafa DC |
138 | * @returns {void} |
139 | */ | |
8f9d1d4d | 140 | function verify(node, pattern, flags, unicodeFixer) { |
eb39fafa DC |
141 | let patternNode; |
142 | ||
143 | try { | |
144 | patternNode = parser.parsePattern( | |
145 | pattern, | |
146 | 0, | |
147 | pattern.length, | |
148 | flags.includes("u") | |
149 | ); | |
d3726936 | 150 | } catch { |
eb39fafa DC |
151 | |
152 | // Ignore regular expressions with syntax errors | |
153 | return; | |
154 | } | |
155 | ||
8f9d1d4d DC |
156 | const foundKinds = new Set(); |
157 | ||
eb39fafa DC |
158 | visitRegExpAST(patternNode, { |
159 | onCharacterClassEnter(ccNode) { | |
160 | for (const chars of iterateCharacterSequence(ccNode.elements)) { | |
161 | for (const kind of kinds) { | |
8f9d1d4d DC |
162 | if (hasCharacterSequence[kind](chars)) { |
163 | foundKinds.add(kind); | |
164 | } | |
eb39fafa DC |
165 | } |
166 | } | |
167 | } | |
168 | }); | |
169 | ||
8f9d1d4d DC |
170 | for (const kind of foundKinds) { |
171 | let suggest; | |
172 | ||
173 | if (kind === "surrogatePairWithoutUFlag") { | |
174 | suggest = [{ | |
175 | messageId: "suggestUnicodeFlag", | |
176 | fix: unicodeFixer | |
177 | }]; | |
eb39fafa | 178 | } |
8f9d1d4d DC |
179 | |
180 | context.report({ | |
181 | node, | |
182 | messageId: kind, | |
183 | suggest | |
184 | }); | |
eb39fafa DC |
185 | } |
186 | } | |
187 | ||
8f9d1d4d DC |
188 | /** |
189 | * Checks if the given regular expression pattern would be valid with the `u` flag. | |
190 | * @param {string} pattern The regular expression pattern to verify. | |
191 | * @returns {boolean} `true` if the pattern would be valid with the `u` flag. | |
192 | * `false` if the pattern would be invalid with the `u` flag or the configured | |
193 | * ecmaVersion doesn't support the `u` flag. | |
194 | */ | |
195 | function isValidWithUnicodeFlag(pattern) { | |
196 | const { ecmaVersion } = context.parserOptions; | |
197 | ||
198 | // ecmaVersion is unknown or it doesn't support the 'u' flag | |
199 | if (typeof ecmaVersion !== "number" || ecmaVersion <= 5) { | |
200 | return false; | |
201 | } | |
202 | ||
203 | const validator = new RegExpValidator({ | |
204 | ecmaVersion: Math.min(ecmaVersion + 2009, REGEXPP_LATEST_ECMA_VERSION) | |
205 | }); | |
206 | ||
207 | try { | |
208 | validator.validatePattern(pattern, void 0, void 0, /* uFlag = */ true); | |
209 | } catch { | |
210 | return false; | |
211 | } | |
212 | ||
213 | return true; | |
214 | } | |
215 | ||
eb39fafa DC |
216 | return { |
217 | "Literal[regex]"(node) { | |
8f9d1d4d DC |
218 | verify(node, node.regex.pattern, node.regex.flags, fixer => { |
219 | if (!isValidWithUnicodeFlag(node.regex.pattern)) { | |
220 | return null; | |
221 | } | |
222 | ||
223 | return fixer.insertTextAfter(node, "u"); | |
224 | }); | |
eb39fafa DC |
225 | }, |
226 | "Program"() { | |
227 | const scope = context.getScope(); | |
228 | const tracker = new ReferenceTracker(scope); | |
229 | ||
230 | /* | |
231 | * Iterate calls of RegExp. | |
232 | * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`, | |
233 | * `const {RegExp: a} = window; new a()`, etc... | |
234 | */ | |
235 | for (const { node } of tracker.iterateGlobalReferences({ | |
236 | RegExp: { [CALL]: true, [CONSTRUCT]: true } | |
237 | })) { | |
238 | const [patternNode, flagsNode] = node.arguments; | |
239 | const pattern = getStringIfConstant(patternNode, scope); | |
240 | const flags = getStringIfConstant(flagsNode, scope); | |
241 | ||
242 | if (typeof pattern === "string") { | |
8f9d1d4d DC |
243 | verify(node, pattern, flags || "", fixer => { |
244 | ||
245 | if (!isValidWithUnicodeFlag(pattern)) { | |
246 | return null; | |
247 | } | |
248 | ||
249 | if (node.arguments.length === 1) { | |
250 | const penultimateToken = sourceCode.getLastToken(node, { skip: 1 }); // skip closing parenthesis | |
251 | ||
252 | return fixer.insertTextAfter( | |
253 | penultimateToken, | |
254 | astUtils.isCommaToken(penultimateToken) | |
255 | ? ' "u",' | |
256 | : ', "u"' | |
257 | ); | |
258 | } | |
259 | ||
260 | if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") { | |
261 | const range = [flagsNode.range[0], flagsNode.range[1] - 1]; | |
262 | ||
263 | return fixer.insertTextAfterRange(range, "u"); | |
264 | } | |
265 | ||
266 | return null; | |
267 | }); | |
eb39fafa DC |
268 | } |
269 | } | |
270 | } | |
271 | }; | |
272 | } | |
273 | }; |