]> git.proxmox.com Git - pve-eslint.git/blob - eslint/lib/rules/no-misleading-character-class.js
667d066e81c7eabe0528338789124084a8b65301
[pve-eslint.git] / eslint / lib / rules / no-misleading-character-class.js
1 /**
2 * @author Toru Nagashima <https://github.com/mysticatea>
3 */
4 "use strict";
5
6 const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
7 const { RegExpValidator, RegExpParser, visitRegExpAST } = require("regexpp");
8 const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
9 const astUtils = require("./utils/ast-utils.js");
10
11 //------------------------------------------------------------------------------
12 // Helpers
13 //------------------------------------------------------------------------------
14
15 const REGEXPP_LATEST_ECMA_VERSION = 2022;
16
17 /**
18 * Iterate character sequences of a given nodes.
19 *
20 * CharacterClassRange syntax can steal a part of character sequence,
21 * so this function reverts CharacterClassRange syntax and restore the sequence.
22 * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences.
23 * @returns {IterableIterator<number[]>} The list of character sequences.
24 */
25 function *iterateCharacterSequence(nodes) {
26 let seq = [];
27
28 for (const node of nodes) {
29 switch (node.type) {
30 case "Character":
31 seq.push(node.value);
32 break;
33
34 case "CharacterClassRange":
35 seq.push(node.min.value);
36 yield seq;
37 seq = [node.max.value];
38 break;
39
40 case "CharacterSet":
41 if (seq.length > 0) {
42 yield seq;
43 seq = [];
44 }
45 break;
46
47 // no default
48 }
49 }
50
51 if (seq.length > 0) {
52 yield seq;
53 }
54 }
55
56 const hasCharacterSequence = {
57 surrogatePairWithoutUFlag(chars) {
58 return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c));
59 },
60
61 combiningClass(chars) {
62 return chars.some((c, i) => (
63 i !== 0 &&
64 isCombiningCharacter(c) &&
65 !isCombiningCharacter(chars[i - 1])
66 ));
67 },
68
69 emojiModifier(chars) {
70 return chars.some((c, i) => (
71 i !== 0 &&
72 isEmojiModifier(c) &&
73 !isEmojiModifier(chars[i - 1])
74 ));
75 },
76
77 regionalIndicatorSymbol(chars) {
78 return chars.some((c, i) => (
79 i !== 0 &&
80 isRegionalIndicatorSymbol(c) &&
81 isRegionalIndicatorSymbol(chars[i - 1])
82 ));
83 },
84
85 zwj(chars) {
86 const lastIndex = chars.length - 1;
87
88 return chars.some((c, i) => (
89 i !== 0 &&
90 i !== lastIndex &&
91 c === 0x200d &&
92 chars[i - 1] !== 0x200d &&
93 chars[i + 1] !== 0x200d
94 ));
95 }
96 };
97
98 const kinds = Object.keys(hasCharacterSequence);
99
100 //------------------------------------------------------------------------------
101 // Rule Definition
102 //------------------------------------------------------------------------------
103
104 /** @type {import('../shared/types').Rule} */
105 module.exports = {
106 meta: {
107 type: "problem",
108
109 docs: {
110 description: "Disallow characters which are made with multiple code points in character class syntax",
111 recommended: true,
112 url: "https://eslint.org/docs/rules/no-misleading-character-class"
113 },
114
115 hasSuggestions: true,
116
117 schema: [],
118
119 messages: {
120 surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
121 combiningClass: "Unexpected combined character in character class.",
122 emojiModifier: "Unexpected modified Emoji in character class.",
123 regionalIndicatorSymbol: "Unexpected national flag in character class.",
124 zwj: "Unexpected joined character sequence in character class.",
125 suggestUnicodeFlag: "Add unicode 'u' flag to regex."
126 }
127 },
128 create(context) {
129 const sourceCode = context.getSourceCode();
130 const parser = new RegExpParser();
131
132 /**
133 * Verify a given regular expression.
134 * @param {Node} node The node to report.
135 * @param {string} pattern The regular expression pattern to verify.
136 * @param {string} flags The flags of the regular expression.
137 * @param {Function} unicodeFixer Fixer for missing "u" flag.
138 * @returns {void}
139 */
140 function verify(node, pattern, flags, unicodeFixer) {
141 let patternNode;
142
143 try {
144 patternNode = parser.parsePattern(
145 pattern,
146 0,
147 pattern.length,
148 flags.includes("u")
149 );
150 } catch {
151
152 // Ignore regular expressions with syntax errors
153 return;
154 }
155
156 const foundKinds = new Set();
157
158 visitRegExpAST(patternNode, {
159 onCharacterClassEnter(ccNode) {
160 for (const chars of iterateCharacterSequence(ccNode.elements)) {
161 for (const kind of kinds) {
162 if (hasCharacterSequence[kind](chars)) {
163 foundKinds.add(kind);
164 }
165 }
166 }
167 }
168 });
169
170 for (const kind of foundKinds) {
171 let suggest;
172
173 if (kind === "surrogatePairWithoutUFlag") {
174 suggest = [{
175 messageId: "suggestUnicodeFlag",
176 fix: unicodeFixer
177 }];
178 }
179
180 context.report({
181 node,
182 messageId: kind,
183 suggest
184 });
185 }
186 }
187
188 /**
189 * Checks if the given regular expression pattern would be valid with the `u` flag.
190 * @param {string} pattern The regular expression pattern to verify.
191 * @returns {boolean} `true` if the pattern would be valid with the `u` flag.
192 * `false` if the pattern would be invalid with the `u` flag or the configured
193 * ecmaVersion doesn't support the `u` flag.
194 */
195 function isValidWithUnicodeFlag(pattern) {
196 const { ecmaVersion } = context.parserOptions;
197
198 // ecmaVersion is unknown or it doesn't support the 'u' flag
199 if (typeof ecmaVersion !== "number" || ecmaVersion <= 5) {
200 return false;
201 }
202
203 const validator = new RegExpValidator({
204 ecmaVersion: Math.min(ecmaVersion + 2009, REGEXPP_LATEST_ECMA_VERSION)
205 });
206
207 try {
208 validator.validatePattern(pattern, void 0, void 0, /* uFlag = */ true);
209 } catch {
210 return false;
211 }
212
213 return true;
214 }
215
216 return {
217 "Literal[regex]"(node) {
218 verify(node, node.regex.pattern, node.regex.flags, fixer => {
219 if (!isValidWithUnicodeFlag(node.regex.pattern)) {
220 return null;
221 }
222
223 return fixer.insertTextAfter(node, "u");
224 });
225 },
226 "Program"() {
227 const scope = context.getScope();
228 const tracker = new ReferenceTracker(scope);
229
230 /*
231 * Iterate calls of RegExp.
232 * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
233 * `const {RegExp: a} = window; new a()`, etc...
234 */
235 for (const { node } of tracker.iterateGlobalReferences({
236 RegExp: { [CALL]: true, [CONSTRUCT]: true }
237 })) {
238 const [patternNode, flagsNode] = node.arguments;
239 const pattern = getStringIfConstant(patternNode, scope);
240 const flags = getStringIfConstant(flagsNode, scope);
241
242 if (typeof pattern === "string") {
243 verify(node, pattern, flags || "", fixer => {
244
245 if (!isValidWithUnicodeFlag(pattern)) {
246 return null;
247 }
248
249 if (node.arguments.length === 1) {
250 const penultimateToken = sourceCode.getLastToken(node, { skip: 1 }); // skip closing parenthesis
251
252 return fixer.insertTextAfter(
253 penultimateToken,
254 astUtils.isCommaToken(penultimateToken)
255 ? ' "u",'
256 : ', "u"'
257 );
258 }
259
260 if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
261 const range = [flagsNode.range[0], flagsNode.range[1] - 1];
262
263 return fixer.insertTextAfterRange(range, "u");
264 }
265
266 return null;
267 });
268 }
269 }
270 }
271 };
272 }
273 };