]> git.proxmox.com Git - pve-eslint.git/blame - eslint/lib/rules/no-misleading-character-class.js
import 8.41.0 source
[pve-eslint.git] / eslint / lib / rules / no-misleading-character-class.js
CommitLineData
eb39fafa
DC
1/**
2 * @author Toru Nagashima <https://github.com/mysticatea>
3 */
4"use strict";
5
f2a92ac6
DC
6const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils");
7const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
eb39fafa 8const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
8f9d1d4d 9const astUtils = require("./utils/ast-utils.js");
f2a92ac6 10const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
eb39fafa
DC
11
12//------------------------------------------------------------------------------
13// Helpers
14//------------------------------------------------------------------------------
15
16/**
17 * Iterate character sequences of a given nodes.
18 *
19 * CharacterClassRange syntax can steal a part of character sequence,
20 * so this function reverts CharacterClassRange syntax and restore the sequence.
21 * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences.
22 * @returns {IterableIterator<number[]>} The list of character sequences.
23 */
24function *iterateCharacterSequence(nodes) {
25 let seq = [];
26
27 for (const node of nodes) {
28 switch (node.type) {
29 case "Character":
30 seq.push(node.value);
31 break;
32
33 case "CharacterClassRange":
34 seq.push(node.min.value);
35 yield seq;
36 seq = [node.max.value];
37 break;
38
39 case "CharacterSet":
40 if (seq.length > 0) {
41 yield seq;
42 seq = [];
43 }
44 break;
45
46 // no default
47 }
48 }
49
50 if (seq.length > 0) {
51 yield seq;
52 }
53}
54
55const hasCharacterSequence = {
56 surrogatePairWithoutUFlag(chars) {
57 return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c));
58 },
59
60 combiningClass(chars) {
61 return chars.some((c, i) => (
62 i !== 0 &&
63 isCombiningCharacter(c) &&
64 !isCombiningCharacter(chars[i - 1])
65 ));
66 },
67
68 emojiModifier(chars) {
69 return chars.some((c, i) => (
70 i !== 0 &&
71 isEmojiModifier(c) &&
72 !isEmojiModifier(chars[i - 1])
73 ));
74 },
75
76 regionalIndicatorSymbol(chars) {
77 return chars.some((c, i) => (
78 i !== 0 &&
79 isRegionalIndicatorSymbol(c) &&
80 isRegionalIndicatorSymbol(chars[i - 1])
81 ));
82 },
83
84 zwj(chars) {
85 const lastIndex = chars.length - 1;
86
87 return chars.some((c, i) => (
88 i !== 0 &&
89 i !== lastIndex &&
90 c === 0x200d &&
91 chars[i - 1] !== 0x200d &&
92 chars[i + 1] !== 0x200d
93 ));
94 }
95};
96
97const kinds = Object.keys(hasCharacterSequence);
98
99//------------------------------------------------------------------------------
100// Rule Definition
101//------------------------------------------------------------------------------
102
34eeec05 103/** @type {import('../shared/types').Rule} */
eb39fafa
DC
104module.exports = {
105 meta: {
106 type: "problem",
107
108 docs: {
8f9d1d4d 109 description: "Disallow characters which are made with multiple code points in character class syntax",
eb39fafa 110 recommended: true,
f2a92ac6 111 url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"
eb39fafa
DC
112 },
113
8f9d1d4d
DC
114 hasSuggestions: true,
115
eb39fafa
DC
116 schema: [],
117
118 messages: {
119 surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
120 combiningClass: "Unexpected combined character in character class.",
121 emojiModifier: "Unexpected modified Emoji in character class.",
122 regionalIndicatorSymbol: "Unexpected national flag in character class.",
8f9d1d4d
DC
123 zwj: "Unexpected joined character sequence in character class.",
124 suggestUnicodeFlag: "Add unicode 'u' flag to regex."
eb39fafa
DC
125 }
126 },
127 create(context) {
f2a92ac6 128 const sourceCode = context.sourceCode;
eb39fafa
DC
129 const parser = new RegExpParser();
130
131 /**
132 * Verify a given regular expression.
133 * @param {Node} node The node to report.
134 * @param {string} pattern The regular expression pattern to verify.
135 * @param {string} flags The flags of the regular expression.
8f9d1d4d 136 * @param {Function} unicodeFixer Fixer for missing "u" flag.
eb39fafa
DC
137 * @returns {void}
138 */
8f9d1d4d 139 function verify(node, pattern, flags, unicodeFixer) {
eb39fafa
DC
140 let patternNode;
141
142 try {
143 patternNode = parser.parsePattern(
144 pattern,
145 0,
146 pattern.length,
147 flags.includes("u")
148 );
d3726936 149 } catch {
eb39fafa
DC
150
151 // Ignore regular expressions with syntax errors
152 return;
153 }
154
8f9d1d4d
DC
155 const foundKinds = new Set();
156
eb39fafa
DC
157 visitRegExpAST(patternNode, {
158 onCharacterClassEnter(ccNode) {
159 for (const chars of iterateCharacterSequence(ccNode.elements)) {
160 for (const kind of kinds) {
8f9d1d4d
DC
161 if (hasCharacterSequence[kind](chars)) {
162 foundKinds.add(kind);
163 }
eb39fafa
DC
164 }
165 }
166 }
167 });
168
8f9d1d4d
DC
169 for (const kind of foundKinds) {
170 let suggest;
171
172 if (kind === "surrogatePairWithoutUFlag") {
173 suggest = [{
174 messageId: "suggestUnicodeFlag",
175 fix: unicodeFixer
176 }];
eb39fafa 177 }
8f9d1d4d
DC
178
179 context.report({
180 node,
181 messageId: kind,
182 suggest
183 });
eb39fafa
DC
184 }
185 }
186
187 return {
188 "Literal[regex]"(node) {
8f9d1d4d 189 verify(node, node.regex.pattern, node.regex.flags, fixer => {
f2a92ac6 190 if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
8f9d1d4d
DC
191 return null;
192 }
193
194 return fixer.insertTextAfter(node, "u");
195 });
eb39fafa 196 },
f2a92ac6
DC
197 "Program"(node) {
198 const scope = sourceCode.getScope(node);
eb39fafa
DC
199 const tracker = new ReferenceTracker(scope);
200
201 /*
202 * Iterate calls of RegExp.
203 * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
204 * `const {RegExp: a} = window; new a()`, etc...
205 */
f2a92ac6 206 for (const { node: refNode } of tracker.iterateGlobalReferences({
eb39fafa
DC
207 RegExp: { [CALL]: true, [CONSTRUCT]: true }
208 })) {
f2a92ac6 209 const [patternNode, flagsNode] = refNode.arguments;
eb39fafa
DC
210 const pattern = getStringIfConstant(patternNode, scope);
211 const flags = getStringIfConstant(flagsNode, scope);
212
213 if (typeof pattern === "string") {
f2a92ac6 214 verify(refNode, pattern, flags || "", fixer => {
8f9d1d4d 215
f2a92ac6 216 if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
8f9d1d4d
DC
217 return null;
218 }
219
f2a92ac6
DC
220 if (refNode.arguments.length === 1) {
221 const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis
8f9d1d4d
DC
222
223 return fixer.insertTextAfter(
224 penultimateToken,
225 astUtils.isCommaToken(penultimateToken)
226 ? ' "u",'
227 : ', "u"'
228 );
229 }
230
231 if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
232 const range = [flagsNode.range[0], flagsNode.range[1] - 1];
233
234 return fixer.insertTextAfterRange(range, "u");
235 }
236
237 return null;
238 });
eb39fafa
DC
239 }
240 }
241 }
242 };
243 }
244};