]> git.proxmox.com Git - pve-eslint.git/blob - eslint/lib/rules/no-misleading-character-class.js
70e31e604f4abd556a53f96e7d5018fa0ffcb838
[pve-eslint.git] / eslint / lib / rules / no-misleading-character-class.js
1 /**
2 * @author Toru Nagashima <https://github.com/mysticatea>
3 */
4 "use strict";
5
6 const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
7 const { RegExpParser, visitRegExpAST } = require("regexpp");
8 const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
9
10 //------------------------------------------------------------------------------
11 // Helpers
12 //------------------------------------------------------------------------------
13
14 /**
15 * Iterate character sequences of a given nodes.
16 *
17 * CharacterClassRange syntax can steal a part of character sequence,
18 * so this function reverts CharacterClassRange syntax and restore the sequence.
19 * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences.
20 * @returns {IterableIterator<number[]>} The list of character sequences.
21 */
22 function *iterateCharacterSequence(nodes) {
23 let seq = [];
24
25 for (const node of nodes) {
26 switch (node.type) {
27 case "Character":
28 seq.push(node.value);
29 break;
30
31 case "CharacterClassRange":
32 seq.push(node.min.value);
33 yield seq;
34 seq = [node.max.value];
35 break;
36
37 case "CharacterSet":
38 if (seq.length > 0) {
39 yield seq;
40 seq = [];
41 }
42 break;
43
44 // no default
45 }
46 }
47
48 if (seq.length > 0) {
49 yield seq;
50 }
51 }
52
53 const hasCharacterSequence = {
54 surrogatePairWithoutUFlag(chars) {
55 return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c));
56 },
57
58 combiningClass(chars) {
59 return chars.some((c, i) => (
60 i !== 0 &&
61 isCombiningCharacter(c) &&
62 !isCombiningCharacter(chars[i - 1])
63 ));
64 },
65
66 emojiModifier(chars) {
67 return chars.some((c, i) => (
68 i !== 0 &&
69 isEmojiModifier(c) &&
70 !isEmojiModifier(chars[i - 1])
71 ));
72 },
73
74 regionalIndicatorSymbol(chars) {
75 return chars.some((c, i) => (
76 i !== 0 &&
77 isRegionalIndicatorSymbol(c) &&
78 isRegionalIndicatorSymbol(chars[i - 1])
79 ));
80 },
81
82 zwj(chars) {
83 const lastIndex = chars.length - 1;
84
85 return chars.some((c, i) => (
86 i !== 0 &&
87 i !== lastIndex &&
88 c === 0x200d &&
89 chars[i - 1] !== 0x200d &&
90 chars[i + 1] !== 0x200d
91 ));
92 }
93 };
94
95 const kinds = Object.keys(hasCharacterSequence);
96
97 //------------------------------------------------------------------------------
98 // Rule Definition
99 //------------------------------------------------------------------------------
100
101 module.exports = {
102 meta: {
103 type: "problem",
104
105 docs: {
106 description: "disallow characters which are made with multiple code points in character class syntax",
107 recommended: true,
108 url: "https://eslint.org/docs/rules/no-misleading-character-class"
109 },
110
111 schema: [],
112
113 messages: {
114 surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
115 combiningClass: "Unexpected combined character in character class.",
116 emojiModifier: "Unexpected modified Emoji in character class.",
117 regionalIndicatorSymbol: "Unexpected national flag in character class.",
118 zwj: "Unexpected joined character sequence in character class."
119 }
120 },
121 create(context) {
122 const parser = new RegExpParser();
123
124 /**
125 * Verify a given regular expression.
126 * @param {Node} node The node to report.
127 * @param {string} pattern The regular expression pattern to verify.
128 * @param {string} flags The flags of the regular expression.
129 * @returns {void}
130 */
131 function verify(node, pattern, flags) {
132 const has = {
133 surrogatePairWithoutUFlag: false,
134 combiningClass: false,
135 variationSelector: false,
136 emojiModifier: false,
137 regionalIndicatorSymbol: false,
138 zwj: false
139 };
140 let patternNode;
141
142 try {
143 patternNode = parser.parsePattern(
144 pattern,
145 0,
146 pattern.length,
147 flags.includes("u")
148 );
149 } catch {
150
151 // Ignore regular expressions with syntax errors
152 return;
153 }
154
155 visitRegExpAST(patternNode, {
156 onCharacterClassEnter(ccNode) {
157 for (const chars of iterateCharacterSequence(ccNode.elements)) {
158 for (const kind of kinds) {
159 has[kind] = has[kind] || hasCharacterSequence[kind](chars);
160 }
161 }
162 }
163 });
164
165 for (const kind of kinds) {
166 if (has[kind]) {
167 context.report({ node, messageId: kind });
168 }
169 }
170 }
171
172 return {
173 "Literal[regex]"(node) {
174 verify(node, node.regex.pattern, node.regex.flags);
175 },
176 "Program"() {
177 const scope = context.getScope();
178 const tracker = new ReferenceTracker(scope);
179
180 /*
181 * Iterate calls of RegExp.
182 * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
183 * `const {RegExp: a} = window; new a()`, etc...
184 */
185 for (const { node } of tracker.iterateGlobalReferences({
186 RegExp: { [CALL]: true, [CONSTRUCT]: true }
187 })) {
188 const [patternNode, flagsNode] = node.arguments;
189 const pattern = getStringIfConstant(patternNode, scope);
190 const flags = getStringIfConstant(flagsNode, scope);
191
192 if (typeof pattern === "string") {
193 verify(node, pattern, flags || "");
194 }
195 }
196 }
197 };
198 }
199 };