2 * @author Toru Nagashima <https://github.com/mysticatea>
6 const { CALL
, CONSTRUCT
, ReferenceTracker
, getStringIfConstant
} = require("eslint-utils");
7 const { RegExpValidator
, RegExpParser
, visitRegExpAST
} = require("regexpp");
8 const { isCombiningCharacter
, isEmojiModifier
, isRegionalIndicatorSymbol
, isSurrogatePair
} = require("./utils/unicode");
9 const astUtils
= require("./utils/ast-utils.js");
11 //------------------------------------------------------------------------------
13 //------------------------------------------------------------------------------
15 const REGEXPP_LATEST_ECMA_VERSION
= 2022;
18 * Iterate character sequences of a given nodes.
20 * CharacterClassRange syntax can steal a part of character sequence,
21 * so this function reverts CharacterClassRange syntax and restore the sequence.
22 * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences.
23 * @returns {IterableIterator<number[]>} The list of character sequences.
25 function *iterateCharacterSequence(nodes
) {
28 for (const node
of nodes
) {
34 case "CharacterClassRange":
35 seq
.push(node
.min
.value
);
37 seq
= [node
.max
.value
];
56 const hasCharacterSequence
= {
57 surrogatePairWithoutUFlag(chars
) {
58 return chars
.some((c
, i
) => i
!== 0 && isSurrogatePair(chars
[i
- 1], c
));
61 combiningClass(chars
) {
62 return chars
.some((c
, i
) => (
64 isCombiningCharacter(c
) &&
65 !isCombiningCharacter(chars
[i
- 1])
69 emojiModifier(chars
) {
70 return chars
.some((c
, i
) => (
73 !isEmojiModifier(chars
[i
- 1])
77 regionalIndicatorSymbol(chars
) {
78 return chars
.some((c
, i
) => (
80 isRegionalIndicatorSymbol(c
) &&
81 isRegionalIndicatorSymbol(chars
[i
- 1])
86 const lastIndex
= chars
.length
- 1;
88 return chars
.some((c
, i
) => (
92 chars
[i
- 1] !== 0x200d &&
93 chars
[i
+ 1] !== 0x200d
98 const kinds
= Object
.keys(hasCharacterSequence
);
100 //------------------------------------------------------------------------------
102 //------------------------------------------------------------------------------
104 /** @type {import('../shared/types').Rule} */
110 description
: "Disallow characters which are made with multiple code points in character class syntax",
112 url
: "https://eslint.org/docs/rules/no-misleading-character-class"
115 hasSuggestions
: true,
120 surrogatePairWithoutUFlag
: "Unexpected surrogate pair in character class. Use 'u' flag.",
121 combiningClass
: "Unexpected combined character in character class.",
122 emojiModifier
: "Unexpected modified Emoji in character class.",
123 regionalIndicatorSymbol
: "Unexpected national flag in character class.",
124 zwj
: "Unexpected joined character sequence in character class.",
125 suggestUnicodeFlag
: "Add unicode 'u' flag to regex."
129 const sourceCode
= context
.getSourceCode();
130 const parser
= new RegExpParser();
133 * Verify a given regular expression.
134 * @param {Node} node The node to report.
135 * @param {string} pattern The regular expression pattern to verify.
136 * @param {string} flags The flags of the regular expression.
137 * @param {Function} unicodeFixer Fixer for missing "u" flag.
140 function verify(node
, pattern
, flags
, unicodeFixer
) {
144 patternNode
= parser
.parsePattern(
152 // Ignore regular expressions with syntax errors
156 const foundKinds
= new Set();
158 visitRegExpAST(patternNode
, {
159 onCharacterClassEnter(ccNode
) {
160 for (const chars
of iterateCharacterSequence(ccNode
.elements
)) {
161 for (const kind
of kinds
) {
162 if (hasCharacterSequence
[kind
](chars
)) {
163 foundKinds
.add(kind
);
170 for (const kind
of foundKinds
) {
173 if (kind
=== "surrogatePairWithoutUFlag") {
175 messageId
: "suggestUnicodeFlag",
189 * Checks if the given regular expression pattern would be valid with the `u` flag.
190 * @param {string} pattern The regular expression pattern to verify.
191 * @returns {boolean} `true` if the pattern would be valid with the `u` flag.
192 * `false` if the pattern would be invalid with the `u` flag or the configured
193 * ecmaVersion doesn't support the `u` flag.
195 function isValidWithUnicodeFlag(pattern
) {
196 const { ecmaVersion
} = context
.parserOptions
;
198 // ecmaVersion is unknown or it doesn't support the 'u' flag
199 if (typeof ecmaVersion
!== "number" || ecmaVersion
<= 5) {
203 const validator
= new RegExpValidator({
204 ecmaVersion
: Math
.min(ecmaVersion
+ 2009, REGEXPP_LATEST_ECMA_VERSION
)
208 validator
.validatePattern(pattern
, void 0, void 0, /* uFlag = */ true);
217 "Literal[regex]"(node
) {
218 verify(node
, node
.regex
.pattern
, node
.regex
.flags
, fixer
=> {
219 if (!isValidWithUnicodeFlag(node
.regex
.pattern
)) {
223 return fixer
.insertTextAfter(node
, "u");
227 const scope
= context
.getScope();
228 const tracker
= new ReferenceTracker(scope
);
231 * Iterate calls of RegExp.
232 * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
233 * `const {RegExp: a} = window; new a()`, etc...
235 for (const { node
} of tracker
.iterateGlobalReferences({
236 RegExp
: { [CALL
]: true, [CONSTRUCT
]: true }
238 const [patternNode
, flagsNode
] = node
.arguments
;
239 const pattern
= getStringIfConstant(patternNode
, scope
);
240 const flags
= getStringIfConstant(flagsNode
, scope
);
242 if (typeof pattern
=== "string") {
243 verify(node
, pattern
, flags
|| "", fixer
=> {
245 if (!isValidWithUnicodeFlag(pattern
)) {
249 if (node
.arguments
.length
=== 1) {
250 const penultimateToken
= sourceCode
.getLastToken(node
, { skip
: 1 }); // skip closing parenthesis
252 return fixer
.insertTextAfter(
254 astUtils
.isCommaToken(penultimateToken
)
260 if ((flagsNode
.type
=== "Literal" && typeof flagsNode
.value
=== "string") || flagsNode
.type
=== "TemplateLiteral") {
261 const range
= [flagsNode
.range
[0], flagsNode
.range
[1] - 1];
263 return fixer
.insertTextAfterRange(range
, "u");