eslint/lib/rules/no-misleading-character-class.js

   1 /**
   2  * @author Toru Nagashima <https://github.com/mysticatea>
   3  */
   4 "use strict";
   5
   6 const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
   7 const { RegExpValidator, RegExpParser, visitRegExpAST } = require("regexpp");
   8 const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
   9 const astUtils = require("./utils/ast-utils.js");
  10
  11 //------------------------------------------------------------------------------
  12 // Helpers
  13 //------------------------------------------------------------------------------
  14
  15 const REGEXPP_LATEST_ECMA_VERSION = 2022;
  16
  17 /**
  18  * Iterate character sequences of a given nodes.
  19  *
  20  * CharacterClassRange syntax can steal a part of character sequence,
  21  * so this function reverts CharacterClassRange syntax and restore the sequence.
  22  * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences.
  23  * @returns {IterableIterator<number[]>} The list of character sequences.
  24  */
  25 function *iterateCharacterSequence(nodes) {
  26     let seq = [];
  27
  28     for (const node of nodes) {
  29         switch (node.type) {
  30             case "Character":
  31                 seq.push(node.value);
  32                 break;
  33
  34             case "CharacterClassRange":
  35                 seq.push(node.min.value);
  36                 yield seq;
  37                 seq = [node.max.value];
  38                 break;
  39
  40             case "CharacterSet":
  41                 if (seq.length > 0) {
  42                     yield seq;
  43                     seq = [];
  44                 }
  45                 break;
  46
  47             // no default
  48         }
  49     }
  50
  51     if (seq.length > 0) {
  52         yield seq;
  53     }
  54 }
  55
  56 const hasCharacterSequence = {
  57     surrogatePairWithoutUFlag(chars) {
  58         return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c));
  59     },
  60
  61     combiningClass(chars) {
  62         return chars.some((c, i) => (
  63             i !== 0 &&
  64             isCombiningCharacter(c) &&
  65             !isCombiningCharacter(chars[i - 1])
  66         ));
  67     },
  68
  69     emojiModifier(chars) {
  70         return chars.some((c, i) => (
  71             i !== 0 &&
  72             isEmojiModifier(c) &&
  73             !isEmojiModifier(chars[i - 1])
  74         ));
  75     },
  76
  77     regionalIndicatorSymbol(chars) {
  78         return chars.some((c, i) => (
  79             i !== 0 &&
  80             isRegionalIndicatorSymbol(c) &&
  81             isRegionalIndicatorSymbol(chars[i - 1])
  82         ));
  83     },
  84
  85     zwj(chars) {
  86         const lastIndex = chars.length - 1;
  87
  88         return chars.some((c, i) => (
  89             i !== 0 &&
  90             i !== lastIndex &&
  91             c === 0x200d &&
  92             chars[i - 1] !== 0x200d &&
  93             chars[i + 1] !== 0x200d
  94         ));
  95     }
  96 };
  97
  98 const kinds = Object.keys(hasCharacterSequence);
  99
 100 //------------------------------------------------------------------------------
 101 // Rule Definition
 102 //------------------------------------------------------------------------------
 103
 104 /** @type {import('../shared/types').Rule} */
 105 module.exports = {
 106     meta: {
 107         type: "problem",
 108
 109         docs: {
 110             description: "Disallow characters which are made with multiple code points in character class syntax",
 111             recommended: true,
 112             url: "https://eslint.org/docs/rules/no-misleading-character-class"
 113         },
 114
 115         hasSuggestions: true,
 116
 117         schema: [],
 118
 119         messages: {
 120             surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
 121             combiningClass: "Unexpected combined character in character class.",
 122             emojiModifier: "Unexpected modified Emoji in character class.",
 123             regionalIndicatorSymbol: "Unexpected national flag in character class.",
 124             zwj: "Unexpected joined character sequence in character class.",
 125             suggestUnicodeFlag: "Add unicode 'u' flag to regex."
 126         }
 127     },
 128     create(context) {
 129         const sourceCode = context.getSourceCode();
 130         const parser = new RegExpParser();
 131
 132         /**
 133          * Verify a given regular expression.
 134          * @param {Node} node The node to report.
 135          * @param {string} pattern The regular expression pattern to verify.
 136          * @param {string} flags The flags of the regular expression.
 137          * @param {Function} unicodeFixer Fixer for missing "u" flag.
 138          * @returns {void}
 139          */
 140         function verify(node, pattern, flags, unicodeFixer) {
 141             let patternNode;
 142
 143             try {
 144                 patternNode = parser.parsePattern(
 145                     pattern,
 146                     0,
 147                     pattern.length,
 148                     flags.includes("u")
 149                 );
 150             } catch {
 151
 152                 // Ignore regular expressions with syntax errors
 153                 return;
 154             }
 155
 156             const foundKinds = new Set();
 157
 158             visitRegExpAST(patternNode, {
 159                 onCharacterClassEnter(ccNode) {
 160                     for (const chars of iterateCharacterSequence(ccNode.elements)) {
 161                         for (const kind of kinds) {
 162                             if (hasCharacterSequence[kind](chars)) {
 163                                 foundKinds.add(kind);
 164                             }
 165                         }
 166                     }
 167                 }
 168             });
 169
 170             for (const kind of foundKinds) {
 171                 let suggest;
 172
 173                 if (kind === "surrogatePairWithoutUFlag") {
 174                     suggest = [{
 175                         messageId: "suggestUnicodeFlag",
 176                         fix: unicodeFixer
 177                     }];
 178                 }
 179
 180                 context.report({
 181                     node,
 182                     messageId: kind,
 183                     suggest
 184                 });
 185             }
 186         }
 187
 188         /**
 189          * Checks if the given regular expression pattern would be valid with the `u` flag.
 190          * @param {string} pattern The regular expression pattern to verify.
 191          * @returns {boolean} `true` if the pattern would be valid with the `u` flag.
 192          * `false` if the pattern would be invalid with the `u` flag or the configured
 193          * ecmaVersion doesn't support the `u` flag.
 194          */
 195         function isValidWithUnicodeFlag(pattern) {
 196             const { ecmaVersion } = context.parserOptions;
 197
 198             // ecmaVersion is unknown or it doesn't support the 'u' flag
 199             if (typeof ecmaVersion !== "number" || ecmaVersion <= 5) {
 200                 return false;
 201             }
 202
 203             const validator = new RegExpValidator({
 204                 ecmaVersion: Math.min(ecmaVersion + 2009, REGEXPP_LATEST_ECMA_VERSION)
 205             });
 206
 207             try {
 208                 validator.validatePattern(pattern, void 0, void 0, /* uFlag = */ true);
 209             } catch {
 210                 return false;
 211             }
 212
 213             return true;
 214         }
 215
 216         return {
 217             "Literal[regex]"(node) {
 218                 verify(node, node.regex.pattern, node.regex.flags, fixer => {
 219                     if (!isValidWithUnicodeFlag(node.regex.pattern)) {
 220                         return null;
 221                     }
 222
 223                     return fixer.insertTextAfter(node, "u");
 224                 });
 225             },
 226             "Program"() {
 227                 const scope = context.getScope();
 228                 const tracker = new ReferenceTracker(scope);
 229
 230                 /*
 231                  * Iterate calls of RegExp.
 232                  * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
 233                  *       `const {RegExp: a} = window; new a()`, etc...
 234                  */
 235                 for (const { node } of tracker.iterateGlobalReferences({
 236                     RegExp: { [CALL]: true, [CONSTRUCT]: true }
 237                 })) {
 238                     const [patternNode, flagsNode] = node.arguments;
 239                     const pattern = getStringIfConstant(patternNode, scope);
 240                     const flags = getStringIfConstant(flagsNode, scope);
 241
 242                     if (typeof pattern === "string") {
 243                         verify(node, pattern, flags || "", fixer => {
 244
 245                             if (!isValidWithUnicodeFlag(pattern)) {
 246                                 return null;
 247                             }
 248
 249                             if (node.arguments.length === 1) {
 250                                 const penultimateToken = sourceCode.getLastToken(node, { skip: 1 }); // skip closing parenthesis
 251
 252                                 return fixer.insertTextAfter(
 253                                     penultimateToken,
 254                                     astUtils.isCommaToken(penultimateToken)
 255                                         ? ' "u",'
 256                                         : ', "u"'
 257                                 );
 258                             }
 259
 260                             if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
 261                                 const range = [flagsNode.range[0], flagsNode.range[1] - 1];
 262
 263                                 return fixer.insertTextAfterRange(range, "u");
 264                             }
 265
 266                             return null;
 267                         });
 268                     }
 269                 }
 270             }
 271         };
 272     }
 273 };