You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

301 lines
9.9 KiB
JavaScript

/**
* @author Toru Nagashima <https://github.com/mysticatea>
*/
"use strict";
const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils");
const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
const astUtils = require("./utils/ast-utils.js");
const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------
/**
* @typedef {import('@eslint-community/regexpp').AST.Character} Character
* @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement
*/
/**
* Iterate character sequences of a given nodes.
*
* CharacterClassRange syntax can steal a part of character sequence,
* so this function reverts CharacterClassRange syntax and restore the sequence.
* @param {CharacterClassElement[]} nodes The node list to iterate character sequences.
* @returns {IterableIterator<Character[]>} The list of character sequences.
*/
function *iterateCharacterSequence(nodes) {
/** @type {Character[]} */
let seq = [];
for (const node of nodes) {
switch (node.type) {
case "Character":
seq.push(node);
break;
case "CharacterClassRange":
seq.push(node.min);
yield seq;
seq = [node.max];
break;
case "CharacterSet":
case "CharacterClass": // [[]] nesting character class
case "ClassStringDisjunction": // \q{...}
case "ExpressionCharacterClass": // [A--B]
if (seq.length > 0) {
yield seq;
seq = [];
}
break;
// no default
}
}
if (seq.length > 0) {
yield seq;
}
}
/**
* Checks whether the given character node is a Unicode code point escape or not.
* @param {Character} char the character node to check.
* @returns {boolean} `true` if the character node is a Unicode code point escape.
*/
function isUnicodeCodePointEscape(char) {
return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
}
/**
* Each function returns `true` if it detects that kind of problem.
* @type {Record<string, (chars: Character[]) => boolean>}
*/
const hasCharacterSequence = {
surrogatePairWithoutUFlag(chars) {
return chars.some((c, i) => {
if (i === 0) {
return false;
}
const c1 = chars[i - 1];
return (
isSurrogatePair(c1.value, c.value) &&
!isUnicodeCodePointEscape(c1) &&
!isUnicodeCodePointEscape(c)
);
});
},
surrogatePair(chars) {
return chars.some((c, i) => {
if (i === 0) {
return false;
}
const c1 = chars[i - 1];
return (
isSurrogatePair(c1.value, c.value) &&
(
isUnicodeCodePointEscape(c1) ||
isUnicodeCodePointEscape(c)
)
);
});
},
combiningClass(chars) {
return chars.some((c, i) => (
i !== 0 &&
isCombiningCharacter(c.value) &&
!isCombiningCharacter(chars[i - 1].value)
));
},
emojiModifier(chars) {
return chars.some((c, i) => (
i !== 0 &&
isEmojiModifier(c.value) &&
!isEmojiModifier(chars[i - 1].value)
));
},
regionalIndicatorSymbol(chars) {
return chars.some((c, i) => (
i !== 0 &&
isRegionalIndicatorSymbol(c.value) &&
isRegionalIndicatorSymbol(chars[i - 1].value)
));
},
zwj(chars) {
const lastIndex = chars.length - 1;
return chars.some((c, i) => (
i !== 0 &&
i !== lastIndex &&
c.value === 0x200d &&
chars[i - 1].value !== 0x200d &&
chars[i + 1].value !== 0x200d
));
}
};
const kinds = Object.keys(hasCharacterSequence);
//------------------------------------------------------------------------------
// Rule Definition
//------------------------------------------------------------------------------
/** @type {import('../shared/types').Rule} */
module.exports = {
meta: {
type: "problem",
docs: {
description: "Disallow characters which are made with multiple code points in character class syntax",
recommended: true,
url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"
},
hasSuggestions: true,
schema: [],
messages: {
surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
surrogatePair: "Unexpected surrogate pair in character class.",
combiningClass: "Unexpected combined character in character class.",
emojiModifier: "Unexpected modified Emoji in character class.",
regionalIndicatorSymbol: "Unexpected national flag in character class.",
zwj: "Unexpected joined character sequence in character class.",
suggestUnicodeFlag: "Add unicode 'u' flag to regex."
}
},
create(context) {
const sourceCode = context.sourceCode;
const parser = new RegExpParser();
/**
* Verify a given regular expression.
* @param {Node} node The node to report.
* @param {string} pattern The regular expression pattern to verify.
* @param {string} flags The flags of the regular expression.
* @param {Function} unicodeFixer Fixer for missing "u" flag.
* @returns {void}
*/
function verify(node, pattern, flags, unicodeFixer) {
let patternNode;
try {
patternNode = parser.parsePattern(
pattern,
0,
pattern.length,
{
unicode: flags.includes("u"),
unicodeSets: flags.includes("v")
}
);
} catch {
// Ignore regular expressions with syntax errors
return;
}
const foundKinds = new Set();
visitRegExpAST(patternNode, {
onCharacterClassEnter(ccNode) {
for (const chars of iterateCharacterSequence(ccNode.elements)) {
for (const kind of kinds) {
if (hasCharacterSequence[kind](chars)) {
foundKinds.add(kind);
}
}
}
}
});
for (const kind of foundKinds) {
let suggest;
if (kind === "surrogatePairWithoutUFlag") {
suggest = [{
messageId: "suggestUnicodeFlag",
fix: unicodeFixer
}];
}
context.report({
node,
messageId: kind,
suggest
});
}
}
return {
"Literal[regex]"(node) {
verify(node, node.regex.pattern, node.regex.flags, fixer => {
if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
return null;
}
return fixer.insertTextAfter(node, "u");
});
},
"Program"(node) {
const scope = sourceCode.getScope(node);
const tracker = new ReferenceTracker(scope);
/*
* Iterate calls of RegExp.
* E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
* `const {RegExp: a} = window; new a()`, etc...
*/
for (const { node: refNode } of tracker.iterateGlobalReferences({
RegExp: { [CALL]: true, [CONSTRUCT]: true }
})) {
const [patternNode, flagsNode] = refNode.arguments;
const pattern = getStringIfConstant(patternNode, scope);
const flags = getStringIfConstant(flagsNode, scope);
if (typeof pattern === "string") {
verify(refNode, pattern, flags || "", fixer => {
if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
return null;
}
if (refNode.arguments.length === 1) {
const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 }); // skip closing parenthesis
return fixer.insertTextAfter(
penultimateToken,
astUtils.isCommaToken(penultimateToken)
? ' "u",'
: ', "u"'
);
}
if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
const range = [flagsNode.range[0], flagsNode.range[1] - 1];
return fixer.insertTextAfterRange(range, "u");
}
return null;
});
}
}
}
};
}
};