232 lines
6.6 KiB
JavaScript
232 lines
6.6 KiB
JavaScript
|
'use strict';
|
||
|
|
||
|
const EOF = 0;
|
||
|
|
||
|
// https://drafts.csswg.org/css-syntax-3/
|
||
|
// § 4.2. Definitions
|
||
|
|
||
|
// digit
|
||
|
// A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9).
|
||
|
function isDigit(code) {
|
||
|
return code >= 0x0030 && code <= 0x0039;
|
||
|
}
|
||
|
|
||
|
// hex digit
|
||
|
// A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F),
|
||
|
// or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f).
|
||
|
function isHexDigit(code) {
|
||
|
return (
|
||
|
isDigit(code) || // 0 .. 9
|
||
|
(code >= 0x0041 && code <= 0x0046) || // A .. F
|
||
|
(code >= 0x0061 && code <= 0x0066) // a .. f
|
||
|
);
|
||
|
}
|
||
|
|
||
|
// uppercase letter
|
||
|
// A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z).
|
||
|
function isUppercaseLetter(code) {
|
||
|
return code >= 0x0041 && code <= 0x005A;
|
||
|
}
|
||
|
|
||
|
// lowercase letter
|
||
|
// A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z).
|
||
|
function isLowercaseLetter(code) {
|
||
|
return code >= 0x0061 && code <= 0x007A;
|
||
|
}
|
||
|
|
||
|
// letter
|
||
|
// An uppercase letter or a lowercase letter.
|
||
|
function isLetter(code) {
|
||
|
return isUppercaseLetter(code) || isLowercaseLetter(code);
|
||
|
}
|
||
|
|
||
|
// non-ASCII code point
|
||
|
// A code point with a value equal to or greater than U+0080 <control>.
|
||
|
function isNonAscii(code) {
|
||
|
return code >= 0x0080;
|
||
|
}
|
||
|
|
||
|
// name-start code point
|
||
|
// A letter, a non-ASCII code point, or U+005F LOW LINE (_).
|
||
|
function isNameStart(code) {
|
||
|
return isLetter(code) || isNonAscii(code) || code === 0x005F;
|
||
|
}
|
||
|
|
||
|
// name code point
|
||
|
// A name-start code point, a digit, or U+002D HYPHEN-MINUS (-).
|
||
|
function isName(code) {
|
||
|
return isNameStart(code) || isDigit(code) || code === 0x002D;
|
||
|
}
|
||
|
|
||
|
// non-printable code point
|
||
|
// A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION,
|
||
|
// or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE.
|
||
|
function isNonPrintable(code) {
|
||
|
return (
|
||
|
(code >= 0x0000 && code <= 0x0008) ||
|
||
|
(code === 0x000B) ||
|
||
|
(code >= 0x000E && code <= 0x001F) ||
|
||
|
(code === 0x007F)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
// newline
|
||
|
// U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition,
|
||
|
// as they are converted to U+000A LINE FEED during preprocessing.
|
||
|
// TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED
|
||
|
function isNewline(code) {
|
||
|
return code === 0x000A || code === 0x000D || code === 0x000C;
|
||
|
}
|
||
|
|
||
|
// whitespace
|
||
|
// A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
|
||
|
function isWhiteSpace(code) {
|
||
|
return isNewline(code) || code === 0x0020 || code === 0x0009;
|
||
|
}
|
||
|
|
||
|
// § 4.3.8. Check if two code points are a valid escape
|
||
|
function isValidEscape(first, second) {
|
||
|
// If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
|
||
|
if (first !== 0x005C) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// Otherwise, if the second code point is a newline or EOF, return false.
|
||
|
if (isNewline(second) || second === EOF) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// Otherwise, return true.
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// § 4.3.9. Check if three code points would start an identifier
|
||
|
function isIdentifierStart(first, second, third) {
|
||
|
// Look at the first code point:
|
||
|
|
||
|
// U+002D HYPHEN-MINUS
|
||
|
if (first === 0x002D) {
|
||
|
// If the second code point is a name-start code point or a U+002D HYPHEN-MINUS,
|
||
|
// or the second and third code points are a valid escape, return true. Otherwise, return false.
|
||
|
return (
|
||
|
isNameStart(second) ||
|
||
|
second === 0x002D ||
|
||
|
isValidEscape(second, third)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
// name-start code point
|
||
|
if (isNameStart(first)) {
|
||
|
// Return true.
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// U+005C REVERSE SOLIDUS (\)
|
||
|
if (first === 0x005C) {
|
||
|
// If the first and second code points are a valid escape, return true. Otherwise, return false.
|
||
|
return isValidEscape(first, second);
|
||
|
}
|
||
|
|
||
|
// anything else
|
||
|
// Return false.
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// § 4.3.10. Check if three code points would start a number
|
||
|
function isNumberStart(first, second, third) {
|
||
|
// Look at the first code point:
|
||
|
|
||
|
// U+002B PLUS SIGN (+)
|
||
|
// U+002D HYPHEN-MINUS (-)
|
||
|
if (first === 0x002B || first === 0x002D) {
|
||
|
// If the second code point is a digit, return true.
|
||
|
if (isDigit(second)) {
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
// Otherwise, if the second code point is a U+002E FULL STOP (.)
|
||
|
// and the third code point is a digit, return true.
|
||
|
// Otherwise, return false.
|
||
|
return second === 0x002E && isDigit(third) ? 3 : 0;
|
||
|
}
|
||
|
|
||
|
// U+002E FULL STOP (.)
|
||
|
if (first === 0x002E) {
|
||
|
// If the second code point is a digit, return true. Otherwise, return false.
|
||
|
return isDigit(second) ? 2 : 0;
|
||
|
}
|
||
|
|
||
|
// digit
|
||
|
if (isDigit(first)) {
|
||
|
// Return true.
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
// anything else
|
||
|
// Return false.
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Misc
|
||
|
//
|
||
|
|
||
|
// detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
|
||
|
function isBOM(code) {
|
||
|
// UTF-16BE
|
||
|
if (code === 0xFEFF) {
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
// UTF-16LE
|
||
|
if (code === 0xFFFE) {
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// Fast code category
|
||
|
// Only ASCII code points has a special meaning, that's why we define a maps for 0..127 codes only
|
||
|
const CATEGORY = new Array(0x80);
|
||
|
const EofCategory = 0x80;
|
||
|
const WhiteSpaceCategory = 0x82;
|
||
|
const DigitCategory = 0x83;
|
||
|
const NameStartCategory = 0x84;
|
||
|
const NonPrintableCategory = 0x85;
|
||
|
|
||
|
for (let i = 0; i < CATEGORY.length; i++) {
|
||
|
CATEGORY[i] =
|
||
|
isWhiteSpace(i) && WhiteSpaceCategory ||
|
||
|
isDigit(i) && DigitCategory ||
|
||
|
isNameStart(i) && NameStartCategory ||
|
||
|
isNonPrintable(i) && NonPrintableCategory ||
|
||
|
i || EofCategory;
|
||
|
}
|
||
|
|
||
|
function charCodeCategory(code) {
|
||
|
return code < 0x80 ? CATEGORY[code] : NameStartCategory;
|
||
|
}
|
||
|
|
||
|
exports.DigitCategory = DigitCategory;
|
||
|
exports.EofCategory = EofCategory;
|
||
|
exports.NameStartCategory = NameStartCategory;
|
||
|
exports.NonPrintableCategory = NonPrintableCategory;
|
||
|
exports.WhiteSpaceCategory = WhiteSpaceCategory;
|
||
|
exports.charCodeCategory = charCodeCategory;
|
||
|
exports.isBOM = isBOM;
|
||
|
exports.isDigit = isDigit;
|
||
|
exports.isHexDigit = isHexDigit;
|
||
|
exports.isIdentifierStart = isIdentifierStart;
|
||
|
exports.isLetter = isLetter;
|
||
|
exports.isLowercaseLetter = isLowercaseLetter;
|
||
|
exports.isName = isName;
|
||
|
exports.isNameStart = isNameStart;
|
||
|
exports.isNewline = isNewline;
|
||
|
exports.isNonAscii = isNonAscii;
|
||
|
exports.isNonPrintable = isNonPrintable;
|
||
|
exports.isNumberStart = isNumberStart;
|
||
|
exports.isUppercaseLetter = isUppercaseLetter;
|
||
|
exports.isValidEscape = isValidEscape;
|
||
|
exports.isWhiteSpace = isWhiteSpace;
|