import { adoptBuffer } from './adopt-buffer.js'; import { cmpStr } from './utils.js'; import tokenNames from './names.js'; import { WhiteSpace, Comment, Delim, EOF, Function as FunctionToken, LeftParenthesis, RightParenthesis, LeftSquareBracket, RightSquareBracket, LeftCurlyBracket, RightCurlyBracket } from './types.js'; const OFFSET_MASK = 0x00FFFFFF; const TYPE_SHIFT = 24; const balancePair = new Map([ [FunctionToken, RightParenthesis], [LeftParenthesis, RightParenthesis], [LeftSquareBracket, RightSquareBracket], [LeftCurlyBracket, RightCurlyBracket] ]); export class TokenStream { constructor(source, tokenize) { this.setSource(source, tokenize); } reset() { this.eof = false; this.tokenIndex = -1; this.tokenType = 0; this.tokenStart = this.firstCharOffset; this.tokenEnd = this.firstCharOffset; } setSource(source = '', tokenize = () => {}) { source = String(source || ''); const sourceLength = source.length; const offsetAndType = adoptBuffer(this.offsetAndType, source.length + 1); // +1 because of eof-token const balance = adoptBuffer(this.balance, source.length + 1); let tokenCount = 0; let balanceCloseType = 0; let balanceStart = 0; let firstCharOffset = -1; // capture buffers this.offsetAndType = null; this.balance = null; tokenize(source, (type, start, end) => { switch (type) { default: balance[tokenCount] = sourceLength; break; case balanceCloseType: { let balancePrev = balanceStart & OFFSET_MASK; balanceStart = balance[balancePrev]; balanceCloseType = balanceStart >> TYPE_SHIFT; balance[tokenCount] = balancePrev; balance[balancePrev++] = tokenCount; for (; balancePrev < tokenCount; balancePrev++) { if (balance[balancePrev] === sourceLength) { balance[balancePrev] = tokenCount; } } break; } case LeftParenthesis: case FunctionToken: case LeftSquareBracket: case LeftCurlyBracket: balance[tokenCount] = balanceStart; balanceCloseType = balancePair.get(type); balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount; break; } offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | end; if (firstCharOffset === -1) { firstCharOffset = start; } }); // finalize buffers offsetAndType[tokenCount] = (EOF << TYPE_SHIFT) | sourceLength; // balance[tokenCount] = sourceLength; balance[sourceLength] = sourceLength; // prevents false positive balance match with any token while (balanceStart !== 0) { const balancePrev = balanceStart & OFFSET_MASK; balanceStart = balance[balancePrev]; balance[balancePrev] = sourceLength; } this.source = source; this.firstCharOffset = firstCharOffset === -1 ? 0 : firstCharOffset; this.tokenCount = tokenCount; this.offsetAndType = offsetAndType; this.balance = balance; this.reset(); this.next(); } lookupType(offset) { offset += this.tokenIndex; if (offset < this.tokenCount) { return this.offsetAndType[offset] >> TYPE_SHIFT; } return EOF; } lookupOffset(offset) { offset += this.tokenIndex; if (offset < this.tokenCount) { return this.offsetAndType[offset - 1] & OFFSET_MASK; } return this.source.length; } lookupValue(offset, referenceStr) { offset += this.tokenIndex; if (offset < this.tokenCount) { return cmpStr( this.source, this.offsetAndType[offset - 1] & OFFSET_MASK, this.offsetAndType[offset] & OFFSET_MASK, referenceStr ); } return false; } getTokenStart(tokenIndex) { if (tokenIndex === this.tokenIndex) { return this.tokenStart; } if (tokenIndex > 0) { return tokenIndex < this.tokenCount ? this.offsetAndType[tokenIndex - 1] & OFFSET_MASK : this.offsetAndType[this.tokenCount] & OFFSET_MASK; } return this.firstCharOffset; } substrToCursor(start) { return this.source.substring(start, this.tokenStart); } isBalanceEdge(pos) { return this.balance[this.tokenIndex] < pos; } isDelim(code, offset) { if (offset) { return ( this.lookupType(offset) === Delim && this.source.charCodeAt(this.lookupOffset(offset)) === code ); } return ( this.tokenType === Delim && this.source.charCodeAt(this.tokenStart) === code ); } skip(tokenCount) { let next = this.tokenIndex + tokenCount; if (next < this.tokenCount) { this.tokenIndex = next; this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK; next = this.offsetAndType[next]; this.tokenType = next >> TYPE_SHIFT; this.tokenEnd = next & OFFSET_MASK; } else { this.tokenIndex = this.tokenCount; this.next(); } } next() { let next = this.tokenIndex + 1; if (next < this.tokenCount) { this.tokenIndex = next; this.tokenStart = this.tokenEnd; next = this.offsetAndType[next]; this.tokenType = next >> TYPE_SHIFT; this.tokenEnd = next & OFFSET_MASK; } else { this.eof = true; this.tokenIndex = this.tokenCount; this.tokenType = EOF; this.tokenStart = this.tokenEnd = this.source.length; } } skipSC() { while (this.tokenType === WhiteSpace || this.tokenType === Comment) { this.next(); } } skipUntilBalanced(startToken, stopConsume) { let cursor = startToken; let balanceEnd; let offset; loop: for (; cursor < this.tokenCount; cursor++) { balanceEnd = this.balance[cursor]; // stop scanning on balance edge that points to offset before start token if (balanceEnd < startToken) { break loop; } offset = cursor > 0 ? this.offsetAndType[cursor - 1] & OFFSET_MASK : this.firstCharOffset; // check stop condition switch (stopConsume(this.source.charCodeAt(offset))) { case 1: // just stop break loop; case 2: // stop & included cursor++; break loop; default: // fast forward to the end of balanced block if (this.balance[balanceEnd] === cursor) { cursor = balanceEnd; } } } this.skip(cursor - this.tokenIndex); } forEachToken(fn) { for (let i = 0, offset = this.firstCharOffset; i < this.tokenCount; i++) { const start = offset; const item = this.offsetAndType[i]; const end = item & OFFSET_MASK; const type = item >> TYPE_SHIFT; offset = end; fn(type, start, end, i); } } dump() { const tokens = new Array(this.tokenCount); this.forEachToken((type, start, end, index) => { tokens[index] = { idx: index, type: tokenNames[type], chunk: this.source.substring(start, end), balance: this.balance[index] }; }); return tokens; } };