|
- 'use strict';
- var CssSyntaxError = require('./error');
- var constants = require('./const');
- var TYPE = constants.TYPE;
- var NAME = constants.NAME;
- var SYMBOL_TYPE = constants.SYMBOL_TYPE;
- var utils = require('./utils');
- var firstCharOffset = utils.firstCharOffset;
- var cmpStr = utils.cmpStr;
- var isNumber = utils.isNumber;
- var findWhiteSpaceStart = utils.findWhiteSpaceStart;
- var findWhiteSpaceEnd = utils.findWhiteSpaceEnd;
- var findCommentEnd = utils.findCommentEnd;
- var findStringEnd = utils.findStringEnd;
- var findNumberEnd = utils.findNumberEnd;
- var findIdentifierEnd = utils.findIdentifierEnd;
- var findUrlRawEnd = utils.findUrlRawEnd;
- var NULL = 0;
- var WHITESPACE = TYPE.WhiteSpace;
- var IDENTIFIER = TYPE.Identifier;
- var NUMBER = TYPE.Number;
- var STRING = TYPE.String;
- var COMMENT = TYPE.Comment;
- var PUNCTUATOR = TYPE.Punctuator;
- var CDO = TYPE.CDO;
- var CDC = TYPE.CDC;
- var ATRULE = TYPE.Atrule;
- var FUNCTION = TYPE.Function;
- var URL = TYPE.Url;
- var RAW = TYPE.Raw;
- var N = 10;
- var F = 12;
- var R = 13;
- var STAR = TYPE.Asterisk;
- var SLASH = TYPE.Solidus;
- var FULLSTOP = TYPE.FullStop;
- var PLUSSIGN = TYPE.PlusSign;
- var HYPHENMINUS = TYPE.HyphenMinus;
- var GREATERTHANSIGN = TYPE.GreaterThanSign;
- var LESSTHANSIGN = TYPE.LessThanSign;
- var EXCLAMATIONMARK = TYPE.ExclamationMark;
- var COMMERCIALAT = TYPE.CommercialAt;
- var QUOTATIONMARK = TYPE.QuotationMark;
- var APOSTROPHE = TYPE.Apostrophe;
- var LEFTPARENTHESIS = TYPE.LeftParenthesis;
- var RIGHTPARENTHESIS = TYPE.RightParenthesis;
- var LEFTCURLYBRACKET = TYPE.LeftCurlyBracket;
- var RIGHTCURLYBRACKET = TYPE.RightCurlyBracket;
- var LEFTSQUAREBRACKET = TYPE.LeftSquareBracket;
- var RIGHTSQUAREBRACKET = TYPE.RightSquareBracket;
- var MIN_BUFFER_SIZE = 16 * 1024;
- var OFFSET_MASK = 0x00FFFFFF;
- var TYPE_SHIFT = 24;
- var SafeUint32Array = typeof Uint32Array !== 'undefined' ? Uint32Array : Array; // fallback on Array when TypedArray is not supported
- function computeLinesAndColumns(tokenizer, source) {
- var sourceLength = source.length;
- var start = firstCharOffset(source);
- var lines = tokenizer.lines;
- var line = tokenizer.startLine;
- var columns = tokenizer.columns;
- var column = tokenizer.startColumn;
- if (lines === null || lines.length < sourceLength + 1) {
- lines = new SafeUint32Array(Math.max(sourceLength + 1024, MIN_BUFFER_SIZE));
- columns = new SafeUint32Array(lines.length);
- }
- for (var i = start; i < sourceLength; i++) {
- var code = source.charCodeAt(i);
- lines[i] = line;
- columns[i] = column++;
- if (code === N || code === R || code === F) {
- if (code === R && i + 1 < sourceLength && source.charCodeAt(i + 1) === N) {
- i++;
- lines[i] = line;
- columns[i] = column;
- }
- line++;
- column = 1;
- }
- }
- lines[i] = line;
- columns[i] = column;
- tokenizer.linesAnsColumnsComputed = true;
- tokenizer.lines = lines;
- tokenizer.columns = columns;
- }
- function tokenLayout(tokenizer, source, startPos) {
- var sourceLength = source.length;
- var offsetAndType = tokenizer.offsetAndType;
- var balance = tokenizer.balance;
- var tokenCount = 0;
- var prevType = 0;
- var offset = startPos;
- var anchor = 0;
- var balanceCloseCode = 0;
- var balanceStart = 0;
- var balancePrev = 0;
- if (offsetAndType === null || offsetAndType.length < sourceLength + 1) {
- offsetAndType = new SafeUint32Array(sourceLength + 1024);
- balance = new SafeUint32Array(sourceLength + 1024);
- }
- while (offset < sourceLength) {
- var code = source.charCodeAt(offset);
- var type = code < 0x80 ? SYMBOL_TYPE[code] : IDENTIFIER;
- balance[tokenCount] = sourceLength;
- switch (type) {
- case WHITESPACE:
- offset = findWhiteSpaceEnd(source, offset + 1);
- break;
- case PUNCTUATOR:
- switch (code) {
- case balanceCloseCode:
- balancePrev = balanceStart & OFFSET_MASK;
- balanceStart = balance[balancePrev];
- balanceCloseCode = balanceStart >> TYPE_SHIFT;
- balance[tokenCount] = balancePrev;
- balance[balancePrev++] = tokenCount;
- for (; balancePrev < tokenCount; balancePrev++) {
- if (balance[balancePrev] === sourceLength) {
- balance[balancePrev] = tokenCount;
- }
- }
- break;
- case LEFTSQUAREBRACKET:
- balance[tokenCount] = balanceStart;
- balanceCloseCode = RIGHTSQUAREBRACKET;
- balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
- break;
- case LEFTCURLYBRACKET:
- balance[tokenCount] = balanceStart;
- balanceCloseCode = RIGHTCURLYBRACKET;
- balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
- break;
- case LEFTPARENTHESIS:
- balance[tokenCount] = balanceStart;
- balanceCloseCode = RIGHTPARENTHESIS;
- balanceStart = (balanceCloseCode << TYPE_SHIFT) | tokenCount;
- break;
- }
- // /*
- if (code === STAR && prevType === SLASH) {
- type = COMMENT;
- offset = findCommentEnd(source, offset + 1);
- tokenCount--; // rewrite prev token
- break;
- }
- // edge case for -.123 and +.123
- if (code === FULLSTOP && (prevType === PLUSSIGN || prevType === HYPHENMINUS)) {
- if (offset + 1 < sourceLength && isNumber(source.charCodeAt(offset + 1))) {
- type = NUMBER;
- offset = findNumberEnd(source, offset + 2, false);
- tokenCount--; // rewrite prev token
- break;
- }
- }
- // <!--
- if (code === EXCLAMATIONMARK && prevType === LESSTHANSIGN) {
- if (offset + 2 < sourceLength &&
- source.charCodeAt(offset + 1) === HYPHENMINUS &&
- source.charCodeAt(offset + 2) === HYPHENMINUS) {
- type = CDO;
- offset = offset + 3;
- tokenCount--; // rewrite prev token
- break;
- }
- }
- // -->
- if (code === HYPHENMINUS && prevType === HYPHENMINUS) {
- if (offset + 1 < sourceLength && source.charCodeAt(offset + 1) === GREATERTHANSIGN) {
- type = CDC;
- offset = offset + 2;
- tokenCount--; // rewrite prev token
- break;
- }
- }
- // ident(
- if (code === LEFTPARENTHESIS && prevType === IDENTIFIER) {
- offset = offset + 1;
- tokenCount--; // rewrite prev token
- balance[tokenCount] = balance[tokenCount + 1];
- balanceStart--;
- // 4 char length identifier and equal to `url(` (case insensitive)
- if (offset - anchor === 4 && cmpStr(source, anchor, offset, 'url(')) {
- // special case for url() because it can contain any symbols sequence with few exceptions
- anchor = findWhiteSpaceEnd(source, offset);
- code = source.charCodeAt(anchor);
- if (code !== LEFTPARENTHESIS &&
- code !== RIGHTPARENTHESIS &&
- code !== QUOTATIONMARK &&
- code !== APOSTROPHE) {
- // url(
- offsetAndType[tokenCount++] = (URL << TYPE_SHIFT) | offset;
- balance[tokenCount] = sourceLength;
- // ws*
- if (anchor !== offset) {
- offsetAndType[tokenCount++] = (WHITESPACE << TYPE_SHIFT) | anchor;
- balance[tokenCount] = sourceLength;
- }
- // raw
- type = RAW;
- offset = findUrlRawEnd(source, anchor);
- } else {
- type = URL;
- }
- } else {
- type = FUNCTION;
- }
- break;
- }
- type = code;
- offset = offset + 1;
- break;
- case NUMBER:
- offset = findNumberEnd(source, offset + 1, prevType !== FULLSTOP);
- // merge number with a preceding dot, dash or plus
- if (prevType === FULLSTOP ||
- prevType === HYPHENMINUS ||
- prevType === PLUSSIGN) {
- tokenCount--; // rewrite prev token
- }
- break;
- case STRING:
- offset = findStringEnd(source, offset + 1, code);
- break;
- default:
- anchor = offset;
- offset = findIdentifierEnd(source, offset);
- // merge identifier with a preceding dash
- if (prevType === HYPHENMINUS) {
- // rewrite prev token
- tokenCount--;
- // restore prev prev token type
- // for case @-prefix-ident
- prevType = tokenCount === 0 ? 0 : offsetAndType[tokenCount - 1] >> TYPE_SHIFT;
- }
- if (prevType === COMMERCIALAT) {
- // rewrite prev token and change type to <at-keyword-token>
- tokenCount--;
- type = ATRULE;
- }
- }
- offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | offset;
- prevType = type;
- }
- // finalize arrays
- offsetAndType[tokenCount] = offset;
- balance[tokenCount] = sourceLength;
- balance[sourceLength] = sourceLength; // prevents false positive balance match with any token
- while (balanceStart !== 0) {
- balancePrev = balanceStart & OFFSET_MASK;
- balanceStart = balance[balancePrev];
- balance[balancePrev] = sourceLength;
- }
- tokenizer.offsetAndType = offsetAndType;
- tokenizer.tokenCount = tokenCount;
- tokenizer.balance = balance;
- }
- //
- // tokenizer
- //
- var Tokenizer = function(source, startOffset, startLine, startColumn) {
- this.offsetAndType = null;
- this.balance = null;
- this.lines = null;
- this.columns = null;
- this.setSource(source, startOffset, startLine, startColumn);
- };
- Tokenizer.prototype = {
- setSource: function(source, startOffset, startLine, startColumn) {
- var safeSource = String(source || '');
- var start = firstCharOffset(safeSource);
- this.source = safeSource;
- this.firstCharOffset = start;
- this.startOffset = typeof startOffset === 'undefined' ? 0 : startOffset;
- this.startLine = typeof startLine === 'undefined' ? 1 : startLine;
- this.startColumn = typeof startColumn === 'undefined' ? 1 : startColumn;
- this.linesAnsColumnsComputed = false;
- this.eof = false;
- this.currentToken = -1;
- this.tokenType = 0;
- this.tokenStart = start;
- this.tokenEnd = start;
- tokenLayout(this, safeSource, start);
- this.next();
- },
- lookupType: function(offset) {
- offset += this.currentToken;
- if (offset < this.tokenCount) {
- return this.offsetAndType[offset] >> TYPE_SHIFT;
- }
- return NULL;
- },
- lookupNonWSType: function(offset) {
- offset += this.currentToken;
- for (var type; offset < this.tokenCount; offset++) {
- type = this.offsetAndType[offset] >> TYPE_SHIFT;
- if (type !== WHITESPACE) {
- return type;
- }
- }
- return NULL;
- },
- lookupValue: function(offset, referenceStr) {
- offset += this.currentToken;
- if (offset < this.tokenCount) {
- return cmpStr(
- this.source,
- this.offsetAndType[offset - 1] & OFFSET_MASK,
- this.offsetAndType[offset] & OFFSET_MASK,
- referenceStr
- );
- }
- return false;
- },
- getTokenStart: function(tokenNum) {
- if (tokenNum === this.currentToken) {
- return this.tokenStart;
- }
- if (tokenNum > 0) {
- return tokenNum < this.tokenCount
- ? this.offsetAndType[tokenNum - 1] & OFFSET_MASK
- : this.offsetAndType[this.tokenCount] & OFFSET_MASK;
- }
- return this.firstCharOffset;
- },
- getOffsetExcludeWS: function() {
- if (this.currentToken > 0) {
- if ((this.offsetAndType[this.currentToken - 1] >> TYPE_SHIFT) === WHITESPACE) {
- return this.currentToken > 1
- ? this.offsetAndType[this.currentToken - 2] & OFFSET_MASK
- : this.firstCharOffset;
- }
- }
- return this.tokenStart;
- },
- getRawLength: function(startToken, endTokenType1, endTokenType2, includeTokenType2) {
- var cursor = startToken;
- var balanceEnd;
- loop:
- for (; cursor < this.tokenCount; cursor++) {
- balanceEnd = this.balance[cursor];
- // belance end points to offset before start
- if (balanceEnd < startToken) {
- break loop;
- }
- // check token is stop type
- switch (this.offsetAndType[cursor] >> TYPE_SHIFT) {
- case endTokenType1:
- break loop;
- case endTokenType2:
- if (includeTokenType2) {
- cursor++;
- }
- break loop;
- default:
- // fast forward to the end of balanced block
- if (this.balance[balanceEnd] === cursor) {
- cursor = balanceEnd;
- }
- }
- }
- return cursor - this.currentToken;
- },
- isBalanceEdge: function(pos) {
- var balanceStart = this.balance[this.currentToken];
- return balanceStart < pos;
- },
- getTokenValue: function() {
- return this.source.substring(this.tokenStart, this.tokenEnd);
- },
- substrToCursor: function(start) {
- return this.source.substring(start, this.tokenStart);
- },
- skipWS: function() {
- for (var i = this.currentToken, skipTokenCount = 0; i < this.tokenCount; i++, skipTokenCount++) {
- if ((this.offsetAndType[i] >> TYPE_SHIFT) !== WHITESPACE) {
- break;
- }
- }
- if (skipTokenCount > 0) {
- this.skip(skipTokenCount);
- }
- },
- skipSC: function() {
- while (this.tokenType === WHITESPACE || this.tokenType === COMMENT) {
- this.next();
- }
- },
- skip: function(tokenCount) {
- var next = this.currentToken + tokenCount;
- if (next < this.tokenCount) {
- this.currentToken = next;
- this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK;
- next = this.offsetAndType[next];
- this.tokenType = next >> TYPE_SHIFT;
- this.tokenEnd = next & OFFSET_MASK;
- } else {
- this.currentToken = this.tokenCount;
- this.next();
- }
- },
- next: function() {
- var next = this.currentToken + 1;
- if (next < this.tokenCount) {
- this.currentToken = next;
- this.tokenStart = this.tokenEnd;
- next = this.offsetAndType[next];
- this.tokenType = next >> TYPE_SHIFT;
- this.tokenEnd = next & OFFSET_MASK;
- } else {
- this.currentToken = this.tokenCount;
- this.eof = true;
- this.tokenType = NULL;
- this.tokenStart = this.tokenEnd = this.source.length;
- }
- },
- eat: function(tokenType) {
- if (this.tokenType !== tokenType) {
- var offset = this.tokenStart;
- var message = NAME[tokenType] + ' is expected';
- // tweak message and offset
- if (tokenType === IDENTIFIER) {
- // when identifier is expected but there is a function or url
- if (this.tokenType === FUNCTION || this.tokenType === URL) {
- offset = this.tokenEnd - 1;
- message += ' but function found';
- }
- } else {
- // when test type is part of another token show error for current position + 1
- // e.g. eat(HYPHENMINUS) will fail on "-foo", but pointing on "-" is odd
- if (this.source.charCodeAt(this.tokenStart) === tokenType) {
- offset = offset + 1;
- }
- }
- this.error(message, offset);
- }
- this.next();
- },
- eatNonWS: function(tokenType) {
- this.skipWS();
- this.eat(tokenType);
- },
- consume: function(tokenType) {
- var value = this.getTokenValue();
- this.eat(tokenType);
- return value;
- },
- consumeFunctionName: function() {
- var name = this.source.substring(this.tokenStart, this.tokenEnd - 1);
- this.eat(FUNCTION);
- return name;
- },
- consumeNonWS: function(tokenType) {
- this.skipWS();
- return this.consume(tokenType);
- },
- expectIdentifier: function(name) {
- if (this.tokenType !== IDENTIFIER || cmpStr(this.source, this.tokenStart, this.tokenEnd, name) === false) {
- this.error('Identifier `' + name + '` is expected');
- }
- this.next();
- },
- getLocation: function(offset, filename) {
- if (!this.linesAnsColumnsComputed) {
- computeLinesAndColumns(this, this.source);
- }
- return {
- source: filename,
- offset: this.startOffset + offset,
- line: this.lines[offset],
- column: this.columns[offset]
- };
- },
- getLocationRange: function(start, end, filename) {
- if (!this.linesAnsColumnsComputed) {
- computeLinesAndColumns(this, this.source);
- }
- return {
- source: filename,
- start: {
- offset: this.startOffset + start,
- line: this.lines[start],
- column: this.columns[start]
- },
- end: {
- offset: this.startOffset + end,
- line: this.lines[end],
- column: this.columns[end]
- }
- };
- },
- error: function(message, offset) {
- var location = typeof offset !== 'undefined' && offset < this.source.length
- ? this.getLocation(offset)
- : this.eof
- ? this.getLocation(findWhiteSpaceStart(this.source, this.source.length - 1))
- : this.getLocation(this.tokenStart);
- throw new CssSyntaxError(
- message || 'Unexpected input',
- this.source,
- location.offset,
- location.line,
- location.column
- );
- },
- dump: function() {
- var offset = 0;
- return Array.prototype.slice.call(this.offsetAndType, 0, this.tokenCount).map(function(item, idx) {
- var start = offset;
- var end = item & OFFSET_MASK;
- offset = end;
- return {
- idx: idx,
- type: NAME[item >> TYPE_SHIFT],
- chunk: this.source.substring(start, end),
- balance: this.balance[idx]
- };
- }, this);
- }
- };
- // extend with error class
- Tokenizer.CssSyntaxError = CssSyntaxError;
- // extend tokenizer with constants
- Object.keys(constants).forEach(function(key) {
- Tokenizer[key] = constants[key];
- });
- // extend tokenizer with static methods from utils
- Object.keys(utils).forEach(function(key) {
- Tokenizer[key] = utils[key];
- });
- // warm up tokenizer to elimitate code branches that never execute
- // fix soft deoptimizations (insufficient type feedback)
- new Tokenizer('\n\r\r\n\f<!---->//""\'\'/*\r\n\f*/1a;.\\31\t\+2{url(a);func();+1.2e3 -.4e-5 .6e+7}').getLocation();
- module.exports = Tokenizer;
|