123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252 |
- 'use strict';
- var constants = require('./const');
- var PUNCTUATION = constants.PUNCTUATION;
- var STOP_URL_RAW = constants.STOP_URL_RAW;
- var TYPE = constants.TYPE;
- var FULLSTOP = TYPE.FullStop;
- var PLUSSIGN = TYPE.PlusSign;
- var HYPHENMINUS = TYPE.HyphenMinus;
- var PUNCTUATOR = TYPE.Punctuator;
- var TAB = 9;
- var N = 10;
- var F = 12;
- var R = 13;
- var SPACE = 32;
- var BACK_SLASH = 92;
- var E = 101; // 'e'.charCodeAt(0)
- function firstCharOffset(source) {
- // detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
- if (source.charCodeAt(0) === 0xFEFF || // UTF-16BE
- source.charCodeAt(0) === 0xFFFE) { // UTF-16LE
- return 1;
- }
- return 0;
- }
- function isHex(code) {
- return (code >= 48 && code <= 57) || // 0 .. 9
- (code >= 65 && code <= 70) || // A .. F
- (code >= 97 && code <= 102); // a .. f
- }
- function isNumber(code) {
- return code >= 48 && code <= 57;
- }
- function isWhiteSpace(code) {
- return code === SPACE || code === TAB || isNewline(code);
- }
- function isNewline(code) {
- return code === R || code === N || code === F;
- }
- function getNewlineLength(source, offset, code) {
- if (isNewline(code)) {
- if (code === R && offset + 1 < source.length && source.charCodeAt(offset + 1) === N) {
- return 2;
- }
- return 1;
- }
- return 0;
- }
- function cmpChar(testStr, offset, referenceCode) {
- var code = testStr.charCodeAt(offset);
- // code.toLowerCase() for A..Z
- if (code >= 65 && code <= 90) {
- code = code | 32;
- }
- return code === referenceCode;
- }
- function cmpStr(testStr, start, end, referenceStr) {
- if (end - start !== referenceStr.length) {
- return false;
- }
- if (start < 0 || end > testStr.length) {
- return false;
- }
- for (var i = start; i < end; i++) {
- var testCode = testStr.charCodeAt(i);
- var refCode = referenceStr.charCodeAt(i - start);
- // testCode.toLowerCase() for A..Z
- if (testCode >= 65 && testCode <= 90) {
- testCode = testCode | 32;
- }
- if (testCode !== refCode) {
- return false;
- }
- }
- return true;
- }
- function findWhiteSpaceStart(source, offset) {
- while (offset >= 0 && isWhiteSpace(source.charCodeAt(offset))) {
- offset--;
- }
- return offset + 1;
- }
- function findWhiteSpaceEnd(source, offset) {
- while (offset < source.length && isWhiteSpace(source.charCodeAt(offset))) {
- offset++;
- }
- return offset;
- }
- function findCommentEnd(source, offset) {
- var commentEnd = source.indexOf('*/', offset);
- if (commentEnd === -1) {
- return source.length;
- }
- return commentEnd + 2;
- }
- function findStringEnd(source, offset, quote) {
- for (; offset < source.length; offset++) {
- var code = source.charCodeAt(offset);
- // TODO: bad string
- if (code === BACK_SLASH) {
- offset++;
- } else if (code === quote) {
- offset++;
- break;
- }
- }
- return offset;
- }
- function findDecimalNumberEnd(source, offset) {
- while (offset < source.length && isNumber(source.charCodeAt(offset))) {
- offset++;
- }
- return offset;
- }
- function findNumberEnd(source, offset, allowFraction) {
- var code;
- offset = findDecimalNumberEnd(source, offset);
- // fraction: .\d+
- if (allowFraction && offset + 1 < source.length && source.charCodeAt(offset) === FULLSTOP) {
- code = source.charCodeAt(offset + 1);
- if (isNumber(code)) {
- offset = findDecimalNumberEnd(source, offset + 1);
- }
- }
- // exponent: e[+-]\d+
- if (offset + 1 < source.length) {
- if ((source.charCodeAt(offset) | 32) === E) { // case insensitive check for `e`
- code = source.charCodeAt(offset + 1);
- if (code === PLUSSIGN || code === HYPHENMINUS) {
- if (offset + 2 < source.length) {
- code = source.charCodeAt(offset + 2);
- }
- }
- if (isNumber(code)) {
- offset = findDecimalNumberEnd(source, offset + 2);
- }
- }
- }
- return offset;
- }
- // skip escaped unicode sequence that can ends with space
- // [0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
- function findEscapeEnd(source, offset) {
- for (var i = 0; i < 7 && offset + i < source.length; i++) {
- var code = source.charCodeAt(offset + i);
- if (i !== 6 && isHex(code)) {
- continue;
- }
- if (i > 0) {
- offset += i - 1 + getNewlineLength(source, offset + i, code);
- if (code === SPACE || code === TAB) {
- offset++;
- }
- }
- break;
- }
- return offset;
- }
- function findIdentifierEnd(source, offset) {
- for (; offset < source.length; offset++) {
- var code = source.charCodeAt(offset);
- if (code === BACK_SLASH) {
- offset = findEscapeEnd(source, offset + 1);
- } else if (code < 0x80 && PUNCTUATION[code] === PUNCTUATOR) {
- break;
- }
- }
- return offset;
- }
- function findUrlRawEnd(source, offset) {
- for (; offset < source.length; offset++) {
- var code = source.charCodeAt(offset);
- if (code === BACK_SLASH) {
- offset = findEscapeEnd(source, offset + 1);
- } else if (code < 0x80 && STOP_URL_RAW[code] === 1) {
- break;
- }
- }
- return offset;
- }
- module.exports = {
- firstCharOffset: firstCharOffset,
- isHex: isHex,
- isNumber: isNumber,
- isWhiteSpace: isWhiteSpace,
- isNewline: isNewline,
- getNewlineLength: getNewlineLength,
- cmpChar: cmpChar,
- cmpStr: cmpStr,
- findWhiteSpaceStart: findWhiteSpaceStart,
- findWhiteSpaceEnd: findWhiteSpaceEnd,
- findCommentEnd: findCommentEnd,
- findStringEnd: findStringEnd,
- findDecimalNumberEnd: findDecimalNumberEnd,
- findNumberEnd: findNumberEnd,
- findEscapeEnd: findEscapeEnd,
- findIdentifierEnd: findIdentifierEnd,
- findUrlRawEnd: findUrlRawEnd
- };
|