utils.js 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. 'use strict';
  2. var constants = require('./const');
  3. var PUNCTUATION = constants.PUNCTUATION;
  4. var STOP_URL_RAW = constants.STOP_URL_RAW;
  5. var TYPE = constants.TYPE;
  6. var FULLSTOP = TYPE.FullStop;
  7. var PLUSSIGN = TYPE.PlusSign;
  8. var HYPHENMINUS = TYPE.HyphenMinus;
  9. var PUNCTUATOR = TYPE.Punctuator;
  10. var TAB = 9;
  11. var N = 10;
  12. var F = 12;
  13. var R = 13;
  14. var SPACE = 32;
  15. var BACK_SLASH = 92;
  16. var E = 101; // 'e'.charCodeAt(0)
  17. function firstCharOffset(source) {
  18. // detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
  19. if (source.charCodeAt(0) === 0xFEFF || // UTF-16BE
  20. source.charCodeAt(0) === 0xFFFE) { // UTF-16LE
  21. return 1;
  22. }
  23. return 0;
  24. }
  25. function isHex(code) {
  26. return (code >= 48 && code <= 57) || // 0 .. 9
  27. (code >= 65 && code <= 70) || // A .. F
  28. (code >= 97 && code <= 102); // a .. f
  29. }
  30. function isNumber(code) {
  31. return code >= 48 && code <= 57;
  32. }
  33. function isWhiteSpace(code) {
  34. return code === SPACE || code === TAB || isNewline(code);
  35. }
  36. function isNewline(code) {
  37. return code === R || code === N || code === F;
  38. }
  39. function getNewlineLength(source, offset, code) {
  40. if (isNewline(code)) {
  41. if (code === R && offset + 1 < source.length && source.charCodeAt(offset + 1) === N) {
  42. return 2;
  43. }
  44. return 1;
  45. }
  46. return 0;
  47. }
  48. function cmpChar(testStr, offset, referenceCode) {
  49. var code = testStr.charCodeAt(offset);
  50. // code.toLowerCase() for A..Z
  51. if (code >= 65 && code <= 90) {
  52. code = code | 32;
  53. }
  54. return code === referenceCode;
  55. }
  56. function cmpStr(testStr, start, end, referenceStr) {
  57. if (end - start !== referenceStr.length) {
  58. return false;
  59. }
  60. if (start < 0 || end > testStr.length) {
  61. return false;
  62. }
  63. for (var i = start; i < end; i++) {
  64. var testCode = testStr.charCodeAt(i);
  65. var refCode = referenceStr.charCodeAt(i - start);
  66. // testCode.toLowerCase() for A..Z
  67. if (testCode >= 65 && testCode <= 90) {
  68. testCode = testCode | 32;
  69. }
  70. if (testCode !== refCode) {
  71. return false;
  72. }
  73. }
  74. return true;
  75. }
  76. function findWhiteSpaceStart(source, offset) {
  77. while (offset >= 0 && isWhiteSpace(source.charCodeAt(offset))) {
  78. offset--;
  79. }
  80. return offset + 1;
  81. }
  82. function findWhiteSpaceEnd(source, offset) {
  83. while (offset < source.length && isWhiteSpace(source.charCodeAt(offset))) {
  84. offset++;
  85. }
  86. return offset;
  87. }
  88. function findCommentEnd(source, offset) {
  89. var commentEnd = source.indexOf('*/', offset);
  90. if (commentEnd === -1) {
  91. return source.length;
  92. }
  93. return commentEnd + 2;
  94. }
  95. function findStringEnd(source, offset, quote) {
  96. for (; offset < source.length; offset++) {
  97. var code = source.charCodeAt(offset);
  98. // TODO: bad string
  99. if (code === BACK_SLASH) {
  100. offset++;
  101. } else if (code === quote) {
  102. offset++;
  103. break;
  104. }
  105. }
  106. return offset;
  107. }
  108. function findDecimalNumberEnd(source, offset) {
  109. while (offset < source.length && isNumber(source.charCodeAt(offset))) {
  110. offset++;
  111. }
  112. return offset;
  113. }
  114. function findNumberEnd(source, offset, allowFraction) {
  115. var code;
  116. offset = findDecimalNumberEnd(source, offset);
  117. // fraction: .\d+
  118. if (allowFraction && offset + 1 < source.length && source.charCodeAt(offset) === FULLSTOP) {
  119. code = source.charCodeAt(offset + 1);
  120. if (isNumber(code)) {
  121. offset = findDecimalNumberEnd(source, offset + 1);
  122. }
  123. }
  124. // exponent: e[+-]\d+
  125. if (offset + 1 < source.length) {
  126. if ((source.charCodeAt(offset) | 32) === E) { // case insensitive check for `e`
  127. code = source.charCodeAt(offset + 1);
  128. if (code === PLUSSIGN || code === HYPHENMINUS) {
  129. if (offset + 2 < source.length) {
  130. code = source.charCodeAt(offset + 2);
  131. }
  132. }
  133. if (isNumber(code)) {
  134. offset = findDecimalNumberEnd(source, offset + 2);
  135. }
  136. }
  137. }
  138. return offset;
  139. }
  140. // skip escaped unicode sequence that can ends with space
  141. // [0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
  142. function findEscapeEnd(source, offset) {
  143. for (var i = 0; i < 7 && offset + i < source.length; i++) {
  144. var code = source.charCodeAt(offset + i);
  145. if (i !== 6 && isHex(code)) {
  146. continue;
  147. }
  148. if (i > 0) {
  149. offset += i - 1 + getNewlineLength(source, offset + i, code);
  150. if (code === SPACE || code === TAB) {
  151. offset++;
  152. }
  153. }
  154. break;
  155. }
  156. return offset;
  157. }
  158. function findIdentifierEnd(source, offset) {
  159. for (; offset < source.length; offset++) {
  160. var code = source.charCodeAt(offset);
  161. if (code === BACK_SLASH) {
  162. offset = findEscapeEnd(source, offset + 1);
  163. } else if (code < 0x80 && PUNCTUATION[code] === PUNCTUATOR) {
  164. break;
  165. }
  166. }
  167. return offset;
  168. }
  169. function findUrlRawEnd(source, offset) {
  170. for (; offset < source.length; offset++) {
  171. var code = source.charCodeAt(offset);
  172. if (code === BACK_SLASH) {
  173. offset = findEscapeEnd(source, offset + 1);
  174. } else if (code < 0x80 && STOP_URL_RAW[code] === 1) {
  175. break;
  176. }
  177. }
  178. return offset;
  179. }
  180. module.exports = {
  181. firstCharOffset: firstCharOffset,
  182. isHex: isHex,
  183. isNumber: isNumber,
  184. isWhiteSpace: isWhiteSpace,
  185. isNewline: isNewline,
  186. getNewlineLength: getNewlineLength,
  187. cmpChar: cmpChar,
  188. cmpStr: cmpStr,
  189. findWhiteSpaceStart: findWhiteSpaceStart,
  190. findWhiteSpaceEnd: findWhiteSpaceEnd,
  191. findCommentEnd: findCommentEnd,
  192. findStringEnd: findStringEnd,
  193. findDecimalNumberEnd: findDecimalNumberEnd,
  194. findNumberEnd: findNumberEnd,
  195. findEscapeEnd: findEscapeEnd,
  196. findIdentifierEnd: findIdentifierEnd,
  197. findUrlRawEnd: findUrlRawEnd
  198. };