char-class-to-meta-transform.js 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. /**
  2. * The MIT License (MIT)
  3. * Copyright (c) 2017-present Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  4. */
  5. 'use strict';
  6. /**
  7. * A regexp-tree plugin to replace standard character classes with
  8. * their meta symbols equivalents.
  9. */
  10. module.exports = {
  11. _hasIFlag: false,
  12. _hasUFlag: false,
  13. init: function init(ast) {
  14. this._hasIFlag = ast.flags.includes('i');
  15. this._hasUFlag = ast.flags.includes('u');
  16. },
  17. CharacterClass: function CharacterClass(path) {
  18. // [0-9] -> \d
  19. rewriteNumberRanges(path);
  20. // [a-zA-Z_0-9] -> \w
  21. rewriteWordRanges(path, this._hasIFlag, this._hasUFlag);
  22. // [ \t\r\n\f] -> \s
  23. rewriteWhitespaceRanges(path);
  24. }
  25. };
  26. /**
  27. * Rewrites number ranges: [0-9] -> \d
  28. */
  29. function rewriteNumberRanges(path) {
  30. var node = path.node;
  31. node.expressions.forEach(function (expression, i) {
  32. if (isFullNumberRange(expression)) {
  33. path.getChild(i).replace({
  34. type: 'Char',
  35. value: '\\d',
  36. kind: 'meta'
  37. });
  38. }
  39. });
  40. }
  41. /**
  42. * Rewrites word ranges: [a-zA-Z_0-9] -> \w
  43. * Thus, the ranges may go in any order, and other symbols/ranges
  44. * are kept untouched, e.g. [a-z_\dA-Z$] -> [\w$]
  45. */
  46. function rewriteWordRanges(path, hasIFlag, hasUFlag) {
  47. var node = path.node;
  48. var numberPath = null;
  49. var lowerCasePath = null;
  50. var upperCasePath = null;
  51. var underscorePath = null;
  52. var u017fPath = null;
  53. var u212aPath = null;
  54. node.expressions.forEach(function (expression, i) {
  55. // \d
  56. if (isMetaChar(expression, '\\d')) {
  57. numberPath = path.getChild(i);
  58. }
  59. // a-z
  60. else if (isLowerCaseRange(expression)) {
  61. lowerCasePath = path.getChild(i);
  62. }
  63. // A-Z
  64. else if (isUpperCaseRange(expression)) {
  65. upperCasePath = path.getChild(i);
  66. }
  67. // _
  68. else if (isUnderscore(expression)) {
  69. underscorePath = path.getChild(i);
  70. } else if (hasIFlag && hasUFlag && isU017fPath(expression)) {
  71. u017fPath = path.getChild(i);
  72. } else if (hasIFlag && hasUFlag && isU212aPath(expression)) {
  73. u212aPath = path.getChild(i);
  74. }
  75. });
  76. // If we found the whole pattern, replace it.
  77. if (numberPath && (lowerCasePath && upperCasePath || hasIFlag && (lowerCasePath || upperCasePath)) && underscorePath && (!hasUFlag || !hasIFlag || u017fPath && u212aPath)) {
  78. // Put \w in place of \d.
  79. numberPath.replace({
  80. type: 'Char',
  81. value: '\\w',
  82. kind: 'meta'
  83. });
  84. // Other paths are removed.
  85. if (lowerCasePath) {
  86. lowerCasePath.remove();
  87. }
  88. if (upperCasePath) {
  89. upperCasePath.remove();
  90. }
  91. underscorePath.remove();
  92. if (u017fPath) {
  93. u017fPath.remove();
  94. }
  95. if (u212aPath) {
  96. u212aPath.remove();
  97. }
  98. }
  99. }
  100. /**
  101. * Rewrites whitespace ranges: [ \t\r\n\f] -> \s.
  102. */
  103. function rewriteWhitespaceRanges(path) {
  104. var node = path.node;
  105. var spacePath = null;
  106. var tPath = null;
  107. var nPath = null;
  108. var rPath = null;
  109. var fPath = null;
  110. node.expressions.forEach(function (expression, i) {
  111. // Space
  112. if (isChar(expression, ' ')) {
  113. spacePath = path.getChild(i);
  114. }
  115. // \t
  116. else if (isMetaChar(expression, '\\t')) {
  117. tPath = path.getChild(i);
  118. }
  119. // \n
  120. else if (isMetaChar(expression, '\\n')) {
  121. nPath = path.getChild(i);
  122. }
  123. // \r
  124. else if (isMetaChar(expression, '\\r')) {
  125. rPath = path.getChild(i);
  126. }
  127. // \f
  128. else if (isMetaChar(expression, '\\f')) {
  129. fPath = path.getChild(i);
  130. }
  131. });
  132. // If we found the whole pattern, replace it.
  133. // Make \f optional.
  134. if (spacePath && tPath && nPath && rPath) {
  135. // Put \s in place of \n.
  136. nPath.node.value = '\\s';
  137. // Other paths are removed.
  138. spacePath.remove();
  139. tPath.remove();
  140. rPath.remove();
  141. if (fPath) {
  142. fPath.remove();
  143. }
  144. }
  145. }
  146. function isFullNumberRange(node) {
  147. return node.type === 'ClassRange' && node.from.value === '0' && node.to.value === '9';
  148. }
  149. function isChar(node, value) {
  150. var kind = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'simple';
  151. return node.type === 'Char' && node.value === value && node.kind === kind;
  152. }
  153. function isMetaChar(node, value) {
  154. return isChar(node, value, 'meta');
  155. }
  156. function isLowerCaseRange(node) {
  157. return node.type === 'ClassRange' && node.from.value === 'a' && node.to.value === 'z';
  158. }
  159. function isUpperCaseRange(node) {
  160. return node.type === 'ClassRange' && node.from.value === 'A' && node.to.value === 'Z';
  161. }
  162. function isUnderscore(node) {
  163. return node.type === 'Char' && node.value === '_' && node.kind === 'simple';
  164. }
  165. function isU017fPath(node) {
  166. return node.type === 'Char' && node.kind === 'unicode' && node.codePoint === 0x017f;
  167. }
  168. function isU212aPath(node) {
  169. return node.type === 'Char' && node.kind === 'unicode' && node.codePoint === 0x212a;
  170. }