block-serialization-default-parser.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. /******/ (function() { // webpackBootstrap
  2. /******/ "use strict";
  3. /******/ // The require scope
  4. /******/ var __webpack_require__ = {};
  5. /******/
  6. /************************************************************************/
  7. /******/ /* webpack/runtime/define property getters */
  8. /******/ !function() {
  9. /******/ // define getter functions for harmony exports
  10. /******/ __webpack_require__.d = function(exports, definition) {
  11. /******/ for(var key in definition) {
  12. /******/ if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {
  13. /******/ Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });
  14. /******/ }
  15. /******/ }
  16. /******/ };
  17. /******/ }();
  18. /******/
  19. /******/ /* webpack/runtime/hasOwnProperty shorthand */
  20. /******/ !function() {
  21. /******/ __webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }
  22. /******/ }();
  23. /******/
  24. /******/ /* webpack/runtime/make namespace object */
  25. /******/ !function() {
  26. /******/ // define __esModule on exports
  27. /******/ __webpack_require__.r = function(exports) {
  28. /******/ if(typeof Symbol !== 'undefined' && Symbol.toStringTag) {
  29. /******/ Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
  30. /******/ }
  31. /******/ Object.defineProperty(exports, '__esModule', { value: true });
  32. /******/ };
  33. /******/ }();
  34. /******/
  35. /************************************************************************/
  36. var __webpack_exports__ = {};
  37. __webpack_require__.r(__webpack_exports__);
  38. /* harmony export */ __webpack_require__.d(__webpack_exports__, {
  39. /* harmony export */ "parse": function() { return /* binding */ parse; }
  40. /* harmony export */ });
  41. /**
  42. * @type {string}
  43. */
  44. let document;
  45. /**
  46. * @type {number}
  47. */
  48. let offset;
  49. /**
  50. * @type {ParsedBlock[]}
  51. */
  52. let output;
  53. /**
  54. * @type {ParsedFrame[]}
  55. */
  56. let stack;
  57. /**
  58. * @typedef {Object|null} Attributes
  59. */
  60. /**
  61. * @typedef {Object} ParsedBlock
  62. * @property {string|null} blockName Block name.
  63. * @property {Attributes} attrs Block attributes.
  64. * @property {ParsedBlock[]} innerBlocks Inner blocks.
  65. * @property {string} innerHTML Inner HTML.
  66. * @property {Array<string|null>} innerContent Inner content.
  67. */
  68. /**
  69. * @typedef {Object} ParsedFrame
  70. * @property {ParsedBlock} block Block.
  71. * @property {number} tokenStart Token start.
  72. * @property {number} tokenLength Token length.
  73. * @property {number} prevOffset Previous offset.
  74. * @property {number|null} leadingHtmlStart Leading HTML start.
  75. */
  76. /**
  77. * @typedef {'void-block'|'block-opener'|'block-closer'} TokenType
  78. */
  79. /**
  80. * @typedef {[TokenType, string, Attributes, number, number]} Token
  81. */
  82. /**
  83. * Matches block comment delimiters
  84. *
  85. * While most of this pattern is straightforward the attribute parsing
  86. * incorporates a tricks to make sure we don't choke on specific input
  87. *
  88. * - since JavaScript has no possessive quantifier or atomic grouping
  89. * we are emulating it with a trick
  90. *
  91. * we want a possessive quantifier or atomic group to prevent backtracking
  92. * on the `}`s should we fail to match the remainder of the pattern
  93. *
  94. * we can emulate this with a positive lookahead and back reference
  95. * (a++)*c === ((?=(a+))\1)*c
  96. *
  97. * let's examine an example:
  98. * - /(a+)*c/.test('aaaaaaaaaaaaad') fails after over 49,000 steps
  99. * - /(a++)*c/.test('aaaaaaaaaaaaad') fails after 85 steps
  100. * - /(?>a+)*c/.test('aaaaaaaaaaaaad') fails after 126 steps
  101. *
  102. * this is because the possessive `++` and the atomic group `(?>)`
  103. * tell the engine that all those `a`s belong together as a single group
  104. * and so it won't split it up when stepping backwards to try and match
  105. *
  106. * if we use /((?=(a+))\1)*c/ then we get the same behavior as the atomic group
  107. * or possessive and prevent the backtracking because the `a+` is matched but
  108. * not captured. thus, we find the long string of `a`s and remember it, then
  109. * reference it as a whole unit inside our pattern
  110. *
  111. * @see http://instanceof.me/post/52245507631/regex-emulate-atomic-grouping-with-lookahead
  112. * @see http://blog.stevenlevithan.com/archives/mimic-atomic-groups
  113. * @see https://javascript.info/regexp-infinite-backtracking-problem
  114. *
  115. * once browsers reliably support atomic grouping or possessive
  116. * quantifiers natively we should remove this trick and simplify
  117. *
  118. * @type {RegExp}
  119. *
  120. * @since 3.8.0
  121. * @since 4.6.1 added optimization to prevent backtracking on attribute parsing
  122. */
  123. const tokenizer = /<!--\s+(\/)?wp:([a-z][a-z0-9_-]*\/)?([a-z][a-z0-9_-]*)\s+({(?:(?=([^}]+|}+(?=})|(?!}\s+\/?-->)[^])*)\5|[^]*?)}\s+)?(\/)?-->/g;
  124. /**
  125. * Constructs a block object.
  126. *
  127. * @param {string|null} blockName
  128. * @param {Attributes} attrs
  129. * @param {ParsedBlock[]} innerBlocks
  130. * @param {string} innerHTML
  131. * @param {string[]} innerContent
  132. * @return {ParsedBlock} The block object.
  133. */
  134. function Block(blockName, attrs, innerBlocks, innerHTML, innerContent) {
  135. return {
  136. blockName,
  137. attrs,
  138. innerBlocks,
  139. innerHTML,
  140. innerContent
  141. };
  142. }
  143. /**
  144. * Constructs a freeform block object.
  145. *
  146. * @param {string} innerHTML
  147. * @return {ParsedBlock} The freeform block object.
  148. */
  149. function Freeform(innerHTML) {
  150. return Block(null, {}, [], innerHTML, [innerHTML]);
  151. }
  152. /**
  153. * Constructs a frame object.
  154. *
  155. * @param {ParsedBlock} block
  156. * @param {number} tokenStart
  157. * @param {number} tokenLength
  158. * @param {number} prevOffset
  159. * @param {number|null} leadingHtmlStart
  160. * @return {ParsedFrame} The frame object.
  161. */
  162. function Frame(block, tokenStart, tokenLength, prevOffset, leadingHtmlStart) {
  163. return {
  164. block,
  165. tokenStart,
  166. tokenLength,
  167. prevOffset: prevOffset || tokenStart + tokenLength,
  168. leadingHtmlStart
  169. };
  170. }
  171. /**
  172. * Parser function, that converts input HTML into a block based structure.
  173. *
  174. * @param {string} doc The HTML document to parse.
  175. *
  176. * @example
  177. * Input post:
  178. * ```html
  179. * <!-- wp:columns {"columns":3} -->
  180. * <div class="wp-block-columns has-3-columns"><!-- wp:column -->
  181. * <div class="wp-block-column"><!-- wp:paragraph -->
  182. * <p>Left</p>
  183. * <!-- /wp:paragraph --></div>
  184. * <!-- /wp:column -->
  185. *
  186. * <!-- wp:column -->
  187. * <div class="wp-block-column"><!-- wp:paragraph -->
  188. * <p><strong>Middle</strong></p>
  189. * <!-- /wp:paragraph --></div>
  190. * <!-- /wp:column -->
  191. *
  192. * <!-- wp:column -->
  193. * <div class="wp-block-column"></div>
  194. * <!-- /wp:column --></div>
  195. * <!-- /wp:columns -->
  196. * ```
  197. *
  198. * Parsing code:
  199. * ```js
  200. * import { parse } from '@wordpress/block-serialization-default-parser';
  201. *
  202. * parse( post ) === [
  203. * {
  204. * blockName: "core/columns",
  205. * attrs: {
  206. * columns: 3
  207. * },
  208. * innerBlocks: [
  209. * {
  210. * blockName: "core/column",
  211. * attrs: null,
  212. * innerBlocks: [
  213. * {
  214. * blockName: "core/paragraph",
  215. * attrs: null,
  216. * innerBlocks: [],
  217. * innerHTML: "\n<p>Left</p>\n"
  218. * }
  219. * ],
  220. * innerHTML: '\n<div class="wp-block-column"></div>\n'
  221. * },
  222. * {
  223. * blockName: "core/column",
  224. * attrs: null,
  225. * innerBlocks: [
  226. * {
  227. * blockName: "core/paragraph",
  228. * attrs: null,
  229. * innerBlocks: [],
  230. * innerHTML: "\n<p><strong>Middle</strong></p>\n"
  231. * }
  232. * ],
  233. * innerHTML: '\n<div class="wp-block-column"></div>\n'
  234. * },
  235. * {
  236. * blockName: "core/column",
  237. * attrs: null,
  238. * innerBlocks: [],
  239. * innerHTML: '\n<div class="wp-block-column"></div>\n'
  240. * }
  241. * ],
  242. * innerHTML: '\n<div class="wp-block-columns has-3-columns">\n\n\n\n</div>\n'
  243. * }
  244. * ];
  245. * ```
  246. * @return {ParsedBlock[]} A block-based representation of the input HTML.
  247. */
  248. const parse = doc => {
  249. document = doc;
  250. offset = 0;
  251. output = [];
  252. stack = [];
  253. tokenizer.lastIndex = 0;
  254. do {// twiddle our thumbs
  255. } while (proceed());
  256. return output;
  257. };
  258. /**
  259. * Parses the next token in the input document.
  260. *
  261. * @return {boolean} Returns true when there is more tokens to parse.
  262. */
  263. function proceed() {
  264. const stackDepth = stack.length;
  265. const next = nextToken();
  266. if (next === null) {
  267. // If not in a block then flush output.
  268. if (0 === stackDepth) {
  269. addFreeform();
  270. return false;
  271. } // Otherwise we have a problem
  272. // This is an error
  273. // we have options
  274. // - treat it all as freeform text
  275. // - assume an implicit closer (easiest when not nesting)
  276. // For the easy case we'll assume an implicit closer.
  277. if (1 === stackDepth) {
  278. addBlockFromStack();
  279. return false;
  280. } // For the nested case where it's more difficult we'll
  281. // have to assume that multiple closers are missing
  282. // and so we'll collapse the whole stack piecewise.
  283. while (0 < stack.length) {
  284. addBlockFromStack();
  285. }
  286. return false;
  287. }
  288. const [tokenType, blockName, attrs, startOffset, tokenLength] = next; // We may have some HTML soup before the next block.
  289. const leadingHtmlStart = startOffset > offset ? offset : null;
  290. switch (tokenType) {
  291. case 'void-block':
  292. // easy case is if we stumbled upon a void block
  293. // in the top-level of the document.
  294. if (0 === stackDepth) {
  295. if (null !== leadingHtmlStart) {
  296. output.push(Freeform(document.substr(leadingHtmlStart, startOffset - leadingHtmlStart)));
  297. }
  298. output.push(Block(blockName, attrs, [], '', []));
  299. offset = startOffset + tokenLength;
  300. return true;
  301. } // Otherwise we found an inner block.
  302. addInnerBlock(Block(blockName, attrs, [], '', []), startOffset, tokenLength);
  303. offset = startOffset + tokenLength;
  304. return true;
  305. case 'block-opener':
  306. // Track all newly-opened blocks on the stack.
  307. stack.push(Frame(Block(blockName, attrs, [], '', []), startOffset, tokenLength, startOffset + tokenLength, leadingHtmlStart));
  308. offset = startOffset + tokenLength;
  309. return true;
  310. case 'block-closer':
  311. // If we're missing an opener we're in trouble
  312. // This is an error.
  313. if (0 === stackDepth) {
  314. // We have options
  315. // - assume an implicit opener
  316. // - assume _this_ is the opener
  317. // - give up and close out the document.
  318. addFreeform();
  319. return false;
  320. } // If we're not nesting then this is easy - close the block.
  321. if (1 === stackDepth) {
  322. addBlockFromStack(startOffset);
  323. offset = startOffset + tokenLength;
  324. return true;
  325. } // Otherwise we're nested and we have to close out the current
  326. // block and add it as a innerBlock to the parent.
  327. const stackTop =
  328. /** @type {ParsedFrame} */
  329. stack.pop();
  330. const html = document.substr(stackTop.prevOffset, startOffset - stackTop.prevOffset);
  331. stackTop.block.innerHTML += html;
  332. stackTop.block.innerContent.push(html);
  333. stackTop.prevOffset = startOffset + tokenLength;
  334. addInnerBlock(stackTop.block, stackTop.tokenStart, stackTop.tokenLength, startOffset + tokenLength);
  335. offset = startOffset + tokenLength;
  336. return true;
  337. default:
  338. // This is an error.
  339. addFreeform();
  340. return false;
  341. }
  342. }
  343. /**
  344. * Parse JSON if valid, otherwise return null
  345. *
  346. * Note that JSON coming from the block comment
  347. * delimiters is constrained to be an object
  348. * and cannot be things like `true` or `null`
  349. *
  350. * @param {string} input JSON input string to parse
  351. * @return {Object|null} parsed JSON if valid
  352. */
  353. function parseJSON(input) {
  354. try {
  355. return JSON.parse(input);
  356. } catch (e) {
  357. return null;
  358. }
  359. }
  360. /**
  361. * Finds the next token in the document.
  362. *
  363. * @return {Token|null} The next matched token.
  364. */
  365. function nextToken() {
  366. // Aye the magic
  367. // we're using a single RegExp to tokenize the block comment delimiters
  368. // we're also using a trick here because the only difference between a
  369. // block opener and a block closer is the leading `/` before `wp:` (and
  370. // a closer has no attributes). we can trap them both and process the
  371. // match back in JavaScript to see which one it was.
  372. const matches = tokenizer.exec(document); // We have no more tokens.
  373. if (null === matches) {
  374. return null;
  375. }
  376. const startedAt = matches.index;
  377. const [match, closerMatch, namespaceMatch, nameMatch, attrsMatch
  378. /* Internal/unused. */
  379. ,, voidMatch] = matches;
  380. const length = match.length;
  381. const isCloser = !!closerMatch;
  382. const isVoid = !!voidMatch;
  383. const namespace = namespaceMatch || 'core/';
  384. const name = namespace + nameMatch;
  385. const hasAttrs = !!attrsMatch;
  386. const attrs = hasAttrs ? parseJSON(attrsMatch) : {}; // This state isn't allowed
  387. // This is an error.
  388. if (isCloser && (isVoid || hasAttrs)) {// We can ignore them since they don't hurt anything
  389. // we may warn against this at some point or reject it.
  390. }
  391. if (isVoid) {
  392. return ['void-block', name, attrs, startedAt, length];
  393. }
  394. if (isCloser) {
  395. return ['block-closer', name, null, startedAt, length];
  396. }
  397. return ['block-opener', name, attrs, startedAt, length];
  398. }
  399. /**
  400. * Adds a freeform block to the output.
  401. *
  402. * @param {number} [rawLength]
  403. */
  404. function addFreeform(rawLength) {
  405. const length = rawLength ? rawLength : document.length - offset;
  406. if (0 === length) {
  407. return;
  408. }
  409. output.push(Freeform(document.substr(offset, length)));
  410. }
  411. /**
  412. * Adds inner block to the parent block.
  413. *
  414. * @param {ParsedBlock} block
  415. * @param {number} tokenStart
  416. * @param {number} tokenLength
  417. * @param {number} [lastOffset]
  418. */
  419. function addInnerBlock(block, tokenStart, tokenLength, lastOffset) {
  420. const parent = stack[stack.length - 1];
  421. parent.block.innerBlocks.push(block);
  422. const html = document.substr(parent.prevOffset, tokenStart - parent.prevOffset);
  423. if (html) {
  424. parent.block.innerHTML += html;
  425. parent.block.innerContent.push(html);
  426. }
  427. parent.block.innerContent.push(null);
  428. parent.prevOffset = lastOffset ? lastOffset : tokenStart + tokenLength;
  429. }
  430. /**
  431. * Adds block from the stack to the output.
  432. *
  433. * @param {number} [endOffset]
  434. */
  435. function addBlockFromStack(endOffset) {
  436. const {
  437. block,
  438. leadingHtmlStart,
  439. prevOffset,
  440. tokenStart
  441. } =
  442. /** @type {ParsedFrame} */
  443. stack.pop();
  444. const html = endOffset ? document.substr(prevOffset, endOffset - prevOffset) : document.substr(prevOffset);
  445. if (html) {
  446. block.innerHTML += html;
  447. block.innerContent.push(html);
  448. }
  449. if (null !== leadingHtmlStart) {
  450. output.push(Freeform(document.substr(leadingHtmlStart, tokenStart - leadingHtmlStart)));
  451. }
  452. output.push(block);
  453. }
  454. (window.wp = window.wp || {}).blockSerializationDefaultParser = __webpack_exports__;
  455. /******/ })()
  456. ;