class-wp-block-parser.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. <?php
  2. /**
  3. * Block Serialization Parser
  4. *
  5. * @package WordPress
  6. */
  7. /**
  8. * Class WP_Block_Parser_Block
  9. *
  10. * Holds the block structure in memory
  11. *
  12. * @since 5.0.0
  13. */
  14. class WP_Block_Parser_Block {
  15. /**
  16. * Name of block
  17. *
  18. * @example "core/paragraph"
  19. *
  20. * @since 5.0.0
  21. * @var string
  22. */
  23. public $blockName;
  24. /**
  25. * Optional set of attributes from block comment delimiters
  26. *
  27. * @example null
  28. * @example array( 'columns' => 3 )
  29. *
  30. * @since 5.0.0
  31. * @var array|null
  32. */
  33. public $attrs;
  34. /**
  35. * List of inner blocks (of this same class)
  36. *
  37. * @since 5.0.0
  38. * @var WP_Block_Parser_Block[]
  39. */
  40. public $innerBlocks;
  41. /**
  42. * Resultant HTML from inside block comment delimiters
  43. * after removing inner blocks
  44. *
  45. * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..."
  46. *
  47. * @since 5.0.0
  48. * @var string
  49. */
  50. public $innerHTML;
  51. /**
  52. * List of string fragments and null markers where inner blocks were found
  53. *
  54. * @example array(
  55. * 'innerHTML' => 'BeforeInnerAfter',
  56. * 'innerBlocks' => array( block, block ),
  57. * 'innerContent' => array( 'Before', null, 'Inner', null, 'After' ),
  58. * )
  59. *
  60. * @since 4.2.0
  61. * @var array
  62. */
  63. public $innerContent;
  64. /**
  65. * Constructor.
  66. *
  67. * Will populate object properties from the provided arguments.
  68. *
  69. * @since 5.0.0
  70. *
  71. * @param string $name Name of block.
  72. * @param array $attrs Optional set of attributes from block comment delimiters.
  73. * @param array $innerBlocks List of inner blocks (of this same class).
  74. * @param string $innerHTML Resultant HTML from inside block comment delimiters after removing inner blocks.
  75. * @param array $innerContent List of string fragments and null markers where inner blocks were found.
  76. */
  77. function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) {
  78. $this->blockName = $name;
  79. $this->attrs = $attrs;
  80. $this->innerBlocks = $innerBlocks;
  81. $this->innerHTML = $innerHTML;
  82. $this->innerContent = $innerContent;
  83. }
  84. }
  85. /**
  86. * Class WP_Block_Parser_Frame
  87. *
  88. * Holds partial blocks in memory while parsing
  89. *
  90. * @internal
  91. * @since 5.0.0
  92. */
  93. class WP_Block_Parser_Frame {
  94. /**
  95. * Full or partial block
  96. *
  97. * @since 5.0.0
  98. * @var WP_Block_Parser_Block
  99. */
  100. public $block;
  101. /**
  102. * Byte offset into document for start of parse token
  103. *
  104. * @since 5.0.0
  105. * @var int
  106. */
  107. public $token_start;
  108. /**
  109. * Byte length of entire parse token string
  110. *
  111. * @since 5.0.0
  112. * @var int
  113. */
  114. public $token_length;
  115. /**
  116. * Byte offset into document for after parse token ends
  117. * (used during reconstruction of stack into parse production)
  118. *
  119. * @since 5.0.0
  120. * @var int
  121. */
  122. public $prev_offset;
  123. /**
  124. * Byte offset into document where leading HTML before token starts
  125. *
  126. * @since 5.0.0
  127. * @var int
  128. */
  129. public $leading_html_start;
  130. /**
  131. * Constructor
  132. *
  133. * Will populate object properties from the provided arguments.
  134. *
  135. * @since 5.0.0
  136. *
  137. * @param WP_Block_Parser_Block $block Full or partial block.
  138. * @param int $token_start Byte offset into document for start of parse token.
  139. * @param int $token_length Byte length of entire parse token string.
  140. * @param int $prev_offset Byte offset into document for after parse token ends.
  141. * @param int $leading_html_start Byte offset into document where leading HTML before token starts.
  142. */
  143. function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) {
  144. $this->block = $block;
  145. $this->token_start = $token_start;
  146. $this->token_length = $token_length;
  147. $this->prev_offset = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length;
  148. $this->leading_html_start = $leading_html_start;
  149. }
  150. }
  151. /**
  152. * Class WP_Block_Parser
  153. *
  154. * Parses a document and constructs a list of parsed block objects
  155. *
  156. * @since 5.0.0
  157. * @since 4.0.0 returns arrays not objects, all attributes are arrays
  158. */
  159. class WP_Block_Parser {
  160. /**
  161. * Input document being parsed
  162. *
  163. * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
  164. *
  165. * @since 5.0.0
  166. * @var string
  167. */
  168. public $document;
  169. /**
  170. * Tracks parsing progress through document
  171. *
  172. * @since 5.0.0
  173. * @var int
  174. */
  175. public $offset;
  176. /**
  177. * List of parsed blocks
  178. *
  179. * @since 5.0.0
  180. * @var WP_Block_Parser_Block[]
  181. */
  182. public $output;
  183. /**
  184. * Stack of partially-parsed structures in memory during parse
  185. *
  186. * @since 5.0.0
  187. * @var WP_Block_Parser_Frame[]
  188. */
  189. public $stack;
  190. /**
  191. * Empty associative array, here due to PHP quirks
  192. *
  193. * @since 4.4.0
  194. * @var array empty associative array
  195. */
  196. public $empty_attrs;
  197. /**
  198. * Parses a document and returns a list of block structures
  199. *
  200. * When encountering an invalid parse will return a best-effort
  201. * parse. In contrast to the specification parser this does not
  202. * return an error on invalid inputs.
  203. *
  204. * @since 5.0.0
  205. *
  206. * @param string $document Input document being parsed.
  207. * @return array[]
  208. */
  209. function parse( $document ) {
  210. $this->document = $document;
  211. $this->offset = 0;
  212. $this->output = array();
  213. $this->stack = array();
  214. $this->empty_attrs = json_decode( '{}', true );
  215. do {
  216. // twiddle our thumbs.
  217. } while ( $this->proceed() );
  218. return $this->output;
  219. }
  220. /**
  221. * Processes the next token from the input document
  222. * and returns whether to proceed eating more tokens
  223. *
  224. * This is the "next step" function that essentially
  225. * takes a token as its input and decides what to do
  226. * with that token before descending deeper into a
  227. * nested block tree or continuing along the document
  228. * or breaking out of a level of nesting.
  229. *
  230. * @internal
  231. * @since 5.0.0
  232. * @return bool
  233. */
  234. function proceed() {
  235. $next_token = $this->next_token();
  236. list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
  237. $stack_depth = count( $this->stack );
  238. // we may have some HTML soup before the next block.
  239. $leading_html_start = $start_offset > $this->offset ? $this->offset : null;
  240. switch ( $token_type ) {
  241. case 'no-more-tokens':
  242. // if not in a block then flush output.
  243. if ( 0 === $stack_depth ) {
  244. $this->add_freeform();
  245. return false;
  246. }
  247. /*
  248. * Otherwise we have a problem
  249. * This is an error
  250. *
  251. * we have options
  252. * - treat it all as freeform text
  253. * - assume an implicit closer (easiest when not nesting)
  254. */
  255. // for the easy case we'll assume an implicit closer.
  256. if ( 1 === $stack_depth ) {
  257. $this->add_block_from_stack();
  258. return false;
  259. }
  260. /*
  261. * for the nested case where it's more difficult we'll
  262. * have to assume that multiple closers are missing
  263. * and so we'll collapse the whole stack piecewise
  264. */
  265. while ( 0 < count( $this->stack ) ) {
  266. $this->add_block_from_stack();
  267. }
  268. return false;
  269. case 'void-block':
  270. /*
  271. * easy case is if we stumbled upon a void block
  272. * in the top-level of the document
  273. */
  274. if ( 0 === $stack_depth ) {
  275. if ( isset( $leading_html_start ) ) {
  276. $this->output[] = (array) $this->freeform(
  277. substr(
  278. $this->document,
  279. $leading_html_start,
  280. $start_offset - $leading_html_start
  281. )
  282. );
  283. }
  284. $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
  285. $this->offset = $start_offset + $token_length;
  286. return true;
  287. }
  288. // otherwise we found an inner block.
  289. $this->add_inner_block(
  290. new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
  291. $start_offset,
  292. $token_length
  293. );
  294. $this->offset = $start_offset + $token_length;
  295. return true;
  296. case 'block-opener':
  297. // track all newly-opened blocks on the stack.
  298. array_push(
  299. $this->stack,
  300. new WP_Block_Parser_Frame(
  301. new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
  302. $start_offset,
  303. $token_length,
  304. $start_offset + $token_length,
  305. $leading_html_start
  306. )
  307. );
  308. $this->offset = $start_offset + $token_length;
  309. return true;
  310. case 'block-closer':
  311. /*
  312. * if we're missing an opener we're in trouble
  313. * This is an error
  314. */
  315. if ( 0 === $stack_depth ) {
  316. /*
  317. * we have options
  318. * - assume an implicit opener
  319. * - assume _this_ is the opener
  320. * - give up and close out the document
  321. */
  322. $this->add_freeform();
  323. return false;
  324. }
  325. // if we're not nesting then this is easy - close the block.
  326. if ( 1 === $stack_depth ) {
  327. $this->add_block_from_stack( $start_offset );
  328. $this->offset = $start_offset + $token_length;
  329. return true;
  330. }
  331. /*
  332. * otherwise we're nested and we have to close out the current
  333. * block and add it as a new innerBlock to the parent
  334. */
  335. $stack_top = array_pop( $this->stack );
  336. $html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
  337. $stack_top->block->innerHTML .= $html;
  338. $stack_top->block->innerContent[] = $html;
  339. $stack_top->prev_offset = $start_offset + $token_length;
  340. $this->add_inner_block(
  341. $stack_top->block,
  342. $stack_top->token_start,
  343. $stack_top->token_length,
  344. $start_offset + $token_length
  345. );
  346. $this->offset = $start_offset + $token_length;
  347. return true;
  348. default:
  349. // This is an error.
  350. $this->add_freeform();
  351. return false;
  352. }
  353. }
  354. /**
  355. * Scans the document from where we last left off
  356. * and finds the next valid token to parse if it exists
  357. *
  358. * Returns the type of the find: kind of find, block information, attributes
  359. *
  360. * @internal
  361. * @since 5.0.0
  362. * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
  363. * @return array
  364. */
  365. function next_token() {
  366. $matches = null;
  367. /*
  368. * aye the magic
  369. * we're using a single RegExp to tokenize the block comment delimiters
  370. * we're also using a trick here because the only difference between a
  371. * block opener and a block closer is the leading `/` before `wp:` (and
  372. * a closer has no attributes). we can trap them both and process the
  373. * match back in PHP to see which one it was.
  374. */
  375. $has_match = preg_match(
  376. '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
  377. $this->document,
  378. $matches,
  379. PREG_OFFSET_CAPTURE,
  380. $this->offset
  381. );
  382. // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
  383. if ( false === $has_match ) {
  384. return array( 'no-more-tokens', null, null, null, null );
  385. }
  386. // we have no more tokens.
  387. if ( 0 === $has_match ) {
  388. return array( 'no-more-tokens', null, null, null, null );
  389. }
  390. list( $match, $started_at ) = $matches[0];
  391. $length = strlen( $match );
  392. $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
  393. $is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1];
  394. $namespace = $matches['namespace'];
  395. $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
  396. $name = $namespace . $matches['name'][0];
  397. $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
  398. /*
  399. * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
  400. * are associative arrays. If we use `array()` we get a JSON `[]`
  401. */
  402. $attrs = $has_attrs
  403. ? json_decode( $matches['attrs'][0], /* as-associative */ true )
  404. : $this->empty_attrs;
  405. /*
  406. * This state isn't allowed
  407. * This is an error
  408. */
  409. if ( $is_closer && ( $is_void || $has_attrs ) ) {
  410. // we can ignore them since they don't hurt anything.
  411. }
  412. if ( $is_void ) {
  413. return array( 'void-block', $name, $attrs, $started_at, $length );
  414. }
  415. if ( $is_closer ) {
  416. return array( 'block-closer', $name, null, $started_at, $length );
  417. }
  418. return array( 'block-opener', $name, $attrs, $started_at, $length );
  419. }
  420. /**
  421. * Returns a new block object for freeform HTML
  422. *
  423. * @internal
  424. * @since 3.9.0
  425. *
  426. * @param string $innerHTML HTML content of block.
  427. * @return WP_Block_Parser_Block freeform block object.
  428. */
  429. function freeform( $innerHTML ) {
  430. return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) );
  431. }
  432. /**
  433. * Pushes a length of text from the input document
  434. * to the output list as a freeform block.
  435. *
  436. * @internal
  437. * @since 5.0.0
  438. * @param null $length how many bytes of document text to output.
  439. */
  440. function add_freeform( $length = null ) {
  441. $length = $length ? $length : strlen( $this->document ) - $this->offset;
  442. if ( 0 === $length ) {
  443. return;
  444. }
  445. $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) );
  446. }
  447. /**
  448. * Given a block structure from memory pushes
  449. * a new block to the output list.
  450. *
  451. * @internal
  452. * @since 5.0.0
  453. * @param WP_Block_Parser_Block $block The block to add to the output.
  454. * @param int $token_start Byte offset into the document where the first token for the block starts.
  455. * @param int $token_length Byte length of entire block from start of opening token to end of closing token.
  456. * @param int|null $last_offset Last byte offset into document if continuing form earlier output.
  457. */
  458. function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
  459. $parent = $this->stack[ count( $this->stack ) - 1 ];
  460. $parent->block->innerBlocks[] = (array) $block;
  461. $html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
  462. if ( ! empty( $html ) ) {
  463. $parent->block->innerHTML .= $html;
  464. $parent->block->innerContent[] = $html;
  465. }
  466. $parent->block->innerContent[] = null;
  467. $parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length;
  468. }
  469. /**
  470. * Pushes the top block from the parsing stack to the output list.
  471. *
  472. * @internal
  473. * @since 5.0.0
  474. * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
  475. */
  476. function add_block_from_stack( $end_offset = null ) {
  477. $stack_top = array_pop( $this->stack );
  478. $prev_offset = $stack_top->prev_offset;
  479. $html = isset( $end_offset )
  480. ? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
  481. : substr( $this->document, $prev_offset );
  482. if ( ! empty( $html ) ) {
  483. $stack_top->block->innerHTML .= $html;
  484. $stack_top->block->innerContent[] = $html;
  485. }
  486. if ( isset( $stack_top->leading_html_start ) ) {
  487. $this->output[] = (array) $this->freeform(
  488. substr(
  489. $this->document,
  490. $stack_top->leading_html_start,
  491. $stack_top->token_start - $stack_top->leading_html_start
  492. )
  493. );
  494. }
  495. $this->output[] = (array) $stack_top->block;
  496. }
  497. }