123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555 |
- <?php
- /**
- * Block Serialization Parser
- *
- * @package WordPress
- */
- /**
- * Class WP_Block_Parser_Block
- *
- * Holds the block structure in memory
- *
- * @since 5.0.0
- */
- class WP_Block_Parser_Block {
- /**
- * Name of block
- *
- * @example "core/paragraph"
- *
- * @since 5.0.0
- * @var string
- */
- public $blockName;
- /**
- * Optional set of attributes from block comment delimiters
- *
- * @example null
- * @example array( 'columns' => 3 )
- *
- * @since 5.0.0
- * @var array|null
- */
- public $attrs;
- /**
- * List of inner blocks (of this same class)
- *
- * @since 5.0.0
- * @var WP_Block_Parser_Block[]
- */
- public $innerBlocks;
- /**
- * Resultant HTML from inside block comment delimiters
- * after removing inner blocks
- *
- * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..."
- *
- * @since 5.0.0
- * @var string
- */
- public $innerHTML;
- /**
- * List of string fragments and null markers where inner blocks were found
- *
- * @example array(
- * 'innerHTML' => 'BeforeInnerAfter',
- * 'innerBlocks' => array( block, block ),
- * 'innerContent' => array( 'Before', null, 'Inner', null, 'After' ),
- * )
- *
- * @since 4.2.0
- * @var array
- */
- public $innerContent;
- /**
- * Constructor.
- *
- * Will populate object properties from the provided arguments.
- *
- * @since 5.0.0
- *
- * @param string $name Name of block.
- * @param array $attrs Optional set of attributes from block comment delimiters.
- * @param array $innerBlocks List of inner blocks (of this same class).
- * @param string $innerHTML Resultant HTML from inside block comment delimiters after removing inner blocks.
- * @param array $innerContent List of string fragments and null markers where inner blocks were found.
- */
- function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) {
- $this->blockName = $name;
- $this->attrs = $attrs;
- $this->innerBlocks = $innerBlocks;
- $this->innerHTML = $innerHTML;
- $this->innerContent = $innerContent;
- }
- }
- /**
- * Class WP_Block_Parser_Frame
- *
- * Holds partial blocks in memory while parsing
- *
- * @internal
- * @since 5.0.0
- */
- class WP_Block_Parser_Frame {
- /**
- * Full or partial block
- *
- * @since 5.0.0
- * @var WP_Block_Parser_Block
- */
- public $block;
- /**
- * Byte offset into document for start of parse token
- *
- * @since 5.0.0
- * @var int
- */
- public $token_start;
- /**
- * Byte length of entire parse token string
- *
- * @since 5.0.0
- * @var int
- */
- public $token_length;
- /**
- * Byte offset into document for after parse token ends
- * (used during reconstruction of stack into parse production)
- *
- * @since 5.0.0
- * @var int
- */
- public $prev_offset;
- /**
- * Byte offset into document where leading HTML before token starts
- *
- * @since 5.0.0
- * @var int
- */
- public $leading_html_start;
- /**
- * Constructor
- *
- * Will populate object properties from the provided arguments.
- *
- * @since 5.0.0
- *
- * @param WP_Block_Parser_Block $block Full or partial block.
- * @param int $token_start Byte offset into document for start of parse token.
- * @param int $token_length Byte length of entire parse token string.
- * @param int $prev_offset Byte offset into document for after parse token ends.
- * @param int $leading_html_start Byte offset into document where leading HTML before token starts.
- */
- function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) {
- $this->block = $block;
- $this->token_start = $token_start;
- $this->token_length = $token_length;
- $this->prev_offset = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length;
- $this->leading_html_start = $leading_html_start;
- }
- }
- /**
- * Class WP_Block_Parser
- *
- * Parses a document and constructs a list of parsed block objects
- *
- * @since 5.0.0
- * @since 4.0.0 returns arrays not objects, all attributes are arrays
- */
- class WP_Block_Parser {
- /**
- * Input document being parsed
- *
- * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
- *
- * @since 5.0.0
- * @var string
- */
- public $document;
- /**
- * Tracks parsing progress through document
- *
- * @since 5.0.0
- * @var int
- */
- public $offset;
- /**
- * List of parsed blocks
- *
- * @since 5.0.0
- * @var WP_Block_Parser_Block[]
- */
- public $output;
- /**
- * Stack of partially-parsed structures in memory during parse
- *
- * @since 5.0.0
- * @var WP_Block_Parser_Frame[]
- */
- public $stack;
- /**
- * Empty associative array, here due to PHP quirks
- *
- * @since 4.4.0
- * @var array empty associative array
- */
- public $empty_attrs;
- /**
- * Parses a document and returns a list of block structures
- *
- * When encountering an invalid parse will return a best-effort
- * parse. In contrast to the specification parser this does not
- * return an error on invalid inputs.
- *
- * @since 5.0.0
- *
- * @param string $document Input document being parsed.
- * @return array[]
- */
- function parse( $document ) {
- $this->document = $document;
- $this->offset = 0;
- $this->output = array();
- $this->stack = array();
- $this->empty_attrs = json_decode( '{}', true );
- do {
- // twiddle our thumbs.
- } while ( $this->proceed() );
- return $this->output;
- }
- /**
- * Processes the next token from the input document
- * and returns whether to proceed eating more tokens
- *
- * This is the "next step" function that essentially
- * takes a token as its input and decides what to do
- * with that token before descending deeper into a
- * nested block tree or continuing along the document
- * or breaking out of a level of nesting.
- *
- * @internal
- * @since 5.0.0
- * @return bool
- */
- function proceed() {
- $next_token = $this->next_token();
- list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
- $stack_depth = count( $this->stack );
- // we may have some HTML soup before the next block.
- $leading_html_start = $start_offset > $this->offset ? $this->offset : null;
- switch ( $token_type ) {
- case 'no-more-tokens':
- // if not in a block then flush output.
- if ( 0 === $stack_depth ) {
- $this->add_freeform();
- return false;
- }
- /*
- * Otherwise we have a problem
- * This is an error
- *
- * we have options
- * - treat it all as freeform text
- * - assume an implicit closer (easiest when not nesting)
- */
- // for the easy case we'll assume an implicit closer.
- if ( 1 === $stack_depth ) {
- $this->add_block_from_stack();
- return false;
- }
- /*
- * for the nested case where it's more difficult we'll
- * have to assume that multiple closers are missing
- * and so we'll collapse the whole stack piecewise
- */
- while ( 0 < count( $this->stack ) ) {
- $this->add_block_from_stack();
- }
- return false;
- case 'void-block':
- /*
- * easy case is if we stumbled upon a void block
- * in the top-level of the document
- */
- if ( 0 === $stack_depth ) {
- if ( isset( $leading_html_start ) ) {
- $this->output[] = (array) $this->freeform(
- substr(
- $this->document,
- $leading_html_start,
- $start_offset - $leading_html_start
- )
- );
- }
- $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
- $this->offset = $start_offset + $token_length;
- return true;
- }
- // otherwise we found an inner block.
- $this->add_inner_block(
- new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
- $start_offset,
- $token_length
- );
- $this->offset = $start_offset + $token_length;
- return true;
- case 'block-opener':
- // track all newly-opened blocks on the stack.
- array_push(
- $this->stack,
- new WP_Block_Parser_Frame(
- new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
- $start_offset,
- $token_length,
- $start_offset + $token_length,
- $leading_html_start
- )
- );
- $this->offset = $start_offset + $token_length;
- return true;
- case 'block-closer':
- /*
- * if we're missing an opener we're in trouble
- * This is an error
- */
- if ( 0 === $stack_depth ) {
- /*
- * we have options
- * - assume an implicit opener
- * - assume _this_ is the opener
- * - give up and close out the document
- */
- $this->add_freeform();
- return false;
- }
- // if we're not nesting then this is easy - close the block.
- if ( 1 === $stack_depth ) {
- $this->add_block_from_stack( $start_offset );
- $this->offset = $start_offset + $token_length;
- return true;
- }
- /*
- * otherwise we're nested and we have to close out the current
- * block and add it as a new innerBlock to the parent
- */
- $stack_top = array_pop( $this->stack );
- $html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
- $stack_top->block->innerHTML .= $html;
- $stack_top->block->innerContent[] = $html;
- $stack_top->prev_offset = $start_offset + $token_length;
- $this->add_inner_block(
- $stack_top->block,
- $stack_top->token_start,
- $stack_top->token_length,
- $start_offset + $token_length
- );
- $this->offset = $start_offset + $token_length;
- return true;
- default:
- // This is an error.
- $this->add_freeform();
- return false;
- }
- }
- /**
- * Scans the document from where we last left off
- * and finds the next valid token to parse if it exists
- *
- * Returns the type of the find: kind of find, block information, attributes
- *
- * @internal
- * @since 5.0.0
- * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
- * @return array
- */
- function next_token() {
- $matches = null;
- /*
- * aye the magic
- * we're using a single RegExp to tokenize the block comment delimiters
- * we're also using a trick here because the only difference between a
- * block opener and a block closer is the leading `/` before `wp:` (and
- * a closer has no attributes). we can trap them both and process the
- * match back in PHP to see which one it was.
- */
- $has_match = preg_match(
- '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
- $this->document,
- $matches,
- PREG_OFFSET_CAPTURE,
- $this->offset
- );
- // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
- if ( false === $has_match ) {
- return array( 'no-more-tokens', null, null, null, null );
- }
- // we have no more tokens.
- if ( 0 === $has_match ) {
- return array( 'no-more-tokens', null, null, null, null );
- }
- list( $match, $started_at ) = $matches[0];
- $length = strlen( $match );
- $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
- $is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1];
- $namespace = $matches['namespace'];
- $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
- $name = $namespace . $matches['name'][0];
- $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
- /*
- * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
- * are associative arrays. If we use `array()` we get a JSON `[]`
- */
- $attrs = $has_attrs
- ? json_decode( $matches['attrs'][0], /* as-associative */ true )
- : $this->empty_attrs;
- /*
- * This state isn't allowed
- * This is an error
- */
- if ( $is_closer && ( $is_void || $has_attrs ) ) {
- // we can ignore them since they don't hurt anything.
- }
- if ( $is_void ) {
- return array( 'void-block', $name, $attrs, $started_at, $length );
- }
- if ( $is_closer ) {
- return array( 'block-closer', $name, null, $started_at, $length );
- }
- return array( 'block-opener', $name, $attrs, $started_at, $length );
- }
- /**
- * Returns a new block object for freeform HTML
- *
- * @internal
- * @since 3.9.0
- *
- * @param string $innerHTML HTML content of block.
- * @return WP_Block_Parser_Block freeform block object.
- */
- function freeform( $innerHTML ) {
- return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) );
- }
- /**
- * Pushes a length of text from the input document
- * to the output list as a freeform block.
- *
- * @internal
- * @since 5.0.0
- * @param null $length how many bytes of document text to output.
- */
- function add_freeform( $length = null ) {
- $length = $length ? $length : strlen( $this->document ) - $this->offset;
- if ( 0 === $length ) {
- return;
- }
- $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) );
- }
- /**
- * Given a block structure from memory pushes
- * a new block to the output list.
- *
- * @internal
- * @since 5.0.0
- * @param WP_Block_Parser_Block $block The block to add to the output.
- * @param int $token_start Byte offset into the document where the first token for the block starts.
- * @param int $token_length Byte length of entire block from start of opening token to end of closing token.
- * @param int|null $last_offset Last byte offset into document if continuing form earlier output.
- */
- function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
- $parent = $this->stack[ count( $this->stack ) - 1 ];
- $parent->block->innerBlocks[] = (array) $block;
- $html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
- if ( ! empty( $html ) ) {
- $parent->block->innerHTML .= $html;
- $parent->block->innerContent[] = $html;
- }
- $parent->block->innerContent[] = null;
- $parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length;
- }
- /**
- * Pushes the top block from the parsing stack to the output list.
- *
- * @internal
- * @since 5.0.0
- * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
- */
- function add_block_from_stack( $end_offset = null ) {
- $stack_top = array_pop( $this->stack );
- $prev_offset = $stack_top->prev_offset;
- $html = isset( $end_offset )
- ? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
- : substr( $this->document, $prev_offset );
- if ( ! empty( $html ) ) {
- $stack_top->block->innerHTML .= $html;
- $stack_top->block->innerContent[] = $html;
- }
- if ( isset( $stack_top->leading_html_start ) ) {
- $this->output[] = (array) $this->freeform(
- substr(
- $this->document,
- $stack_top->leading_html_start,
- $stack_top->token_start - $stack_top->leading_html_start
- )
- );
- }
- $this->output[] = (array) $stack_top->block;
- }
- }
|