<?php /** * Contact Form 7's class used for formatting HTML fragments. */ class WPCF7_HTMLFormatter { // HTML component types. const text = 0; const start_tag = 1; const end_tag = 2; const comment = 3; /** * Tag name reserved for a custom HTML element used as a block placeholder. */ const placeholder_block = 'placeholder:block'; /** * Tag name reserved for a custom HTML element used as an inline placeholder. */ const placeholder_inline = 'placeholder:inline'; /** * The void elements in HTML. * * @link https://developer.mozilla.org/en-US/docs/Glossary/Void_element */ const void_elements = array( 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr', self::placeholder_block, self::placeholder_inline, ); /** * HTML elements that can contain flow content. */ const p_parent_elements = array( 'address', 'article', 'aside', 'blockquote', 'body', 'caption', 'dd', 'details', 'dialog', 'div', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'header', 'li', 'main', 'nav', 'section', 'td', 'th', ); /** * HTML elements that can be neither the parent nor a child of * a paragraph element. */ const p_nonparent_elements = array( 'colgroup', 'dl', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hgroup', 'html', 'legend', 'menu', 'ol', 'pre', 'style', 'summary', 'table', 'tbody', 'template', 'tfoot', 'thead', 'title', 'tr', 'ul', ); /** * HTML elements in the phrasing content category, plus non-phrasing * content elements that can be grandchildren of a paragraph element. */ const p_child_elements = array( 'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', 'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'link', 'map', 'mark', 'meta', 'meter', 'noscript', 'object', 'output', 'picture', 'progress', 'q', 'ruby', 's', 'samp', 'script', 'select', 'slot', 'small', 'span', 'strong', 'sub', 'sup', 'textarea', 'time', 'u', 'var', 'video', 'wbr', 'optgroup', 'option', 'rp', 'rt', // non-phrasing grandchildren self::placeholder_inline, ); /** * HTML elements that can contain phrasing content. */ const br_parent_elements = array( 'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b', 'bdi', 'bdo', 'blockquote', 'button', 'canvas', 'caption', 'cite', 'code', 'data', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'div', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'i', 'ins', 'kbd', 'label', 'legend', 'li', 'main', 'map', 'mark', 'meter', 'nav', 'noscript', 'object', 'output', 'p', 'progress', 'q', 'rt', 'ruby', 's', 'samp', 'section', 'slot', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'td', 'th', 'time', 'u', 'var', 'video', ); // Properties. private $options = array(); private $stacked_elements = array(); private $output = ''; /** * Constructor. */ public function __construct( $args = '' ) { $this->options = wp_parse_args( $args, array( 'auto_br' => true, 'auto_indent' => true, ) ); } /** * Separates the given text into chunks of HTML. Each chunk must be an * associative array that includes 'position', 'type', and 'content' keys. * * @param string $input Text to be separated into chunks. * @return iterable Iterable of chunks. */ public function separate_into_chunks( $input ) { $input_bytelength = strlen( $input ); $position = 0; while ( $position < $input_bytelength ) { $next_tag = preg_match( '/(?:<!--.*?-->|<(?:\/?)[a-z].*?>)/is', $input, $matches, PREG_OFFSET_CAPTURE, $position ); if ( ! $next_tag ) { yield array( 'position' => $position, 'type' => self::text, 'content' => substr( $input, $position ), ); break; } $next_tag = $matches[0][0]; $next_tag_position = $matches[0][1]; if ( $position < $next_tag_position ) { yield array( 'position' => $position, 'type' => self::text, 'content' => substr( $input, $position, $next_tag_position - $position ), ); } if ( '<!' === substr( $next_tag, 0, 2 ) ) { $next_tag_type = self::comment; } elseif ( '</' === substr( $next_tag, 0, 2 ) ) { $next_tag_type = self::end_tag; } else { $next_tag_type = self::start_tag; } yield array( 'position' => $next_tag_position, 'type' => $next_tag_type, 'content' => substr( $input, $next_tag_position, strlen( $next_tag ) ), ); $position = $next_tag_position + strlen( $next_tag ); } } /** * Normalizes content in each chunk. This may change the type and position * of the chunk. * * @param iterable $chunks The original chunks. * @return iterable Normalized chunks. */ public function pre_format( $chunks ) { $position = 0; foreach ( $chunks as $chunk ) { $chunk['position'] = $position; // Standardize newline characters to "\n". $chunk['content'] = str_replace( array( "\r\n", "\r" ), "\n", $chunk['content'] ); if ( $chunk['type'] === self::start_tag ) { list( $chunk['content'] ) = self::normalize_start_tag( $chunk['content'] ); // Replace <br /> by a line break. if ( $this->options['auto_br'] and preg_match( '/^<br\s*\/?>$/i', $chunk['content'] ) ) { $chunk['type'] = self::text; $chunk['content'] = "\n"; } } yield $chunk; $position = self::calc_next_position( $chunk ); } } /** * Concatenates neighboring text chunks to create a single chunk. * * @param iterable $chunks The original chunks. * @return iterable Processed chunks. */ public function concatenate_texts( $chunks ) { $position = 0; $text_left = null; foreach ( $chunks as $chunk ) { $chunk['position'] = $position; if ( $chunk['type'] === self::text ) { if ( isset( $text_left ) ) { $text_left['content'] .= $chunk['content']; } else { $text_left = $chunk; } continue; } if ( isset( $text_left ) ) { yield $text_left; $chunk['position'] = self::calc_next_position( $text_left ); $text_left = null; } yield $chunk; $position = self::calc_next_position( $chunk ); } if ( isset( $text_left ) ) { yield $text_left; } } /** * Outputs formatted HTML based on the given chunks. * * @param iterable $chunks The original chunks. * @return string Formatted HTML. */ public function format( $chunks ) { $chunks = $this->pre_format( $chunks ); $chunks = $this->concatenate_texts( $chunks ); $this->output = ''; $this->stacked_elements = array(); foreach ( $chunks as $chunk ) { if ( $chunk['type'] === self::text ) { $this->append_text( $chunk['content'] ); } if ( $chunk['type'] === self::start_tag ) { $this->start_tag( $chunk['content'] ); } if ( $chunk['type'] === self::end_tag ) { $this->end_tag( $chunk['content'] ); } if ( $chunk['type'] === self::comment ) { $this->append_comment( $chunk['content'] ); } } // Close all remaining tags. $this->close_all_tags(); return $this->output; } /** * Appends a text node content to the output property. * * @param string $content Text node content. */ public function append_text( $content ) { if ( $this->is_inside( array( 'pre', 'template' ) ) ) { $this->output .= $content; return; } if ( empty( $this->stacked_elements ) or $this->has_parent( 'p' ) or $this->has_parent( self::p_parent_elements ) ) { // Close <p> if the content starts with multiple line breaks. if ( preg_match( '/^\s*\n\s*\n\s*/', $content ) ) { $this->end_tag( 'p' ); } // Split up the contents into paragraphs, separated by double line breaks. $paragraphs = preg_split( '/\s*\n\s*\n\s*/', $content ); $paragraphs = array_filter( $paragraphs, static function ( $paragraph ) { return '' !== trim( $paragraph ); } ); $paragraphs = array_values( $paragraphs ); if ( $paragraphs ) { if ( $this->is_inside( 'p' ) ) { $paragraph = array_shift( $paragraphs ); $paragraph = self::normalize_paragraph( $paragraph, $this->options['auto_br'] ); $this->output .= $paragraph; } foreach ( $paragraphs as $paragraph ) { $this->start_tag( 'p' ); $paragraph = ltrim( $paragraph ); $paragraph = self::normalize_paragraph( $paragraph, $this->options['auto_br'] ); $this->output .= $paragraph; } } // Close <p> if the content ends with multiple line breaks. if ( preg_match( '/\s*\n\s*\n\s*$/', $content ) ) { $this->end_tag( 'p' ); } // Cases where the content is a single line break. if ( preg_match( '/^\s*\n\s*$/', $content ) ) { $auto_br = $this->options['auto_br'] && $this->is_inside( 'p' ); $content = self::normalize_paragraph( $content, $auto_br ); $this->output .= $content; } } else { $auto_br = $this->options['auto_br'] && $this->has_parent( self::br_parent_elements ); $content = self::normalize_paragraph( $content, $auto_br ); $this->output .= $content; } } /** * Appends a start tag to the output property. * * @param string $tag A start tag. */ public function start_tag( $tag ) { list( $tag, $tag_name ) = self::normalize_start_tag( $tag ); if ( in_array( $tag_name, self::p_child_elements ) ) { if ( ! $this->is_inside( 'p' ) and ! $this->is_inside( self::p_child_elements ) and ! $this->has_parent( self::p_nonparent_elements ) ) { // Open <p> if it does not exist. $this->start_tag( 'p' ); } } elseif ( 'p' === $tag_name or in_array( $tag_name, self::p_parent_elements ) or in_array( $tag_name, self::p_nonparent_elements ) ) { // Close <p> if it exists. $this->end_tag( 'p' ); } if ( 'dd' === $tag_name or 'dt' === $tag_name ) { // Close <dd> and <dt> if closing tag is omitted. $this->end_tag( 'dd' ); $this->end_tag( 'dt' ); } if ( 'li' === $tag_name ) { // Close <li> if closing tag is omitted. $this->end_tag( 'li' ); } if ( 'optgroup' === $tag_name ) { // Close <option> and <optgroup> if closing tag is omitted. $this->end_tag( 'option' ); $this->end_tag( 'optgroup' ); } if ( 'option' === $tag_name ) { // Close <option> if closing tag is omitted. $this->end_tag( 'option' ); } if ( 'rp' === $tag_name or 'rt' === $tag_name ) { // Close <rp> and <rt> if closing tag is omitted. $this->end_tag( 'rp' ); $this->end_tag( 'rt' ); } if ( 'td' === $tag_name or 'th' === $tag_name ) { // Close <td> and <th> if closing tag is omitted. $this->end_tag( 'td' ); $this->end_tag( 'th' ); } if ( 'tr' === $tag_name ) { // Close <tr> if closing tag is omitted. $this->end_tag( 'tr' ); } if ( 'tbody' === $tag_name or 'tfoot' === $tag_name ) { // Close <thead> if closing tag is omitted. $this->end_tag( 'thead' ); } if ( 'tfoot' === $tag_name ) { // Close <tbody> if closing tag is omitted. $this->end_tag( 'tbody' ); } if ( ! in_array( $tag_name, self::void_elements ) ) { array_unshift( $this->stacked_elements, $tag_name ); } if ( ! in_array( $tag_name, self::p_child_elements ) ) { if ( '' !== $this->output ) { $this->output = rtrim( $this->output ) . "\n"; } if ( $this->options['auto_indent'] ) { $this->output .= self::indent( count( $this->stacked_elements ) - 1 ); } } $this->output .= $tag; } /** * Closes an element and its open descendants at a time. * * @param string $tag An end tag. */ public function end_tag( $tag ) { if ( preg_match( '/<\/(.+?)(?:\s|>)/', $tag, $matches ) ) { $tag_name = strtolower( $matches[1] ); } else { $tag_name = strtolower( $tag ); } $stacked_elements = array_values( $this->stacked_elements ); $tag_position = array_search( $tag_name, $stacked_elements ); if ( false === $tag_position ) { return; } // Element groups that make up an indirect nesting structure. // Descendant can contain ancestors. static $nesting_families = array( array( 'ancestors' => array( 'dl', ), 'descendants' => array( 'dd', 'dt', ), ), array( 'ancestors' => array( 'ol', 'ul', 'menu', ), 'descendants' => array( 'li', ), ), array( 'ancestors' => array( 'table', ), 'descendants' => array( 'td', 'th', 'tr', 'thead', 'tbody', 'tfoot', ), ), ); foreach ( $nesting_families as $family ) { $ancestors = (array) $family['ancestors']; $descendants = (array) $family['descendants']; if ( in_array( $tag_name, $descendants ) ) { $intersect = array_intersect( $ancestors, array_slice( $stacked_elements, 0, $tag_position ) ); if ( $intersect ) { // Ancestor appears after descendant. return; } } } while ( $element = array_shift( $this->stacked_elements ) ) { $this->append_end_tag( $element ); if ( $element === $tag_name ) { break; } } } /** * Closes all open tags. */ public function close_all_tags() { while ( $element = array_shift( $this->stacked_elements ) ) { $this->append_end_tag( $element ); } } /** * Appends an end tag to the output property. * * @param string $tag_name Tag name. */ public function append_end_tag( $tag_name ) { if ( ! in_array( $tag_name, self::p_child_elements ) ) { // Remove unnecessary <br />. $this->output = preg_replace( '/\s*<br \/>\s*$/', '', $this->output ); $this->output = rtrim( $this->output ) . "\n"; if ( $this->options['auto_indent'] ) { $this->output .= self::indent( count( $this->stacked_elements ) ); } } $this->output .= sprintf( '</%s>', $tag_name ); // Remove trailing <p></p>. $this->output = preg_replace( '/<p>\s*<\/p>$/', '', $this->output ); } /** * Appends an HTML comment to the output property. * * @param string $tag An HTML comment. */ public function append_comment( $tag ) { $this->output .= $tag; } /** * Returns true if it is currently inside one of HTML elements specified * by tag names. * * @param string|array $tag_names A tag name or an array of tag names. */ public function is_inside( $tag_names ) { $tag_names = (array) $tag_names; foreach ( $this->stacked_elements as $element ) { if ( in_array( $element, $tag_names ) ) { return true; } } return false; } /** * Returns true if the parent node is one of HTML elements specified * by tag names. * * @param string|array $tag_names A tag name or an array of tag names. */ public function has_parent( $tag_names ) { $tag_names = (array) $tag_names; $parent = reset( $this->stacked_elements ); if ( false === $parent ) { return false; } return in_array( $parent, $tag_names ); } /** * Calculates the position of the next chunk based on the position and * length of the current chunk. * * @param array $chunk An associative array of the current chunk. * @return int The position of the next chunk. */ public static function calc_next_position( $chunk ) { return $chunk['position'] + strlen( $chunk['content'] ); } /** * Outputs a set of tabs to indent. * * @param int $level Indentation level. * @return string A series of tabs. */ public static function indent( $level ) { $level = (int) $level; if ( 0 < $level ) { return str_repeat( "\t", $level ); } return ''; } /** * Normalizes a start tag. * * @param string $tag A start tag or a tag name. * @return array An array includes the normalized start tag and tag name. */ public static function normalize_start_tag( $tag ) { if ( preg_match( '/<(.+?)[\s\/>]/', $tag, $matches ) ) { $tag_name = strtolower( $matches[1] ); } else { $tag_name = strtolower( $tag ); $tag = sprintf( '<%s>', $tag_name ); } if ( in_array( $tag_name, self::void_elements ) ) { // Normalize void element. $tag = preg_replace( '/\s*\/?>/', ' />', $tag ); } return array( $tag, $tag_name ); } /** * Normalizes a paragraph of text. * * @param string $paragraph A paragraph of text. * @param bool $auto_br Optional. If true, line breaks will be replaced * by a br element. * @return string The normalized paragraph. */ public static function normalize_paragraph( $paragraph, $auto_br = false ) { if ( $auto_br ) { $paragraph = preg_replace( '/\s*\n\s*/', "<br />\n", $paragraph ); } $paragraph = preg_replace( '/[ ]+/', " ", $paragraph ); return $paragraph; } }