* @copyright 2004-2016 Michel Fortin 
s around * "paragraphs" that are wrapped in non-block-level tags, such as * anchors, phrase emphasis, and spans. The list of tags we're looking * for is hard-coded: * * * List "a" is made of tags which can be both inline or block-level. * These will be treated block-level when the start tag is alone on * its line, otherwise they're not matched here and will be taken as * inline later. * * List "b" is made of tags which are always block-level; */ $block_tags_a_re = 'ins|del'; $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 'script|noscript|style|form|fieldset|iframe|math|svg|'. 'article|section|nav|aside|hgroup|header|footer|'. 'figure'; // Regular expression for the content of a block tag. $nested_tags_level = 4; $attr = ' (?> # optional tag attributes \s # starts with whitespace (?> [^>"/]+ # text outside quotes | /+(?!>) # slash not followed by ">" | "[^"]*" # text inside double quotes (tolerate ">") | \'[^\']*\' # text inside single quotes (tolerate ">") )* )? '; $content = str_repeat(' (?> [^<]+ # content without tag | <\2 # nested opening tag '.$attr.' # attributes (?> /> | >', $nested_tags_level). // end of opening tag '.*?'. // last level nested tag content str_repeat(' \2\s*> # closing nested tag ) | <(?!/\2\s*> # other tags with a different name ) )*', $nested_tags_level); $content2 = str_replace('\2', '\3', $content); /** * First, look for nested blocks, e.g.: *
` blocks.
	 * @param  string $text
	 * @return string
	 */
	protected function doCodeBlocks($text) {
		$text = preg_replace_callback('{
				(?:\n\n|\A\n?)
				(	            # $1 = the code block -- one or more lines, starting with a space/tab
				  (?>
					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
					.*\n+
				  )+
				)
				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
			}xm',
			array($this, '_doCodeBlocks_callback'), $text);
		return $text;
	}
	/**
	 * Code block parsing callback
	 * @param  array $matches
	 * @return string
	 */
	protected function _doCodeBlocks_callback($matches) {
		$codeblock = $matches[1];
		$codeblock = $this->outdent($codeblock);
		if ($this->code_block_content_func) {
			$codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
		} else {
			$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
		}
		# trim leading newlines and trailing newlines
		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
		$codeblock = "$codeblock\n
";
		return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
	}
	/**
	 * Create a code span markup for $code. Called from handleSpanToken.
	 * @param  string $code
	 * @return string
	 */
	protected function makeCodeSpan($code) {
		if ($this->code_span_content_func) {
			$code = call_user_func($this->code_span_content_func, $code);
		} else {
			$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
		}
		return $this->hashPart("$code");
	}
	/**
	 * Define the emphasis operators with their regex matches
	 * @var array
	 */
	protected $em_relist = array(
		''  => '(?:(? '(? '(? '(?:(? '(? '(? '(?:(? '(? '(?em_relist as $em => $em_re) {
			foreach ($this->strong_relist as $strong => $strong_re) {
				// Construct list of allowed token expressions.
				$token_relist = array();
				if (isset($this->em_strong_relist["$em$strong"])) {
					$token_relist[] = $this->em_strong_relist["$em$strong"];
				}
				$token_relist[] = $em_re;
				$token_relist[] = $strong_re;
				
				// Construct master expression from list.
				$token_re = '{(' . implode('|', $token_relist) . ')}';
				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
			}
		}
	}
	/**
	 * Convert Markdown italics (emphasis) and bold (strong) to HTML
	 * @param  string $text
	 * @return string
	 */
	protected function doItalicsAndBold($text) {
		$token_stack = array('');
		$text_stack = array('');
		$em = '';
		$strong = '';
		$tree_char_em = false;
		
		while (1) {
			// Get prepared regular expression for seraching emphasis tokens
			// in current context.
			$token_re = $this->em_strong_prepared_relist["$em$strong"];
			
			// Each loop iteration search for the next emphasis token. 
			// Each token is then passed to handleSpanToken.
			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
			$text_stack[0] .= $parts[0];
			$token =& $parts[1];
			$text =& $parts[2];
			
			if (empty($token)) {
				// Reached end of text span: empty stack without emitting.
				// any more emphasis.
				while ($token_stack[0]) {
					$text_stack[1] .= array_shift($token_stack);
					$text_stack[0] .= array_shift($text_stack);
				}
				break;
			}
			
			$token_len = strlen($token);
			if ($tree_char_em) {
				// Reached closing marker while inside a three-char emphasis.
				if ($token_len == 3) {
					// Three-char closing marker, close em and strong.
					array_shift($token_stack);
					$span = array_shift($text_stack);
					$span = $this->runSpanGamut($span);
					$span = "$span";
					$text_stack[0] .= $this->hashPart($span);
					$em = '';
					$strong = '';
				} else {
					// Other closing marker: close one em or strong and
					// change current token state to match the other
					$token_stack[0] = str_repeat($token{0}, 3-$token_len);
					$tag = $token_len == 2 ? "strong" : "em";
					$span = $text_stack[0];
					$span = $this->runSpanGamut($span);
					$span = "<$tag>$span$tag>";
					$text_stack[0] = $this->hashPart($span);
					$$tag = ''; // $$tag stands for $em or $strong
				}
				$tree_char_em = false;
			} else if ($token_len == 3) {
				if ($em) {
					// Reached closing marker for both em and strong.
					// Closing strong marker:
					for ($i = 0; $i < 2; ++$i) {
						$shifted_token = array_shift($token_stack);
						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
						$span = array_shift($text_stack);
						$span = $this->runSpanGamut($span);
						$span = "<$tag>$span$tag>";
						$text_stack[0] .= $this->hashPart($span);
						$$tag = ''; // $$tag stands for $em or $strong
					}
				} else {
					// Reached opening three-char emphasis marker. Push on token 
					// stack; will be handled by the special condition above.
					$em = $token{0};
					$strong = "$em$em";
					array_unshift($token_stack, $token);
					array_unshift($text_stack, '');
					$tree_char_em = true;
				}
			} else if ($token_len == 2) {
				if ($strong) {
					// Unwind any dangling emphasis marker:
					if (strlen($token_stack[0]) == 1) {
						$text_stack[1] .= array_shift($token_stack);
						$text_stack[0] .= array_shift($text_stack);
					}
					// Closing strong marker:
					array_shift($token_stack);
					$span = array_shift($text_stack);
					$span = $this->runSpanGamut($span);
					$span = "$span";
					$text_stack[0] .= $this->hashPart($span);
					$strong = '';
				} else {
					array_unshift($token_stack, $token);
					array_unshift($text_stack, '');
					$strong = $token;
				}
			} else {
				// Here $token_len == 1
				if ($em) {
					if (strlen($token_stack[0]) == 1) {
						// Closing emphasis marker:
						array_shift($token_stack);
						$span = array_shift($text_stack);
						$span = $this->runSpanGamut($span);
						$span = "$span";
						$text_stack[0] .= $this->hashPart($span);
						$em = '';
					} else {
						$text_stack[0] .= $token;
					}
				} else {
					array_unshift($token_stack, $token);
					array_unshift($text_stack, '');
					$em = $token;
				}
			}
		}
		return $text_stack[0];
	}
	/**
	 * Parse Markdown blockquotes to HTML
	 * @param  string $text
	 * @return string
	 */
	protected function doBlockQuotes($text) {
		$text = preg_replace_callback('/
			  (								# Wrap whole match in $1
				(?>
				  ^[ ]*>[ ]?			# ">" at the start of a line
					.+\n					# rest of the first line
				  (.+\n)*					# subsequent consecutive lines
				  \n*						# blanks
				)+
			  )
			/xm',
			array($this, '_doBlockQuotes_callback'), $text);
		return $text;
	}
	/**
	 * Blockquote parsing callback
	 * @param  array $matches
	 * @return string
	 */
	protected function _doBlockQuotes_callback($matches) {
		$bq = $matches[1];
		// trim one level of quoting - trim whitespace-only lines
		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
		$bq = $this->runBlockGamut($bq); // recurse
		$bq = preg_replace('/^/m', "  ", $bq);
		// These leading spaces cause problem with  content, 
		// so we need to fix that:
		$bq = preg_replace_callback('{(\s*.+?
)}sx', 
			array($this, '_doBlockQuotes_callback2'), $bq);
		return "\n" . $this->hashBlock("\n$bq\n
") . "\n\n";
	}
	/**
	 * Blockquote parsing callback
	 * @param  array $matches
	 * @return string
	 */
	protected function _doBlockQuotes_callback2($matches) {
		$pre = $matches[1];
		$pre = preg_replace('/^  /m', '', $pre);
		return $pre;
	}
	/**
	 * Parse paragraphs
	 *
	 * @param  string $text String to process in paragraphs
	 * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in  tags
	 * @return string
	 */
	protected function formParagraphs($text, $wrap_in_p = true) {
		// Strip leading and trailing lines:
		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
		// Wrap 
 tags and unhashify HTML blocks
		foreach ($grafs as $key => $value) {
			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
				// Is a paragraph.
				$value = $this->runSpanGamut($value);
				if ($wrap_in_p) {
					$value = preg_replace('/^([ ]*)/', "
", $value);
					$value .= "
";
				}
				$grafs[$key] = $this->unhash($value);
			} else {
				// Is a block.
				// Modify elements of @grafs in-place...
				$graf = $value;
				$block = $this->html_hashes[$graf];
				$graf = $block;
//				if (preg_match('{
//					\A
//					(							# $1 =  tag
//					  ]*
//					  \b
//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
//					  1
//					  \2
//					  [^>]*
//					  >
//					)
//					(							# $3 = contents
//					.*
//					)
//					()					# $4 = closing tag
//					\z
//					}xs', $block, $matches))
//				{
//					list(, $div_open, , $div_content, $div_close) = $matches;
//
//					// We can't call Markdown(), because that resets the hash;
//					// that initialization code should be pulled into its own sub, though.
//					$div_content = $this->hashHTMLBlocks($div_content);
//					
//					// Run document gamut methods on the content.
//					foreach ($this->document_gamut as $method => $priority) {
//						$div_content = $this->$method($div_content);
//					}
//
//					$div_open = preg_replace(
//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
//
//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
//				}
				$grafs[$key] = $graf;
			}
		}
		return implode("\n\n", $grafs);
	}
	/**
	 * Encode text for a double-quoted HTML attribute. This function
	 * is *not* suitable for attributes enclosed in single quotes.
	 * @param  string $text
	 * @return string
	 */
	protected function encodeAttribute($text) {
		$text = $this->encodeAmpsAndAngles($text);
		$text = str_replace('"', '"', $text);
		return $text;
	}
	/**
	 * Encode text for a double-quoted HTML attribute containing a URL,
	 * applying the URL filter if set. Also generates the textual
	 * representation for the URL (removing mailto: or tel:) storing it in $text.
	 * This function is *not* suitable for attributes enclosed in single quotes.
	 *
	 * @param  string $url
	 * @param  string &$text Passed by reference
	 * @return string        URL
	 */
	protected function encodeURLAttribute($url, &$text = null) {
		if ($this->url_filter_func) {
			$url = call_user_func($this->url_filter_func, $url);
		}
		if (preg_match('{^mailto:}i', $url)) {
			$url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
		} else if (preg_match('{^tel:}i', $url)) {
			$url = $this->encodeAttribute($url);
			$text = substr($url, 4);
		} else {
			$url = $this->encodeAttribute($url);
			$text = $url;
		}
		return $url;
	}
	
	/**
	 * Smart processing for ampersands and angle brackets that need to 
	 * be encoded. Valid character entities are left alone unless the
	 * no-entities mode is set.
	 * @param  string $text
	 * @return string
	 */
	protected function encodeAmpsAndAngles($text) {
		if ($this->no_entities) {
			$text = str_replace('&', '&', $text);
		} else {
			// Ampersand-encoding based entirely on Nat Irons's Amputator
			// MT plugin: