Merge pull request #688 from dawnbreak/markdown

⬆️ 🔨 Upgrade Markdownify library.
This commit is contained in:
zotlabs 2017-03-05 18:22:37 +11:00 committed by GitHub
commit 215bd07f0b
20 changed files with 2869 additions and 2427 deletions

View File

@ -29,7 +29,8 @@
"ext-xml" : "*",
"ext-openssl" : "*",
"sabre/dav" : "~3.2",
"michelf/php-markdown" : "^1.7"
"michelf/php-markdown" : "^1.7",
"pixel418/markdownify": "^2.2"
},
"require-dev" : {
"php" : ">=5.6",

58
composer.lock generated
View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"content-hash": "4b24468c1f8babe7c8804fba8ee602f7",
"content-hash": "c0cafbf9fd702be588f6b392b9742cb6",
"packages": [
{
"name": "michelf/php-markdown",
@ -57,6 +57,62 @@
],
"time": "2016-10-29T18:58:20+00:00"
},
{
"name": "pixel418/markdownify",
"version": "v2.2.1",
"source": {
"type": "git",
"url": "https://github.com/Elephant418/Markdownify.git",
"reference": "0160677f04c784550dd10fd72fdf3994967db848"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Elephant418/Markdownify/zipball/0160677f04c784550dd10fd72fdf3994967db848",
"reference": "0160677f04c784550dd10fd72fdf3994967db848",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8"
},
"type": "lib",
"autoload": {
"psr-4": {
"Markdownify\\": "src",
"Test\\Markdownify\\": "test"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"LGPL"
],
"authors": [
{
"name": "Peter Kruithof",
"email": "pkruithof@gmail.com",
"homepage": "http://pkruithof.tumblr.com/"
},
{
"name": "Milian Wolff",
"email": "mail@milianw.de",
"homepage": "http://milianw.de"
},
{
"name": "Thomas Zilliox",
"email": "hello@tzi.fr",
"homepage": "http://tzi.fr"
}
],
"description": "The HTML to Markdown converter for PHP ",
"homepage": "https://github.com/elephant418/Markdownify",
"keywords": [
"markdown",
"markdownify"
],
"time": "2016-09-21T13:01:43+00:00"
},
{
"name": "psr/log",
"version": "1.0.2",

View File

@ -5,12 +5,12 @@
*/
use Michelf\MarkdownExtra;
use Markdownify\Converter;
require_once("include/oembed.php");
require_once("include/event.php");
require_once("include/html2bbcode.php");
require_once("include/bbcode.php");
require_once("library/markdownify/markdownify.php");
function get_bb_tag_pos($s, $name, $occurance = 1) {
@ -367,7 +367,6 @@ function bb2diaspora_itemwallwall(&$item,$uplink = false) {
function bb2diaspora_itembody($item, $force_update = false, $have_channel = false, $uplink = false) {
if(! get_iconfig($item,'diaspora','fields')) {
$force_update = true;
}
@ -454,7 +453,7 @@ function bb2diaspora_itembody($item, $force_update = false, $have_channel = fals
return html_entity_decode($body);
}
function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
// Re-enabling the converter again.
// The bbcode parser now handles youtube-links (and the other stuff) correctly.
@ -496,11 +495,10 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
$Text = str_replace(array('<','>','&'),array('&_lt_;','&_gt_;','&_amp_;'),$Text);
// Now convert HTML to Markdown
$md = new Markdownify(false, false, false);
$md = new Converter(Converter::LINK_AFTER_CONTENT, false, false);
$Text = $md->parseString($Text);
// It also adds backslashes to our attempt at getting around the html entity preservation for some weird reason.
$Text = str_replace(array('&\\_lt\\_;','&\\_gt\\_;','&\\_amp\\_;'),array('<','>','&'),$Text);
@ -522,7 +520,7 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
$Text = trim($Text);
call_hooks('bb2diaspora',$Text);
call_hooks('bb2diaspora', $Text);
return $Text;
}

View File

@ -1,29 +0,0 @@
Markdownify
===========
* handle non-markdownifiable lists (i.e. `<ul><li id="foobar">asdf</li></ul>`)
* organize methods better (i.e. flushlinebreaks & setlinebreaks close to each other)
* take a look at function names etc.
* is the new (in rev. 93) lastclosedtag property needed?
* word wrapping (some work is done but it's still very buggy)
Markdownify Extra
=================
* handle table alignment with KEEP_HTML=false
* handle tables without headings when KEEP_HTML=false is set
* handle Markdown inside non-markdownable tags
Implementation Thoughts
=======================
* non-markdownifiable lists and markdown inside non-markdownable tags as well as the current
table implementation could be rewritten by using a rollback mechanism.
example:
<ul><li>asdf</li><li id="foobar">asdf</li></ul>
we come to `<ul>`, know that this might fail and create a snapshot of our current parser
we keep on parsing and when we reach `<li id="foobar">` we gotta rollback and keep this
list in HTML format.

View File

@ -1,51 +0,0 @@
<?php
error_reporting(E_ALL);
if (!empty($_POST['input'])) {
include 'markdownify_extra.php';
if (!isset($_POST['leap'])) {
$leap = MDFY_LINKS_EACH_PARAGRAPH;
} else {
$leap = $_POST['leap'];
}
if (!isset($_POST['keepHTML'])) {
$keephtml = MDFY_KEEPHTML;
} else {
$keephtml = $_POST['keepHTML'];
}
if (!empty($_POST['extra'])) {
$md = new Markdownify_Extra($leap, MDFY_BODYWIDTH, $keephtml);
} else {
$md = new Markdownify($leap, MDFY_BODYWIDTH, $keephtml);
}
if (ini_get('magic_quotes_gpc')) {
$_POST['input'] = stripslashes($_POST['input']);
}
$output = $md->parseString($_POST['input']);
} else {
$_POST['input'] = '';
}
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<title>HTML to Markdown Converter</title>
</head>
<body>
<?php if (empty($_POST['input'])): ?>
<form action="<?php echo $_SERVER['PHP_SELF']; ?>" method="post">
<fieldset>
<legend>HTML Input</legend>
<textarea style="width:100%;" cols="85" rows="40" name="input"><?php echo htmlspecialchars($_POST['input'], ENT_NOQUOTES, 'UTF-8'); ?></textarea>
</fieldset>
<label for="extra">Markdownify Extra: <input name="extra" checked="checked" id="extra" type="checkbox" value="1" /></label>
<label for="leap">Links after each block elem: <input name="leap" id="leap" type="checkbox" value="1" /></label>
<label for="keepHTML">keep HTML: <input name="keepHTML" id="keepHTML" type="checkbox" value="1" checked="checked" /></label>
<input type="submit" name="submit" value="submit" />
</form>
<?php else: ?>
<h1 style="text-align:right;"><a href="<?php echo $_SERVER['PHP_SELF']; ?>">BACK</a></h1>
<pre><?php echo htmlspecialchars($output, ENT_NOQUOTES, 'UTF-8'); ?></pre>
<?php endif; ?>
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@ -1,33 +0,0 @@
#!/usr/bin/php
<?php
require dirname(__FILE__) .'/markdownify_extra.php';
function param($name, $default = false) {
if (!in_array('--'.$name, $_SERVER['argv']))
return $default;
reset($_SERVER['argv']);
while (each($_SERVER['argv'])) {
if (current($_SERVER['argv']) == '--'.$name)
break;
}
$value = next($_SERVER['argv']);
if ($value === false || substr($value, 0, 2) == '--')
return true;
else
return $value;
}
$input = stream_get_contents(STDIN);
$linksAfterEachParagraph = param('links');
$bodyWidth = param('width');
$keepHTML = param('html', true);
if (param('no_extra')) {
$parser = new Markdownify($linksAfterEachParagraph, $bodyWidth, $keepHTML);
} else {
$parser = new Markdownify_Extra($linksAfterEachParagraph, $bodyWidth, $keepHTML);
}
echo $parser->parseString($input) ."\n";

View File

@ -1,489 +0,0 @@
<?php
/**
* Class to convert HTML to Markdown with PHP Markdown Extra syntax support.
*
* @version 1.0.0 alpha
* @author Milian Wolff (<mail@milianw.de>, <http://milianw.de>)
* @license LGPL, see LICENSE_LGPL.txt and the summary below
* @copyright (C) 2007 Milian Wolff
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* standard Markdownify class
*/
require_once dirname(__FILE__).'/markdownify.php';
class Markdownify_Extra extends Markdownify {
/**
* table data, including rows with content and the maximum width of each col
*
* @var array
*/
var $table = array();
/**
* current col
*
* @var int
*/
var $col = -1;
/**
* current row
*
* @var int
*/
var $row = 0;
/**
* constructor, see Markdownify::Markdownify() for more information
*/
function Markdownify_Extra($linksAfterEachParagraph = MDFY_LINKS_EACH_PARAGRAPH, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) {
parent::Markdownify($linksAfterEachParagraph, $bodyWidth, $keepHTML);
### new markdownable tags & attributes
# header ids: # foo {bar}
$this->isMarkdownable['h1']['id'] = 'optional';
$this->isMarkdownable['h2']['id'] = 'optional';
$this->isMarkdownable['h3']['id'] = 'optional';
$this->isMarkdownable['h4']['id'] = 'optional';
$this->isMarkdownable['h5']['id'] = 'optional';
$this->isMarkdownable['h6']['id'] = 'optional';
# tables
$this->isMarkdownable['table'] = array();
$this->isMarkdownable['th'] = array(
'align' => 'optional',
);
$this->isMarkdownable['td'] = array(
'align' => 'optional',
);
$this->isMarkdownable['tr'] = array();
array_push($this->ignore, 'thead');
array_push($this->ignore, 'tbody');
array_push($this->ignore, 'tfoot');
# definition lists
$this->isMarkdownable['dl'] = array();
$this->isMarkdownable['dd'] = array();
$this->isMarkdownable['dt'] = array();
# footnotes
$this->isMarkdownable['fnref'] = array(
'target' => 'required',
);
$this->isMarkdownable['footnotes'] = array();
$this->isMarkdownable['fn'] = array(
'name' => 'required',
);
$this->parser->blockElements['fnref'] = false;
$this->parser->blockElements['fn'] = true;
$this->parser->blockElements['footnotes'] = true;
# abbr
$this->isMarkdownable['abbr'] = array(
'title' => 'required',
);
# build RegEx lookahead to decide wether table can pe parsed or not
$inlineTags = array_keys($this->parser->blockElements, false);
$colContents = '(?:[^<]|<(?:'.implode('|', $inlineTags).'|[^a-z]))+';
$this->tableLookaheadHeader = '{
^\s*(?:<thead\s*>)?\s* # open optional thead
<tr\s*>\s*(?: # start required row with headers
<th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align
\s*'.$colContents.'\s* # contents
</th>\s* # close header
)+</tr> # close row with headers
\s*(?:</thead>)? # close optional thead
}sxi';
$this->tdSubstitute = '\s*'.$colContents.'\s* # contents
</td>\s*';
$this->tableLookaheadBody = '{
\s*(?:<tbody\s*>)?\s* # open optional tbody
(?:<tr\s*>\s* # start row
%s # cols to be substituted
</tr>)+ # close row
\s*(?:</tbody>)? # close optional tbody
\s*</table> # close table
}sxi';
}
/**
* handle header tags (<h1> - <h6>)
*
* @param int $level 1-6
* @return void
*/
function handleHeader($level) {
static $id = null;
if ($this->parser->isStartTag) {
if (isset($this->parser->tagAttributes['id'])) {
$id = $this->parser->tagAttributes['id'];
}
} else {
if (!is_null($id)) {
$this->out(' {#'.$id.'}');
$id = null;
}
}
parent::handleHeader($level);
}
/**
* handle <abbr> tags
*
* @param void
* @return void
*/
function handleTag_abbr() {
if ($this->parser->isStartTag) {
$this->stack();
$this->buffer();
} else {
$tag = $this->unstack();
$tag['text'] = $this->unbuffer();
$add = true;
foreach ($this->stack['abbr'] as $stacked) {
if ($stacked['text'] == $tag['text']) {
/** TODO: differing abbr definitions, i.e. different titles for same text **/
$add = false;
break;
}
}
$this->out($tag['text']);
if ($add) {
array_push($this->stack['abbr'], $tag);
}
}
}
/**
* flush stacked abbr tags
*
* @param void
* @return void
*/
function flushStacked_abbr() {
$out = array();
foreach ($this->stack['abbr'] as $k => $tag) {
if (!isset($tag['unstacked'])) {
array_push($out, ' *['.$tag['text'].']: '.$tag['title']);
$tag['unstacked'] = true;
$this->stack['abbr'][$k] = $tag;
}
}
if (!empty($out)) {
$this->out("\n\n".implode("\n", $out));
}
}
/**
* handle <table> tags
*
* @param void
* @return void
*/
function handleTag_table() {
if ($this->parser->isStartTag) {
# check if upcoming table can be converted
if ($this->keepHTML) {
if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) {
# header seems good, now check body
# get align & number of cols
preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols);
$regEx = '';
$i = 1;
$aligns = array();
foreach ($cols[2] as $align) {
$align = strtolower($align);
array_push($aligns, $align);
if (empty($align)) {
$align = 'left'; # default value
}
$td = '\s+align=("|\')'.$align.'\\'.$i;
$i++;
if ($align == 'left') {
# look for empty align or left
$td = '(?:'.$td.')?';
}
$td = '<td'.$td.'\s*>';
$regEx .= $td.$this->tdSubstitute;
}
$regEx = sprintf($this->tableLookaheadBody, $regEx);
if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) {
# this is a markdownable table tag!
$this->table = array(
'rows' => array(),
'col_widths' => array(),
'aligns' => $aligns,
);
$this->row = 0;
} else {
# non markdownable table
$this->handleTagToText();
}
} else {
# non markdownable table
$this->handleTagToText();
}
} else {
$this->table = array(
'rows' => array(),
'col_widths' => array(),
'aligns' => array(),
);
$this->row = 0;
}
} else {
# finally build the table in Markdown Extra syntax
$separator = array();
# seperator with correct align identifikators
foreach($this->table['aligns'] as $col => $align) {
if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) {
break;
}
$left = ' ';
$right = ' ';
switch ($align) {
case 'left':
$left = ':';
break;
case 'center':
$right = ':';
$left = ':';
case 'right':
$right = ':';
break;
}
array_push($separator, $left.str_repeat('-', $this->table['col_widths'][$col]).$right);
}
$separator = '|'.implode('|', $separator).'|';
$rows = array();
# add padding
array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent'));
$header = array_shift($this->table['rows']);
array_push($rows, '| '.implode(' | ', $header).' |');
array_push($rows, $separator);
foreach ($this->table['rows'] as $row) {
array_push($rows, '| '.implode(' | ', $row).' |');
}
$this->out(implode("\n".$this->indent, $rows));
$this->table = array();
$this->setLineBreaks(2);
}
}
/**
* properly pad content so it is aligned as whished
* should be used with array_walk_recursive on $this->table['rows']
*
* @param string &$content
* @param int $col
* @return void
*/
function alignTdContent(&$content, $col) {
switch ($this->table['aligns'][$col]) {
default:
case 'left':
$content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content));
break;
case 'right':
$content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)).$content;
break;
case 'center':
$paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content);
$left = floor($paddingNeeded / 2);
$right = $paddingNeeded - $left;
$content = str_repeat(' ', $left).$content.str_repeat(' ', $right);
break;
}
}
/**
* handle <tr> tags
*
* @param void
* @return void
*/
function handleTag_tr() {
if ($this->parser->isStartTag) {
$this->col = -1;
} else {
$this->row++;
}
}
/**
* handle <td> tags
*
* @param void
* @return void
*/
function handleTag_td() {
if ($this->parser->isStartTag) {
$this->col++;
if (!isset($this->table['col_widths'][$this->col])) {
$this->table['col_widths'][$this->col] = 0;
}
$this->buffer();
} else {
$buffer = trim($this->unbuffer());
$this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer));
$this->table['rows'][$this->row][$this->col] = $buffer;
}
}
/**
* handle <th> tags
*
* @param void
* @return void
*/
function handleTag_th() {
if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col+1])) {
if (isset($this->parser->tagAttributes['align'])) {
$this->table['aligns'][$this->col+1] = $this->parser->tagAttributes['align'];
} else {
$this->table['aligns'][$this->col+1] = '';
}
}
$this->handleTag_td();
}
/**
* handle <dl> tags
*
* @param void
* @return void
*/
function handleTag_dl() {
if (!$this->parser->isStartTag) {
$this->setLineBreaks(2);
}
}
/**
* handle <dt> tags
*
* @param void
* @return void
**/
function handleTag_dt() {
if (!$this->parser->isStartTag) {
$this->setLineBreaks(1);
}
}
/**
* handle <dd> tags
*
* @param void
* @return void
*/
function handleTag_dd() {
if ($this->parser->isStartTag) {
if (substr(ltrim($this->parser->html), 0, 3) == '<p>') {
# next comes a paragraph, so we'll need an extra line
$this->out("\n".$this->indent);
} elseif (substr($this->output, -2) == "\n\n") {
$this->output = substr($this->output, 0, -1);
}
$this->out(': ');
$this->indent(' ', false);
} else {
# lookahead for next dt
if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') {
$this->setLineBreaks(2);
} else {
$this->setLineBreaks(1);
}
$this->indent(' ');
}
}
/**
* handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString())
*
* @param void
* @return void
*/
function handleTag_fnref() {
$this->out('[^'.$this->parser->tagAttributes['target'].']');
}
/**
* handle <fn> tags (custom footnotes, see markdownify_extra::parseString()
* and markdownify_extra::_makeFootnotes())
*
* @param void
* @return void
*/
function handleTag_fn() {
if ($this->parser->isStartTag) {
$this->out('[^'.$this->parser->tagAttributes['name'].']:');
$this->setLineBreaks(1);
} else {
$this->setLineBreaks(2);
}
$this->indent(' ');
}
/**
* handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString()
* and markdownify_extra::_makeFootnotes())
*
* @param void
* @return void
*/
function handleTag_footnotes() {
if (!$this->parser->isStartTag) {
$this->setLineBreaks(2);
}
}
/**
* parse a HTML string, clean up footnotes prior
*
* @param string $HTML input
* @return string Markdown formatted output
*/
function parseString($html) {
/** TODO: custom markdown-extra options, e.g. titles & classes **/
# <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup>
# => <fnref target="..." />
$html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html);
# <div class="footnotes">
# <hr />
# <ol>
#
# <li id="fn:...">...</li>
# ...
#
# </ol>
# </div>
# =>
# <footnotes>
# <fn name="...">...</fn>
# ...
# </footnotes>
$html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html);
return parent::parseString($html);
}
/**
* replace HTML representation of footnotes with something more easily parsable
*
* @note this is a callback to be used in parseString()
*
* @param array $matches
* @return string
*/
function _makeFootnotes($matches) {
# <li id="fn:1">
# ...
# <a href="#fnref:block" rev="footnote">&#8617;</a></p>
# </li>
# => <fn name="1">...</fn>
# remove footnote link
$fns = preg_replace('@\s*(&#160;\s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>&#8617;</a>\s*@s', '', $matches[1]);
# remove empty paragraph
$fns = preg_replace('@<p>\s*</p>@s', '', $fns);
# <li id="fn:1">...</li> -> <footnote nr="1">...</footnote>
$fns = str_replace('<li id="fn:', '<fn name="', $fns);
$fns = '<footnotes>'.$fns.'</footnotes>';
return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns);
}
}

View File

@ -1,618 +0,0 @@
<?php
/**
* parseHTML is a HTML parser which works with PHP 4 and above.
* It tries to handle invalid HTML to some degree.
*
* @version 1.0 beta
* @author Milian Wolff (mail@milianw.de, http://milianw.de)
* @license LGPL, see LICENSE_LGPL.txt and the summary below
* @copyright (C) 2007 Milian Wolff
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
class parseHTML {
/**
* tags which are always empty (<br /> etc.)
*
* @var array<string>
*/
var $emptyTags = array(
'br',
'hr',
'input',
'img',
'area',
'link',
'meta',
'param',
);
/**
* tags with preformatted text
* whitespaces wont be touched in them
*
* @var array<string>
*/
var $preformattedTags = array(
'script',
'style',
'pre',
'code',
);
/**
* supress HTML tags inside preformatted tags (see above)
*
* @var bool
*/
var $noTagsInCode = false;
/**
* html to be parsed
*
* @var string
*/
var $html = '';
/**
* node type:
*
* - tag (see isStartTag)
* - text (includes cdata)
* - comment
* - doctype
* - pi (processing instruction)
*
* @var string
*/
var $nodeType = '';
/**
* current node content, i.e. either a
* simple string (text node), or something like
* <tag attrib="value"...>
*
* @var string
*/
var $node = '';
/**
* wether current node is an opening tag (<a>) or not (</a>)
* set to NULL if current node is not a tag
* NOTE: empty tags (<br />) set this to true as well!
*
* @var bool | null
*/
var $isStartTag = null;
/**
* wether current node is an empty tag (<br />) or not (<a></a>)
*
* @var bool | null
*/
var $isEmptyTag = null;
/**
* tag name
*
* @var string | null
*/
var $tagName = '';
/**
* attributes of current tag
*
* @var array (attribName=>value) | null
*/
var $tagAttributes = null;
/**
* wether the current tag is a block element
*
* @var bool | null
*/
var $isBlockElement = null;
/**
* keep whitespace
*
* @var int
*/
var $keepWhitespace = 0;
/**
* list of open tags
* count this to get current depth
*
* @var array
*/
var $openTags = array();
/**
* list of block elements
*
* @var array
* TODO: what shall we do with <del> and <ins> ?!
*/
var $blockElements = array (
# tag name => <bool> is block
# block elements
'address' => true,
'blockquote' => true,
'center' => true,
'del' => true,
'dir' => true,
'div' => true,
'dl' => true,
'fieldset' => true,
'form' => true,
'h1' => true,
'h2' => true,
'h3' => true,
'h4' => true,
'h5' => true,
'h6' => true,
'hr' => true,
'ins' => true,
'isindex' => true,
'menu' => true,
'noframes' => true,
'noscript' => true,
'ol' => true,
'p' => true,
'pre' => true,
'table' => true,
'ul' => true,
# set table elements and list items to block as well
'thead' => true,
'tbody' => true,
'tfoot' => true,
'td' => true,
'tr' => true,
'th' => true,
'li' => true,
'dd' => true,
'dt' => true,
# header items and html / body as well
'html' => true,
'body' => true,
'head' => true,
'meta' => true,
'link' => true,
'style' => true,
'title' => true,
# unfancy media tags, when indented should be rendered as block
'map' => true,
'object' => true,
'param' => true,
'embed' => true,
'area' => true,
# inline elements
'a' => false,
'abbr' => false,
'acronym' => false,
'applet' => false,
'b' => false,
'basefont' => false,
'bdo' => false,
'big' => false,
'br' => false,
'button' => false,
'cite' => false,
'code' => false,
'del' => false,
'dfn' => false,
'em' => false,
'font' => false,
'i' => false,
'img' => false,
'ins' => false,
'input' => false,
'iframe' => false,
'kbd' => false,
'label' => false,
'q' => false,
'samp' => false,
'script' => false,
'select' => false,
'small' => false,
'span' => false,
'strong' => false,
'sub' => false,
'sup' => false,
'textarea' => false,
'tt' => false,
'var' => false,
);
/**
* get next node, set $this->html prior!
*
* @param void
* @return bool
*/
function nextNode() {
if (empty($this->html)) {
# we are done with parsing the html string
return false;
}
static $skipWhitespace = true;
if ($this->isStartTag && !$this->isEmptyTag) {
array_push($this->openTags, $this->tagName);
if (in_array($this->tagName, $this->preformattedTags)) {
# dont truncate whitespaces for <code> or <pre> contents
$this->keepWhitespace++;
}
}
if ($this->html[0] == '<') {
$token = substr($this->html, 0, 9);
if (substr($token, 0, 2) == '<?') {
# xml prolog or other pi's
/** TODO **/
#trigger_error('this might need some work', E_USER_NOTICE);
$pos = strpos($this->html, '>');
$this->setNode('pi', $pos + 1);
return true;
}
if (substr($token, 0, 4) == '<!--') {
# comment
$pos = strpos($this->html, '-->');
if ($pos === false) {
# could not find a closing -->, use next gt instead
# this is firefox' behaviour
$pos = strpos($this->html, '>') + 1;
} else {
$pos += 3;
}
$this->setNode('comment', $pos);
$skipWhitespace = true;
return true;
}
if ($token == '<!DOCTYPE') {
# doctype
$this->setNode('doctype', strpos($this->html, '>')+1);
$skipWhitespace = true;
return true;
}
if ($token == '<![CDATA[') {
# cdata, use text node
# remove leading <![CDATA[
$this->html = substr($this->html, 9);
$this->setNode('text', strpos($this->html, ']]>')+3);
# remove trailing ]]> and trim
$this->node = substr($this->node, 0, -3);
$this->handleWhitespaces();
$skipWhitespace = true;
return true;
}
if ($this->parseTag()) {
# seems to be a tag
# handle whitespaces
if ($this->isBlockElement) {
$skipWhitespace = true;
} else {
$skipWhitespace = false;
}
return true;
}
}
if ($this->keepWhitespace) {
$skipWhitespace = false;
}
# when we get here it seems to be a text node
$pos = strpos($this->html, '<');
if ($pos === false) {
$pos = strlen($this->html);
}
$this->setNode('text', $pos);
$this->handleWhitespaces();
if ($skipWhitespace && $this->node == ' ') {
return $this->nextNode();
}
$skipWhitespace = false;
return true;
}
/**
* parse tag, set tag name and attributes, see if it's a closing tag and so forth...
*
* @param void
* @return bool
*/
function parseTag() {
static $a_ord, $z_ord, $special_ords;
if (!isset($a_ord)) {
$a_ord = ord('a');
$z_ord = ord('z');
$special_ords = array(
ord(':'), // for xml:lang
ord('-'), // for http-equiv
);
}
$tagName = '';
$pos = 1;
$isStartTag = $this->html[$pos] != '/';
if (!$isStartTag) {
$pos++;
}
# get tagName
while (isset($this->html[$pos])) {
$pos_ord = ord(strtolower($this->html[$pos]));
if (($pos_ord >= $a_ord && $pos_ord <= $z_ord) || (!empty($tagName) && is_numeric($this->html[$pos]))) {
$tagName .= $this->html[$pos];
$pos++;
} else {
$pos--;
break;
}
}
$tagName = strtolower($tagName);
if (empty($tagName) || !isset($this->blockElements[$tagName])) {
# something went wrong => invalid tag
$this->invalidTag();
return false;
}
if ($this->noTagsInCode && end($this->openTags) == 'code' && !($tagName == 'code' && !$isStartTag)) {
# we supress all HTML tags inside code tags
$this->invalidTag();
return false;
}
# get tag attributes
/** TODO: in html 4 attributes do not need to be quoted **/
$isEmptyTag = false;
$attributes = array();
$currAttrib = '';
while (isset($this->html[$pos+1])) {
$pos++;
# close tag
if ($this->html[$pos] == '>' || $this->html[$pos].$this->html[$pos+1] == '/>') {
if ($this->html[$pos] == '/') {
$isEmptyTag = true;
$pos++;
}
break;
}
$pos_ord = ord(strtolower($this->html[$pos]));
if ( ($pos_ord >= $a_ord && $pos_ord <= $z_ord) || in_array($pos_ord, $special_ords)) {
# attribute name
$currAttrib .= $this->html[$pos];
} elseif (in_array($this->html[$pos], array(' ', "\t", "\n"))) {
# drop whitespace
} elseif (in_array($this->html[$pos].$this->html[$pos+1], array('="', "='"))) {
# get attribute value
$pos++;
$await = $this->html[$pos]; # single or double quote
$pos++;
$value = '';
while (isset($this->html[$pos]) && $this->html[$pos] != $await) {
$value .= $this->html[$pos];
$pos++;
}
$attributes[$currAttrib] = $value;
$currAttrib = '';
} else {
$this->invalidTag();
return false;
}
}
if ($this->html[$pos] != '>') {
$this->invalidTag();
return false;
}
if (!empty($currAttrib)) {
# html 4 allows something like <option selected> instead of <option selected="selected">
$attributes[$currAttrib] = $currAttrib;
}
if (!$isStartTag) {
if (!empty($attributes) || $tagName != end($this->openTags)) {
# end tags must not contain any attributes
# or maybe we did not expect a different tag to be closed
$this->invalidTag();
return false;
}
array_pop($this->openTags);
if (in_array($tagName, $this->preformattedTags)) {
$this->keepWhitespace--;
}
}
$pos++;
$this->node = substr($this->html, 0, $pos);
$this->html = substr($this->html, $pos);
$this->tagName = $tagName;
$this->tagAttributes = $attributes;
$this->isStartTag = $isStartTag;
$this->isEmptyTag = $isEmptyTag || in_array($tagName, $this->emptyTags);
if ($this->isEmptyTag) {
# might be not well formed
$this->node = preg_replace('# */? *>$#', ' />', $this->node);
}
$this->nodeType = 'tag';
$this->isBlockElement = $this->blockElements[$tagName];
return true;
}
/**
* handle invalid tags
*
* @param void
* @return void
*/
function invalidTag() {
$this->html = substr_replace($this->html, '&lt;', 0, 1);
}
/**
* update all vars and make $this->html shorter
*
* @param string $type see description for $this->nodeType
* @param int $pos to which position shall we cut?
* @return void
*/
function setNode($type, $pos) {
if ($this->nodeType == 'tag') {
# set tag specific vars to null
# $type == tag should not be called here
# see this::parseTag() for more
$this->tagName = null;
$this->tagAttributes = null;
$this->isStartTag = null;
$this->isEmptyTag = null;
$this->isBlockElement = null;
}
$this->nodeType = $type;
$this->node = substr($this->html, 0, $pos);
$this->html = substr($this->html, $pos);
}
/**
* check if $this->html begins with $str
*
* @param string $str
* @return bool
*/
function match($str) {
return substr($this->html, 0, strlen($str)) == $str;
}
/**
* truncate whitespaces
*
* @param void
* @return void
*/
function handleWhitespaces() {
if ($this->keepWhitespace) {
# <pre> or <code> before...
return;
}
# truncate multiple whitespaces to a single one
$this->node = preg_replace('#\s+#s', ' ', $this->node);
}
/**
* normalize self::node
*
* @param void
* @return void
*/
function normalizeNode() {
$this->node = '<';
if (!$this->isStartTag) {
$this->node .= '/'.$this->tagName.'>';
return;
}
$this->node .= $this->tagName;
foreach ($this->tagAttributes as $name => $value) {
$this->node .= ' '.$name.'="'.str_replace('"', '&quot;', $value).'"';
}
if ($this->isEmptyTag) {
$this->node .= ' /';
}
$this->node .= '>';
}
}
/**
* indent a HTML string properly
*
* @param string $html
* @param string $indent optional
* @return string
*/
function indentHTML($html, $indent = " ", $noTagsInCode = false) {
$parser = new parseHTML;
$parser->noTagsInCode = $noTagsInCode;
$parser->html = $html;
$html = '';
$last = true; # last tag was block elem
$indent_a = array();
while($parser->nextNode()) {
if ($parser->nodeType == 'tag') {
$parser->normalizeNode();
}
if ($parser->nodeType == 'tag' && $parser->isBlockElement) {
$isPreOrCode = in_array($parser->tagName, array('code', 'pre'));
if (!$parser->keepWhitespace && !$last && !$isPreOrCode) {
$html = rtrim($html)."\n";
}
if ($parser->isStartTag) {
$html .= implode($indent_a);
if (!$parser->isEmptyTag) {
array_push($indent_a, $indent);
}
} else {
array_pop($indent_a);
if (!$isPreOrCode) {
$html .= implode($indent_a);
}
}
$html .= $parser->node;
if (!$parser->keepWhitespace && !($isPreOrCode && $parser->isStartTag)) {
$html .= "\n";
}
$last = true;
} else {
if ($parser->nodeType == 'tag' && $parser->tagName == 'br') {
$html .= $parser->node."\n";
$last = true;
continue;
} elseif ($last && !$parser->keepWhitespace) {
$html .= implode($indent_a);
$parser->node = ltrim($parser->node);
}
$html .= $parser->node;
if (in_array($parser->nodeType, array('comment', 'pi', 'doctype'))) {
$html .= "\n";
} else {
$last = false;
}
}
}
return $html;
}
/*
# testcase / example
error_reporting(E_ALL);
$html = '<p>Simple block on one line:</p>
<div>foo</div>
<p>And nested without indentation:</p>
<div>
<div>
<div>
foo
</div>
<div style=">"/>
</div>
<div>bar</div>
</div>
<p>And with attributes:</p>
<div>
<div id="foo">
</div>
</div>
<p>This was broken in 1.0.2b7:</p>
<div class="inlinepage">
<div class="toggleableend">
foo
</div>
</div>';
#$html = '<a href="asdfasdf" title=\'asdf\' foo="bar">asdf</a>';
echo indentHTML($html);
die();
*/

View File

@ -7,6 +7,9 @@ $baseDir = dirname($vendorDir);
return array(
'Hubzilla\\Import\\Import' => $baseDir . '/include/Import/Importer.php',
'Markdownify\\Converter' => $vendorDir . '/pixel418/markdownify/src/Converter.php',
'Markdownify\\ConverterExtra' => $vendorDir . '/pixel418/markdownify/src/ConverterExtra.php',
'Markdownify\\Parser' => $vendorDir . '/pixel418/markdownify/src/Parser.php',
'Michelf\\Markdown' => $vendorDir . '/michelf/php-markdown/Michelf/Markdown.php',
'Michelf\\MarkdownExtra' => $vendorDir . '/michelf/php-markdown/Michelf/MarkdownExtra.php',
'Michelf\\MarkdownInterface' => $vendorDir . '/michelf/php-markdown/Michelf/MarkdownInterface.php',
@ -18,6 +21,8 @@ return array(
'Psr\\Log\\LoggerInterface' => $vendorDir . '/psr/log/Psr/Log/LoggerInterface.php',
'Psr\\Log\\LoggerTrait' => $vendorDir . '/psr/log/Psr/Log/LoggerTrait.php',
'Psr\\Log\\NullLogger' => $vendorDir . '/psr/log/Psr/Log/NullLogger.php',
'Psr\\Log\\Test\\DummyTest' => $vendorDir . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
'Psr\\Log\\Test\\LoggerInterfaceTest' => $vendorDir . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
'Sabre\\CalDAV\\Backend\\AbstractBackend' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/AbstractBackend.php',
'Sabre\\CalDAV\\Backend\\BackendInterface' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/BackendInterface.php',
'Sabre\\CalDAV\\Backend\\NotificationSupport' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/NotificationSupport.php',
@ -268,7 +273,6 @@ return array(
'Sabre\\HTTP\\URLUtil' => $vendorDir . '/sabre/http/lib/URLUtil.php',
'Sabre\\HTTP\\Util' => $vendorDir . '/sabre/http/lib/Util.php',
'Sabre\\HTTP\\Version' => $vendorDir . '/sabre/http/lib/Version.php',
'Sabre\\Uri\\InvalidUriException' => $vendorDir . '/sabre/uri/lib/InvalidUriException.php',
'Sabre\\Uri\\Version' => $vendorDir . '/sabre/uri/lib/Version.php',
'Sabre\\VObject\\BirthdayCalendarGenerator' => $vendorDir . '/sabre/vobject/lib/BirthdayCalendarGenerator.php',
'Sabre\\VObject\\Cli' => $vendorDir . '/sabre/vobject/lib/Cli.php',
@ -357,6 +361,9 @@ return array(
'Sabre\\Xml\\Writer' => $vendorDir . '/sabre/xml/lib/Writer.php',
'Sabre\\Xml\\XmlDeserializable' => $vendorDir . '/sabre/xml/lib/XmlDeserializable.php',
'Sabre\\Xml\\XmlSerializable' => $vendorDir . '/sabre/xml/lib/XmlSerializable.php',
'Test\\Markdownify\\ConverterExtraTest' => $vendorDir . '/pixel418/markdownify/test/ConverterExtraTest.php',
'Test\\Markdownify\\ConverterTest' => $vendorDir . '/pixel418/markdownify/test/ConverterTest.php',
'Test\\Markdownify\\ConverterTestCase' => $vendorDir . '/pixel418/markdownify/test/ConverterTestCase.php',
'Zotlabs\\Access\\AccessList' => $baseDir . '/Zotlabs/Access/AccessList.php',
'Zotlabs\\Access\\PermissionLimits' => $baseDir . '/Zotlabs/Access/PermissionLimits.php',
'Zotlabs\\Access\\PermissionRoles' => $baseDir . '/Zotlabs/Access/PermissionRoles.php',

View File

@ -7,6 +7,7 @@ $baseDir = dirname($vendorDir);
return array(
'Zotlabs\\' => array($baseDir . '/Zotlabs'),
'Test\\Markdownify\\' => array($vendorDir . '/pixel418/markdownify/test'),
'Sabre\\Xml\\' => array($vendorDir . '/sabre/xml/lib'),
'Sabre\\VObject\\' => array($vendorDir . '/sabre/vobject/lib'),
'Sabre\\Uri\\' => array($vendorDir . '/sabre/uri/lib'),
@ -17,5 +18,6 @@ return array(
'Sabre\\CardDAV\\' => array($vendorDir . '/sabre/dav/lib/CardDAV'),
'Sabre\\CalDAV\\' => array($vendorDir . '/sabre/dav/lib/CalDAV'),
'Psr\\Log\\' => array($vendorDir . '/psr/log/Psr/Log'),
'Markdownify\\' => array($vendorDir . '/pixel418/markdownify/src'),
'Hubzilla\\' => array($baseDir . '/include'),
);

View File

@ -21,6 +21,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
array (
'Zotlabs\\' => 8,
),
'T' =>
array (
'Test\\Markdownify\\' => 17,
),
'S' =>
array (
'Sabre\\Xml\\' => 10,
@ -37,6 +41,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
array (
'Psr\\Log\\' => 8,
),
'M' =>
array (
'Markdownify\\' => 12,
),
'H' =>
array (
'Hubzilla\\' => 9,
@ -48,6 +56,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
array (
0 => __DIR__ . '/../..' . '/Zotlabs',
),
'Test\\Markdownify\\' =>
array (
0 => __DIR__ . '/..' . '/pixel418/markdownify/test',
),
'Sabre\\Xml\\' =>
array (
0 => __DIR__ . '/..' . '/sabre/xml/lib',
@ -88,6 +100,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
array (
0 => __DIR__ . '/..' . '/psr/log/Psr/Log',
),
'Markdownify\\' =>
array (
0 => __DIR__ . '/..' . '/pixel418/markdownify/src',
),
'Hubzilla\\' =>
array (
0 => __DIR__ . '/../..' . '/include',
@ -106,6 +122,9 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
public static $classMap = array (
'Hubzilla\\Import\\Import' => __DIR__ . '/../..' . '/include/Import/Importer.php',
'Markdownify\\Converter' => __DIR__ . '/..' . '/pixel418/markdownify/src/Converter.php',
'Markdownify\\ConverterExtra' => __DIR__ . '/..' . '/pixel418/markdownify/src/ConverterExtra.php',
'Markdownify\\Parser' => __DIR__ . '/..' . '/pixel418/markdownify/src/Parser.php',
'Michelf\\Markdown' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/Markdown.php',
'Michelf\\MarkdownExtra' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/MarkdownExtra.php',
'Michelf\\MarkdownInterface' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/MarkdownInterface.php',
@ -117,6 +136,8 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
'Psr\\Log\\LoggerInterface' => __DIR__ . '/..' . '/psr/log/Psr/Log/LoggerInterface.php',
'Psr\\Log\\LoggerTrait' => __DIR__ . '/..' . '/psr/log/Psr/Log/LoggerTrait.php',
'Psr\\Log\\NullLogger' => __DIR__ . '/..' . '/psr/log/Psr/Log/NullLogger.php',
'Psr\\Log\\Test\\DummyTest' => __DIR__ . '/..' . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
'Psr\\Log\\Test\\LoggerInterfaceTest' => __DIR__ . '/..' . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
'Sabre\\CalDAV\\Backend\\AbstractBackend' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/AbstractBackend.php',
'Sabre\\CalDAV\\Backend\\BackendInterface' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/BackendInterface.php',
'Sabre\\CalDAV\\Backend\\NotificationSupport' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/NotificationSupport.php',
@ -367,7 +388,6 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
'Sabre\\HTTP\\URLUtil' => __DIR__ . '/..' . '/sabre/http/lib/URLUtil.php',
'Sabre\\HTTP\\Util' => __DIR__ . '/..' . '/sabre/http/lib/Util.php',
'Sabre\\HTTP\\Version' => __DIR__ . '/..' . '/sabre/http/lib/Version.php',
'Sabre\\Uri\\InvalidUriException' => __DIR__ . '/..' . '/sabre/uri/lib/InvalidUriException.php',
'Sabre\\Uri\\Version' => __DIR__ . '/..' . '/sabre/uri/lib/Version.php',
'Sabre\\VObject\\BirthdayCalendarGenerator' => __DIR__ . '/..' . '/sabre/vobject/lib/BirthdayCalendarGenerator.php',
'Sabre\\VObject\\Cli' => __DIR__ . '/..' . '/sabre/vobject/lib/Cli.php',
@ -456,6 +476,9 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
'Sabre\\Xml\\Writer' => __DIR__ . '/..' . '/sabre/xml/lib/Writer.php',
'Sabre\\Xml\\XmlDeserializable' => __DIR__ . '/..' . '/sabre/xml/lib/XmlDeserializable.php',
'Sabre\\Xml\\XmlSerializable' => __DIR__ . '/..' . '/sabre/xml/lib/XmlSerializable.php',
'Test\\Markdownify\\ConverterExtraTest' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterExtraTest.php',
'Test\\Markdownify\\ConverterTest' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterTest.php',
'Test\\Markdownify\\ConverterTestCase' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterTestCase.php',
'Zotlabs\\Access\\AccessList' => __DIR__ . '/../..' . '/Zotlabs/Access/AccessList.php',
'Zotlabs\\Access\\PermissionLimits' => __DIR__ . '/../..' . '/Zotlabs/Access/PermissionLimits.php',
'Zotlabs\\Access\\PermissionRoles' => __DIR__ . '/../..' . '/Zotlabs/Access/PermissionRoles.php',

View File

@ -518,5 +518,63 @@
"keywords": [
"markdown"
]
},
{
"name": "pixel418/markdownify",
"version": "v2.2.1",
"version_normalized": "2.2.1.0",
"source": {
"type": "git",
"url": "https://github.com/Elephant418/Markdownify.git",
"reference": "0160677f04c784550dd10fd72fdf3994967db848"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Elephant418/Markdownify/zipball/0160677f04c784550dd10fd72fdf3994967db848",
"reference": "0160677f04c784550dd10fd72fdf3994967db848",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8"
},
"time": "2016-09-21T13:01:43+00:00",
"type": "lib",
"installation-source": "dist",
"autoload": {
"psr-4": {
"Markdownify\\": "src",
"Test\\Markdownify\\": "test"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"LGPL"
],
"authors": [
{
"name": "Peter Kruithof",
"email": "pkruithof@gmail.com",
"homepage": "http://pkruithof.tumblr.com/"
},
{
"name": "Milian Wolff",
"email": "mail@milianw.de",
"homepage": "http://milianw.de"
},
{
"name": "Thomas Zilliox",
"email": "hello@tzi.fr",
"homepage": "http://tzi.fr"
}
],
"description": "The HTML to Markdown converter for PHP ",
"homepage": "https://github.com/elephant418/Markdownify",
"keywords": [
"markdown",
"markdownify"
]
}
]

View File

@ -0,0 +1,76 @@
CHANGELOG
==============
21/09/2016 v2.2.1
--------------
* Fix: Moving trailing whitespace from inline elements outside of the element
* Feature: Use PSR-4
* Feature: PHP 7.0 support in continuous integration
* Doc: Update of the README
07/09/2016 v2.2.0
--------------
* Fix: Reset state between each parsing
19/02/2016 v2.1.11
--------------
* Fix: Empty table cell conversion
10/02/2016 v2.1.10
--------------
* Fix: Handle nested table.
01/04/2015 v2.1.9
--------------
* Fix: Handle HTML breaks & spaces in a less destructive way.
26/03/2015 v2.1.8
--------------
* Fix: Use alternative italic character
* Fix: Handle HTML breaks inside another tag
* Fix: Handle HTML spaces around tags
07/11/2014 v2.1.7
--------------
* Change composer name to "elephant418/markdownify"
14/07/2014 v2.1.6
--------------
* Fix: Simulate a paragraph for inline text preceding block element
* Fix: Nested lists
* Fix: setKeepHTML method
* Feature: PHP 5.5 & 5.6 support in continuous integration
16/03/2014 v2.1.5
--------------
Add display settings
* Test: Add tests for footnotes after every paragraph or not
* Feature: Allow to display link reference in paragraph, without footnotes
27/02/2014 v2.1.4
--------------
Improve how ConverterExtra handle id & class attributes:
* Feature: Allow id & class attributes on links
* Feature: Allow class attributes on headings

63
vendor/pixel418/markdownify/README.md vendored Normal file
View File

@ -0,0 +1,63 @@
# Markdownify
[![Build Status](https://travis-ci.org/Elephant418/Markdownify.png?branch=master)](https://travis-ci.org/Elephant418/Markdownify?branch=master)
[![Total Downloads](https://poser.pugx.org/pixel418/markdownify/downloads)](https://packagist.org/packages/pixel418/markdownify)
[![License LGPL](https://poser.pugx.org/pixel418/markdownify/license)](https://opensource.org/licenses/lgpl-2.1.php)
The HTML to Markdown converter for PHP
[Code example](#code-example) | [How to Install](#how-to-install) | [How to Contribute](#how-to-contribute) | [Author & Community](#author--community)
Code example
--------
### Markdown
```php
$converter = new Markdownify\Converter;
$converter->parseString('<h1>Heading</h1>');
// Returns: # Heading
```
### Markdown Extra [as defined by @michelf](http://michelf.ca/projects/php-markdown/extra/)
```php
$converter = new Markdownify\ConverterExtra;
$converter->parseString('<h1 id="md">Heading</h1>');
// Returns: # Heading {#md}
```
How to Install
--------
This library package requires `PHP 5.3` or later.<br>
Install [Composer](http://getcomposer.org/doc/01-basic-usage.md#installation) and run the following command to get the latest version:
```sh
composer require pixel418/markdownify
```
How to Contribute
--------
1. Fork the Markdownify repository
2. Create a new branch for each feature or improvement
3. Send a pull request from each feature branch to the **v2.x** branch
If you don't know much about pull request, you can read [the Github article](https://help.github.com/articles/using-pull-requests)
Author & Community
--------
Markdownify is under [LGPL License](http://opensource.org/licenses/LGPL-2.1)<br>
It was created by [Milian Wolff](http://milianw.de)<br>
It was converted to a Symfony Bundle by [Peter Kruithof](https://github.com/pkruithof)<br>
It is maintained by [Thomas ZILLIOX](http://tzi.fr)

View File

@ -0,0 +1,38 @@
{
"name": "pixel418/markdownify",
"type": "lib",
"description": "The HTML to Markdown converter for PHP ",
"keywords": ["markdown", "markdownify"],
"license": "LGPL",
"homepage": "https://github.com/elephant418/Markdownify",
"authors": [
{
"name": "Milian Wolff",
"email": "mail@milianw.de",
"homepage": "http://milianw.de"
},
{
"name": "Peter Kruithof",
"email": "pkruithof@gmail.com",
"homepage": "http://pkruithof.tumblr.com/"
},
{
"name": "Thomas Zilliox",
"email": "hello@tzi.fr",
"homepage": "http://tzi.fr"
}
],
"require": {
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "^4.8"
},
"autoload": {
"psr-4": {
"Markdownify\\": "src",
"Test\\Markdownify\\": "test"
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,573 @@
<?php
/* This file is part of the Markdownify project, which is under LGPL license */
namespace Markdownify;
class ConverterExtra extends Converter
{
/**
* table data, including rows with content and the maximum width of each col
*
* @var array
*/
protected $table = array();
/**
* current col
*
* @var int
*/
protected $col = -1;
/**
* current row
*
* @var int
*/
protected $row = 0;
/**
* constructor, see Markdownify::Markdownify() for more information
*/
public function __construct($linksAfterEachParagraph = self::LINK_AFTER_CONTENT, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML)
{
parent::__construct($linksAfterEachParagraph, $bodyWidth, $keepHTML);
// new markdownable tags & attributes
// header ids: # foo {bar}
$this->isMarkdownable['h1']['id'] = 'optional';
$this->isMarkdownable['h1']['class'] = 'optional';
$this->isMarkdownable['h2']['id'] = 'optional';
$this->isMarkdownable['h2']['class'] = 'optional';
$this->isMarkdownable['h3']['id'] = 'optional';
$this->isMarkdownable['h3']['class'] = 'optional';
$this->isMarkdownable['h4']['id'] = 'optional';
$this->isMarkdownable['h4']['class'] = 'optional';
$this->isMarkdownable['h5']['id'] = 'optional';
$this->isMarkdownable['h5']['class'] = 'optional';
$this->isMarkdownable['h6']['id'] = 'optional';
$this->isMarkdownable['h6']['class'] = 'optional';
// tables
$this->isMarkdownable['table'] = array();
$this->isMarkdownable['th'] = array(
'align' => 'optional',
);
$this->isMarkdownable['td'] = array(
'align' => 'optional',
);
$this->isMarkdownable['tr'] = array();
array_push($this->ignore, 'thead');
array_push($this->ignore, 'tbody');
array_push($this->ignore, 'tfoot');
// definition lists
$this->isMarkdownable['dl'] = array();
$this->isMarkdownable['dd'] = array();
$this->isMarkdownable['dt'] = array();
// link class
$this->isMarkdownable['a']['id'] = 'optional';
$this->isMarkdownable['a']['class'] = 'optional';
// footnotes
$this->isMarkdownable['fnref'] = array(
'target' => 'required',
);
$this->isMarkdownable['footnotes'] = array();
$this->isMarkdownable['fn'] = array(
'name' => 'required',
);
$this->parser->blockElements['fnref'] = false;
$this->parser->blockElements['fn'] = true;
$this->parser->blockElements['footnotes'] = true;
// abbr
$this->isMarkdownable['abbr'] = array(
'title' => 'required',
);
// build RegEx lookahead to decide wether table can pe parsed or not
$inlineTags = array_keys($this->parser->blockElements, false);
$colContents = '(?:[^<]|<(?:' . implode('|', $inlineTags) . '|[^a-z]))*';
$this->tableLookaheadHeader = '{
^\s*(?:<thead\s*>)?\s* # open optional thead
<tr\s*>\s*(?: # start required row with headers
<th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align
\s*' . $colContents . '\s* # contents
</th>\s* # close header
)+</tr> # close row with headers
\s*(?:</thead>)? # close optional thead
}sxi';
$this->tdSubstitute = '\s*' . $colContents . '\s* # contents
</td>\s*';
$this->tableLookaheadBody = '{
\s*(?:<tbody\s*>)?\s* # open optional tbody
(?:<tr\s*>\s* # start row
%s # cols to be substituted
</tr>)+ # close row
\s*(?:</tbody>)? # close optional tbody
\s*</table> # close table
}sxi';
}
/**
* handle header tags (<h1> - <h6>)
*
* @param int $level 1-6
* @return void
*/
protected function handleHeader($level)
{
if ($this->parser->isStartTag) {
$this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector();
$this->stack();
} else {
$tag = $this->unstack();
if (!empty($tag['cssSelector'])) {
// {#id.class}
$this->out(' {' . $tag['cssSelector'] . '}');
}
}
parent::handleHeader($level);
}
/**
* handle <a> tags parsing
*
* @param void
* @return void
*/
protected function handleTag_a_parser()
{
parent::handleTag_a_parser();
$this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector();
}
/**
* handle <a> tags conversion
*
* @param array $tag
* @param string $buffer
* @return string The markdownified link
*/
protected function handleTag_a_converter($tag, $buffer)
{
$output = parent::handleTag_a_converter($tag, $buffer);
if (!empty($tag['cssSelector'])) {
// [This link][id]{#id.class}
$output .= '{' . $tag['cssSelector'] . '}';
}
return $output;
}
/**
* handle <abbr> tags
*
* @param void
* @return void
*/
protected function handleTag_abbr()
{
if ($this->parser->isStartTag) {
$this->stack();
$this->buffer();
} else {
$tag = $this->unstack();
$tag['text'] = $this->unbuffer();
$add = true;
foreach ($this->stack['abbr'] as $stacked) {
if ($stacked['text'] == $tag['text']) {
/** TODO: differing abbr definitions, i.e. different titles for same text **/
$add = false;
break;
}
}
$this->out($tag['text']);
if ($add) {
array_push($this->stack['abbr'], $tag);
}
}
}
/**
* flush stacked abbr tags
*
* @param void
* @return void
*/
protected function flushStacked_abbr()
{
$out = array();
foreach ($this->stack['abbr'] as $k => $tag) {
if (!isset($tag['unstacked'])) {
array_push($out, ' *[' . $tag['text'] . ']: ' . $tag['title']);
$tag['unstacked'] = true;
$this->stack['abbr'][$k] = $tag;
}
}
if (!empty($out)) {
$this->out("\n\n" . implode("\n", $out));
}
}
/**
* handle <table> tags
*
* @param void
* @return void
*/
protected function handleTag_table()
{
if ($this->parser->isStartTag) {
// check if upcoming table can be converted
if ($this->keepHTML) {
if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) {
// header seems good, now check body
// get align & number of cols
preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols);
$regEx = '';
$i = 1;
$aligns = array();
foreach ($cols[2] as $align) {
$align = strtolower($align);
array_push($aligns, $align);
if (empty($align)) {
$align = 'left'; // default value
}
$td = '\s+align=("|\')' . $align . '\\' . $i;
$i++;
if ($align == 'left') {
// look for empty align or left
$td = '(?:' . $td . ')?';
}
$td = '<td' . $td . '\s*>';
$regEx .= $td . $this->tdSubstitute;
}
$regEx = sprintf($this->tableLookaheadBody, $regEx);
if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) {
// this is a markdownable table tag!
$this->table = array(
'rows' => array(),
'col_widths' => array(),
'aligns' => $aligns,
);
$this->row = 0;
} else {
// non markdownable table
$this->handleTagToText();
}
} else {
// non markdownable table
$this->handleTagToText();
}
} else {
$this->table = array(
'rows' => array(),
'col_widths' => array(),
'aligns' => array(),
);
$this->row = 0;
}
} else {
// finally build the table in Markdown Extra syntax
$separator = array();
if (!isset($this->table['aligns'])) {
$this->table['aligns'] = array();
}
// seperator with correct align identifiers
foreach ($this->table['aligns'] as $col => $align) {
if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) {
break;
}
$left = ' ';
$right = ' ';
switch ($align) {
case 'left':
$left = ':';
break;
case 'center':
$right = ':';
$left = ':';
case 'right':
$right = ':';
break;
}
array_push($separator, $left . str_repeat('-', $this->table['col_widths'][$col]) . $right);
}
$separator = '|' . implode('|', $separator) . '|';
$rows = array();
// add padding
array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent'));
$header = array_shift($this->table['rows']);
array_push($rows, '| ' . implode(' | ', $header) . ' |');
array_push($rows, $separator);
foreach ($this->table['rows'] as $row) {
array_push($rows, '| ' . implode(' | ', $row) . ' |');
}
$this->out(implode("\n" . $this->indent, $rows));
$this->table = array();
$this->setLineBreaks(2);
}
}
/**
* properly pad content so it is aligned as whished
* should be used with array_walk_recursive on $this->table['rows']
*
* @param string &$content
* @param int $col
* @return void
*/
protected function alignTdContent(&$content, $col)
{
if (!isset($this->table['aligns'][$col])) {
$this->table['aligns'][$col] = 'left';
}
switch ($this->table['aligns'][$col]) {
default:
case 'left':
$content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content));
break;
case 'right':
$content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)) . $content;
break;
case 'center':
$paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content);
$left = floor($paddingNeeded / 2);
$right = $paddingNeeded - $left;
$content = str_repeat(' ', $left) . $content . str_repeat(' ', $right);
break;
}
}
/**
* handle <tr> tags
*
* @param void
* @return void
*/
protected function handleTag_tr()
{
if ($this->parser->isStartTag) {
$this->col = -1;
} else {
$this->row++;
}
}
/**
* handle <td> tags
*
* @param void
* @return void
*/
protected function handleTag_td()
{
if ($this->parser->isStartTag) {
$this->col++;
if (!isset($this->table['col_widths'][$this->col])) {
$this->table['col_widths'][$this->col] = 0;
}
$this->buffer();
} else {
$buffer = trim($this->unbuffer());
if (!isset($this->table['col_widths'][$this->col])) {
$this->table['col_widths'][$this->col] = 0;
}
$this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer));
$this->table['rows'][$this->row][$this->col] = $buffer;
}
}
/**
* handle <th> tags
*
* @param void
* @return void
*/
protected function handleTag_th()
{
if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col + 1])) {
if (isset($this->parser->tagAttributes['align'])) {
$this->table['aligns'][$this->col + 1] = $this->parser->tagAttributes['align'];
} else {
$this->table['aligns'][$this->col + 1] = '';
}
}
$this->handleTag_td();
}
/**
* handle <dl> tags
*
* @param void
* @return void
*/
protected function handleTag_dl()
{
if (!$this->parser->isStartTag) {
$this->setLineBreaks(2);
}
}
/**
* handle <dt> tags
*
* @param void
* @return void
**/
protected function handleTag_dt()
{
if (!$this->parser->isStartTag) {
$this->setLineBreaks(1);
}
}
/**
* handle <dd> tags
*
* @param void
* @return void
*/
protected function handleTag_dd()
{
if ($this->parser->isStartTag) {
if (substr(ltrim($this->parser->html), 0, 3) == '<p>') {
// next comes a paragraph, so we'll need an extra line
$this->out("\n" . $this->indent);
} elseif (substr($this->output, -2) == "\n\n") {
$this->output = substr($this->output, 0, -1);
}
$this->out(': ');
$this->indent(' ', false);
} else {
// lookahead for next dt
if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') {
$this->setLineBreaks(2);
} else {
$this->setLineBreaks(1);
}
$this->indent(' ');
}
}
/**
* handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString())
*
* @param void
* @return void
*/
protected function handleTag_fnref()
{
$this->out('[^' . $this->parser->tagAttributes['target'] . ']');
}
/**
* handle <fn> tags (custom footnotes, see markdownify_extra::parseString()
* and markdownify_extra::_makeFootnotes())
*
* @param void
* @return void
*/
protected function handleTag_fn()
{
if ($this->parser->isStartTag) {
$this->out('[^' . $this->parser->tagAttributes['name'] . ']:');
$this->setLineBreaks(1);
} else {
$this->setLineBreaks(2);
}
$this->indent(' ');
}
/**
* handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString()
* and markdownify_extra::_makeFootnotes())
*
* @param void
* @return void
*/
protected function handleTag_footnotes()
{
if (!$this->parser->isStartTag) {
$this->setLineBreaks(2);
}
}
/**
* parse a HTML string, clean up footnotes prior
*
* @param string $HTML input
* @return string Markdown formatted output
*/
public function parseString($html)
{
/** TODO: custom markdown-extra options, e.g. titles & classes **/
// <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup>
// => <fnref target="..." />
$html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html);
// <div class="footnotes">
// <hr />
// <ol>
//
// <li id="fn:...">...</li>
// ...
//
// </ol>
// </div>
// =>
// <footnotes>
// <fn name="...">...</fn>
// ...
// </footnotes>
$html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html);
return parent::parseString($html);
}
/**
* replace HTML representation of footnotes with something more easily parsable
*
* @note this is a callback to be used in parseString()
*
* @param array $matches
* @return string
*/
protected function _makeFootnotes($matches)
{
// <li id="fn:1">
// ...
// <a href="#fnref:block" rev="footnote">&#8617;</a></p>
// </li>
// => <fn name="1">...</fn>
// remove footnote link
$fns = preg_replace('@\s*(&#160;\s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>&#8617;</a>\s*@s', '', $matches[1]);
// remove empty paragraph
$fns = preg_replace('@<p>\s*</p>@s', '', $fns);
// <li id="fn:1">...</li> -> <footnote nr="1">...</footnote>
$fns = str_replace('<li id="fn:', '<fn name="', $fns);
$fns = '<footnotes>' . $fns . '</footnotes>';
return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns);
}
/**
* handle <a> tags parsing
*
* @param void
* @return void
*/
protected function getCurrentCssSelector()
{
$cssSelector = '';
if (isset($this->parser->tagAttributes['id'])) {
$cssSelector .= '#' . $this->decode($this->parser->tagAttributes['id']);
}
if (isset($this->parser->tagAttributes['class'])) {
$classes = explode(' ', $this->decode($this->parser->tagAttributes['class']));
$classes = array_filter($classes);
$cssSelector .= '.' . join('.', $classes);
}
return $cssSelector;
}
}

View File

@ -0,0 +1,564 @@
<?php
/* This file is part of the Markdownify project, which is under LGPL license */
namespace Markdownify;
class Parser
{
public static $skipWhitespace = true;
public static $a_ord;
public static $z_ord;
public static $special_ords;
/**
* tags which are always empty (<br /> etc.)
*
* @var array<string>
*/
public $emptyTags = array(
'br',
'hr',
'input',
'img',
'area',
'link',
'meta',
'param',
);
/**
* tags with preformatted text
* whitespaces wont be touched in them
*
* @var array<string>
*/
public $preformattedTags = array(
'script',
'style',
'pre',
'code',
);
/**
* supress HTML tags inside preformatted tags (see above)
*
* @var bool
*/
public $noTagsInCode = false;
/**
* html to be parsed
*
* @var string
*/
public $html = '';
/**
* node type:
*
* - tag (see isStartTag)
* - text (includes cdata)
* - comment
* - doctype
* - pi (processing instruction)
*
* @var string
*/
public $nodeType = '';
/**
* current node content, i.e. either a
* simple string (text node), or something like
* <tag attrib="value"...>
*
* @var string
*/
public $node = '';
/**
* wether current node is an opening tag (<a>) or not (</a>)
* set to NULL if current node is not a tag
* NOTE: empty tags (<br />) set this to true as well!
*
* @var bool | null
*/
public $isStartTag = null;
/**
* wether current node is an empty tag (<br />) or not (<a></a>)
*
* @var bool | null
*/
public $isEmptyTag = null;
/**
* tag name
*
* @var string | null
*/
public $tagName = '';
/**
* attributes of current tag
*
* @var array (attribName=>value) | null
*/
public $tagAttributes = null;
/**
* whether or not the actual context is a inline context
*
* @var bool | null
*/
public $isInlineContext = null;
/**
* whether the current tag is a block element
*
* @var bool | null
*/
public $isBlockElement = null;
/**
* whether the previous tag (browser) is a block element
*
* @var bool | null
*/
public $isNextToInlineContext = null;
/**
* keep whitespace
*
* @var int
*/
public $keepWhitespace = 0;
/**
* list of open tags
* count this to get current depth
*
* @var array
*/
public $openTags = array();
/**
* list of block elements
*
* @var array
* TODO: what shall we do with <del> and <ins> ?!
*/
public $blockElements = array(
// tag name => <bool> is block
// block elements
'address' => true,
'blockquote' => true,
'center' => true,
'del' => true,
'dir' => true,
'div' => true,
'dl' => true,
'fieldset' => true,
'form' => true,
'h1' => true,
'h2' => true,
'h3' => true,
'h4' => true,
'h5' => true,
'h6' => true,
'hr' => true,
'ins' => true,
'isindex' => true,
'menu' => true,
'noframes' => true,
'noscript' => true,
'ol' => true,
'p' => true,
'pre' => true,
'table' => true,
'ul' => true,
// set table elements and list items to block as well
'thead' => true,
'tbody' => true,
'tfoot' => true,
'td' => true,
'tr' => true,
'th' => true,
'li' => true,
'dd' => true,
'dt' => true,
// header items and html / body as well
'html' => true,
'body' => true,
'head' => true,
'meta' => true,
'link' => true,
'style' => true,
'title' => true,
// unfancy media tags, when indented should be rendered as block
'map' => true,
'object' => true,
'param' => true,
'embed' => true,
'area' => true,
// inline elements
'a' => false,
'abbr' => false,
'acronym' => false,
'applet' => false,
'b' => false,
'basefont' => false,
'bdo' => false,
'big' => false,
'br' => false,
'button' => false,
'cite' => false,
'code' => false,
'del' => false,
'dfn' => false,
'em' => false,
'font' => false,
'i' => false,
'img' => false,
'ins' => false,
'input' => false,
'iframe' => false,
'kbd' => false,
'label' => false,
'q' => false,
'samp' => false,
'script' => false,
'select' => false,
'small' => false,
'span' => false,
'strong' => false,
'sub' => false,
'sup' => false,
'textarea' => false,
'tt' => false,
'var' => false,
);
/**
* get next node, set $this->html prior!
*
* @param void
* @return bool
*/
public function nextNode()
{
if (empty($this->html)) {
// we are done with parsing the html string
return false;
}
if ($this->isStartTag && !$this->isEmptyTag) {
array_push($this->openTags, $this->tagName);
if (in_array($this->tagName, $this->preformattedTags)) {
// dont truncate whitespaces for <code> or <pre> contents
$this->keepWhitespace++;
}
}
if ($this->html[0] == '<') {
$token = substr($this->html, 0, 9);
if (substr($token, 0, 2) == '<?') {
// xml prolog or other pi's
/** TODO **/
// trigger_error('this might need some work', E_USER_NOTICE);
$pos = strpos($this->html, '>');
$this->setNode('pi', $pos + 1);
return true;
}
if (substr($token, 0, 4) == '<!--') {
// comment
$pos = strpos($this->html, '-->');
if ($pos === false) {
// could not find a closing -->, use next gt instead
// this is firefox' behaviour
$pos = strpos($this->html, '>') + 1;
} else {
$pos += 3;
}
$this->setNode('comment', $pos);
static::$skipWhitespace = true;
return true;
}
if ($token == '<!DOCTYPE') {
// doctype
$this->setNode('doctype', strpos($this->html, '>') + 1);
static::$skipWhitespace = true;
return true;
}
if ($token == '<![CDATA[') {
// cdata, use text node
// remove leading <![CDATA[
$this->html = substr($this->html, 9);
$this->setNode('text', strpos($this->html, ']]>') + 3);
// remove trailing ]]> and trim
$this->node = substr($this->node, 0, -3);
$this->handleWhitespaces();
static::$skipWhitespace = true;
return true;
}
if ($this->parseTag()) {
// seems to be a tag
// handle whitespaces
if ($this->isBlockElement) {
static::$skipWhitespace = true;
} else {
static::$skipWhitespace = false;
}
return true;
}
}
if ($this->keepWhitespace) {
static::$skipWhitespace = false;
}
// when we get here it seems to be a text node
$pos = strpos($this->html, '<');
if ($pos === false) {
$pos = strlen($this->html);
}
$this->setNode('text', $pos);
$this->handleWhitespaces();
if (static::$skipWhitespace && $this->node == ' ') {
return $this->nextNode();
}
$this->isInlineContext = true;
static::$skipWhitespace = false;
return true;
}
/**
* parse tag, set tag name and attributes, see if it's a closing tag and so forth...
*
* @param void
* @return bool
*/
protected function parseTag()
{
if (!isset(static::$a_ord)) {
static::$a_ord = ord('a');
static::$z_ord = ord('z');
static::$special_ords = array(
ord(':'), // for xml:lang
ord('-'), // for http-equiv
);
}
$tagName = '';
$pos = 1;
$isStartTag = $this->html[$pos] != '/';
if (!$isStartTag) {
$pos++;
}
// get tagName
while (isset($this->html[$pos])) {
$pos_ord = ord(strtolower($this->html[$pos]));
if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || (!empty($tagName) && is_numeric($this->html[$pos]))) {
$tagName .= $this->html[$pos];
$pos++;
} else {
$pos--;
break;
}
}
$tagName = strtolower($tagName);
if (empty($tagName) || !isset($this->blockElements[$tagName])) {
// something went wrong => invalid tag
$this->invalidTag();
return false;
}
if ($this->noTagsInCode && end($this->openTags) == 'code' && !($tagName == 'code' && !$isStartTag)) {
// we supress all HTML tags inside code tags
$this->invalidTag();
return false;
}
// get tag attributes
/** TODO: in html 4 attributes do not need to be quoted **/
$isEmptyTag = false;
$attributes = array();
$currAttrib = '';
while (isset($this->html[$pos + 1])) {
$pos++;
// close tag
if ($this->html[$pos] == '>' || $this->html[$pos] . $this->html[$pos + 1] == '/>') {
if ($this->html[$pos] == '/') {
$isEmptyTag = true;
$pos++;
}
break;
}
$pos_ord = ord(strtolower($this->html[$pos]));
if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || in_array($pos_ord, static::$special_ords)) {
// attribute name
$currAttrib .= $this->html[$pos];
} elseif (in_array($this->html[$pos], array(' ', "\t", "\n"))) {
// drop whitespace
} elseif (in_array($this->html[$pos] . $this->html[$pos + 1], array('="', "='"))) {
// get attribute value
$pos++;
$await = $this->html[$pos]; // single or double quote
$pos++;
$value = '';
while (isset($this->html[$pos]) && $this->html[$pos] != $await) {
$value .= $this->html[$pos];
$pos++;
}
$attributes[$currAttrib] = $value;
$currAttrib = '';
} else {
$this->invalidTag();
return false;
}
}
if ($this->html[$pos] != '>') {
$this->invalidTag();
return false;
}
if (!empty($currAttrib)) {
// html 4 allows something like <option selected> instead of <option selected="selected">
$attributes[$currAttrib] = $currAttrib;
}
if (!$isStartTag) {
if (!empty($attributes) || $tagName != end($this->openTags)) {
// end tags must not contain any attributes
// or maybe we did not expect a different tag to be closed
$this->invalidTag();
return false;
}
array_pop($this->openTags);
if (in_array($tagName, $this->preformattedTags)) {
$this->keepWhitespace--;
}
}
$pos++;
$this->node = substr($this->html, 0, $pos);
$this->html = substr($this->html, $pos);
$this->tagName = $tagName;
$this->tagAttributes = $attributes;
$this->isStartTag = $isStartTag;
$this->isEmptyTag = $isEmptyTag || in_array($tagName, $this->emptyTags);
if ($this->isEmptyTag) {
// might be not well formed
$this->node = preg_replace('# */? *>$#', ' />', $this->node);
}
$this->nodeType = 'tag';
$this->isBlockElement = $this->blockElements[$tagName];
$this->isNextToInlineContext = $isStartTag && $this->isInlineContext;
$this->isInlineContext = !$this->isBlockElement;
return true;
}
/**
* handle invalid tags
*
* @param void
* @return void
*/
protected function invalidTag()
{
$this->html = substr_replace($this->html, '&lt;', 0, 1);
}
/**
* update all vars and make $this->html shorter
*
* @param string $type see description for $this->nodeType
* @param int $pos to which position shall we cut?
* @return void
*/
protected function setNode($type, $pos)
{
if ($this->nodeType == 'tag') {
// set tag specific vars to null
// $type == tag should not be called here
// see this::parseTag() for more
$this->tagName = null;
$this->tagAttributes = null;
$this->isStartTag = null;
$this->isEmptyTag = null;
$this->isBlockElement = null;
}
$this->nodeType = $type;
$this->node = substr($this->html, 0, $pos);
$this->html = substr($this->html, $pos);
}
/**
* check if $this->html begins with $str
*
* @param string $str
* @return bool
*/
protected function match($str)
{
return substr($this->html, 0, strlen($str)) == $str;
}
/**
* truncate whitespaces
*
* @param void
* @return void
*/
protected function handleWhitespaces()
{
if ($this->keepWhitespace) {
// <pre> or <code> before...
return;
}
// truncate multiple whitespaces to a single one
$this->node = preg_replace('#\s+#s', ' ', $this->node);
}
/**
* normalize self::node
*
* @param void
* @return void
*/
protected function normalizeNode()
{
$this->node = '<';
if (!$this->isStartTag) {
$this->node .= '/' . $this->tagName . '>';
return;
}
$this->node .= $this->tagName;
foreach ($this->tagAttributes as $name => $value) {
$this->node .= ' ' . $name . '="' . str_replace('"', '&quot;', $value) . '"';
}
if ($this->isEmptyTag) {
$this->node .= ' /';
}
$this->node .= '>';
}
}