Merge pull request #688 from dawnbreak/markdown
⬆️ 🔨 Upgrade Markdownify library.
This commit is contained in:
commit
215bd07f0b
@ -29,7 +29,8 @@
|
||||
"ext-xml" : "*",
|
||||
"ext-openssl" : "*",
|
||||
"sabre/dav" : "~3.2",
|
||||
"michelf/php-markdown" : "^1.7"
|
||||
"michelf/php-markdown" : "^1.7",
|
||||
"pixel418/markdownify": "^2.2"
|
||||
},
|
||||
"require-dev" : {
|
||||
"php" : ">=5.6",
|
||||
|
58
composer.lock
generated
58
composer.lock
generated
@ -4,7 +4,7 @@
|
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "4b24468c1f8babe7c8804fba8ee602f7",
|
||||
"content-hash": "c0cafbf9fd702be588f6b392b9742cb6",
|
||||
"packages": [
|
||||
{
|
||||
"name": "michelf/php-markdown",
|
||||
@ -57,6 +57,62 @@
|
||||
],
|
||||
"time": "2016-10-29T18:58:20+00:00"
|
||||
},
|
||||
{
|
||||
"name": "pixel418/markdownify",
|
||||
"version": "v2.2.1",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/Elephant418/Markdownify.git",
|
||||
"reference": "0160677f04c784550dd10fd72fdf3994967db848"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/Elephant418/Markdownify/zipball/0160677f04c784550dd10fd72fdf3994967db848",
|
||||
"reference": "0160677f04c784550dd10fd72fdf3994967db848",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8"
|
||||
},
|
||||
"type": "lib",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Markdownify\\": "src",
|
||||
"Test\\Markdownify\\": "test"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"LGPL"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Peter Kruithof",
|
||||
"email": "pkruithof@gmail.com",
|
||||
"homepage": "http://pkruithof.tumblr.com/"
|
||||
},
|
||||
{
|
||||
"name": "Milian Wolff",
|
||||
"email": "mail@milianw.de",
|
||||
"homepage": "http://milianw.de"
|
||||
},
|
||||
{
|
||||
"name": "Thomas Zilliox",
|
||||
"email": "hello@tzi.fr",
|
||||
"homepage": "http://tzi.fr"
|
||||
}
|
||||
],
|
||||
"description": "The HTML to Markdown converter for PHP ",
|
||||
"homepage": "https://github.com/elephant418/Markdownify",
|
||||
"keywords": [
|
||||
"markdown",
|
||||
"markdownify"
|
||||
],
|
||||
"time": "2016-09-21T13:01:43+00:00"
|
||||
},
|
||||
{
|
||||
"name": "psr/log",
|
||||
"version": "1.0.2",
|
||||
|
@ -5,12 +5,12 @@
|
||||
*/
|
||||
|
||||
use Michelf\MarkdownExtra;
|
||||
use Markdownify\Converter;
|
||||
|
||||
require_once("include/oembed.php");
|
||||
require_once("include/event.php");
|
||||
require_once("include/html2bbcode.php");
|
||||
require_once("include/bbcode.php");
|
||||
require_once("library/markdownify/markdownify.php");
|
||||
|
||||
|
||||
function get_bb_tag_pos($s, $name, $occurance = 1) {
|
||||
@ -367,7 +367,6 @@ function bb2diaspora_itemwallwall(&$item,$uplink = false) {
|
||||
|
||||
function bb2diaspora_itembody($item, $force_update = false, $have_channel = false, $uplink = false) {
|
||||
|
||||
|
||||
if(! get_iconfig($item,'diaspora','fields')) {
|
||||
$force_update = true;
|
||||
}
|
||||
@ -454,7 +453,7 @@ function bb2diaspora_itembody($item, $force_update = false, $have_channel = fals
|
||||
return html_entity_decode($body);
|
||||
}
|
||||
|
||||
function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
|
||||
function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
|
||||
|
||||
// Re-enabling the converter again.
|
||||
// The bbcode parser now handles youtube-links (and the other stuff) correctly.
|
||||
@ -496,11 +495,10 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
|
||||
$Text = str_replace(array('<','>','&'),array('&_lt_;','&_gt_;','&_amp_;'),$Text);
|
||||
|
||||
// Now convert HTML to Markdown
|
||||
$md = new Markdownify(false, false, false);
|
||||
$md = new Converter(Converter::LINK_AFTER_CONTENT, false, false);
|
||||
$Text = $md->parseString($Text);
|
||||
|
||||
|
||||
|
||||
// It also adds backslashes to our attempt at getting around the html entity preservation for some weird reason.
|
||||
|
||||
$Text = str_replace(array('&\\_lt\\_;','&\\_gt\\_;','&\\_amp\\_;'),array('<','>','&'),$Text);
|
||||
@ -522,7 +520,7 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
|
||||
|
||||
$Text = trim($Text);
|
||||
|
||||
call_hooks('bb2diaspora',$Text);
|
||||
call_hooks('bb2diaspora', $Text);
|
||||
|
||||
return $Text;
|
||||
}
|
||||
|
@ -1,29 +0,0 @@
|
||||
Markdownify
|
||||
===========
|
||||
* handle non-markdownifiable lists (i.e. `<ul><li id="foobar">asdf</li></ul>`)
|
||||
* organize methods better (i.e. flushlinebreaks & setlinebreaks close to each other)
|
||||
* take a look at function names etc.
|
||||
* is the new (in rev. 93) lastclosedtag property needed?
|
||||
* word wrapping (some work is done but it's still very buggy)
|
||||
|
||||
|
||||
Markdownify Extra
|
||||
=================
|
||||
|
||||
* handle table alignment with KEEP_HTML=false
|
||||
* handle tables without headings when KEEP_HTML=false is set
|
||||
* handle Markdown inside non-markdownable tags
|
||||
|
||||
|
||||
Implementation Thoughts
|
||||
=======================
|
||||
* non-markdownifiable lists and markdown inside non-markdownable tags as well as the current
|
||||
table implementation could be rewritten by using a rollback mechanism.
|
||||
|
||||
example:
|
||||
|
||||
<ul><li>asdf</li><li id="foobar">asdf</li></ul>
|
||||
|
||||
we come to `<ul>`, know that this might fail and create a snapshot of our current parser
|
||||
we keep on parsing and when we reach `<li id="foobar">` we gotta rollback and keep this
|
||||
list in HTML format.
|
@ -1,51 +0,0 @@
|
||||
<?php
|
||||
error_reporting(E_ALL);
|
||||
if (!empty($_POST['input'])) {
|
||||
include 'markdownify_extra.php';
|
||||
if (!isset($_POST['leap'])) {
|
||||
$leap = MDFY_LINKS_EACH_PARAGRAPH;
|
||||
} else {
|
||||
$leap = $_POST['leap'];
|
||||
}
|
||||
|
||||
if (!isset($_POST['keepHTML'])) {
|
||||
$keephtml = MDFY_KEEPHTML;
|
||||
} else {
|
||||
$keephtml = $_POST['keepHTML'];
|
||||
}
|
||||
if (!empty($_POST['extra'])) {
|
||||
$md = new Markdownify_Extra($leap, MDFY_BODYWIDTH, $keephtml);
|
||||
} else {
|
||||
$md = new Markdownify($leap, MDFY_BODYWIDTH, $keephtml);
|
||||
}
|
||||
if (ini_get('magic_quotes_gpc')) {
|
||||
$_POST['input'] = stripslashes($_POST['input']);
|
||||
}
|
||||
$output = $md->parseString($_POST['input']);
|
||||
} else {
|
||||
$_POST['input'] = '';
|
||||
}
|
||||
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||
<title>HTML to Markdown Converter</title>
|
||||
</head>
|
||||
<body>
|
||||
<?php if (empty($_POST['input'])): ?>
|
||||
<form action="<?php echo $_SERVER['PHP_SELF']; ?>" method="post">
|
||||
<fieldset>
|
||||
<legend>HTML Input</legend>
|
||||
<textarea style="width:100%;" cols="85" rows="40" name="input"><?php echo htmlspecialchars($_POST['input'], ENT_NOQUOTES, 'UTF-8'); ?></textarea>
|
||||
</fieldset>
|
||||
<label for="extra">Markdownify Extra: <input name="extra" checked="checked" id="extra" type="checkbox" value="1" /></label>
|
||||
<label for="leap">Links after each block elem: <input name="leap" id="leap" type="checkbox" value="1" /></label>
|
||||
<label for="keepHTML">keep HTML: <input name="keepHTML" id="keepHTML" type="checkbox" value="1" checked="checked" /></label>
|
||||
<input type="submit" name="submit" value="submit" />
|
||||
</form>
|
||||
<?php else: ?>
|
||||
<h1 style="text-align:right;"><a href="<?php echo $_SERVER['PHP_SELF']; ?>">BACK</a></h1>
|
||||
<pre><?php echo htmlspecialchars($output, ENT_NOQUOTES, 'UTF-8'); ?></pre>
|
||||
<?php endif; ?>
|
||||
</body>
|
||||
</html>
|
File diff suppressed because it is too large
Load Diff
@ -1,33 +0,0 @@
|
||||
#!/usr/bin/php
|
||||
<?php
|
||||
require dirname(__FILE__) .'/markdownify_extra.php';
|
||||
|
||||
function param($name, $default = false) {
|
||||
if (!in_array('--'.$name, $_SERVER['argv']))
|
||||
return $default;
|
||||
reset($_SERVER['argv']);
|
||||
while (each($_SERVER['argv'])) {
|
||||
if (current($_SERVER['argv']) == '--'.$name)
|
||||
break;
|
||||
}
|
||||
$value = next($_SERVER['argv']);
|
||||
if ($value === false || substr($value, 0, 2) == '--')
|
||||
return true;
|
||||
else
|
||||
return $value;
|
||||
}
|
||||
|
||||
|
||||
$input = stream_get_contents(STDIN);
|
||||
|
||||
$linksAfterEachParagraph = param('links');
|
||||
$bodyWidth = param('width');
|
||||
$keepHTML = param('html', true);
|
||||
|
||||
if (param('no_extra')) {
|
||||
$parser = new Markdownify($linksAfterEachParagraph, $bodyWidth, $keepHTML);
|
||||
} else {
|
||||
$parser = new Markdownify_Extra($linksAfterEachParagraph, $bodyWidth, $keepHTML);
|
||||
}
|
||||
|
||||
echo $parser->parseString($input) ."\n";
|
@ -1,489 +0,0 @@
|
||||
<?php
|
||||
/**
|
||||
* Class to convert HTML to Markdown with PHP Markdown Extra syntax support.
|
||||
*
|
||||
* @version 1.0.0 alpha
|
||||
* @author Milian Wolff (<mail@milianw.de>, <http://milianw.de>)
|
||||
* @license LGPL, see LICENSE_LGPL.txt and the summary below
|
||||
* @copyright (C) 2007 Milian Wolff
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* standard Markdownify class
|
||||
*/
|
||||
require_once dirname(__FILE__).'/markdownify.php';
|
||||
|
||||
class Markdownify_Extra extends Markdownify {
|
||||
/**
|
||||
* table data, including rows with content and the maximum width of each col
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
var $table = array();
|
||||
/**
|
||||
* current col
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
var $col = -1;
|
||||
/**
|
||||
* current row
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
var $row = 0;
|
||||
/**
|
||||
* constructor, see Markdownify::Markdownify() for more information
|
||||
*/
|
||||
function Markdownify_Extra($linksAfterEachParagraph = MDFY_LINKS_EACH_PARAGRAPH, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) {
|
||||
parent::Markdownify($linksAfterEachParagraph, $bodyWidth, $keepHTML);
|
||||
|
||||
### new markdownable tags & attributes
|
||||
# header ids: # foo {bar}
|
||||
$this->isMarkdownable['h1']['id'] = 'optional';
|
||||
$this->isMarkdownable['h2']['id'] = 'optional';
|
||||
$this->isMarkdownable['h3']['id'] = 'optional';
|
||||
$this->isMarkdownable['h4']['id'] = 'optional';
|
||||
$this->isMarkdownable['h5']['id'] = 'optional';
|
||||
$this->isMarkdownable['h6']['id'] = 'optional';
|
||||
# tables
|
||||
$this->isMarkdownable['table'] = array();
|
||||
$this->isMarkdownable['th'] = array(
|
||||
'align' => 'optional',
|
||||
);
|
||||
$this->isMarkdownable['td'] = array(
|
||||
'align' => 'optional',
|
||||
);
|
||||
$this->isMarkdownable['tr'] = array();
|
||||
array_push($this->ignore, 'thead');
|
||||
array_push($this->ignore, 'tbody');
|
||||
array_push($this->ignore, 'tfoot');
|
||||
# definition lists
|
||||
$this->isMarkdownable['dl'] = array();
|
||||
$this->isMarkdownable['dd'] = array();
|
||||
$this->isMarkdownable['dt'] = array();
|
||||
# footnotes
|
||||
$this->isMarkdownable['fnref'] = array(
|
||||
'target' => 'required',
|
||||
);
|
||||
$this->isMarkdownable['footnotes'] = array();
|
||||
$this->isMarkdownable['fn'] = array(
|
||||
'name' => 'required',
|
||||
);
|
||||
$this->parser->blockElements['fnref'] = false;
|
||||
$this->parser->blockElements['fn'] = true;
|
||||
$this->parser->blockElements['footnotes'] = true;
|
||||
# abbr
|
||||
$this->isMarkdownable['abbr'] = array(
|
||||
'title' => 'required',
|
||||
);
|
||||
# build RegEx lookahead to decide wether table can pe parsed or not
|
||||
$inlineTags = array_keys($this->parser->blockElements, false);
|
||||
$colContents = '(?:[^<]|<(?:'.implode('|', $inlineTags).'|[^a-z]))+';
|
||||
$this->tableLookaheadHeader = '{
|
||||
^\s*(?:<thead\s*>)?\s* # open optional thead
|
||||
<tr\s*>\s*(?: # start required row with headers
|
||||
<th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align
|
||||
\s*'.$colContents.'\s* # contents
|
||||
</th>\s* # close header
|
||||
)+</tr> # close row with headers
|
||||
\s*(?:</thead>)? # close optional thead
|
||||
}sxi';
|
||||
$this->tdSubstitute = '\s*'.$colContents.'\s* # contents
|
||||
</td>\s*';
|
||||
$this->tableLookaheadBody = '{
|
||||
\s*(?:<tbody\s*>)?\s* # open optional tbody
|
||||
(?:<tr\s*>\s* # start row
|
||||
%s # cols to be substituted
|
||||
</tr>)+ # close row
|
||||
\s*(?:</tbody>)? # close optional tbody
|
||||
\s*</table> # close table
|
||||
}sxi';
|
||||
}
|
||||
/**
|
||||
* handle header tags (<h1> - <h6>)
|
||||
*
|
||||
* @param int $level 1-6
|
||||
* @return void
|
||||
*/
|
||||
function handleHeader($level) {
|
||||
static $id = null;
|
||||
if ($this->parser->isStartTag) {
|
||||
if (isset($this->parser->tagAttributes['id'])) {
|
||||
$id = $this->parser->tagAttributes['id'];
|
||||
}
|
||||
} else {
|
||||
if (!is_null($id)) {
|
||||
$this->out(' {#'.$id.'}');
|
||||
$id = null;
|
||||
}
|
||||
}
|
||||
parent::handleHeader($level);
|
||||
}
|
||||
/**
|
||||
* handle <abbr> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_abbr() {
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->stack();
|
||||
$this->buffer();
|
||||
} else {
|
||||
$tag = $this->unstack();
|
||||
$tag['text'] = $this->unbuffer();
|
||||
$add = true;
|
||||
foreach ($this->stack['abbr'] as $stacked) {
|
||||
if ($stacked['text'] == $tag['text']) {
|
||||
/** TODO: differing abbr definitions, i.e. different titles for same text **/
|
||||
$add = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
$this->out($tag['text']);
|
||||
if ($add) {
|
||||
array_push($this->stack['abbr'], $tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* flush stacked abbr tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function flushStacked_abbr() {
|
||||
$out = array();
|
||||
foreach ($this->stack['abbr'] as $k => $tag) {
|
||||
if (!isset($tag['unstacked'])) {
|
||||
array_push($out, ' *['.$tag['text'].']: '.$tag['title']);
|
||||
$tag['unstacked'] = true;
|
||||
$this->stack['abbr'][$k] = $tag;
|
||||
}
|
||||
}
|
||||
if (!empty($out)) {
|
||||
$this->out("\n\n".implode("\n", $out));
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <table> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_table() {
|
||||
if ($this->parser->isStartTag) {
|
||||
# check if upcoming table can be converted
|
||||
if ($this->keepHTML) {
|
||||
if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) {
|
||||
# header seems good, now check body
|
||||
# get align & number of cols
|
||||
preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols);
|
||||
$regEx = '';
|
||||
$i = 1;
|
||||
$aligns = array();
|
||||
foreach ($cols[2] as $align) {
|
||||
$align = strtolower($align);
|
||||
array_push($aligns, $align);
|
||||
if (empty($align)) {
|
||||
$align = 'left'; # default value
|
||||
}
|
||||
$td = '\s+align=("|\')'.$align.'\\'.$i;
|
||||
$i++;
|
||||
if ($align == 'left') {
|
||||
# look for empty align or left
|
||||
$td = '(?:'.$td.')?';
|
||||
}
|
||||
$td = '<td'.$td.'\s*>';
|
||||
$regEx .= $td.$this->tdSubstitute;
|
||||
}
|
||||
$regEx = sprintf($this->tableLookaheadBody, $regEx);
|
||||
if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) {
|
||||
# this is a markdownable table tag!
|
||||
$this->table = array(
|
||||
'rows' => array(),
|
||||
'col_widths' => array(),
|
||||
'aligns' => $aligns,
|
||||
);
|
||||
$this->row = 0;
|
||||
} else {
|
||||
# non markdownable table
|
||||
$this->handleTagToText();
|
||||
}
|
||||
} else {
|
||||
# non markdownable table
|
||||
$this->handleTagToText();
|
||||
}
|
||||
} else {
|
||||
$this->table = array(
|
||||
'rows' => array(),
|
||||
'col_widths' => array(),
|
||||
'aligns' => array(),
|
||||
);
|
||||
$this->row = 0;
|
||||
}
|
||||
} else {
|
||||
# finally build the table in Markdown Extra syntax
|
||||
$separator = array();
|
||||
# seperator with correct align identifikators
|
||||
foreach($this->table['aligns'] as $col => $align) {
|
||||
if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) {
|
||||
break;
|
||||
}
|
||||
$left = ' ';
|
||||
$right = ' ';
|
||||
switch ($align) {
|
||||
case 'left':
|
||||
$left = ':';
|
||||
break;
|
||||
case 'center':
|
||||
$right = ':';
|
||||
$left = ':';
|
||||
case 'right':
|
||||
$right = ':';
|
||||
break;
|
||||
}
|
||||
array_push($separator, $left.str_repeat('-', $this->table['col_widths'][$col]).$right);
|
||||
}
|
||||
$separator = '|'.implode('|', $separator).'|';
|
||||
|
||||
$rows = array();
|
||||
# add padding
|
||||
array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent'));
|
||||
$header = array_shift($this->table['rows']);
|
||||
array_push($rows, '| '.implode(' | ', $header).' |');
|
||||
array_push($rows, $separator);
|
||||
foreach ($this->table['rows'] as $row) {
|
||||
array_push($rows, '| '.implode(' | ', $row).' |');
|
||||
}
|
||||
$this->out(implode("\n".$this->indent, $rows));
|
||||
$this->table = array();
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* properly pad content so it is aligned as whished
|
||||
* should be used with array_walk_recursive on $this->table['rows']
|
||||
*
|
||||
* @param string &$content
|
||||
* @param int $col
|
||||
* @return void
|
||||
*/
|
||||
function alignTdContent(&$content, $col) {
|
||||
switch ($this->table['aligns'][$col]) {
|
||||
default:
|
||||
case 'left':
|
||||
$content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content));
|
||||
break;
|
||||
case 'right':
|
||||
$content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)).$content;
|
||||
break;
|
||||
case 'center':
|
||||
$paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content);
|
||||
$left = floor($paddingNeeded / 2);
|
||||
$right = $paddingNeeded - $left;
|
||||
$content = str_repeat(' ', $left).$content.str_repeat(' ', $right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <tr> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_tr() {
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->col = -1;
|
||||
} else {
|
||||
$this->row++;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <td> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_td() {
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->col++;
|
||||
if (!isset($this->table['col_widths'][$this->col])) {
|
||||
$this->table['col_widths'][$this->col] = 0;
|
||||
}
|
||||
$this->buffer();
|
||||
} else {
|
||||
$buffer = trim($this->unbuffer());
|
||||
$this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer));
|
||||
$this->table['rows'][$this->row][$this->col] = $buffer;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <th> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_th() {
|
||||
if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col+1])) {
|
||||
if (isset($this->parser->tagAttributes['align'])) {
|
||||
$this->table['aligns'][$this->col+1] = $this->parser->tagAttributes['align'];
|
||||
} else {
|
||||
$this->table['aligns'][$this->col+1] = '';
|
||||
}
|
||||
}
|
||||
$this->handleTag_td();
|
||||
}
|
||||
/**
|
||||
* handle <dl> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_dl() {
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <dt> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
**/
|
||||
function handleTag_dt() {
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(1);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <dd> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_dd() {
|
||||
if ($this->parser->isStartTag) {
|
||||
if (substr(ltrim($this->parser->html), 0, 3) == '<p>') {
|
||||
# next comes a paragraph, so we'll need an extra line
|
||||
$this->out("\n".$this->indent);
|
||||
} elseif (substr($this->output, -2) == "\n\n") {
|
||||
$this->output = substr($this->output, 0, -1);
|
||||
}
|
||||
$this->out(': ');
|
||||
$this->indent(' ', false);
|
||||
} else {
|
||||
# lookahead for next dt
|
||||
if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') {
|
||||
$this->setLineBreaks(2);
|
||||
} else {
|
||||
$this->setLineBreaks(1);
|
||||
}
|
||||
$this->indent(' ');
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_fnref() {
|
||||
$this->out('[^'.$this->parser->tagAttributes['target'].']');
|
||||
}
|
||||
/**
|
||||
* handle <fn> tags (custom footnotes, see markdownify_extra::parseString()
|
||||
* and markdownify_extra::_makeFootnotes())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_fn() {
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->out('[^'.$this->parser->tagAttributes['name'].']:');
|
||||
$this->setLineBreaks(1);
|
||||
} else {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
$this->indent(' ');
|
||||
}
|
||||
/**
|
||||
* handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString()
|
||||
* and markdownify_extra::_makeFootnotes())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_footnotes() {
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* parse a HTML string, clean up footnotes prior
|
||||
*
|
||||
* @param string $HTML input
|
||||
* @return string Markdown formatted output
|
||||
*/
|
||||
function parseString($html) {
|
||||
/** TODO: custom markdown-extra options, e.g. titles & classes **/
|
||||
# <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup>
|
||||
# => <fnref target="..." />
|
||||
$html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html);
|
||||
# <div class="footnotes">
|
||||
# <hr />
|
||||
# <ol>
|
||||
#
|
||||
# <li id="fn:...">...</li>
|
||||
# ...
|
||||
#
|
||||
# </ol>
|
||||
# </div>
|
||||
# =>
|
||||
# <footnotes>
|
||||
# <fn name="...">...</fn>
|
||||
# ...
|
||||
# </footnotes>
|
||||
$html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html);
|
||||
return parent::parseString($html);
|
||||
}
|
||||
/**
|
||||
* replace HTML representation of footnotes with something more easily parsable
|
||||
*
|
||||
* @note this is a callback to be used in parseString()
|
||||
*
|
||||
* @param array $matches
|
||||
* @return string
|
||||
*/
|
||||
function _makeFootnotes($matches) {
|
||||
# <li id="fn:1">
|
||||
# ...
|
||||
# <a href="#fnref:block" rev="footnote">↩</a></p>
|
||||
# </li>
|
||||
# => <fn name="1">...</fn>
|
||||
# remove footnote link
|
||||
$fns = preg_replace('@\s*( \s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>↩</a>\s*@s', '', $matches[1]);
|
||||
# remove empty paragraph
|
||||
$fns = preg_replace('@<p>\s*</p>@s', '', $fns);
|
||||
# <li id="fn:1">...</li> -> <footnote nr="1">...</footnote>
|
||||
$fns = str_replace('<li id="fn:', '<fn name="', $fns);
|
||||
|
||||
$fns = '<footnotes>'.$fns.'</footnotes>';
|
||||
return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns);
|
||||
}
|
||||
}
|
@ -1,618 +0,0 @@
|
||||
<?php
|
||||
/**
|
||||
* parseHTML is a HTML parser which works with PHP 4 and above.
|
||||
* It tries to handle invalid HTML to some degree.
|
||||
*
|
||||
* @version 1.0 beta
|
||||
* @author Milian Wolff (mail@milianw.de, http://milianw.de)
|
||||
* @license LGPL, see LICENSE_LGPL.txt and the summary below
|
||||
* @copyright (C) 2007 Milian Wolff
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
class parseHTML {
|
||||
/**
|
||||
* tags which are always empty (<br /> etc.)
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
var $emptyTags = array(
|
||||
'br',
|
||||
'hr',
|
||||
'input',
|
||||
'img',
|
||||
'area',
|
||||
'link',
|
||||
'meta',
|
||||
'param',
|
||||
);
|
||||
/**
|
||||
* tags with preformatted text
|
||||
* whitespaces wont be touched in them
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
var $preformattedTags = array(
|
||||
'script',
|
||||
'style',
|
||||
'pre',
|
||||
'code',
|
||||
);
|
||||
/**
|
||||
* supress HTML tags inside preformatted tags (see above)
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
var $noTagsInCode = false;
|
||||
/**
|
||||
* html to be parsed
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
var $html = '';
|
||||
/**
|
||||
* node type:
|
||||
*
|
||||
* - tag (see isStartTag)
|
||||
* - text (includes cdata)
|
||||
* - comment
|
||||
* - doctype
|
||||
* - pi (processing instruction)
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
var $nodeType = '';
|
||||
/**
|
||||
* current node content, i.e. either a
|
||||
* simple string (text node), or something like
|
||||
* <tag attrib="value"...>
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
var $node = '';
|
||||
/**
|
||||
* wether current node is an opening tag (<a>) or not (</a>)
|
||||
* set to NULL if current node is not a tag
|
||||
* NOTE: empty tags (<br />) set this to true as well!
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
var $isStartTag = null;
|
||||
/**
|
||||
* wether current node is an empty tag (<br />) or not (<a></a>)
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
var $isEmptyTag = null;
|
||||
/**
|
||||
* tag name
|
||||
*
|
||||
* @var string | null
|
||||
*/
|
||||
var $tagName = '';
|
||||
/**
|
||||
* attributes of current tag
|
||||
*
|
||||
* @var array (attribName=>value) | null
|
||||
*/
|
||||
var $tagAttributes = null;
|
||||
/**
|
||||
* wether the current tag is a block element
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
var $isBlockElement = null;
|
||||
|
||||
/**
|
||||
* keep whitespace
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
var $keepWhitespace = 0;
|
||||
/**
|
||||
* list of open tags
|
||||
* count this to get current depth
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
var $openTags = array();
|
||||
/**
|
||||
* list of block elements
|
||||
*
|
||||
* @var array
|
||||
* TODO: what shall we do with <del> and <ins> ?!
|
||||
*/
|
||||
var $blockElements = array (
|
||||
# tag name => <bool> is block
|
||||
# block elements
|
||||
'address' => true,
|
||||
'blockquote' => true,
|
||||
'center' => true,
|
||||
'del' => true,
|
||||
'dir' => true,
|
||||
'div' => true,
|
||||
'dl' => true,
|
||||
'fieldset' => true,
|
||||
'form' => true,
|
||||
'h1' => true,
|
||||
'h2' => true,
|
||||
'h3' => true,
|
||||
'h4' => true,
|
||||
'h5' => true,
|
||||
'h6' => true,
|
||||
'hr' => true,
|
||||
'ins' => true,
|
||||
'isindex' => true,
|
||||
'menu' => true,
|
||||
'noframes' => true,
|
||||
'noscript' => true,
|
||||
'ol' => true,
|
||||
'p' => true,
|
||||
'pre' => true,
|
||||
'table' => true,
|
||||
'ul' => true,
|
||||
# set table elements and list items to block as well
|
||||
'thead' => true,
|
||||
'tbody' => true,
|
||||
'tfoot' => true,
|
||||
'td' => true,
|
||||
'tr' => true,
|
||||
'th' => true,
|
||||
'li' => true,
|
||||
'dd' => true,
|
||||
'dt' => true,
|
||||
# header items and html / body as well
|
||||
'html' => true,
|
||||
'body' => true,
|
||||
'head' => true,
|
||||
'meta' => true,
|
||||
'link' => true,
|
||||
'style' => true,
|
||||
'title' => true,
|
||||
# unfancy media tags, when indented should be rendered as block
|
||||
'map' => true,
|
||||
'object' => true,
|
||||
'param' => true,
|
||||
'embed' => true,
|
||||
'area' => true,
|
||||
# inline elements
|
||||
'a' => false,
|
||||
'abbr' => false,
|
||||
'acronym' => false,
|
||||
'applet' => false,
|
||||
'b' => false,
|
||||
'basefont' => false,
|
||||
'bdo' => false,
|
||||
'big' => false,
|
||||
'br' => false,
|
||||
'button' => false,
|
||||
'cite' => false,
|
||||
'code' => false,
|
||||
'del' => false,
|
||||
'dfn' => false,
|
||||
'em' => false,
|
||||
'font' => false,
|
||||
'i' => false,
|
||||
'img' => false,
|
||||
'ins' => false,
|
||||
'input' => false,
|
||||
'iframe' => false,
|
||||
'kbd' => false,
|
||||
'label' => false,
|
||||
'q' => false,
|
||||
'samp' => false,
|
||||
'script' => false,
|
||||
'select' => false,
|
||||
'small' => false,
|
||||
'span' => false,
|
||||
'strong' => false,
|
||||
'sub' => false,
|
||||
'sup' => false,
|
||||
'textarea' => false,
|
||||
'tt' => false,
|
||||
'var' => false,
|
||||
);
|
||||
/**
|
||||
* get next node, set $this->html prior!
|
||||
*
|
||||
* @param void
|
||||
* @return bool
|
||||
*/
|
||||
function nextNode() {
|
||||
if (empty($this->html)) {
|
||||
# we are done with parsing the html string
|
||||
return false;
|
||||
}
|
||||
static $skipWhitespace = true;
|
||||
if ($this->isStartTag && !$this->isEmptyTag) {
|
||||
array_push($this->openTags, $this->tagName);
|
||||
if (in_array($this->tagName, $this->preformattedTags)) {
|
||||
# dont truncate whitespaces for <code> or <pre> contents
|
||||
$this->keepWhitespace++;
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->html[0] == '<') {
|
||||
$token = substr($this->html, 0, 9);
|
||||
if (substr($token, 0, 2) == '<?') {
|
||||
# xml prolog or other pi's
|
||||
/** TODO **/
|
||||
#trigger_error('this might need some work', E_USER_NOTICE);
|
||||
$pos = strpos($this->html, '>');
|
||||
$this->setNode('pi', $pos + 1);
|
||||
return true;
|
||||
}
|
||||
if (substr($token, 0, 4) == '<!--') {
|
||||
# comment
|
||||
$pos = strpos($this->html, '-->');
|
||||
if ($pos === false) {
|
||||
# could not find a closing -->, use next gt instead
|
||||
# this is firefox' behaviour
|
||||
$pos = strpos($this->html, '>') + 1;
|
||||
} else {
|
||||
$pos += 3;
|
||||
}
|
||||
$this->setNode('comment', $pos);
|
||||
|
||||
$skipWhitespace = true;
|
||||
return true;
|
||||
}
|
||||
if ($token == '<!DOCTYPE') {
|
||||
# doctype
|
||||
$this->setNode('doctype', strpos($this->html, '>')+1);
|
||||
|
||||
$skipWhitespace = true;
|
||||
return true;
|
||||
}
|
||||
if ($token == '<![CDATA[') {
|
||||
# cdata, use text node
|
||||
|
||||
# remove leading <![CDATA[
|
||||
$this->html = substr($this->html, 9);
|
||||
|
||||
$this->setNode('text', strpos($this->html, ']]>')+3);
|
||||
|
||||
# remove trailing ]]> and trim
|
||||
$this->node = substr($this->node, 0, -3);
|
||||
$this->handleWhitespaces();
|
||||
|
||||
$skipWhitespace = true;
|
||||
return true;
|
||||
}
|
||||
if ($this->parseTag()) {
|
||||
# seems to be a tag
|
||||
# handle whitespaces
|
||||
if ($this->isBlockElement) {
|
||||
$skipWhitespace = true;
|
||||
} else {
|
||||
$skipWhitespace = false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if ($this->keepWhitespace) {
|
||||
$skipWhitespace = false;
|
||||
}
|
||||
# when we get here it seems to be a text node
|
||||
$pos = strpos($this->html, '<');
|
||||
if ($pos === false) {
|
||||
$pos = strlen($this->html);
|
||||
}
|
||||
$this->setNode('text', $pos);
|
||||
$this->handleWhitespaces();
|
||||
if ($skipWhitespace && $this->node == ' ') {
|
||||
return $this->nextNode();
|
||||
}
|
||||
$skipWhitespace = false;
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* parse tag, set tag name and attributes, see if it's a closing tag and so forth...
|
||||
*
|
||||
* @param void
|
||||
* @return bool
|
||||
*/
|
||||
function parseTag() {
|
||||
static $a_ord, $z_ord, $special_ords;
|
||||
if (!isset($a_ord)) {
|
||||
$a_ord = ord('a');
|
||||
$z_ord = ord('z');
|
||||
$special_ords = array(
|
||||
ord(':'), // for xml:lang
|
||||
ord('-'), // for http-equiv
|
||||
);
|
||||
}
|
||||
|
||||
$tagName = '';
|
||||
|
||||
$pos = 1;
|
||||
$isStartTag = $this->html[$pos] != '/';
|
||||
if (!$isStartTag) {
|
||||
$pos++;
|
||||
}
|
||||
# get tagName
|
||||
while (isset($this->html[$pos])) {
|
||||
$pos_ord = ord(strtolower($this->html[$pos]));
|
||||
if (($pos_ord >= $a_ord && $pos_ord <= $z_ord) || (!empty($tagName) && is_numeric($this->html[$pos]))) {
|
||||
$tagName .= $this->html[$pos];
|
||||
$pos++;
|
||||
} else {
|
||||
$pos--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$tagName = strtolower($tagName);
|
||||
if (empty($tagName) || !isset($this->blockElements[$tagName])) {
|
||||
# something went wrong => invalid tag
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
if ($this->noTagsInCode && end($this->openTags) == 'code' && !($tagName == 'code' && !$isStartTag)) {
|
||||
# we supress all HTML tags inside code tags
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
|
||||
# get tag attributes
|
||||
/** TODO: in html 4 attributes do not need to be quoted **/
|
||||
$isEmptyTag = false;
|
||||
$attributes = array();
|
||||
$currAttrib = '';
|
||||
while (isset($this->html[$pos+1])) {
|
||||
$pos++;
|
||||
# close tag
|
||||
if ($this->html[$pos] == '>' || $this->html[$pos].$this->html[$pos+1] == '/>') {
|
||||
if ($this->html[$pos] == '/') {
|
||||
$isEmptyTag = true;
|
||||
$pos++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
$pos_ord = ord(strtolower($this->html[$pos]));
|
||||
if ( ($pos_ord >= $a_ord && $pos_ord <= $z_ord) || in_array($pos_ord, $special_ords)) {
|
||||
# attribute name
|
||||
$currAttrib .= $this->html[$pos];
|
||||
} elseif (in_array($this->html[$pos], array(' ', "\t", "\n"))) {
|
||||
# drop whitespace
|
||||
} elseif (in_array($this->html[$pos].$this->html[$pos+1], array('="', "='"))) {
|
||||
# get attribute value
|
||||
$pos++;
|
||||
$await = $this->html[$pos]; # single or double quote
|
||||
$pos++;
|
||||
$value = '';
|
||||
while (isset($this->html[$pos]) && $this->html[$pos] != $await) {
|
||||
$value .= $this->html[$pos];
|
||||
$pos++;
|
||||
}
|
||||
$attributes[$currAttrib] = $value;
|
||||
$currAttrib = '';
|
||||
} else {
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if ($this->html[$pos] != '>') {
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!empty($currAttrib)) {
|
||||
# html 4 allows something like <option selected> instead of <option selected="selected">
|
||||
$attributes[$currAttrib] = $currAttrib;
|
||||
}
|
||||
if (!$isStartTag) {
|
||||
if (!empty($attributes) || $tagName != end($this->openTags)) {
|
||||
# end tags must not contain any attributes
|
||||
# or maybe we did not expect a different tag to be closed
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
array_pop($this->openTags);
|
||||
if (in_array($tagName, $this->preformattedTags)) {
|
||||
$this->keepWhitespace--;
|
||||
}
|
||||
}
|
||||
$pos++;
|
||||
$this->node = substr($this->html, 0, $pos);
|
||||
$this->html = substr($this->html, $pos);
|
||||
$this->tagName = $tagName;
|
||||
$this->tagAttributes = $attributes;
|
||||
$this->isStartTag = $isStartTag;
|
||||
$this->isEmptyTag = $isEmptyTag || in_array($tagName, $this->emptyTags);
|
||||
if ($this->isEmptyTag) {
|
||||
# might be not well formed
|
||||
$this->node = preg_replace('# */? *>$#', ' />', $this->node);
|
||||
}
|
||||
$this->nodeType = 'tag';
|
||||
$this->isBlockElement = $this->blockElements[$tagName];
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* handle invalid tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function invalidTag() {
|
||||
$this->html = substr_replace($this->html, '<', 0, 1);
|
||||
}
|
||||
/**
|
||||
* update all vars and make $this->html shorter
|
||||
*
|
||||
* @param string $type see description for $this->nodeType
|
||||
* @param int $pos to which position shall we cut?
|
||||
* @return void
|
||||
*/
|
||||
function setNode($type, $pos) {
|
||||
if ($this->nodeType == 'tag') {
|
||||
# set tag specific vars to null
|
||||
# $type == tag should not be called here
|
||||
# see this::parseTag() for more
|
||||
$this->tagName = null;
|
||||
$this->tagAttributes = null;
|
||||
$this->isStartTag = null;
|
||||
$this->isEmptyTag = null;
|
||||
$this->isBlockElement = null;
|
||||
|
||||
}
|
||||
$this->nodeType = $type;
|
||||
$this->node = substr($this->html, 0, $pos);
|
||||
$this->html = substr($this->html, $pos);
|
||||
}
|
||||
/**
|
||||
* check if $this->html begins with $str
|
||||
*
|
||||
* @param string $str
|
||||
* @return bool
|
||||
*/
|
||||
function match($str) {
|
||||
return substr($this->html, 0, strlen($str)) == $str;
|
||||
}
|
||||
/**
|
||||
* truncate whitespaces
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleWhitespaces() {
|
||||
if ($this->keepWhitespace) {
|
||||
# <pre> or <code> before...
|
||||
return;
|
||||
}
|
||||
# truncate multiple whitespaces to a single one
|
||||
$this->node = preg_replace('#\s+#s', ' ', $this->node);
|
||||
}
|
||||
/**
|
||||
* normalize self::node
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function normalizeNode() {
|
||||
$this->node = '<';
|
||||
if (!$this->isStartTag) {
|
||||
$this->node .= '/'.$this->tagName.'>';
|
||||
return;
|
||||
}
|
||||
$this->node .= $this->tagName;
|
||||
foreach ($this->tagAttributes as $name => $value) {
|
||||
$this->node .= ' '.$name.'="'.str_replace('"', '"', $value).'"';
|
||||
}
|
||||
if ($this->isEmptyTag) {
|
||||
$this->node .= ' /';
|
||||
}
|
||||
$this->node .= '>';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* indent a HTML string properly
|
||||
*
|
||||
* @param string $html
|
||||
* @param string $indent optional
|
||||
* @return string
|
||||
*/
|
||||
function indentHTML($html, $indent = " ", $noTagsInCode = false) {
|
||||
$parser = new parseHTML;
|
||||
$parser->noTagsInCode = $noTagsInCode;
|
||||
$parser->html = $html;
|
||||
$html = '';
|
||||
$last = true; # last tag was block elem
|
||||
$indent_a = array();
|
||||
while($parser->nextNode()) {
|
||||
if ($parser->nodeType == 'tag') {
|
||||
$parser->normalizeNode();
|
||||
}
|
||||
if ($parser->nodeType == 'tag' && $parser->isBlockElement) {
|
||||
$isPreOrCode = in_array($parser->tagName, array('code', 'pre'));
|
||||
if (!$parser->keepWhitespace && !$last && !$isPreOrCode) {
|
||||
$html = rtrim($html)."\n";
|
||||
}
|
||||
if ($parser->isStartTag) {
|
||||
$html .= implode($indent_a);
|
||||
if (!$parser->isEmptyTag) {
|
||||
array_push($indent_a, $indent);
|
||||
}
|
||||
} else {
|
||||
array_pop($indent_a);
|
||||
if (!$isPreOrCode) {
|
||||
$html .= implode($indent_a);
|
||||
}
|
||||
}
|
||||
$html .= $parser->node;
|
||||
if (!$parser->keepWhitespace && !($isPreOrCode && $parser->isStartTag)) {
|
||||
$html .= "\n";
|
||||
}
|
||||
$last = true;
|
||||
} else {
|
||||
if ($parser->nodeType == 'tag' && $parser->tagName == 'br') {
|
||||
$html .= $parser->node."\n";
|
||||
$last = true;
|
||||
continue;
|
||||
} elseif ($last && !$parser->keepWhitespace) {
|
||||
$html .= implode($indent_a);
|
||||
$parser->node = ltrim($parser->node);
|
||||
}
|
||||
$html .= $parser->node;
|
||||
|
||||
if (in_array($parser->nodeType, array('comment', 'pi', 'doctype'))) {
|
||||
$html .= "\n";
|
||||
} else {
|
||||
$last = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
/*
|
||||
# testcase / example
|
||||
error_reporting(E_ALL);
|
||||
|
||||
$html = '<p>Simple block on one line:</p>
|
||||
|
||||
<div>foo</div>
|
||||
|
||||
<p>And nested without indentation:</p>
|
||||
|
||||
<div>
|
||||
<div>
|
||||
<div>
|
||||
foo
|
||||
</div>
|
||||
<div style=">"/>
|
||||
</div>
|
||||
<div>bar</div>
|
||||
</div>
|
||||
|
||||
<p>And with attributes:</p>
|
||||
|
||||
<div>
|
||||
<div id="foo">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p>This was broken in 1.0.2b7:</p>
|
||||
|
||||
<div class="inlinepage">
|
||||
<div class="toggleableend">
|
||||
foo
|
||||
</div>
|
||||
</div>';
|
||||
#$html = '<a href="asdfasdf" title=\'asdf\' foo="bar">asdf</a>';
|
||||
echo indentHTML($html);
|
||||
die();
|
||||
*/
|
9
vendor/composer/autoload_classmap.php
vendored
9
vendor/composer/autoload_classmap.php
vendored
@ -7,6 +7,9 @@ $baseDir = dirname($vendorDir);
|
||||
|
||||
return array(
|
||||
'Hubzilla\\Import\\Import' => $baseDir . '/include/Import/Importer.php',
|
||||
'Markdownify\\Converter' => $vendorDir . '/pixel418/markdownify/src/Converter.php',
|
||||
'Markdownify\\ConverterExtra' => $vendorDir . '/pixel418/markdownify/src/ConverterExtra.php',
|
||||
'Markdownify\\Parser' => $vendorDir . '/pixel418/markdownify/src/Parser.php',
|
||||
'Michelf\\Markdown' => $vendorDir . '/michelf/php-markdown/Michelf/Markdown.php',
|
||||
'Michelf\\MarkdownExtra' => $vendorDir . '/michelf/php-markdown/Michelf/MarkdownExtra.php',
|
||||
'Michelf\\MarkdownInterface' => $vendorDir . '/michelf/php-markdown/Michelf/MarkdownInterface.php',
|
||||
@ -18,6 +21,8 @@ return array(
|
||||
'Psr\\Log\\LoggerInterface' => $vendorDir . '/psr/log/Psr/Log/LoggerInterface.php',
|
||||
'Psr\\Log\\LoggerTrait' => $vendorDir . '/psr/log/Psr/Log/LoggerTrait.php',
|
||||
'Psr\\Log\\NullLogger' => $vendorDir . '/psr/log/Psr/Log/NullLogger.php',
|
||||
'Psr\\Log\\Test\\DummyTest' => $vendorDir . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
|
||||
'Psr\\Log\\Test\\LoggerInterfaceTest' => $vendorDir . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
|
||||
'Sabre\\CalDAV\\Backend\\AbstractBackend' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/AbstractBackend.php',
|
||||
'Sabre\\CalDAV\\Backend\\BackendInterface' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/BackendInterface.php',
|
||||
'Sabre\\CalDAV\\Backend\\NotificationSupport' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/NotificationSupport.php',
|
||||
@ -268,7 +273,6 @@ return array(
|
||||
'Sabre\\HTTP\\URLUtil' => $vendorDir . '/sabre/http/lib/URLUtil.php',
|
||||
'Sabre\\HTTP\\Util' => $vendorDir . '/sabre/http/lib/Util.php',
|
||||
'Sabre\\HTTP\\Version' => $vendorDir . '/sabre/http/lib/Version.php',
|
||||
'Sabre\\Uri\\InvalidUriException' => $vendorDir . '/sabre/uri/lib/InvalidUriException.php',
|
||||
'Sabre\\Uri\\Version' => $vendorDir . '/sabre/uri/lib/Version.php',
|
||||
'Sabre\\VObject\\BirthdayCalendarGenerator' => $vendorDir . '/sabre/vobject/lib/BirthdayCalendarGenerator.php',
|
||||
'Sabre\\VObject\\Cli' => $vendorDir . '/sabre/vobject/lib/Cli.php',
|
||||
@ -357,6 +361,9 @@ return array(
|
||||
'Sabre\\Xml\\Writer' => $vendorDir . '/sabre/xml/lib/Writer.php',
|
||||
'Sabre\\Xml\\XmlDeserializable' => $vendorDir . '/sabre/xml/lib/XmlDeserializable.php',
|
||||
'Sabre\\Xml\\XmlSerializable' => $vendorDir . '/sabre/xml/lib/XmlSerializable.php',
|
||||
'Test\\Markdownify\\ConverterExtraTest' => $vendorDir . '/pixel418/markdownify/test/ConverterExtraTest.php',
|
||||
'Test\\Markdownify\\ConverterTest' => $vendorDir . '/pixel418/markdownify/test/ConverterTest.php',
|
||||
'Test\\Markdownify\\ConverterTestCase' => $vendorDir . '/pixel418/markdownify/test/ConverterTestCase.php',
|
||||
'Zotlabs\\Access\\AccessList' => $baseDir . '/Zotlabs/Access/AccessList.php',
|
||||
'Zotlabs\\Access\\PermissionLimits' => $baseDir . '/Zotlabs/Access/PermissionLimits.php',
|
||||
'Zotlabs\\Access\\PermissionRoles' => $baseDir . '/Zotlabs/Access/PermissionRoles.php',
|
||||
|
2
vendor/composer/autoload_psr4.php
vendored
2
vendor/composer/autoload_psr4.php
vendored
@ -7,6 +7,7 @@ $baseDir = dirname($vendorDir);
|
||||
|
||||
return array(
|
||||
'Zotlabs\\' => array($baseDir . '/Zotlabs'),
|
||||
'Test\\Markdownify\\' => array($vendorDir . '/pixel418/markdownify/test'),
|
||||
'Sabre\\Xml\\' => array($vendorDir . '/sabre/xml/lib'),
|
||||
'Sabre\\VObject\\' => array($vendorDir . '/sabre/vobject/lib'),
|
||||
'Sabre\\Uri\\' => array($vendorDir . '/sabre/uri/lib'),
|
||||
@ -17,5 +18,6 @@ return array(
|
||||
'Sabre\\CardDAV\\' => array($vendorDir . '/sabre/dav/lib/CardDAV'),
|
||||
'Sabre\\CalDAV\\' => array($vendorDir . '/sabre/dav/lib/CalDAV'),
|
||||
'Psr\\Log\\' => array($vendorDir . '/psr/log/Psr/Log'),
|
||||
'Markdownify\\' => array($vendorDir . '/pixel418/markdownify/src'),
|
||||
'Hubzilla\\' => array($baseDir . '/include'),
|
||||
);
|
||||
|
25
vendor/composer/autoload_static.php
vendored
25
vendor/composer/autoload_static.php
vendored
@ -21,6 +21,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
array (
|
||||
'Zotlabs\\' => 8,
|
||||
),
|
||||
'T' =>
|
||||
array (
|
||||
'Test\\Markdownify\\' => 17,
|
||||
),
|
||||
'S' =>
|
||||
array (
|
||||
'Sabre\\Xml\\' => 10,
|
||||
@ -37,6 +41,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
array (
|
||||
'Psr\\Log\\' => 8,
|
||||
),
|
||||
'M' =>
|
||||
array (
|
||||
'Markdownify\\' => 12,
|
||||
),
|
||||
'H' =>
|
||||
array (
|
||||
'Hubzilla\\' => 9,
|
||||
@ -48,6 +56,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
array (
|
||||
0 => __DIR__ . '/../..' . '/Zotlabs',
|
||||
),
|
||||
'Test\\Markdownify\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/pixel418/markdownify/test',
|
||||
),
|
||||
'Sabre\\Xml\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/sabre/xml/lib',
|
||||
@ -88,6 +100,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/psr/log/Psr/Log',
|
||||
),
|
||||
'Markdownify\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/pixel418/markdownify/src',
|
||||
),
|
||||
'Hubzilla\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/../..' . '/include',
|
||||
@ -106,6 +122,9 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
|
||||
public static $classMap = array (
|
||||
'Hubzilla\\Import\\Import' => __DIR__ . '/../..' . '/include/Import/Importer.php',
|
||||
'Markdownify\\Converter' => __DIR__ . '/..' . '/pixel418/markdownify/src/Converter.php',
|
||||
'Markdownify\\ConverterExtra' => __DIR__ . '/..' . '/pixel418/markdownify/src/ConverterExtra.php',
|
||||
'Markdownify\\Parser' => __DIR__ . '/..' . '/pixel418/markdownify/src/Parser.php',
|
||||
'Michelf\\Markdown' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/Markdown.php',
|
||||
'Michelf\\MarkdownExtra' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/MarkdownExtra.php',
|
||||
'Michelf\\MarkdownInterface' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/MarkdownInterface.php',
|
||||
@ -117,6 +136,8 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
'Psr\\Log\\LoggerInterface' => __DIR__ . '/..' . '/psr/log/Psr/Log/LoggerInterface.php',
|
||||
'Psr\\Log\\LoggerTrait' => __DIR__ . '/..' . '/psr/log/Psr/Log/LoggerTrait.php',
|
||||
'Psr\\Log\\NullLogger' => __DIR__ . '/..' . '/psr/log/Psr/Log/NullLogger.php',
|
||||
'Psr\\Log\\Test\\DummyTest' => __DIR__ . '/..' . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
|
||||
'Psr\\Log\\Test\\LoggerInterfaceTest' => __DIR__ . '/..' . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
|
||||
'Sabre\\CalDAV\\Backend\\AbstractBackend' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/AbstractBackend.php',
|
||||
'Sabre\\CalDAV\\Backend\\BackendInterface' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/BackendInterface.php',
|
||||
'Sabre\\CalDAV\\Backend\\NotificationSupport' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/NotificationSupport.php',
|
||||
@ -367,7 +388,6 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
'Sabre\\HTTP\\URLUtil' => __DIR__ . '/..' . '/sabre/http/lib/URLUtil.php',
|
||||
'Sabre\\HTTP\\Util' => __DIR__ . '/..' . '/sabre/http/lib/Util.php',
|
||||
'Sabre\\HTTP\\Version' => __DIR__ . '/..' . '/sabre/http/lib/Version.php',
|
||||
'Sabre\\Uri\\InvalidUriException' => __DIR__ . '/..' . '/sabre/uri/lib/InvalidUriException.php',
|
||||
'Sabre\\Uri\\Version' => __DIR__ . '/..' . '/sabre/uri/lib/Version.php',
|
||||
'Sabre\\VObject\\BirthdayCalendarGenerator' => __DIR__ . '/..' . '/sabre/vobject/lib/BirthdayCalendarGenerator.php',
|
||||
'Sabre\\VObject\\Cli' => __DIR__ . '/..' . '/sabre/vobject/lib/Cli.php',
|
||||
@ -456,6 +476,9 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
'Sabre\\Xml\\Writer' => __DIR__ . '/..' . '/sabre/xml/lib/Writer.php',
|
||||
'Sabre\\Xml\\XmlDeserializable' => __DIR__ . '/..' . '/sabre/xml/lib/XmlDeserializable.php',
|
||||
'Sabre\\Xml\\XmlSerializable' => __DIR__ . '/..' . '/sabre/xml/lib/XmlSerializable.php',
|
||||
'Test\\Markdownify\\ConverterExtraTest' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterExtraTest.php',
|
||||
'Test\\Markdownify\\ConverterTest' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterTest.php',
|
||||
'Test\\Markdownify\\ConverterTestCase' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterTestCase.php',
|
||||
'Zotlabs\\Access\\AccessList' => __DIR__ . '/../..' . '/Zotlabs/Access/AccessList.php',
|
||||
'Zotlabs\\Access\\PermissionLimits' => __DIR__ . '/../..' . '/Zotlabs/Access/PermissionLimits.php',
|
||||
'Zotlabs\\Access\\PermissionRoles' => __DIR__ . '/../..' . '/Zotlabs/Access/PermissionRoles.php',
|
||||
|
58
vendor/composer/installed.json
vendored
58
vendor/composer/installed.json
vendored
@ -518,5 +518,63 @@
|
||||
"keywords": [
|
||||
"markdown"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "pixel418/markdownify",
|
||||
"version": "v2.2.1",
|
||||
"version_normalized": "2.2.1.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/Elephant418/Markdownify.git",
|
||||
"reference": "0160677f04c784550dd10fd72fdf3994967db848"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/Elephant418/Markdownify/zipball/0160677f04c784550dd10fd72fdf3994967db848",
|
||||
"reference": "0160677f04c784550dd10fd72fdf3994967db848",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8"
|
||||
},
|
||||
"time": "2016-09-21T13:01:43+00:00",
|
||||
"type": "lib",
|
||||
"installation-source": "dist",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Markdownify\\": "src",
|
||||
"Test\\Markdownify\\": "test"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"LGPL"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Peter Kruithof",
|
||||
"email": "pkruithof@gmail.com",
|
||||
"homepage": "http://pkruithof.tumblr.com/"
|
||||
},
|
||||
{
|
||||
"name": "Milian Wolff",
|
||||
"email": "mail@milianw.de",
|
||||
"homepage": "http://milianw.de"
|
||||
},
|
||||
{
|
||||
"name": "Thomas Zilliox",
|
||||
"email": "hello@tzi.fr",
|
||||
"homepage": "http://tzi.fr"
|
||||
}
|
||||
],
|
||||
"description": "The HTML to Markdown converter for PHP ",
|
||||
"homepage": "https://github.com/elephant418/Markdownify",
|
||||
"keywords": [
|
||||
"markdown",
|
||||
"markdownify"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
76
vendor/pixel418/markdownify/CHANGELOG.md
vendored
Normal file
76
vendor/pixel418/markdownify/CHANGELOG.md
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
CHANGELOG
|
||||
==============
|
||||
|
||||
|
||||
21/09/2016 v2.2.1
|
||||
--------------
|
||||
|
||||
* Fix: Moving trailing whitespace from inline elements outside of the element
|
||||
* Feature: Use PSR-4
|
||||
* Feature: PHP 7.0 support in continuous integration
|
||||
* Doc: Update of the README
|
||||
|
||||
|
||||
07/09/2016 v2.2.0
|
||||
--------------
|
||||
|
||||
* Fix: Reset state between each parsing
|
||||
|
||||
|
||||
19/02/2016 v2.1.11
|
||||
--------------
|
||||
|
||||
* Fix: Empty table cell conversion
|
||||
|
||||
|
||||
10/02/2016 v2.1.10
|
||||
--------------
|
||||
|
||||
* Fix: Handle nested table.
|
||||
|
||||
|
||||
01/04/2015 v2.1.9
|
||||
--------------
|
||||
|
||||
* Fix: Handle HTML breaks & spaces in a less destructive way.
|
||||
|
||||
|
||||
26/03/2015 v2.1.8
|
||||
--------------
|
||||
|
||||
* Fix: Use alternative italic character
|
||||
* Fix: Handle HTML breaks inside another tag
|
||||
* Fix: Handle HTML spaces around tags
|
||||
|
||||
|
||||
07/11/2014 v2.1.7
|
||||
--------------
|
||||
|
||||
* Change composer name to "elephant418/markdownify"
|
||||
|
||||
|
||||
14/07/2014 v2.1.6
|
||||
--------------
|
||||
|
||||
* Fix: Simulate a paragraph for inline text preceding block element
|
||||
* Fix: Nested lists
|
||||
* Fix: setKeepHTML method
|
||||
* Feature: PHP 5.5 & 5.6 support in continuous integration
|
||||
|
||||
|
||||
16/03/2014 v2.1.5
|
||||
--------------
|
||||
|
||||
Add display settings
|
||||
|
||||
* Test: Add tests for footnotes after every paragraph or not
|
||||
* Feature: Allow to display link reference in paragraph, without footnotes
|
||||
|
||||
|
||||
27/02/2014 v2.1.4
|
||||
--------------
|
||||
|
||||
Improve how ConverterExtra handle id & class attributes:
|
||||
|
||||
* Feature: Allow id & class attributes on links
|
||||
* Feature: Allow class attributes on headings
|
63
vendor/pixel418/markdownify/README.md
vendored
Normal file
63
vendor/pixel418/markdownify/README.md
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
# Markdownify
|
||||
|
||||
[](https://travis-ci.org/Elephant418/Markdownify?branch=master)
|
||||
[](https://packagist.org/packages/pixel418/markdownify)
|
||||
[](https://opensource.org/licenses/lgpl-2.1.php)
|
||||
|
||||
The HTML to Markdown converter for PHP
|
||||
|
||||
[Code example](#code-example) | [How to Install](#how-to-install) | [How to Contribute](#how-to-contribute) | [Author & Community](#author--community)
|
||||
|
||||
|
||||
|
||||
Code example
|
||||
--------
|
||||
|
||||
### Markdown
|
||||
|
||||
```php
|
||||
$converter = new Markdownify\Converter;
|
||||
$converter->parseString('<h1>Heading</h1>');
|
||||
// Returns: # Heading
|
||||
```
|
||||
|
||||
### Markdown Extra [as defined by @michelf](http://michelf.ca/projects/php-markdown/extra/)
|
||||
|
||||
```php
|
||||
$converter = new Markdownify\ConverterExtra;
|
||||
$converter->parseString('<h1 id="md">Heading</h1>');
|
||||
// Returns: # Heading {#md}
|
||||
```
|
||||
|
||||
|
||||
|
||||
How to Install
|
||||
--------
|
||||
|
||||
This library package requires `PHP 5.3` or later.<br>
|
||||
Install [Composer](http://getcomposer.org/doc/01-basic-usage.md#installation) and run the following command to get the latest version:
|
||||
|
||||
```sh
|
||||
composer require pixel418/markdownify
|
||||
```
|
||||
|
||||
|
||||
|
||||
How to Contribute
|
||||
--------
|
||||
|
||||
1. Fork the Markdownify repository
|
||||
2. Create a new branch for each feature or improvement
|
||||
3. Send a pull request from each feature branch to the **v2.x** branch
|
||||
|
||||
If you don't know much about pull request, you can read [the Github article](https://help.github.com/articles/using-pull-requests)
|
||||
|
||||
|
||||
|
||||
Author & Community
|
||||
--------
|
||||
|
||||
Markdownify is under [LGPL License](http://opensource.org/licenses/LGPL-2.1)<br>
|
||||
It was created by [Milian Wolff](http://milianw.de)<br>
|
||||
It was converted to a Symfony Bundle by [Peter Kruithof](https://github.com/pkruithof)<br>
|
||||
It is maintained by [Thomas ZILLIOX](http://tzi.fr)
|
38
vendor/pixel418/markdownify/composer.json
vendored
Normal file
38
vendor/pixel418/markdownify/composer.json
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
{
|
||||
"name": "pixel418/markdownify",
|
||||
"type": "lib",
|
||||
"description": "The HTML to Markdown converter for PHP ",
|
||||
"keywords": ["markdown", "markdownify"],
|
||||
"license": "LGPL",
|
||||
"homepage": "https://github.com/elephant418/Markdownify",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Milian Wolff",
|
||||
"email": "mail@milianw.de",
|
||||
"homepage": "http://milianw.de"
|
||||
|
||||
},
|
||||
{
|
||||
"name": "Peter Kruithof",
|
||||
"email": "pkruithof@gmail.com",
|
||||
"homepage": "http://pkruithof.tumblr.com/"
|
||||
},
|
||||
{
|
||||
"name": "Thomas Zilliox",
|
||||
"email": "hello@tzi.fr",
|
||||
"homepage": "http://tzi.fr"
|
||||
}
|
||||
],
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8"
|
||||
},
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Markdownify\\": "src",
|
||||
"Test\\Markdownify\\": "test"
|
||||
}
|
||||
}
|
||||
}
|
1400
vendor/pixel418/markdownify/src/Converter.php
vendored
Normal file
1400
vendor/pixel418/markdownify/src/Converter.php
vendored
Normal file
File diff suppressed because it is too large
Load Diff
573
vendor/pixel418/markdownify/src/ConverterExtra.php
vendored
Normal file
573
vendor/pixel418/markdownify/src/ConverterExtra.php
vendored
Normal file
@ -0,0 +1,573 @@
|
||||
<?php
|
||||
|
||||
/* This file is part of the Markdownify project, which is under LGPL license */
|
||||
|
||||
namespace Markdownify;
|
||||
|
||||
class ConverterExtra extends Converter
|
||||
{
|
||||
|
||||
/**
|
||||
* table data, including rows with content and the maximum width of each col
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $table = array();
|
||||
|
||||
/**
|
||||
* current col
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $col = -1;
|
||||
|
||||
/**
|
||||
* current row
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $row = 0;
|
||||
|
||||
/**
|
||||
* constructor, see Markdownify::Markdownify() for more information
|
||||
*/
|
||||
public function __construct($linksAfterEachParagraph = self::LINK_AFTER_CONTENT, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML)
|
||||
{
|
||||
parent::__construct($linksAfterEachParagraph, $bodyWidth, $keepHTML);
|
||||
|
||||
// new markdownable tags & attributes
|
||||
// header ids: # foo {bar}
|
||||
$this->isMarkdownable['h1']['id'] = 'optional';
|
||||
$this->isMarkdownable['h1']['class'] = 'optional';
|
||||
$this->isMarkdownable['h2']['id'] = 'optional';
|
||||
$this->isMarkdownable['h2']['class'] = 'optional';
|
||||
$this->isMarkdownable['h3']['id'] = 'optional';
|
||||
$this->isMarkdownable['h3']['class'] = 'optional';
|
||||
$this->isMarkdownable['h4']['id'] = 'optional';
|
||||
$this->isMarkdownable['h4']['class'] = 'optional';
|
||||
$this->isMarkdownable['h5']['id'] = 'optional';
|
||||
$this->isMarkdownable['h5']['class'] = 'optional';
|
||||
$this->isMarkdownable['h6']['id'] = 'optional';
|
||||
$this->isMarkdownable['h6']['class'] = 'optional';
|
||||
// tables
|
||||
$this->isMarkdownable['table'] = array();
|
||||
$this->isMarkdownable['th'] = array(
|
||||
'align' => 'optional',
|
||||
);
|
||||
$this->isMarkdownable['td'] = array(
|
||||
'align' => 'optional',
|
||||
);
|
||||
$this->isMarkdownable['tr'] = array();
|
||||
array_push($this->ignore, 'thead');
|
||||
array_push($this->ignore, 'tbody');
|
||||
array_push($this->ignore, 'tfoot');
|
||||
// definition lists
|
||||
$this->isMarkdownable['dl'] = array();
|
||||
$this->isMarkdownable['dd'] = array();
|
||||
$this->isMarkdownable['dt'] = array();
|
||||
// link class
|
||||
$this->isMarkdownable['a']['id'] = 'optional';
|
||||
$this->isMarkdownable['a']['class'] = 'optional';
|
||||
// footnotes
|
||||
$this->isMarkdownable['fnref'] = array(
|
||||
'target' => 'required',
|
||||
);
|
||||
$this->isMarkdownable['footnotes'] = array();
|
||||
$this->isMarkdownable['fn'] = array(
|
||||
'name' => 'required',
|
||||
);
|
||||
$this->parser->blockElements['fnref'] = false;
|
||||
$this->parser->blockElements['fn'] = true;
|
||||
$this->parser->blockElements['footnotes'] = true;
|
||||
// abbr
|
||||
$this->isMarkdownable['abbr'] = array(
|
||||
'title' => 'required',
|
||||
);
|
||||
// build RegEx lookahead to decide wether table can pe parsed or not
|
||||
$inlineTags = array_keys($this->parser->blockElements, false);
|
||||
$colContents = '(?:[^<]|<(?:' . implode('|', $inlineTags) . '|[^a-z]))*';
|
||||
$this->tableLookaheadHeader = '{
|
||||
^\s*(?:<thead\s*>)?\s* # open optional thead
|
||||
<tr\s*>\s*(?: # start required row with headers
|
||||
<th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align
|
||||
\s*' . $colContents . '\s* # contents
|
||||
</th>\s* # close header
|
||||
)+</tr> # close row with headers
|
||||
\s*(?:</thead>)? # close optional thead
|
||||
}sxi';
|
||||
$this->tdSubstitute = '\s*' . $colContents . '\s* # contents
|
||||
</td>\s*';
|
||||
$this->tableLookaheadBody = '{
|
||||
\s*(?:<tbody\s*>)?\s* # open optional tbody
|
||||
(?:<tr\s*>\s* # start row
|
||||
%s # cols to be substituted
|
||||
</tr>)+ # close row
|
||||
\s*(?:</tbody>)? # close optional tbody
|
||||
\s*</table> # close table
|
||||
}sxi';
|
||||
}
|
||||
|
||||
/**
|
||||
* handle header tags (<h1> - <h6>)
|
||||
*
|
||||
* @param int $level 1-6
|
||||
* @return void
|
||||
*/
|
||||
protected function handleHeader($level)
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector();
|
||||
$this->stack();
|
||||
} else {
|
||||
$tag = $this->unstack();
|
||||
if (!empty($tag['cssSelector'])) {
|
||||
// {#id.class}
|
||||
$this->out(' {' . $tag['cssSelector'] . '}');
|
||||
}
|
||||
}
|
||||
parent::handleHeader($level);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <a> tags parsing
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_a_parser()
|
||||
{
|
||||
parent::handleTag_a_parser();
|
||||
$this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector();
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <a> tags conversion
|
||||
*
|
||||
* @param array $tag
|
||||
* @param string $buffer
|
||||
* @return string The markdownified link
|
||||
*/
|
||||
protected function handleTag_a_converter($tag, $buffer)
|
||||
{
|
||||
$output = parent::handleTag_a_converter($tag, $buffer);
|
||||
if (!empty($tag['cssSelector'])) {
|
||||
// [This link][id]{#id.class}
|
||||
$output .= '{' . $tag['cssSelector'] . '}';
|
||||
}
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <abbr> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_abbr()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->stack();
|
||||
$this->buffer();
|
||||
} else {
|
||||
$tag = $this->unstack();
|
||||
$tag['text'] = $this->unbuffer();
|
||||
$add = true;
|
||||
foreach ($this->stack['abbr'] as $stacked) {
|
||||
if ($stacked['text'] == $tag['text']) {
|
||||
/** TODO: differing abbr definitions, i.e. different titles for same text **/
|
||||
$add = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
$this->out($tag['text']);
|
||||
if ($add) {
|
||||
array_push($this->stack['abbr'], $tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* flush stacked abbr tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function flushStacked_abbr()
|
||||
{
|
||||
$out = array();
|
||||
foreach ($this->stack['abbr'] as $k => $tag) {
|
||||
if (!isset($tag['unstacked'])) {
|
||||
array_push($out, ' *[' . $tag['text'] . ']: ' . $tag['title']);
|
||||
$tag['unstacked'] = true;
|
||||
$this->stack['abbr'][$k] = $tag;
|
||||
}
|
||||
}
|
||||
if (!empty($out)) {
|
||||
$this->out("\n\n" . implode("\n", $out));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <table> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_table()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
// check if upcoming table can be converted
|
||||
if ($this->keepHTML) {
|
||||
if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) {
|
||||
// header seems good, now check body
|
||||
// get align & number of cols
|
||||
preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols);
|
||||
$regEx = '';
|
||||
$i = 1;
|
||||
$aligns = array();
|
||||
foreach ($cols[2] as $align) {
|
||||
$align = strtolower($align);
|
||||
array_push($aligns, $align);
|
||||
if (empty($align)) {
|
||||
$align = 'left'; // default value
|
||||
}
|
||||
$td = '\s+align=("|\')' . $align . '\\' . $i;
|
||||
$i++;
|
||||
if ($align == 'left') {
|
||||
// look for empty align or left
|
||||
$td = '(?:' . $td . ')?';
|
||||
}
|
||||
$td = '<td' . $td . '\s*>';
|
||||
$regEx .= $td . $this->tdSubstitute;
|
||||
}
|
||||
$regEx = sprintf($this->tableLookaheadBody, $regEx);
|
||||
if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) {
|
||||
// this is a markdownable table tag!
|
||||
$this->table = array(
|
||||
'rows' => array(),
|
||||
'col_widths' => array(),
|
||||
'aligns' => $aligns,
|
||||
);
|
||||
$this->row = 0;
|
||||
} else {
|
||||
// non markdownable table
|
||||
$this->handleTagToText();
|
||||
}
|
||||
} else {
|
||||
// non markdownable table
|
||||
$this->handleTagToText();
|
||||
}
|
||||
} else {
|
||||
$this->table = array(
|
||||
'rows' => array(),
|
||||
'col_widths' => array(),
|
||||
'aligns' => array(),
|
||||
);
|
||||
$this->row = 0;
|
||||
}
|
||||
} else {
|
||||
// finally build the table in Markdown Extra syntax
|
||||
$separator = array();
|
||||
if (!isset($this->table['aligns'])) {
|
||||
$this->table['aligns'] = array();
|
||||
}
|
||||
// seperator with correct align identifiers
|
||||
foreach ($this->table['aligns'] as $col => $align) {
|
||||
if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) {
|
||||
break;
|
||||
}
|
||||
$left = ' ';
|
||||
$right = ' ';
|
||||
switch ($align) {
|
||||
case 'left':
|
||||
$left = ':';
|
||||
break;
|
||||
case 'center':
|
||||
$right = ':';
|
||||
$left = ':';
|
||||
case 'right':
|
||||
$right = ':';
|
||||
break;
|
||||
}
|
||||
array_push($separator, $left . str_repeat('-', $this->table['col_widths'][$col]) . $right);
|
||||
}
|
||||
$separator = '|' . implode('|', $separator) . '|';
|
||||
|
||||
$rows = array();
|
||||
// add padding
|
||||
array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent'));
|
||||
$header = array_shift($this->table['rows']);
|
||||
array_push($rows, '| ' . implode(' | ', $header) . ' |');
|
||||
array_push($rows, $separator);
|
||||
foreach ($this->table['rows'] as $row) {
|
||||
array_push($rows, '| ' . implode(' | ', $row) . ' |');
|
||||
}
|
||||
$this->out(implode("\n" . $this->indent, $rows));
|
||||
$this->table = array();
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* properly pad content so it is aligned as whished
|
||||
* should be used with array_walk_recursive on $this->table['rows']
|
||||
*
|
||||
* @param string &$content
|
||||
* @param int $col
|
||||
* @return void
|
||||
*/
|
||||
protected function alignTdContent(&$content, $col)
|
||||
{
|
||||
if (!isset($this->table['aligns'][$col])) {
|
||||
$this->table['aligns'][$col] = 'left';
|
||||
}
|
||||
switch ($this->table['aligns'][$col]) {
|
||||
default:
|
||||
case 'left':
|
||||
$content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content));
|
||||
break;
|
||||
case 'right':
|
||||
$content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)) . $content;
|
||||
break;
|
||||
case 'center':
|
||||
$paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content);
|
||||
$left = floor($paddingNeeded / 2);
|
||||
$right = $paddingNeeded - $left;
|
||||
$content = str_repeat(' ', $left) . $content . str_repeat(' ', $right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <tr> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_tr()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->col = -1;
|
||||
} else {
|
||||
$this->row++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <td> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_td()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->col++;
|
||||
if (!isset($this->table['col_widths'][$this->col])) {
|
||||
$this->table['col_widths'][$this->col] = 0;
|
||||
}
|
||||
$this->buffer();
|
||||
} else {
|
||||
$buffer = trim($this->unbuffer());
|
||||
if (!isset($this->table['col_widths'][$this->col])) {
|
||||
$this->table['col_widths'][$this->col] = 0;
|
||||
}
|
||||
$this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer));
|
||||
$this->table['rows'][$this->row][$this->col] = $buffer;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <th> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_th()
|
||||
{
|
||||
if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col + 1])) {
|
||||
if (isset($this->parser->tagAttributes['align'])) {
|
||||
$this->table['aligns'][$this->col + 1] = $this->parser->tagAttributes['align'];
|
||||
} else {
|
||||
$this->table['aligns'][$this->col + 1] = '';
|
||||
}
|
||||
}
|
||||
$this->handleTag_td();
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <dl> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_dl()
|
||||
{
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <dt> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
**/
|
||||
protected function handleTag_dt()
|
||||
{
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <dd> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_dd()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
if (substr(ltrim($this->parser->html), 0, 3) == '<p>') {
|
||||
// next comes a paragraph, so we'll need an extra line
|
||||
$this->out("\n" . $this->indent);
|
||||
} elseif (substr($this->output, -2) == "\n\n") {
|
||||
$this->output = substr($this->output, 0, -1);
|
||||
}
|
||||
$this->out(': ');
|
||||
$this->indent(' ', false);
|
||||
} else {
|
||||
// lookahead for next dt
|
||||
if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') {
|
||||
$this->setLineBreaks(2);
|
||||
} else {
|
||||
$this->setLineBreaks(1);
|
||||
}
|
||||
$this->indent(' ');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_fnref()
|
||||
{
|
||||
$this->out('[^' . $this->parser->tagAttributes['target'] . ']');
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <fn> tags (custom footnotes, see markdownify_extra::parseString()
|
||||
* and markdownify_extra::_makeFootnotes())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_fn()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->out('[^' . $this->parser->tagAttributes['name'] . ']:');
|
||||
$this->setLineBreaks(1);
|
||||
} else {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
$this->indent(' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString()
|
||||
* and markdownify_extra::_makeFootnotes())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_footnotes()
|
||||
{
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* parse a HTML string, clean up footnotes prior
|
||||
*
|
||||
* @param string $HTML input
|
||||
* @return string Markdown formatted output
|
||||
*/
|
||||
public function parseString($html)
|
||||
{
|
||||
/** TODO: custom markdown-extra options, e.g. titles & classes **/
|
||||
// <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup>
|
||||
// => <fnref target="..." />
|
||||
$html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html);
|
||||
// <div class="footnotes">
|
||||
// <hr />
|
||||
// <ol>
|
||||
//
|
||||
// <li id="fn:...">...</li>
|
||||
// ...
|
||||
//
|
||||
// </ol>
|
||||
// </div>
|
||||
// =>
|
||||
// <footnotes>
|
||||
// <fn name="...">...</fn>
|
||||
// ...
|
||||
// </footnotes>
|
||||
$html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html);
|
||||
|
||||
return parent::parseString($html);
|
||||
}
|
||||
|
||||
/**
|
||||
* replace HTML representation of footnotes with something more easily parsable
|
||||
*
|
||||
* @note this is a callback to be used in parseString()
|
||||
*
|
||||
* @param array $matches
|
||||
* @return string
|
||||
*/
|
||||
protected function _makeFootnotes($matches)
|
||||
{
|
||||
// <li id="fn:1">
|
||||
// ...
|
||||
// <a href="#fnref:block" rev="footnote">↩</a></p>
|
||||
// </li>
|
||||
// => <fn name="1">...</fn>
|
||||
// remove footnote link
|
||||
$fns = preg_replace('@\s*( \s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>↩</a>\s*@s', '', $matches[1]);
|
||||
// remove empty paragraph
|
||||
$fns = preg_replace('@<p>\s*</p>@s', '', $fns);
|
||||
// <li id="fn:1">...</li> -> <footnote nr="1">...</footnote>
|
||||
$fns = str_replace('<li id="fn:', '<fn name="', $fns);
|
||||
|
||||
$fns = '<footnotes>' . $fns . '</footnotes>';
|
||||
|
||||
return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <a> tags parsing
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function getCurrentCssSelector()
|
||||
{
|
||||
$cssSelector = '';
|
||||
if (isset($this->parser->tagAttributes['id'])) {
|
||||
$cssSelector .= '#' . $this->decode($this->parser->tagAttributes['id']);
|
||||
}
|
||||
if (isset($this->parser->tagAttributes['class'])) {
|
||||
$classes = explode(' ', $this->decode($this->parser->tagAttributes['class']));
|
||||
$classes = array_filter($classes);
|
||||
$cssSelector .= '.' . join('.', $classes);
|
||||
}
|
||||
return $cssSelector;
|
||||
}
|
||||
}
|
564
vendor/pixel418/markdownify/src/Parser.php
vendored
Normal file
564
vendor/pixel418/markdownify/src/Parser.php
vendored
Normal file
@ -0,0 +1,564 @@
|
||||
<?php
|
||||
|
||||
/* This file is part of the Markdownify project, which is under LGPL license */
|
||||
|
||||
namespace Markdownify;
|
||||
|
||||
class Parser
|
||||
{
|
||||
public static $skipWhitespace = true;
|
||||
public static $a_ord;
|
||||
public static $z_ord;
|
||||
public static $special_ords;
|
||||
|
||||
/**
|
||||
* tags which are always empty (<br /> etc.)
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
public $emptyTags = array(
|
||||
'br',
|
||||
'hr',
|
||||
'input',
|
||||
'img',
|
||||
'area',
|
||||
'link',
|
||||
'meta',
|
||||
'param',
|
||||
);
|
||||
|
||||
/**
|
||||
* tags with preformatted text
|
||||
* whitespaces wont be touched in them
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
public $preformattedTags = array(
|
||||
'script',
|
||||
'style',
|
||||
'pre',
|
||||
'code',
|
||||
);
|
||||
|
||||
/**
|
||||
* supress HTML tags inside preformatted tags (see above)
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
public $noTagsInCode = false;
|
||||
|
||||
/**
|
||||
* html to be parsed
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $html = '';
|
||||
|
||||
/**
|
||||
* node type:
|
||||
*
|
||||
* - tag (see isStartTag)
|
||||
* - text (includes cdata)
|
||||
* - comment
|
||||
* - doctype
|
||||
* - pi (processing instruction)
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $nodeType = '';
|
||||
|
||||
/**
|
||||
* current node content, i.e. either a
|
||||
* simple string (text node), or something like
|
||||
* <tag attrib="value"...>
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $node = '';
|
||||
|
||||
/**
|
||||
* wether current node is an opening tag (<a>) or not (</a>)
|
||||
* set to NULL if current node is not a tag
|
||||
* NOTE: empty tags (<br />) set this to true as well!
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isStartTag = null;
|
||||
|
||||
/**
|
||||
* wether current node is an empty tag (<br />) or not (<a></a>)
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isEmptyTag = null;
|
||||
|
||||
/**
|
||||
* tag name
|
||||
*
|
||||
* @var string | null
|
||||
*/
|
||||
public $tagName = '';
|
||||
|
||||
/**
|
||||
* attributes of current tag
|
||||
*
|
||||
* @var array (attribName=>value) | null
|
||||
*/
|
||||
public $tagAttributes = null;
|
||||
|
||||
/**
|
||||
* whether or not the actual context is a inline context
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isInlineContext = null;
|
||||
|
||||
/**
|
||||
* whether the current tag is a block element
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isBlockElement = null;
|
||||
|
||||
/**
|
||||
* whether the previous tag (browser) is a block element
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isNextToInlineContext = null;
|
||||
|
||||
/**
|
||||
* keep whitespace
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $keepWhitespace = 0;
|
||||
|
||||
/**
|
||||
* list of open tags
|
||||
* count this to get current depth
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $openTags = array();
|
||||
|
||||
/**
|
||||
* list of block elements
|
||||
*
|
||||
* @var array
|
||||
* TODO: what shall we do with <del> and <ins> ?!
|
||||
*/
|
||||
public $blockElements = array(
|
||||
// tag name => <bool> is block
|
||||
// block elements
|
||||
'address' => true,
|
||||
'blockquote' => true,
|
||||
'center' => true,
|
||||
'del' => true,
|
||||
'dir' => true,
|
||||
'div' => true,
|
||||
'dl' => true,
|
||||
'fieldset' => true,
|
||||
'form' => true,
|
||||
'h1' => true,
|
||||
'h2' => true,
|
||||
'h3' => true,
|
||||
'h4' => true,
|
||||
'h5' => true,
|
||||
'h6' => true,
|
||||
'hr' => true,
|
||||
'ins' => true,
|
||||
'isindex' => true,
|
||||
'menu' => true,
|
||||
'noframes' => true,
|
||||
'noscript' => true,
|
||||
'ol' => true,
|
||||
'p' => true,
|
||||
'pre' => true,
|
||||
'table' => true,
|
||||
'ul' => true,
|
||||
// set table elements and list items to block as well
|
||||
'thead' => true,
|
||||
'tbody' => true,
|
||||
'tfoot' => true,
|
||||
'td' => true,
|
||||
'tr' => true,
|
||||
'th' => true,
|
||||
'li' => true,
|
||||
'dd' => true,
|
||||
'dt' => true,
|
||||
// header items and html / body as well
|
||||
'html' => true,
|
||||
'body' => true,
|
||||
'head' => true,
|
||||
'meta' => true,
|
||||
'link' => true,
|
||||
'style' => true,
|
||||
'title' => true,
|
||||
// unfancy media tags, when indented should be rendered as block
|
||||
'map' => true,
|
||||
'object' => true,
|
||||
'param' => true,
|
||||
'embed' => true,
|
||||
'area' => true,
|
||||
// inline elements
|
||||
'a' => false,
|
||||
'abbr' => false,
|
||||
'acronym' => false,
|
||||
'applet' => false,
|
||||
'b' => false,
|
||||
'basefont' => false,
|
||||
'bdo' => false,
|
||||
'big' => false,
|
||||
'br' => false,
|
||||
'button' => false,
|
||||
'cite' => false,
|
||||
'code' => false,
|
||||
'del' => false,
|
||||
'dfn' => false,
|
||||
'em' => false,
|
||||
'font' => false,
|
||||
'i' => false,
|
||||
'img' => false,
|
||||
'ins' => false,
|
||||
'input' => false,
|
||||
'iframe' => false,
|
||||
'kbd' => false,
|
||||
'label' => false,
|
||||
'q' => false,
|
||||
'samp' => false,
|
||||
'script' => false,
|
||||
'select' => false,
|
||||
'small' => false,
|
||||
'span' => false,
|
||||
'strong' => false,
|
||||
'sub' => false,
|
||||
'sup' => false,
|
||||
'textarea' => false,
|
||||
'tt' => false,
|
||||
'var' => false,
|
||||
);
|
||||
|
||||
/**
|
||||
* get next node, set $this->html prior!
|
||||
*
|
||||
* @param void
|
||||
* @return bool
|
||||
*/
|
||||
public function nextNode()
|
||||
{
|
||||
if (empty($this->html)) {
|
||||
// we are done with parsing the html string
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($this->isStartTag && !$this->isEmptyTag) {
|
||||
array_push($this->openTags, $this->tagName);
|
||||
if (in_array($this->tagName, $this->preformattedTags)) {
|
||||
// dont truncate whitespaces for <code> or <pre> contents
|
||||
$this->keepWhitespace++;
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->html[0] == '<') {
|
||||
$token = substr($this->html, 0, 9);
|
||||
if (substr($token, 0, 2) == '<?') {
|
||||
// xml prolog or other pi's
|
||||
/** TODO **/
|
||||
// trigger_error('this might need some work', E_USER_NOTICE);
|
||||
$pos = strpos($this->html, '>');
|
||||
$this->setNode('pi', $pos + 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
if (substr($token, 0, 4) == '<!--') {
|
||||
// comment
|
||||
$pos = strpos($this->html, '-->');
|
||||
if ($pos === false) {
|
||||
// could not find a closing -->, use next gt instead
|
||||
// this is firefox' behaviour
|
||||
$pos = strpos($this->html, '>') + 1;
|
||||
} else {
|
||||
$pos += 3;
|
||||
}
|
||||
$this->setNode('comment', $pos);
|
||||
|
||||
static::$skipWhitespace = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
if ($token == '<!DOCTYPE') {
|
||||
// doctype
|
||||
$this->setNode('doctype', strpos($this->html, '>') + 1);
|
||||
|
||||
static::$skipWhitespace = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
if ($token == '<![CDATA[') {
|
||||
// cdata, use text node
|
||||
|
||||
// remove leading <![CDATA[
|
||||
$this->html = substr($this->html, 9);
|
||||
|
||||
$this->setNode('text', strpos($this->html, ']]>') + 3);
|
||||
|
||||
// remove trailing ]]> and trim
|
||||
$this->node = substr($this->node, 0, -3);
|
||||
$this->handleWhitespaces();
|
||||
|
||||
static::$skipWhitespace = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
if ($this->parseTag()) {
|
||||
// seems to be a tag
|
||||
// handle whitespaces
|
||||
if ($this->isBlockElement) {
|
||||
static::$skipWhitespace = true;
|
||||
} else {
|
||||
static::$skipWhitespace = false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if ($this->keepWhitespace) {
|
||||
static::$skipWhitespace = false;
|
||||
}
|
||||
// when we get here it seems to be a text node
|
||||
$pos = strpos($this->html, '<');
|
||||
if ($pos === false) {
|
||||
$pos = strlen($this->html);
|
||||
}
|
||||
$this->setNode('text', $pos);
|
||||
$this->handleWhitespaces();
|
||||
if (static::$skipWhitespace && $this->node == ' ') {
|
||||
return $this->nextNode();
|
||||
}
|
||||
$this->isInlineContext = true;
|
||||
static::$skipWhitespace = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse tag, set tag name and attributes, see if it's a closing tag and so forth...
|
||||
*
|
||||
* @param void
|
||||
* @return bool
|
||||
*/
|
||||
protected function parseTag()
|
||||
{
|
||||
if (!isset(static::$a_ord)) {
|
||||
static::$a_ord = ord('a');
|
||||
static::$z_ord = ord('z');
|
||||
static::$special_ords = array(
|
||||
ord(':'), // for xml:lang
|
||||
ord('-'), // for http-equiv
|
||||
);
|
||||
}
|
||||
|
||||
$tagName = '';
|
||||
|
||||
$pos = 1;
|
||||
$isStartTag = $this->html[$pos] != '/';
|
||||
if (!$isStartTag) {
|
||||
$pos++;
|
||||
}
|
||||
// get tagName
|
||||
while (isset($this->html[$pos])) {
|
||||
$pos_ord = ord(strtolower($this->html[$pos]));
|
||||
if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || (!empty($tagName) && is_numeric($this->html[$pos]))) {
|
||||
$tagName .= $this->html[$pos];
|
||||
$pos++;
|
||||
} else {
|
||||
$pos--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$tagName = strtolower($tagName);
|
||||
if (empty($tagName) || !isset($this->blockElements[$tagName])) {
|
||||
// something went wrong => invalid tag
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
if ($this->noTagsInCode && end($this->openTags) == 'code' && !($tagName == 'code' && !$isStartTag)) {
|
||||
// we supress all HTML tags inside code tags
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// get tag attributes
|
||||
/** TODO: in html 4 attributes do not need to be quoted **/
|
||||
$isEmptyTag = false;
|
||||
$attributes = array();
|
||||
$currAttrib = '';
|
||||
while (isset($this->html[$pos + 1])) {
|
||||
$pos++;
|
||||
// close tag
|
||||
if ($this->html[$pos] == '>' || $this->html[$pos] . $this->html[$pos + 1] == '/>') {
|
||||
if ($this->html[$pos] == '/') {
|
||||
$isEmptyTag = true;
|
||||
$pos++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
$pos_ord = ord(strtolower($this->html[$pos]));
|
||||
if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || in_array($pos_ord, static::$special_ords)) {
|
||||
// attribute name
|
||||
$currAttrib .= $this->html[$pos];
|
||||
} elseif (in_array($this->html[$pos], array(' ', "\t", "\n"))) {
|
||||
// drop whitespace
|
||||
} elseif (in_array($this->html[$pos] . $this->html[$pos + 1], array('="', "='"))) {
|
||||
// get attribute value
|
||||
$pos++;
|
||||
$await = $this->html[$pos]; // single or double quote
|
||||
$pos++;
|
||||
$value = '';
|
||||
while (isset($this->html[$pos]) && $this->html[$pos] != $await) {
|
||||
$value .= $this->html[$pos];
|
||||
$pos++;
|
||||
}
|
||||
$attributes[$currAttrib] = $value;
|
||||
$currAttrib = '';
|
||||
} else {
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if ($this->html[$pos] != '>') {
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!empty($currAttrib)) {
|
||||
// html 4 allows something like <option selected> instead of <option selected="selected">
|
||||
$attributes[$currAttrib] = $currAttrib;
|
||||
}
|
||||
if (!$isStartTag) {
|
||||
if (!empty($attributes) || $tagName != end($this->openTags)) {
|
||||
// end tags must not contain any attributes
|
||||
// or maybe we did not expect a different tag to be closed
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
array_pop($this->openTags);
|
||||
if (in_array($tagName, $this->preformattedTags)) {
|
||||
$this->keepWhitespace--;
|
||||
}
|
||||
}
|
||||
$pos++;
|
||||
$this->node = substr($this->html, 0, $pos);
|
||||
$this->html = substr($this->html, $pos);
|
||||
$this->tagName = $tagName;
|
||||
$this->tagAttributes = $attributes;
|
||||
$this->isStartTag = $isStartTag;
|
||||
$this->isEmptyTag = $isEmptyTag || in_array($tagName, $this->emptyTags);
|
||||
if ($this->isEmptyTag) {
|
||||
// might be not well formed
|
||||
$this->node = preg_replace('# */? *>$#', ' />', $this->node);
|
||||
}
|
||||
$this->nodeType = 'tag';
|
||||
$this->isBlockElement = $this->blockElements[$tagName];
|
||||
$this->isNextToInlineContext = $isStartTag && $this->isInlineContext;
|
||||
$this->isInlineContext = !$this->isBlockElement;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* handle invalid tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function invalidTag()
|
||||
{
|
||||
$this->html = substr_replace($this->html, '<', 0, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* update all vars and make $this->html shorter
|
||||
*
|
||||
* @param string $type see description for $this->nodeType
|
||||
* @param int $pos to which position shall we cut?
|
||||
* @return void
|
||||
*/
|
||||
protected function setNode($type, $pos)
|
||||
{
|
||||
if ($this->nodeType == 'tag') {
|
||||
// set tag specific vars to null
|
||||
// $type == tag should not be called here
|
||||
// see this::parseTag() for more
|
||||
$this->tagName = null;
|
||||
$this->tagAttributes = null;
|
||||
$this->isStartTag = null;
|
||||
$this->isEmptyTag = null;
|
||||
$this->isBlockElement = null;
|
||||
|
||||
}
|
||||
$this->nodeType = $type;
|
||||
$this->node = substr($this->html, 0, $pos);
|
||||
$this->html = substr($this->html, $pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* check if $this->html begins with $str
|
||||
*
|
||||
* @param string $str
|
||||
* @return bool
|
||||
*/
|
||||
protected function match($str)
|
||||
{
|
||||
return substr($this->html, 0, strlen($str)) == $str;
|
||||
}
|
||||
|
||||
/**
|
||||
* truncate whitespaces
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleWhitespaces()
|
||||
{
|
||||
if ($this->keepWhitespace) {
|
||||
// <pre> or <code> before...
|
||||
|
||||
return;
|
||||
}
|
||||
// truncate multiple whitespaces to a single one
|
||||
$this->node = preg_replace('#\s+#s', ' ', $this->node);
|
||||
}
|
||||
|
||||
/**
|
||||
* normalize self::node
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function normalizeNode()
|
||||
{
|
||||
$this->node = '<';
|
||||
if (!$this->isStartTag) {
|
||||
$this->node .= '/' . $this->tagName . '>';
|
||||
|
||||
return;
|
||||
}
|
||||
$this->node .= $this->tagName;
|
||||
foreach ($this->tagAttributes as $name => $value) {
|
||||
$this->node .= ' ' . $name . '="' . str_replace('"', '"', $value) . '"';
|
||||
}
|
||||
if ($this->isEmptyTag) {
|
||||
$this->node .= ' /';
|
||||
}
|
||||
$this->node .= '>';
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user