⬆️ 🔨 Upgrade Markdownify library.
The current version 2.0.0 (alpha) throws deprecated warning with PHP7.1 and PHPUnit. Upgrade the HTML to Markdown converter for PHP to the current Markdownify 2.2.1. Used composer to manage this library.
This commit is contained in:
parent
8e1716065e
commit
6c79e0c077
@ -29,7 +29,8 @@
|
||||
"ext-xml" : "*",
|
||||
"ext-openssl" : "*",
|
||||
"sabre/dav" : "~3.2",
|
||||
"michelf/php-markdown" : "^1.7"
|
||||
"michelf/php-markdown" : "^1.7",
|
||||
"pixel418/markdownify": "^2.2"
|
||||
},
|
||||
"require-dev" : {
|
||||
"php" : ">=5.6",
|
||||
|
58
composer.lock
generated
58
composer.lock
generated
@ -4,7 +4,7 @@
|
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "4b24468c1f8babe7c8804fba8ee602f7",
|
||||
"content-hash": "c0cafbf9fd702be588f6b392b9742cb6",
|
||||
"packages": [
|
||||
{
|
||||
"name": "michelf/php-markdown",
|
||||
@ -57,6 +57,62 @@
|
||||
],
|
||||
"time": "2016-10-29T18:58:20+00:00"
|
||||
},
|
||||
{
|
||||
"name": "pixel418/markdownify",
|
||||
"version": "v2.2.1",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/Elephant418/Markdownify.git",
|
||||
"reference": "0160677f04c784550dd10fd72fdf3994967db848"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/Elephant418/Markdownify/zipball/0160677f04c784550dd10fd72fdf3994967db848",
|
||||
"reference": "0160677f04c784550dd10fd72fdf3994967db848",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8"
|
||||
},
|
||||
"type": "lib",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Markdownify\\": "src",
|
||||
"Test\\Markdownify\\": "test"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"LGPL"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Peter Kruithof",
|
||||
"email": "pkruithof@gmail.com",
|
||||
"homepage": "http://pkruithof.tumblr.com/"
|
||||
},
|
||||
{
|
||||
"name": "Milian Wolff",
|
||||
"email": "mail@milianw.de",
|
||||
"homepage": "http://milianw.de"
|
||||
},
|
||||
{
|
||||
"name": "Thomas Zilliox",
|
||||
"email": "hello@tzi.fr",
|
||||
"homepage": "http://tzi.fr"
|
||||
}
|
||||
],
|
||||
"description": "The HTML to Markdown converter for PHP ",
|
||||
"homepage": "https://github.com/elephant418/Markdownify",
|
||||
"keywords": [
|
||||
"markdown",
|
||||
"markdownify"
|
||||
],
|
||||
"time": "2016-09-21T13:01:43+00:00"
|
||||
},
|
||||
{
|
||||
"name": "psr/log",
|
||||
"version": "1.0.2",
|
||||
|
@ -5,12 +5,12 @@
|
||||
*/
|
||||
|
||||
use Michelf\MarkdownExtra;
|
||||
use Markdownify\Converter;
|
||||
|
||||
require_once("include/oembed.php");
|
||||
require_once("include/event.php");
|
||||
require_once("include/html2bbcode.php");
|
||||
require_once("include/bbcode.php");
|
||||
require_once("library/markdownify/markdownify.php");
|
||||
|
||||
|
||||
function get_bb_tag_pos($s, $name, $occurance = 1) {
|
||||
@ -367,7 +367,6 @@ function bb2diaspora_itemwallwall(&$item,$uplink = false) {
|
||||
|
||||
function bb2diaspora_itembody($item, $force_update = false, $have_channel = false, $uplink = false) {
|
||||
|
||||
|
||||
if(! get_iconfig($item,'diaspora','fields')) {
|
||||
$force_update = true;
|
||||
}
|
||||
@ -454,7 +453,7 @@ function bb2diaspora_itembody($item, $force_update = false, $have_channel = fals
|
||||
return html_entity_decode($body);
|
||||
}
|
||||
|
||||
function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
|
||||
function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
|
||||
|
||||
// Re-enabling the converter again.
|
||||
// The bbcode parser now handles youtube-links (and the other stuff) correctly.
|
||||
@ -496,11 +495,10 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
|
||||
$Text = str_replace(array('<','>','&'),array('&_lt_;','&_gt_;','&_amp_;'),$Text);
|
||||
|
||||
// Now convert HTML to Markdown
|
||||
$md = new Markdownify(false, false, false);
|
||||
$md = new Converter(Converter::LINK_AFTER_CONTENT, false, false);
|
||||
$Text = $md->parseString($Text);
|
||||
|
||||
|
||||
|
||||
// It also adds backslashes to our attempt at getting around the html entity preservation for some weird reason.
|
||||
|
||||
$Text = str_replace(array('&\\_lt\\_;','&\\_gt\\_;','&\\_amp\\_;'),array('<','>','&'),$Text);
|
||||
@ -522,7 +520,7 @@ function bb2diaspora($Text,$preserve_nl = false, $fordiaspora = true) {
|
||||
|
||||
$Text = trim($Text);
|
||||
|
||||
call_hooks('bb2diaspora',$Text);
|
||||
call_hooks('bb2diaspora', $Text);
|
||||
|
||||
return $Text;
|
||||
}
|
||||
|
@ -1,29 +0,0 @@
|
||||
Markdownify
|
||||
===========
|
||||
* handle non-markdownifiable lists (i.e. `<ul><li id="foobar">asdf</li></ul>`)
|
||||
* organize methods better (i.e. flushlinebreaks & setlinebreaks close to each other)
|
||||
* take a look at function names etc.
|
||||
* is the new (in rev. 93) lastclosedtag property needed?
|
||||
* word wrapping (some work is done but it's still very buggy)
|
||||
|
||||
|
||||
Markdownify Extra
|
||||
=================
|
||||
|
||||
* handle table alignment with KEEP_HTML=false
|
||||
* handle tables without headings when KEEP_HTML=false is set
|
||||
* handle Markdown inside non-markdownable tags
|
||||
|
||||
|
||||
Implementation Thoughts
|
||||
=======================
|
||||
* non-markdownifiable lists and markdown inside non-markdownable tags as well as the current
|
||||
table implementation could be rewritten by using a rollback mechanism.
|
||||
|
||||
example:
|
||||
|
||||
<ul><li>asdf</li><li id="foobar">asdf</li></ul>
|
||||
|
||||
we come to `<ul>`, know that this might fail and create a snapshot of our current parser
|
||||
we keep on parsing and when we reach `<li id="foobar">` we gotta rollback and keep this
|
||||
list in HTML format.
|
@ -1,51 +0,0 @@
|
||||
<?php
|
||||
error_reporting(E_ALL);
|
||||
if (!empty($_POST['input'])) {
|
||||
include 'markdownify_extra.php';
|
||||
if (!isset($_POST['leap'])) {
|
||||
$leap = MDFY_LINKS_EACH_PARAGRAPH;
|
||||
} else {
|
||||
$leap = $_POST['leap'];
|
||||
}
|
||||
|
||||
if (!isset($_POST['keepHTML'])) {
|
||||
$keephtml = MDFY_KEEPHTML;
|
||||
} else {
|
||||
$keephtml = $_POST['keepHTML'];
|
||||
}
|
||||
if (!empty($_POST['extra'])) {
|
||||
$md = new Markdownify_Extra($leap, MDFY_BODYWIDTH, $keephtml);
|
||||
} else {
|
||||
$md = new Markdownify($leap, MDFY_BODYWIDTH, $keephtml);
|
||||
}
|
||||
if (ini_get('magic_quotes_gpc')) {
|
||||
$_POST['input'] = stripslashes($_POST['input']);
|
||||
}
|
||||
$output = $md->parseString($_POST['input']);
|
||||
} else {
|
||||
$_POST['input'] = '';
|
||||
}
|
||||
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
||||
<title>HTML to Markdown Converter</title>
|
||||
</head>
|
||||
<body>
|
||||
<?php if (empty($_POST['input'])): ?>
|
||||
<form action="<?php echo $_SERVER['PHP_SELF']; ?>" method="post">
|
||||
<fieldset>
|
||||
<legend>HTML Input</legend>
|
||||
<textarea style="width:100%;" cols="85" rows="40" name="input"><?php echo htmlspecialchars($_POST['input'], ENT_NOQUOTES, 'UTF-8'); ?></textarea>
|
||||
</fieldset>
|
||||
<label for="extra">Markdownify Extra: <input name="extra" checked="checked" id="extra" type="checkbox" value="1" /></label>
|
||||
<label for="leap">Links after each block elem: <input name="leap" id="leap" type="checkbox" value="1" /></label>
|
||||
<label for="keepHTML">keep HTML: <input name="keepHTML" id="keepHTML" type="checkbox" value="1" checked="checked" /></label>
|
||||
<input type="submit" name="submit" value="submit" />
|
||||
</form>
|
||||
<?php else: ?>
|
||||
<h1 style="text-align:right;"><a href="<?php echo $_SERVER['PHP_SELF']; ?>">BACK</a></h1>
|
||||
<pre><?php echo htmlspecialchars($output, ENT_NOQUOTES, 'UTF-8'); ?></pre>
|
||||
<?php endif; ?>
|
||||
</body>
|
||||
</html>
|
File diff suppressed because it is too large
Load Diff
@ -1,33 +0,0 @@
|
||||
#!/usr/bin/php
|
||||
<?php
|
||||
require dirname(__FILE__) .'/markdownify_extra.php';
|
||||
|
||||
function param($name, $default = false) {
|
||||
if (!in_array('--'.$name, $_SERVER['argv']))
|
||||
return $default;
|
||||
reset($_SERVER['argv']);
|
||||
while (each($_SERVER['argv'])) {
|
||||
if (current($_SERVER['argv']) == '--'.$name)
|
||||
break;
|
||||
}
|
||||
$value = next($_SERVER['argv']);
|
||||
if ($value === false || substr($value, 0, 2) == '--')
|
||||
return true;
|
||||
else
|
||||
return $value;
|
||||
}
|
||||
|
||||
|
||||
$input = stream_get_contents(STDIN);
|
||||
|
||||
$linksAfterEachParagraph = param('links');
|
||||
$bodyWidth = param('width');
|
||||
$keepHTML = param('html', true);
|
||||
|
||||
if (param('no_extra')) {
|
||||
$parser = new Markdownify($linksAfterEachParagraph, $bodyWidth, $keepHTML);
|
||||
} else {
|
||||
$parser = new Markdownify_Extra($linksAfterEachParagraph, $bodyWidth, $keepHTML);
|
||||
}
|
||||
|
||||
echo $parser->parseString($input) ."\n";
|
@ -1,489 +0,0 @@
|
||||
<?php
|
||||
/**
|
||||
* Class to convert HTML to Markdown with PHP Markdown Extra syntax support.
|
||||
*
|
||||
* @version 1.0.0 alpha
|
||||
* @author Milian Wolff (<mail@milianw.de>, <http://milianw.de>)
|
||||
* @license LGPL, see LICENSE_LGPL.txt and the summary below
|
||||
* @copyright (C) 2007 Milian Wolff
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* standard Markdownify class
|
||||
*/
|
||||
require_once dirname(__FILE__).'/markdownify.php';
|
||||
|
||||
class Markdownify_Extra extends Markdownify {
|
||||
/**
|
||||
* table data, including rows with content and the maximum width of each col
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
var $table = array();
|
||||
/**
|
||||
* current col
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
var $col = -1;
|
||||
/**
|
||||
* current row
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
var $row = 0;
|
||||
/**
|
||||
* constructor, see Markdownify::Markdownify() for more information
|
||||
*/
|
||||
function Markdownify_Extra($linksAfterEachParagraph = MDFY_LINKS_EACH_PARAGRAPH, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) {
|
||||
parent::Markdownify($linksAfterEachParagraph, $bodyWidth, $keepHTML);
|
||||
|
||||
### new markdownable tags & attributes
|
||||
# header ids: # foo {bar}
|
||||
$this->isMarkdownable['h1']['id'] = 'optional';
|
||||
$this->isMarkdownable['h2']['id'] = 'optional';
|
||||
$this->isMarkdownable['h3']['id'] = 'optional';
|
||||
$this->isMarkdownable['h4']['id'] = 'optional';
|
||||
$this->isMarkdownable['h5']['id'] = 'optional';
|
||||
$this->isMarkdownable['h6']['id'] = 'optional';
|
||||
# tables
|
||||
$this->isMarkdownable['table'] = array();
|
||||
$this->isMarkdownable['th'] = array(
|
||||
'align' => 'optional',
|
||||
);
|
||||
$this->isMarkdownable['td'] = array(
|
||||
'align' => 'optional',
|
||||
);
|
||||
$this->isMarkdownable['tr'] = array();
|
||||
array_push($this->ignore, 'thead');
|
||||
array_push($this->ignore, 'tbody');
|
||||
array_push($this->ignore, 'tfoot');
|
||||
# definition lists
|
||||
$this->isMarkdownable['dl'] = array();
|
||||
$this->isMarkdownable['dd'] = array();
|
||||
$this->isMarkdownable['dt'] = array();
|
||||
# footnotes
|
||||
$this->isMarkdownable['fnref'] = array(
|
||||
'target' => 'required',
|
||||
);
|
||||
$this->isMarkdownable['footnotes'] = array();
|
||||
$this->isMarkdownable['fn'] = array(
|
||||
'name' => 'required',
|
||||
);
|
||||
$this->parser->blockElements['fnref'] = false;
|
||||
$this->parser->blockElements['fn'] = true;
|
||||
$this->parser->blockElements['footnotes'] = true;
|
||||
# abbr
|
||||
$this->isMarkdownable['abbr'] = array(
|
||||
'title' => 'required',
|
||||
);
|
||||
# build RegEx lookahead to decide wether table can pe parsed or not
|
||||
$inlineTags = array_keys($this->parser->blockElements, false);
|
||||
$colContents = '(?:[^<]|<(?:'.implode('|', $inlineTags).'|[^a-z]))+';
|
||||
$this->tableLookaheadHeader = '{
|
||||
^\s*(?:<thead\s*>)?\s* # open optional thead
|
||||
<tr\s*>\s*(?: # start required row with headers
|
||||
<th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align
|
||||
\s*'.$colContents.'\s* # contents
|
||||
</th>\s* # close header
|
||||
)+</tr> # close row with headers
|
||||
\s*(?:</thead>)? # close optional thead
|
||||
}sxi';
|
||||
$this->tdSubstitute = '\s*'.$colContents.'\s* # contents
|
||||
</td>\s*';
|
||||
$this->tableLookaheadBody = '{
|
||||
\s*(?:<tbody\s*>)?\s* # open optional tbody
|
||||
(?:<tr\s*>\s* # start row
|
||||
%s # cols to be substituted
|
||||
</tr>)+ # close row
|
||||
\s*(?:</tbody>)? # close optional tbody
|
||||
\s*</table> # close table
|
||||
}sxi';
|
||||
}
|
||||
/**
|
||||
* handle header tags (<h1> - <h6>)
|
||||
*
|
||||
* @param int $level 1-6
|
||||
* @return void
|
||||
*/
|
||||
function handleHeader($level) {
|
||||
static $id = null;
|
||||
if ($this->parser->isStartTag) {
|
||||
if (isset($this->parser->tagAttributes['id'])) {
|
||||
$id = $this->parser->tagAttributes['id'];
|
||||
}
|
||||
} else {
|
||||
if (!is_null($id)) {
|
||||
$this->out(' {#'.$id.'}');
|
||||
$id = null;
|
||||
}
|
||||
}
|
||||
parent::handleHeader($level);
|
||||
}
|
||||
/**
|
||||
* handle <abbr> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_abbr() {
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->stack();
|
||||
$this->buffer();
|
||||
} else {
|
||||
$tag = $this->unstack();
|
||||
$tag['text'] = $this->unbuffer();
|
||||
$add = true;
|
||||
foreach ($this->stack['abbr'] as $stacked) {
|
||||
if ($stacked['text'] == $tag['text']) {
|
||||
/** TODO: differing abbr definitions, i.e. different titles for same text **/
|
||||
$add = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
$this->out($tag['text']);
|
||||
if ($add) {
|
||||
array_push($this->stack['abbr'], $tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* flush stacked abbr tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function flushStacked_abbr() {
|
||||
$out = array();
|
||||
foreach ($this->stack['abbr'] as $k => $tag) {
|
||||
if (!isset($tag['unstacked'])) {
|
||||
array_push($out, ' *['.$tag['text'].']: '.$tag['title']);
|
||||
$tag['unstacked'] = true;
|
||||
$this->stack['abbr'][$k] = $tag;
|
||||
}
|
||||
}
|
||||
if (!empty($out)) {
|
||||
$this->out("\n\n".implode("\n", $out));
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <table> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_table() {
|
||||
if ($this->parser->isStartTag) {
|
||||
# check if upcoming table can be converted
|
||||
if ($this->keepHTML) {
|
||||
if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) {
|
||||
# header seems good, now check body
|
||||
# get align & number of cols
|
||||
preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols);
|
||||
$regEx = '';
|
||||
$i = 1;
|
||||
$aligns = array();
|
||||
foreach ($cols[2] as $align) {
|
||||
$align = strtolower($align);
|
||||
array_push($aligns, $align);
|
||||
if (empty($align)) {
|
||||
$align = 'left'; # default value
|
||||
}
|
||||
$td = '\s+align=("|\')'.$align.'\\'.$i;
|
||||
$i++;
|
||||
if ($align == 'left') {
|
||||
# look for empty align or left
|
||||
$td = '(?:'.$td.')?';
|
||||
}
|
||||
$td = '<td'.$td.'\s*>';
|
||||
$regEx .= $td.$this->tdSubstitute;
|
||||
}
|
||||
$regEx = sprintf($this->tableLookaheadBody, $regEx);
|
||||
if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) {
|
||||
# this is a markdownable table tag!
|
||||
$this->table = array(
|
||||
'rows' => array(),
|
||||
'col_widths' => array(),
|
||||
'aligns' => $aligns,
|
||||
);
|
||||
$this->row = 0;
|
||||
} else {
|
||||
# non markdownable table
|
||||
$this->handleTagToText();
|
||||
}
|
||||
} else {
|
||||
# non markdownable table
|
||||
$this->handleTagToText();
|
||||
}
|
||||
} else {
|
||||
$this->table = array(
|
||||
'rows' => array(),
|
||||
'col_widths' => array(),
|
||||
'aligns' => array(),
|
||||
);
|
||||
$this->row = 0;
|
||||
}
|
||||
} else {
|
||||
# finally build the table in Markdown Extra syntax
|
||||
$separator = array();
|
||||
# seperator with correct align identifikators
|
||||
foreach($this->table['aligns'] as $col => $align) {
|
||||
if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) {
|
||||
break;
|
||||
}
|
||||
$left = ' ';
|
||||
$right = ' ';
|
||||
switch ($align) {
|
||||
case 'left':
|
||||
$left = ':';
|
||||
break;
|
||||
case 'center':
|
||||
$right = ':';
|
||||
$left = ':';
|
||||
case 'right':
|
||||
$right = ':';
|
||||
break;
|
||||
}
|
||||
array_push($separator, $left.str_repeat('-', $this->table['col_widths'][$col]).$right);
|
||||
}
|
||||
$separator = '|'.implode('|', $separator).'|';
|
||||
|
||||
$rows = array();
|
||||
# add padding
|
||||
array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent'));
|
||||
$header = array_shift($this->table['rows']);
|
||||
array_push($rows, '| '.implode(' | ', $header).' |');
|
||||
array_push($rows, $separator);
|
||||
foreach ($this->table['rows'] as $row) {
|
||||
array_push($rows, '| '.implode(' | ', $row).' |');
|
||||
}
|
||||
$this->out(implode("\n".$this->indent, $rows));
|
||||
$this->table = array();
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* properly pad content so it is aligned as whished
|
||||
* should be used with array_walk_recursive on $this->table['rows']
|
||||
*
|
||||
* @param string &$content
|
||||
* @param int $col
|
||||
* @return void
|
||||
*/
|
||||
function alignTdContent(&$content, $col) {
|
||||
switch ($this->table['aligns'][$col]) {
|
||||
default:
|
||||
case 'left':
|
||||
$content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content));
|
||||
break;
|
||||
case 'right':
|
||||
$content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)).$content;
|
||||
break;
|
||||
case 'center':
|
||||
$paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content);
|
||||
$left = floor($paddingNeeded / 2);
|
||||
$right = $paddingNeeded - $left;
|
||||
$content = str_repeat(' ', $left).$content.str_repeat(' ', $right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <tr> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_tr() {
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->col = -1;
|
||||
} else {
|
||||
$this->row++;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <td> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_td() {
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->col++;
|
||||
if (!isset($this->table['col_widths'][$this->col])) {
|
||||
$this->table['col_widths'][$this->col] = 0;
|
||||
}
|
||||
$this->buffer();
|
||||
} else {
|
||||
$buffer = trim($this->unbuffer());
|
||||
$this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer));
|
||||
$this->table['rows'][$this->row][$this->col] = $buffer;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <th> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_th() {
|
||||
if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col+1])) {
|
||||
if (isset($this->parser->tagAttributes['align'])) {
|
||||
$this->table['aligns'][$this->col+1] = $this->parser->tagAttributes['align'];
|
||||
} else {
|
||||
$this->table['aligns'][$this->col+1] = '';
|
||||
}
|
||||
}
|
||||
$this->handleTag_td();
|
||||
}
|
||||
/**
|
||||
* handle <dl> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_dl() {
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <dt> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
**/
|
||||
function handleTag_dt() {
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(1);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <dd> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_dd() {
|
||||
if ($this->parser->isStartTag) {
|
||||
if (substr(ltrim($this->parser->html), 0, 3) == '<p>') {
|
||||
# next comes a paragraph, so we'll need an extra line
|
||||
$this->out("\n".$this->indent);
|
||||
} elseif (substr($this->output, -2) == "\n\n") {
|
||||
$this->output = substr($this->output, 0, -1);
|
||||
}
|
||||
$this->out(': ');
|
||||
$this->indent(' ', false);
|
||||
} else {
|
||||
# lookahead for next dt
|
||||
if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') {
|
||||
$this->setLineBreaks(2);
|
||||
} else {
|
||||
$this->setLineBreaks(1);
|
||||
}
|
||||
$this->indent(' ');
|
||||
}
|
||||
}
|
||||
/**
|
||||
* handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_fnref() {
|
||||
$this->out('[^'.$this->parser->tagAttributes['target'].']');
|
||||
}
|
||||
/**
|
||||
* handle <fn> tags (custom footnotes, see markdownify_extra::parseString()
|
||||
* and markdownify_extra::_makeFootnotes())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_fn() {
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->out('[^'.$this->parser->tagAttributes['name'].']:');
|
||||
$this->setLineBreaks(1);
|
||||
} else {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
$this->indent(' ');
|
||||
}
|
||||
/**
|
||||
* handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString()
|
||||
* and markdownify_extra::_makeFootnotes())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleTag_footnotes() {
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* parse a HTML string, clean up footnotes prior
|
||||
*
|
||||
* @param string $HTML input
|
||||
* @return string Markdown formatted output
|
||||
*/
|
||||
function parseString($html) {
|
||||
/** TODO: custom markdown-extra options, e.g. titles & classes **/
|
||||
# <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup>
|
||||
# => <fnref target="..." />
|
||||
$html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html);
|
||||
# <div class="footnotes">
|
||||
# <hr />
|
||||
# <ol>
|
||||
#
|
||||
# <li id="fn:...">...</li>
|
||||
# ...
|
||||
#
|
||||
# </ol>
|
||||
# </div>
|
||||
# =>
|
||||
# <footnotes>
|
||||
# <fn name="...">...</fn>
|
||||
# ...
|
||||
# </footnotes>
|
||||
$html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html);
|
||||
return parent::parseString($html);
|
||||
}
|
||||
/**
|
||||
* replace HTML representation of footnotes with something more easily parsable
|
||||
*
|
||||
* @note this is a callback to be used in parseString()
|
||||
*
|
||||
* @param array $matches
|
||||
* @return string
|
||||
*/
|
||||
function _makeFootnotes($matches) {
|
||||
# <li id="fn:1">
|
||||
# ...
|
||||
# <a href="#fnref:block" rev="footnote">↩</a></p>
|
||||
# </li>
|
||||
# => <fn name="1">...</fn>
|
||||
# remove footnote link
|
||||
$fns = preg_replace('@\s*( \s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>↩</a>\s*@s', '', $matches[1]);
|
||||
# remove empty paragraph
|
||||
$fns = preg_replace('@<p>\s*</p>@s', '', $fns);
|
||||
# <li id="fn:1">...</li> -> <footnote nr="1">...</footnote>
|
||||
$fns = str_replace('<li id="fn:', '<fn name="', $fns);
|
||||
|
||||
$fns = '<footnotes>'.$fns.'</footnotes>';
|
||||
return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns);
|
||||
}
|
||||
}
|
@ -1,618 +0,0 @@
|
||||
<?php
|
||||
/**
|
||||
* parseHTML is a HTML parser which works with PHP 4 and above.
|
||||
* It tries to handle invalid HTML to some degree.
|
||||
*
|
||||
* @version 1.0 beta
|
||||
* @author Milian Wolff (mail@milianw.de, http://milianw.de)
|
||||
* @license LGPL, see LICENSE_LGPL.txt and the summary below
|
||||
* @copyright (C) 2007 Milian Wolff
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
class parseHTML {
|
||||
/**
|
||||
* tags which are always empty (<br /> etc.)
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
var $emptyTags = array(
|
||||
'br',
|
||||
'hr',
|
||||
'input',
|
||||
'img',
|
||||
'area',
|
||||
'link',
|
||||
'meta',
|
||||
'param',
|
||||
);
|
||||
/**
|
||||
* tags with preformatted text
|
||||
* whitespaces wont be touched in them
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
var $preformattedTags = array(
|
||||
'script',
|
||||
'style',
|
||||
'pre',
|
||||
'code',
|
||||
);
|
||||
/**
|
||||
* supress HTML tags inside preformatted tags (see above)
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
var $noTagsInCode = false;
|
||||
/**
|
||||
* html to be parsed
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
var $html = '';
|
||||
/**
|
||||
* node type:
|
||||
*
|
||||
* - tag (see isStartTag)
|
||||
* - text (includes cdata)
|
||||
* - comment
|
||||
* - doctype
|
||||
* - pi (processing instruction)
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
var $nodeType = '';
|
||||
/**
|
||||
* current node content, i.e. either a
|
||||
* simple string (text node), or something like
|
||||
* <tag attrib="value"...>
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
var $node = '';
|
||||
/**
|
||||
* wether current node is an opening tag (<a>) or not (</a>)
|
||||
* set to NULL if current node is not a tag
|
||||
* NOTE: empty tags (<br />) set this to true as well!
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
var $isStartTag = null;
|
||||
/**
|
||||
* wether current node is an empty tag (<br />) or not (<a></a>)
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
var $isEmptyTag = null;
|
||||
/**
|
||||
* tag name
|
||||
*
|
||||
* @var string | null
|
||||
*/
|
||||
var $tagName = '';
|
||||
/**
|
||||
* attributes of current tag
|
||||
*
|
||||
* @var array (attribName=>value) | null
|
||||
*/
|
||||
var $tagAttributes = null;
|
||||
/**
|
||||
* wether the current tag is a block element
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
var $isBlockElement = null;
|
||||
|
||||
/**
|
||||
* keep whitespace
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
var $keepWhitespace = 0;
|
||||
/**
|
||||
* list of open tags
|
||||
* count this to get current depth
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
var $openTags = array();
|
||||
/**
|
||||
* list of block elements
|
||||
*
|
||||
* @var array
|
||||
* TODO: what shall we do with <del> and <ins> ?!
|
||||
*/
|
||||
var $blockElements = array (
|
||||
# tag name => <bool> is block
|
||||
# block elements
|
||||
'address' => true,
|
||||
'blockquote' => true,
|
||||
'center' => true,
|
||||
'del' => true,
|
||||
'dir' => true,
|
||||
'div' => true,
|
||||
'dl' => true,
|
||||
'fieldset' => true,
|
||||
'form' => true,
|
||||
'h1' => true,
|
||||
'h2' => true,
|
||||
'h3' => true,
|
||||
'h4' => true,
|
||||
'h5' => true,
|
||||
'h6' => true,
|
||||
'hr' => true,
|
||||
'ins' => true,
|
||||
'isindex' => true,
|
||||
'menu' => true,
|
||||
'noframes' => true,
|
||||
'noscript' => true,
|
||||
'ol' => true,
|
||||
'p' => true,
|
||||
'pre' => true,
|
||||
'table' => true,
|
||||
'ul' => true,
|
||||
# set table elements and list items to block as well
|
||||
'thead' => true,
|
||||
'tbody' => true,
|
||||
'tfoot' => true,
|
||||
'td' => true,
|
||||
'tr' => true,
|
||||
'th' => true,
|
||||
'li' => true,
|
||||
'dd' => true,
|
||||
'dt' => true,
|
||||
# header items and html / body as well
|
||||
'html' => true,
|
||||
'body' => true,
|
||||
'head' => true,
|
||||
'meta' => true,
|
||||
'link' => true,
|
||||
'style' => true,
|
||||
'title' => true,
|
||||
# unfancy media tags, when indented should be rendered as block
|
||||
'map' => true,
|
||||
'object' => true,
|
||||
'param' => true,
|
||||
'embed' => true,
|
||||
'area' => true,
|
||||
# inline elements
|
||||
'a' => false,
|
||||
'abbr' => false,
|
||||
'acronym' => false,
|
||||
'applet' => false,
|
||||
'b' => false,
|
||||
'basefont' => false,
|
||||
'bdo' => false,
|
||||
'big' => false,
|
||||
'br' => false,
|
||||
'button' => false,
|
||||
'cite' => false,
|
||||
'code' => false,
|
||||
'del' => false,
|
||||
'dfn' => false,
|
||||
'em' => false,
|
||||
'font' => false,
|
||||
'i' => false,
|
||||
'img' => false,
|
||||
'ins' => false,
|
||||
'input' => false,
|
||||
'iframe' => false,
|
||||
'kbd' => false,
|
||||
'label' => false,
|
||||
'q' => false,
|
||||
'samp' => false,
|
||||
'script' => false,
|
||||
'select' => false,
|
||||
'small' => false,
|
||||
'span' => false,
|
||||
'strong' => false,
|
||||
'sub' => false,
|
||||
'sup' => false,
|
||||
'textarea' => false,
|
||||
'tt' => false,
|
||||
'var' => false,
|
||||
);
|
||||
/**
|
||||
* get next node, set $this->html prior!
|
||||
*
|
||||
* @param void
|
||||
* @return bool
|
||||
*/
|
||||
function nextNode() {
|
||||
if (empty($this->html)) {
|
||||
# we are done with parsing the html string
|
||||
return false;
|
||||
}
|
||||
static $skipWhitespace = true;
|
||||
if ($this->isStartTag && !$this->isEmptyTag) {
|
||||
array_push($this->openTags, $this->tagName);
|
||||
if (in_array($this->tagName, $this->preformattedTags)) {
|
||||
# dont truncate whitespaces for <code> or <pre> contents
|
||||
$this->keepWhitespace++;
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->html[0] == '<') {
|
||||
$token = substr($this->html, 0, 9);
|
||||
if (substr($token, 0, 2) == '<?') {
|
||||
# xml prolog or other pi's
|
||||
/** TODO **/
|
||||
#trigger_error('this might need some work', E_USER_NOTICE);
|
||||
$pos = strpos($this->html, '>');
|
||||
$this->setNode('pi', $pos + 1);
|
||||
return true;
|
||||
}
|
||||
if (substr($token, 0, 4) == '<!--') {
|
||||
# comment
|
||||
$pos = strpos($this->html, '-->');
|
||||
if ($pos === false) {
|
||||
# could not find a closing -->, use next gt instead
|
||||
# this is firefox' behaviour
|
||||
$pos = strpos($this->html, '>') + 1;
|
||||
} else {
|
||||
$pos += 3;
|
||||
}
|
||||
$this->setNode('comment', $pos);
|
||||
|
||||
$skipWhitespace = true;
|
||||
return true;
|
||||
}
|
||||
if ($token == '<!DOCTYPE') {
|
||||
# doctype
|
||||
$this->setNode('doctype', strpos($this->html, '>')+1);
|
||||
|
||||
$skipWhitespace = true;
|
||||
return true;
|
||||
}
|
||||
if ($token == '<![CDATA[') {
|
||||
# cdata, use text node
|
||||
|
||||
# remove leading <![CDATA[
|
||||
$this->html = substr($this->html, 9);
|
||||
|
||||
$this->setNode('text', strpos($this->html, ']]>')+3);
|
||||
|
||||
# remove trailing ]]> and trim
|
||||
$this->node = substr($this->node, 0, -3);
|
||||
$this->handleWhitespaces();
|
||||
|
||||
$skipWhitespace = true;
|
||||
return true;
|
||||
}
|
||||
if ($this->parseTag()) {
|
||||
# seems to be a tag
|
||||
# handle whitespaces
|
||||
if ($this->isBlockElement) {
|
||||
$skipWhitespace = true;
|
||||
} else {
|
||||
$skipWhitespace = false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if ($this->keepWhitespace) {
|
||||
$skipWhitespace = false;
|
||||
}
|
||||
# when we get here it seems to be a text node
|
||||
$pos = strpos($this->html, '<');
|
||||
if ($pos === false) {
|
||||
$pos = strlen($this->html);
|
||||
}
|
||||
$this->setNode('text', $pos);
|
||||
$this->handleWhitespaces();
|
||||
if ($skipWhitespace && $this->node == ' ') {
|
||||
return $this->nextNode();
|
||||
}
|
||||
$skipWhitespace = false;
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* parse tag, set tag name and attributes, see if it's a closing tag and so forth...
|
||||
*
|
||||
* @param void
|
||||
* @return bool
|
||||
*/
|
||||
function parseTag() {
|
||||
static $a_ord, $z_ord, $special_ords;
|
||||
if (!isset($a_ord)) {
|
||||
$a_ord = ord('a');
|
||||
$z_ord = ord('z');
|
||||
$special_ords = array(
|
||||
ord(':'), // for xml:lang
|
||||
ord('-'), // for http-equiv
|
||||
);
|
||||
}
|
||||
|
||||
$tagName = '';
|
||||
|
||||
$pos = 1;
|
||||
$isStartTag = $this->html[$pos] != '/';
|
||||
if (!$isStartTag) {
|
||||
$pos++;
|
||||
}
|
||||
# get tagName
|
||||
while (isset($this->html[$pos])) {
|
||||
$pos_ord = ord(strtolower($this->html[$pos]));
|
||||
if (($pos_ord >= $a_ord && $pos_ord <= $z_ord) || (!empty($tagName) && is_numeric($this->html[$pos]))) {
|
||||
$tagName .= $this->html[$pos];
|
||||
$pos++;
|
||||
} else {
|
||||
$pos--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$tagName = strtolower($tagName);
|
||||
if (empty($tagName) || !isset($this->blockElements[$tagName])) {
|
||||
# something went wrong => invalid tag
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
if ($this->noTagsInCode && end($this->openTags) == 'code' && !($tagName == 'code' && !$isStartTag)) {
|
||||
# we supress all HTML tags inside code tags
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
|
||||
# get tag attributes
|
||||
/** TODO: in html 4 attributes do not need to be quoted **/
|
||||
$isEmptyTag = false;
|
||||
$attributes = array();
|
||||
$currAttrib = '';
|
||||
while (isset($this->html[$pos+1])) {
|
||||
$pos++;
|
||||
# close tag
|
||||
if ($this->html[$pos] == '>' || $this->html[$pos].$this->html[$pos+1] == '/>') {
|
||||
if ($this->html[$pos] == '/') {
|
||||
$isEmptyTag = true;
|
||||
$pos++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
$pos_ord = ord(strtolower($this->html[$pos]));
|
||||
if ( ($pos_ord >= $a_ord && $pos_ord <= $z_ord) || in_array($pos_ord, $special_ords)) {
|
||||
# attribute name
|
||||
$currAttrib .= $this->html[$pos];
|
||||
} elseif (in_array($this->html[$pos], array(' ', "\t", "\n"))) {
|
||||
# drop whitespace
|
||||
} elseif (in_array($this->html[$pos].$this->html[$pos+1], array('="', "='"))) {
|
||||
# get attribute value
|
||||
$pos++;
|
||||
$await = $this->html[$pos]; # single or double quote
|
||||
$pos++;
|
||||
$value = '';
|
||||
while (isset($this->html[$pos]) && $this->html[$pos] != $await) {
|
||||
$value .= $this->html[$pos];
|
||||
$pos++;
|
||||
}
|
||||
$attributes[$currAttrib] = $value;
|
||||
$currAttrib = '';
|
||||
} else {
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if ($this->html[$pos] != '>') {
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!empty($currAttrib)) {
|
||||
# html 4 allows something like <option selected> instead of <option selected="selected">
|
||||
$attributes[$currAttrib] = $currAttrib;
|
||||
}
|
||||
if (!$isStartTag) {
|
||||
if (!empty($attributes) || $tagName != end($this->openTags)) {
|
||||
# end tags must not contain any attributes
|
||||
# or maybe we did not expect a different tag to be closed
|
||||
$this->invalidTag();
|
||||
return false;
|
||||
}
|
||||
array_pop($this->openTags);
|
||||
if (in_array($tagName, $this->preformattedTags)) {
|
||||
$this->keepWhitespace--;
|
||||
}
|
||||
}
|
||||
$pos++;
|
||||
$this->node = substr($this->html, 0, $pos);
|
||||
$this->html = substr($this->html, $pos);
|
||||
$this->tagName = $tagName;
|
||||
$this->tagAttributes = $attributes;
|
||||
$this->isStartTag = $isStartTag;
|
||||
$this->isEmptyTag = $isEmptyTag || in_array($tagName, $this->emptyTags);
|
||||
if ($this->isEmptyTag) {
|
||||
# might be not well formed
|
||||
$this->node = preg_replace('# */? *>$#', ' />', $this->node);
|
||||
}
|
||||
$this->nodeType = 'tag';
|
||||
$this->isBlockElement = $this->blockElements[$tagName];
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* handle invalid tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function invalidTag() {
|
||||
$this->html = substr_replace($this->html, '<', 0, 1);
|
||||
}
|
||||
/**
|
||||
* update all vars and make $this->html shorter
|
||||
*
|
||||
* @param string $type see description for $this->nodeType
|
||||
* @param int $pos to which position shall we cut?
|
||||
* @return void
|
||||
*/
|
||||
function setNode($type, $pos) {
|
||||
if ($this->nodeType == 'tag') {
|
||||
# set tag specific vars to null
|
||||
# $type == tag should not be called here
|
||||
# see this::parseTag() for more
|
||||
$this->tagName = null;
|
||||
$this->tagAttributes = null;
|
||||
$this->isStartTag = null;
|
||||
$this->isEmptyTag = null;
|
||||
$this->isBlockElement = null;
|
||||
|
||||
}
|
||||
$this->nodeType = $type;
|
||||
$this->node = substr($this->html, 0, $pos);
|
||||
$this->html = substr($this->html, $pos);
|
||||
}
|
||||
/**
|
||||
* check if $this->html begins with $str
|
||||
*
|
||||
* @param string $str
|
||||
* @return bool
|
||||
*/
|
||||
function match($str) {
|
||||
return substr($this->html, 0, strlen($str)) == $str;
|
||||
}
|
||||
/**
|
||||
* truncate whitespaces
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function handleWhitespaces() {
|
||||
if ($this->keepWhitespace) {
|
||||
# <pre> or <code> before...
|
||||
return;
|
||||
}
|
||||
# truncate multiple whitespaces to a single one
|
||||
$this->node = preg_replace('#\s+#s', ' ', $this->node);
|
||||
}
|
||||
/**
|
||||
* normalize self::node
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
function normalizeNode() {
|
||||
$this->node = '<';
|
||||
if (!$this->isStartTag) {
|
||||
$this->node .= '/'.$this->tagName.'>';
|
||||
return;
|
||||
}
|
||||
$this->node .= $this->tagName;
|
||||
foreach ($this->tagAttributes as $name => $value) {
|
||||
$this->node .= ' '.$name.'="'.str_replace('"', '"', $value).'"';
|
||||
}
|
||||
if ($this->isEmptyTag) {
|
||||
$this->node .= ' /';
|
||||
}
|
||||
$this->node .= '>';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* indent a HTML string properly
|
||||
*
|
||||
* @param string $html
|
||||
* @param string $indent optional
|
||||
* @return string
|
||||
*/
|
||||
function indentHTML($html, $indent = " ", $noTagsInCode = false) {
|
||||
$parser = new parseHTML;
|
||||
$parser->noTagsInCode = $noTagsInCode;
|
||||
$parser->html = $html;
|
||||
$html = '';
|
||||
$last = true; # last tag was block elem
|
||||
$indent_a = array();
|
||||
while($parser->nextNode()) {
|
||||
if ($parser->nodeType == 'tag') {
|
||||
$parser->normalizeNode();
|
||||
}
|
||||
if ($parser->nodeType == 'tag' && $parser->isBlockElement) {
|
||||
$isPreOrCode = in_array($parser->tagName, array('code', 'pre'));
|
||||
if (!$parser->keepWhitespace && !$last && !$isPreOrCode) {
|
||||
$html = rtrim($html)."\n";
|
||||
}
|
||||
if ($parser->isStartTag) {
|
||||
$html .= implode($indent_a);
|
||||
if (!$parser->isEmptyTag) {
|
||||
array_push($indent_a, $indent);
|
||||
}
|
||||
} else {
|
||||
array_pop($indent_a);
|
||||
if (!$isPreOrCode) {
|
||||
$html .= implode($indent_a);
|
||||
}
|
||||
}
|
||||
$html .= $parser->node;
|
||||
if (!$parser->keepWhitespace && !($isPreOrCode && $parser->isStartTag)) {
|
||||
$html .= "\n";
|
||||
}
|
||||
$last = true;
|
||||
} else {
|
||||
if ($parser->nodeType == 'tag' && $parser->tagName == 'br') {
|
||||
$html .= $parser->node."\n";
|
||||
$last = true;
|
||||
continue;
|
||||
} elseif ($last && !$parser->keepWhitespace) {
|
||||
$html .= implode($indent_a);
|
||||
$parser->node = ltrim($parser->node);
|
||||
}
|
||||
$html .= $parser->node;
|
||||
|
||||
if (in_array($parser->nodeType, array('comment', 'pi', 'doctype'))) {
|
||||
$html .= "\n";
|
||||
} else {
|
||||
$last = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
/*
|
||||
# testcase / example
|
||||
error_reporting(E_ALL);
|
||||
|
||||
$html = '<p>Simple block on one line:</p>
|
||||
|
||||
<div>foo</div>
|
||||
|
||||
<p>And nested without indentation:</p>
|
||||
|
||||
<div>
|
||||
<div>
|
||||
<div>
|
||||
foo
|
||||
</div>
|
||||
<div style=">"/>
|
||||
</div>
|
||||
<div>bar</div>
|
||||
</div>
|
||||
|
||||
<p>And with attributes:</p>
|
||||
|
||||
<div>
|
||||
<div id="foo">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p>This was broken in 1.0.2b7:</p>
|
||||
|
||||
<div class="inlinepage">
|
||||
<div class="toggleableend">
|
||||
foo
|
||||
</div>
|
||||
</div>';
|
||||
#$html = '<a href="asdfasdf" title=\'asdf\' foo="bar">asdf</a>';
|
||||
echo indentHTML($html);
|
||||
die();
|
||||
*/
|
9
vendor/composer/autoload_classmap.php
vendored
9
vendor/composer/autoload_classmap.php
vendored
@ -7,6 +7,9 @@ $baseDir = dirname($vendorDir);
|
||||
|
||||
return array(
|
||||
'Hubzilla\\Import\\Import' => $baseDir . '/include/Import/Importer.php',
|
||||
'Markdownify\\Converter' => $vendorDir . '/pixel418/markdownify/src/Converter.php',
|
||||
'Markdownify\\ConverterExtra' => $vendorDir . '/pixel418/markdownify/src/ConverterExtra.php',
|
||||
'Markdownify\\Parser' => $vendorDir . '/pixel418/markdownify/src/Parser.php',
|
||||
'Michelf\\Markdown' => $vendorDir . '/michelf/php-markdown/Michelf/Markdown.php',
|
||||
'Michelf\\MarkdownExtra' => $vendorDir . '/michelf/php-markdown/Michelf/MarkdownExtra.php',
|
||||
'Michelf\\MarkdownInterface' => $vendorDir . '/michelf/php-markdown/Michelf/MarkdownInterface.php',
|
||||
@ -18,6 +21,8 @@ return array(
|
||||
'Psr\\Log\\LoggerInterface' => $vendorDir . '/psr/log/Psr/Log/LoggerInterface.php',
|
||||
'Psr\\Log\\LoggerTrait' => $vendorDir . '/psr/log/Psr/Log/LoggerTrait.php',
|
||||
'Psr\\Log\\NullLogger' => $vendorDir . '/psr/log/Psr/Log/NullLogger.php',
|
||||
'Psr\\Log\\Test\\DummyTest' => $vendorDir . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
|
||||
'Psr\\Log\\Test\\LoggerInterfaceTest' => $vendorDir . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
|
||||
'Sabre\\CalDAV\\Backend\\AbstractBackend' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/AbstractBackend.php',
|
||||
'Sabre\\CalDAV\\Backend\\BackendInterface' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/BackendInterface.php',
|
||||
'Sabre\\CalDAV\\Backend\\NotificationSupport' => $vendorDir . '/sabre/dav/lib/CalDAV/Backend/NotificationSupport.php',
|
||||
@ -268,7 +273,6 @@ return array(
|
||||
'Sabre\\HTTP\\URLUtil' => $vendorDir . '/sabre/http/lib/URLUtil.php',
|
||||
'Sabre\\HTTP\\Util' => $vendorDir . '/sabre/http/lib/Util.php',
|
||||
'Sabre\\HTTP\\Version' => $vendorDir . '/sabre/http/lib/Version.php',
|
||||
'Sabre\\Uri\\InvalidUriException' => $vendorDir . '/sabre/uri/lib/InvalidUriException.php',
|
||||
'Sabre\\Uri\\Version' => $vendorDir . '/sabre/uri/lib/Version.php',
|
||||
'Sabre\\VObject\\BirthdayCalendarGenerator' => $vendorDir . '/sabre/vobject/lib/BirthdayCalendarGenerator.php',
|
||||
'Sabre\\VObject\\Cli' => $vendorDir . '/sabre/vobject/lib/Cli.php',
|
||||
@ -357,6 +361,9 @@ return array(
|
||||
'Sabre\\Xml\\Writer' => $vendorDir . '/sabre/xml/lib/Writer.php',
|
||||
'Sabre\\Xml\\XmlDeserializable' => $vendorDir . '/sabre/xml/lib/XmlDeserializable.php',
|
||||
'Sabre\\Xml\\XmlSerializable' => $vendorDir . '/sabre/xml/lib/XmlSerializable.php',
|
||||
'Test\\Markdownify\\ConverterExtraTest' => $vendorDir . '/pixel418/markdownify/test/ConverterExtraTest.php',
|
||||
'Test\\Markdownify\\ConverterTest' => $vendorDir . '/pixel418/markdownify/test/ConverterTest.php',
|
||||
'Test\\Markdownify\\ConverterTestCase' => $vendorDir . '/pixel418/markdownify/test/ConverterTestCase.php',
|
||||
'Zotlabs\\Access\\AccessList' => $baseDir . '/Zotlabs/Access/AccessList.php',
|
||||
'Zotlabs\\Access\\PermissionLimits' => $baseDir . '/Zotlabs/Access/PermissionLimits.php',
|
||||
'Zotlabs\\Access\\PermissionRoles' => $baseDir . '/Zotlabs/Access/PermissionRoles.php',
|
||||
|
2
vendor/composer/autoload_psr4.php
vendored
2
vendor/composer/autoload_psr4.php
vendored
@ -7,6 +7,7 @@ $baseDir = dirname($vendorDir);
|
||||
|
||||
return array(
|
||||
'Zotlabs\\' => array($baseDir . '/Zotlabs'),
|
||||
'Test\\Markdownify\\' => array($vendorDir . '/pixel418/markdownify/test'),
|
||||
'Sabre\\Xml\\' => array($vendorDir . '/sabre/xml/lib'),
|
||||
'Sabre\\VObject\\' => array($vendorDir . '/sabre/vobject/lib'),
|
||||
'Sabre\\Uri\\' => array($vendorDir . '/sabre/uri/lib'),
|
||||
@ -17,5 +18,6 @@ return array(
|
||||
'Sabre\\CardDAV\\' => array($vendorDir . '/sabre/dav/lib/CardDAV'),
|
||||
'Sabre\\CalDAV\\' => array($vendorDir . '/sabre/dav/lib/CalDAV'),
|
||||
'Psr\\Log\\' => array($vendorDir . '/psr/log/Psr/Log'),
|
||||
'Markdownify\\' => array($vendorDir . '/pixel418/markdownify/src'),
|
||||
'Hubzilla\\' => array($baseDir . '/include'),
|
||||
);
|
||||
|
25
vendor/composer/autoload_static.php
vendored
25
vendor/composer/autoload_static.php
vendored
@ -21,6 +21,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
array (
|
||||
'Zotlabs\\' => 8,
|
||||
),
|
||||
'T' =>
|
||||
array (
|
||||
'Test\\Markdownify\\' => 17,
|
||||
),
|
||||
'S' =>
|
||||
array (
|
||||
'Sabre\\Xml\\' => 10,
|
||||
@ -37,6 +41,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
array (
|
||||
'Psr\\Log\\' => 8,
|
||||
),
|
||||
'M' =>
|
||||
array (
|
||||
'Markdownify\\' => 12,
|
||||
),
|
||||
'H' =>
|
||||
array (
|
||||
'Hubzilla\\' => 9,
|
||||
@ -48,6 +56,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
array (
|
||||
0 => __DIR__ . '/../..' . '/Zotlabs',
|
||||
),
|
||||
'Test\\Markdownify\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/pixel418/markdownify/test',
|
||||
),
|
||||
'Sabre\\Xml\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/sabre/xml/lib',
|
||||
@ -88,6 +100,10 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/psr/log/Psr/Log',
|
||||
),
|
||||
'Markdownify\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/..' . '/pixel418/markdownify/src',
|
||||
),
|
||||
'Hubzilla\\' =>
|
||||
array (
|
||||
0 => __DIR__ . '/../..' . '/include',
|
||||
@ -106,6 +122,9 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
|
||||
public static $classMap = array (
|
||||
'Hubzilla\\Import\\Import' => __DIR__ . '/../..' . '/include/Import/Importer.php',
|
||||
'Markdownify\\Converter' => __DIR__ . '/..' . '/pixel418/markdownify/src/Converter.php',
|
||||
'Markdownify\\ConverterExtra' => __DIR__ . '/..' . '/pixel418/markdownify/src/ConverterExtra.php',
|
||||
'Markdownify\\Parser' => __DIR__ . '/..' . '/pixel418/markdownify/src/Parser.php',
|
||||
'Michelf\\Markdown' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/Markdown.php',
|
||||
'Michelf\\MarkdownExtra' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/MarkdownExtra.php',
|
||||
'Michelf\\MarkdownInterface' => __DIR__ . '/..' . '/michelf/php-markdown/Michelf/MarkdownInterface.php',
|
||||
@ -117,6 +136,8 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
'Psr\\Log\\LoggerInterface' => __DIR__ . '/..' . '/psr/log/Psr/Log/LoggerInterface.php',
|
||||
'Psr\\Log\\LoggerTrait' => __DIR__ . '/..' . '/psr/log/Psr/Log/LoggerTrait.php',
|
||||
'Psr\\Log\\NullLogger' => __DIR__ . '/..' . '/psr/log/Psr/Log/NullLogger.php',
|
||||
'Psr\\Log\\Test\\DummyTest' => __DIR__ . '/..' . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
|
||||
'Psr\\Log\\Test\\LoggerInterfaceTest' => __DIR__ . '/..' . '/psr/log/Psr/Log/Test/LoggerInterfaceTest.php',
|
||||
'Sabre\\CalDAV\\Backend\\AbstractBackend' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/AbstractBackend.php',
|
||||
'Sabre\\CalDAV\\Backend\\BackendInterface' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/BackendInterface.php',
|
||||
'Sabre\\CalDAV\\Backend\\NotificationSupport' => __DIR__ . '/..' . '/sabre/dav/lib/CalDAV/Backend/NotificationSupport.php',
|
||||
@ -367,7 +388,6 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
'Sabre\\HTTP\\URLUtil' => __DIR__ . '/..' . '/sabre/http/lib/URLUtil.php',
|
||||
'Sabre\\HTTP\\Util' => __DIR__ . '/..' . '/sabre/http/lib/Util.php',
|
||||
'Sabre\\HTTP\\Version' => __DIR__ . '/..' . '/sabre/http/lib/Version.php',
|
||||
'Sabre\\Uri\\InvalidUriException' => __DIR__ . '/..' . '/sabre/uri/lib/InvalidUriException.php',
|
||||
'Sabre\\Uri\\Version' => __DIR__ . '/..' . '/sabre/uri/lib/Version.php',
|
||||
'Sabre\\VObject\\BirthdayCalendarGenerator' => __DIR__ . '/..' . '/sabre/vobject/lib/BirthdayCalendarGenerator.php',
|
||||
'Sabre\\VObject\\Cli' => __DIR__ . '/..' . '/sabre/vobject/lib/Cli.php',
|
||||
@ -456,6 +476,9 @@ class ComposerStaticInit7b34d7e50a62201ec5d5e526a5b8b35d
|
||||
'Sabre\\Xml\\Writer' => __DIR__ . '/..' . '/sabre/xml/lib/Writer.php',
|
||||
'Sabre\\Xml\\XmlDeserializable' => __DIR__ . '/..' . '/sabre/xml/lib/XmlDeserializable.php',
|
||||
'Sabre\\Xml\\XmlSerializable' => __DIR__ . '/..' . '/sabre/xml/lib/XmlSerializable.php',
|
||||
'Test\\Markdownify\\ConverterExtraTest' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterExtraTest.php',
|
||||
'Test\\Markdownify\\ConverterTest' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterTest.php',
|
||||
'Test\\Markdownify\\ConverterTestCase' => __DIR__ . '/..' . '/pixel418/markdownify/test/ConverterTestCase.php',
|
||||
'Zotlabs\\Access\\AccessList' => __DIR__ . '/../..' . '/Zotlabs/Access/AccessList.php',
|
||||
'Zotlabs\\Access\\PermissionLimits' => __DIR__ . '/../..' . '/Zotlabs/Access/PermissionLimits.php',
|
||||
'Zotlabs\\Access\\PermissionRoles' => __DIR__ . '/../..' . '/Zotlabs/Access/PermissionRoles.php',
|
||||
|
58
vendor/composer/installed.json
vendored
58
vendor/composer/installed.json
vendored
@ -518,5 +518,63 @@
|
||||
"keywords": [
|
||||
"markdown"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "pixel418/markdownify",
|
||||
"version": "v2.2.1",
|
||||
"version_normalized": "2.2.1.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/Elephant418/Markdownify.git",
|
||||
"reference": "0160677f04c784550dd10fd72fdf3994967db848"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/Elephant418/Markdownify/zipball/0160677f04c784550dd10fd72fdf3994967db848",
|
||||
"reference": "0160677f04c784550dd10fd72fdf3994967db848",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8"
|
||||
},
|
||||
"time": "2016-09-21T13:01:43+00:00",
|
||||
"type": "lib",
|
||||
"installation-source": "dist",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Markdownify\\": "src",
|
||||
"Test\\Markdownify\\": "test"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"LGPL"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Peter Kruithof",
|
||||
"email": "pkruithof@gmail.com",
|
||||
"homepage": "http://pkruithof.tumblr.com/"
|
||||
},
|
||||
{
|
||||
"name": "Milian Wolff",
|
||||
"email": "mail@milianw.de",
|
||||
"homepage": "http://milianw.de"
|
||||
},
|
||||
{
|
||||
"name": "Thomas Zilliox",
|
||||
"email": "hello@tzi.fr",
|
||||
"homepage": "http://tzi.fr"
|
||||
}
|
||||
],
|
||||
"description": "The HTML to Markdown converter for PHP ",
|
||||
"homepage": "https://github.com/elephant418/Markdownify",
|
||||
"keywords": [
|
||||
"markdown",
|
||||
"markdownify"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
76
vendor/pixel418/markdownify/CHANGELOG.md
vendored
Normal file
76
vendor/pixel418/markdownify/CHANGELOG.md
vendored
Normal file
@ -0,0 +1,76 @@
|
||||
CHANGELOG
|
||||
==============
|
||||
|
||||
|
||||
21/09/2016 v2.2.1
|
||||
--------------
|
||||
|
||||
* Fix: Moving trailing whitespace from inline elements outside of the element
|
||||
* Feature: Use PSR-4
|
||||
* Feature: PHP 7.0 support in continuous integration
|
||||
* Doc: Update of the README
|
||||
|
||||
|
||||
07/09/2016 v2.2.0
|
||||
--------------
|
||||
|
||||
* Fix: Reset state between each parsing
|
||||
|
||||
|
||||
19/02/2016 v2.1.11
|
||||
--------------
|
||||
|
||||
* Fix: Empty table cell conversion
|
||||
|
||||
|
||||
10/02/2016 v2.1.10
|
||||
--------------
|
||||
|
||||
* Fix: Handle nested table.
|
||||
|
||||
|
||||
01/04/2015 v2.1.9
|
||||
--------------
|
||||
|
||||
* Fix: Handle HTML breaks & spaces in a less destructive way.
|
||||
|
||||
|
||||
26/03/2015 v2.1.8
|
||||
--------------
|
||||
|
||||
* Fix: Use alternative italic character
|
||||
* Fix: Handle HTML breaks inside another tag
|
||||
* Fix: Handle HTML spaces around tags
|
||||
|
||||
|
||||
07/11/2014 v2.1.7
|
||||
--------------
|
||||
|
||||
* Change composer name to "elephant418/markdownify"
|
||||
|
||||
|
||||
14/07/2014 v2.1.6
|
||||
--------------
|
||||
|
||||
* Fix: Simulate a paragraph for inline text preceding block element
|
||||
* Fix: Nested lists
|
||||
* Fix: setKeepHTML method
|
||||
* Feature: PHP 5.5 & 5.6 support in continuous integration
|
||||
|
||||
|
||||
16/03/2014 v2.1.5
|
||||
--------------
|
||||
|
||||
Add display settings
|
||||
|
||||
* Test: Add tests for footnotes after every paragraph or not
|
||||
* Feature: Allow to display link reference in paragraph, without footnotes
|
||||
|
||||
|
||||
27/02/2014 v2.1.4
|
||||
--------------
|
||||
|
||||
Improve how ConverterExtra handle id & class attributes:
|
||||
|
||||
* Feature: Allow id & class attributes on links
|
||||
* Feature: Allow class attributes on headings
|
63
vendor/pixel418/markdownify/README.md
vendored
Normal file
63
vendor/pixel418/markdownify/README.md
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
# Markdownify
|
||||
|
||||
[](https://travis-ci.org/Elephant418/Markdownify?branch=master)
|
||||
[](https://packagist.org/packages/pixel418/markdownify)
|
||||
[](https://opensource.org/licenses/lgpl-2.1.php)
|
||||
|
||||
The HTML to Markdown converter for PHP
|
||||
|
||||
[Code example](#code-example) | [How to Install](#how-to-install) | [How to Contribute](#how-to-contribute) | [Author & Community](#author--community)
|
||||
|
||||
|
||||
|
||||
Code example
|
||||
--------
|
||||
|
||||
### Markdown
|
||||
|
||||
```php
|
||||
$converter = new Markdownify\Converter;
|
||||
$converter->parseString('<h1>Heading</h1>');
|
||||
// Returns: # Heading
|
||||
```
|
||||
|
||||
### Markdown Extra [as defined by @michelf](http://michelf.ca/projects/php-markdown/extra/)
|
||||
|
||||
```php
|
||||
$converter = new Markdownify\ConverterExtra;
|
||||
$converter->parseString('<h1 id="md">Heading</h1>');
|
||||
// Returns: # Heading {#md}
|
||||
```
|
||||
|
||||
|
||||
|
||||
How to Install
|
||||
--------
|
||||
|
||||
This library package requires `PHP 5.3` or later.<br>
|
||||
Install [Composer](http://getcomposer.org/doc/01-basic-usage.md#installation) and run the following command to get the latest version:
|
||||
|
||||
```sh
|
||||
composer require pixel418/markdownify
|
||||
```
|
||||
|
||||
|
||||
|
||||
How to Contribute
|
||||
--------
|
||||
|
||||
1. Fork the Markdownify repository
|
||||
2. Create a new branch for each feature or improvement
|
||||
3. Send a pull request from each feature branch to the **v2.x** branch
|
||||
|
||||
If you don't know much about pull request, you can read [the Github article](https://help.github.com/articles/using-pull-requests)
|
||||
|
||||
|
||||
|
||||
Author & Community
|
||||
--------
|
||||
|
||||
Markdownify is under [LGPL License](http://opensource.org/licenses/LGPL-2.1)<br>
|
||||
It was created by [Milian Wolff](http://milianw.de)<br>
|
||||
It was converted to a Symfony Bundle by [Peter Kruithof](https://github.com/pkruithof)<br>
|
||||
It is maintained by [Thomas ZILLIOX](http://tzi.fr)
|
38
vendor/pixel418/markdownify/composer.json
vendored
Normal file
38
vendor/pixel418/markdownify/composer.json
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
{
|
||||
"name": "pixel418/markdownify",
|
||||
"type": "lib",
|
||||
"description": "The HTML to Markdown converter for PHP ",
|
||||
"keywords": ["markdown", "markdownify"],
|
||||
"license": "LGPL",
|
||||
"homepage": "https://github.com/elephant418/Markdownify",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Milian Wolff",
|
||||
"email": "mail@milianw.de",
|
||||
"homepage": "http://milianw.de"
|
||||
|
||||
},
|
||||
{
|
||||
"name": "Peter Kruithof",
|
||||
"email": "pkruithof@gmail.com",
|
||||
"homepage": "http://pkruithof.tumblr.com/"
|
||||
},
|
||||
{
|
||||
"name": "Thomas Zilliox",
|
||||
"email": "hello@tzi.fr",
|
||||
"homepage": "http://tzi.fr"
|
||||
}
|
||||
],
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^4.8"
|
||||
},
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"Markdownify\\": "src",
|
||||
"Test\\Markdownify\\": "test"
|
||||
}
|
||||
}
|
||||
}
|
1400
vendor/pixel418/markdownify/src/Converter.php
vendored
Normal file
1400
vendor/pixel418/markdownify/src/Converter.php
vendored
Normal file
File diff suppressed because it is too large
Load Diff
573
vendor/pixel418/markdownify/src/ConverterExtra.php
vendored
Normal file
573
vendor/pixel418/markdownify/src/ConverterExtra.php
vendored
Normal file
@ -0,0 +1,573 @@
|
||||
<?php
|
||||
|
||||
/* This file is part of the Markdownify project, which is under LGPL license */
|
||||
|
||||
namespace Markdownify;
|
||||
|
||||
class ConverterExtra extends Converter
|
||||
{
|
||||
|
||||
/**
|
||||
* table data, including rows with content and the maximum width of each col
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $table = array();
|
||||
|
||||
/**
|
||||
* current col
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $col = -1;
|
||||
|
||||
/**
|
||||
* current row
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $row = 0;
|
||||
|
||||
/**
|
||||
* constructor, see Markdownify::Markdownify() for more information
|
||||
*/
|
||||
public function __construct($linksAfterEachParagraph = self::LINK_AFTER_CONTENT, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML)
|
||||
{
|
||||
parent::__construct($linksAfterEachParagraph, $bodyWidth, $keepHTML);
|
||||
|
||||
// new markdownable tags & attributes
|
||||
// header ids: # foo {bar}
|
||||
$this->isMarkdownable['h1']['id'] = 'optional';
|
||||
$this->isMarkdownable['h1']['class'] = 'optional';
|
||||
$this->isMarkdownable['h2']['id'] = 'optional';
|
||||
$this->isMarkdownable['h2']['class'] = 'optional';
|
||||
$this->isMarkdownable['h3']['id'] = 'optional';
|
||||
$this->isMarkdownable['h3']['class'] = 'optional';
|
||||
$this->isMarkdownable['h4']['id'] = 'optional';
|
||||
$this->isMarkdownable['h4']['class'] = 'optional';
|
||||
$this->isMarkdownable['h5']['id'] = 'optional';
|
||||
$this->isMarkdownable['h5']['class'] = 'optional';
|
||||
$this->isMarkdownable['h6']['id'] = 'optional';
|
||||
$this->isMarkdownable['h6']['class'] = 'optional';
|
||||
// tables
|
||||
$this->isMarkdownable['table'] = array();
|
||||
$this->isMarkdownable['th'] = array(
|
||||
'align' => 'optional',
|
||||
);
|
||||
$this->isMarkdownable['td'] = array(
|
||||
'align' => 'optional',
|
||||
);
|
||||
$this->isMarkdownable['tr'] = array();
|
||||
array_push($this->ignore, 'thead');
|
||||
array_push($this->ignore, 'tbody');
|
||||
array_push($this->ignore, 'tfoot');
|
||||
// definition lists
|
||||
$this->isMarkdownable['dl'] = array();
|
||||
$this->isMarkdownable['dd'] = array();
|
||||
$this->isMarkdownable['dt'] = array();
|
||||
// link class
|
||||
$this->isMarkdownable['a']['id'] = 'optional';
|
||||
$this->isMarkdownable['a']['class'] = 'optional';
|
||||
// footnotes
|
||||
$this->isMarkdownable['fnref'] = array(
|
||||
'target' => 'required',
|
||||
);
|
||||
$this->isMarkdownable['footnotes'] = array();
|
||||
$this->isMarkdownable['fn'] = array(
|
||||
'name' => 'required',
|
||||
);
|
||||
$this->parser->blockElements['fnref'] = false;
|
||||
$this->parser->blockElements['fn'] = true;
|
||||
$this->parser->blockElements['footnotes'] = true;
|
||||
// abbr
|
||||
$this->isMarkdownable['abbr'] = array(
|
||||
'title' => 'required',
|
||||
);
|
||||
// build RegEx lookahead to decide wether table can pe parsed or not
|
||||
$inlineTags = array_keys($this->parser->blockElements, false);
|
||||
$colContents = '(?:[^<]|<(?:' . implode('|', $inlineTags) . '|[^a-z]))*';
|
||||
$this->tableLookaheadHeader = '{
|
||||
^\s*(?:<thead\s*>)?\s* # open optional thead
|
||||
<tr\s*>\s*(?: # start required row with headers
|
||||
<th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align
|
||||
\s*' . $colContents . '\s* # contents
|
||||
</th>\s* # close header
|
||||
)+</tr> # close row with headers
|
||||
\s*(?:</thead>)? # close optional thead
|
||||
}sxi';
|
||||
$this->tdSubstitute = '\s*' . $colContents . '\s* # contents
|
||||
</td>\s*';
|
||||
$this->tableLookaheadBody = '{
|
||||
\s*(?:<tbody\s*>)?\s* # open optional tbody
|
||||
(?:<tr\s*>\s* # start row
|
||||
%s # cols to be substituted
|
||||
</tr>)+ # close row
|
||||
\s*(?:</tbody>)? # close optional tbody
|
||||
\s*</table> # close table
|
||||
}sxi';
|
||||
}
|
||||
|
||||
/**
|
||||
* handle header tags (<h1> - <h6>)
|
||||
*
|
||||
* @param int $level 1-6
|
||||
* @return void
|
||||
*/
|
||||
protected function handleHeader($level)
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector();
|
||||
$this->stack();
|
||||
} else {
|
||||
$tag = $this->unstack();
|
||||
if (!empty($tag['cssSelector'])) {
|
||||
// {#id.class}
|
||||
$this->out(' {' . $tag['cssSelector'] . '}');
|
||||
}
|
||||
}
|
||||
parent::handleHeader($level);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <a> tags parsing
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_a_parser()
|
||||
{
|
||||
parent::handleTag_a_parser();
|
||||
$this->parser->tagAttributes['cssSelector'] = $this->getCurrentCssSelector();
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <a> tags conversion
|
||||
*
|
||||
* @param array $tag
|
||||
* @param string $buffer
|
||||
* @return string The markdownified link
|
||||
*/
|
||||
protected function handleTag_a_converter($tag, $buffer)
|
||||
{
|
||||
$output = parent::handleTag_a_converter($tag, $buffer);
|
||||
if (!empty($tag['cssSelector'])) {
|
||||
// [This link][id]{#id.class}
|
||||
$output .= '{' . $tag['cssSelector'] . '}';
|
||||
}
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <abbr> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_abbr()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->stack();
|
||||
$this->buffer();
|
||||
} else {
|
||||
$tag = $this->unstack();
|
||||
$tag['text'] = $this->unbuffer();
|
||||
$add = true;
|
||||
foreach ($this->stack['abbr'] as $stacked) {
|
||||
if ($stacked['text'] == $tag['text']) {
|
||||
/** TODO: differing abbr definitions, i.e. different titles for same text **/
|
||||
$add = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
$this->out($tag['text']);
|
||||
if ($add) {
|
||||
array_push($this->stack['abbr'], $tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* flush stacked abbr tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function flushStacked_abbr()
|
||||
{
|
||||
$out = array();
|
||||
foreach ($this->stack['abbr'] as $k => $tag) {
|
||||
if (!isset($tag['unstacked'])) {
|
||||
array_push($out, ' *[' . $tag['text'] . ']: ' . $tag['title']);
|
||||
$tag['unstacked'] = true;
|
||||
$this->stack['abbr'][$k] = $tag;
|
||||
}
|
||||
}
|
||||
if (!empty($out)) {
|
||||
$this->out("\n\n" . implode("\n", $out));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <table> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_table()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
// check if upcoming table can be converted
|
||||
if ($this->keepHTML) {
|
||||
if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) {
|
||||
// header seems good, now check body
|
||||
// get align & number of cols
|
||||
preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols);
|
||||
$regEx = '';
|
||||
$i = 1;
|
||||
$aligns = array();
|
||||
foreach ($cols[2] as $align) {
|
||||
$align = strtolower($align);
|
||||
array_push($aligns, $align);
|
||||
if (empty($align)) {
|
||||
$align = 'left'; // default value
|
||||
}
|
||||
$td = '\s+align=("|\')' . $align . '\\' . $i;
|
||||
$i++;
|
||||
if ($align == 'left') {
|
||||
// look for empty align or left
|
||||
$td = '(?:' . $td . ')?';
|
||||
}
|
||||
$td = '<td' . $td . '\s*>';
|
||||
$regEx .= $td . $this->tdSubstitute;
|
||||
}
|
||||
$regEx = sprintf($this->tableLookaheadBody, $regEx);
|
||||
if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) {
|
||||
// this is a markdownable table tag!
|
||||
$this->table = array(
|
||||
'rows' => array(),
|
||||
'col_widths' => array(),
|
||||
'aligns' => $aligns,
|
||||
);
|
||||
$this->row = 0;
|
||||
} else {
|
||||
// non markdownable table
|
||||
$this->handleTagToText();
|
||||
}
|
||||
} else {
|
||||
// non markdownable table
|
||||
$this->handleTagToText();
|
||||
}
|
||||
} else {
|
||||
$this->table = array(
|
||||
'rows' => array(),
|
||||
'col_widths' => array(),
|
||||
'aligns' => array(),
|
||||
);
|
||||
$this->row = 0;
|
||||
}
|
||||
} else {
|
||||
// finally build the table in Markdown Extra syntax
|
||||
$separator = array();
|
||||
if (!isset($this->table['aligns'])) {
|
||||
$this->table['aligns'] = array();
|
||||
}
|
||||
// seperator with correct align identifiers
|
||||
foreach ($this->table['aligns'] as $col => $align) {
|
||||
if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) {
|
||||
break;
|
||||
}
|
||||
$left = ' ';
|
||||
$right = ' ';
|
||||
switch ($align) {
|
||||
case 'left':
|
||||
$left = ':';
|
||||
break;
|
||||
case 'center':
|
||||
$right = ':';
|
||||
$left = ':';
|
||||
case 'right':
|
||||
$right = ':';
|
||||
break;
|
||||
}
|
||||
array_push($separator, $left . str_repeat('-', $this->table['col_widths'][$col]) . $right);
|
||||
}
|
||||
$separator = '|' . implode('|', $separator) . '|';
|
||||
|
||||
$rows = array();
|
||||
// add padding
|
||||
array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent'));
|
||||
$header = array_shift($this->table['rows']);
|
||||
array_push($rows, '| ' . implode(' | ', $header) . ' |');
|
||||
array_push($rows, $separator);
|
||||
foreach ($this->table['rows'] as $row) {
|
||||
array_push($rows, '| ' . implode(' | ', $row) . ' |');
|
||||
}
|
||||
$this->out(implode("\n" . $this->indent, $rows));
|
||||
$this->table = array();
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* properly pad content so it is aligned as whished
|
||||
* should be used with array_walk_recursive on $this->table['rows']
|
||||
*
|
||||
* @param string &$content
|
||||
* @param int $col
|
||||
* @return void
|
||||
*/
|
||||
protected function alignTdContent(&$content, $col)
|
||||
{
|
||||
if (!isset($this->table['aligns'][$col])) {
|
||||
$this->table['aligns'][$col] = 'left';
|
||||
}
|
||||
switch ($this->table['aligns'][$col]) {
|
||||
default:
|
||||
case 'left':
|
||||
$content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content));
|
||||
break;
|
||||
case 'right':
|
||||
$content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)) . $content;
|
||||
break;
|
||||
case 'center':
|
||||
$paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content);
|
||||
$left = floor($paddingNeeded / 2);
|
||||
$right = $paddingNeeded - $left;
|
||||
$content = str_repeat(' ', $left) . $content . str_repeat(' ', $right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <tr> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_tr()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->col = -1;
|
||||
} else {
|
||||
$this->row++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <td> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_td()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->col++;
|
||||
if (!isset($this->table['col_widths'][$this->col])) {
|
||||
$this->table['col_widths'][$this->col] = 0;
|
||||
}
|
||||
$this->buffer();
|
||||
} else {
|
||||
$buffer = trim($this->unbuffer());
|
||||
if (!isset($this->table['col_widths'][$this->col])) {
|
||||
$this->table['col_widths'][$this->col] = 0;
|
||||
}
|
||||
$this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer));
|
||||
$this->table['rows'][$this->row][$this->col] = $buffer;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <th> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_th()
|
||||
{
|
||||
if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col + 1])) {
|
||||
if (isset($this->parser->tagAttributes['align'])) {
|
||||
$this->table['aligns'][$this->col + 1] = $this->parser->tagAttributes['align'];
|
||||
} else {
|
||||
$this->table['aligns'][$this->col + 1] = '';
|
||||
}
|
||||
}
|
||||
$this->handleTag_td();
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <dl> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_dl()
|
||||
{
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <dt> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
**/
|
||||
protected function handleTag_dt()
|
||||
{
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <dd> tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_dd()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
if (substr(ltrim($this->parser->html), 0, 3) == '<p>') {
|
||||
// next comes a paragraph, so we'll need an extra line
|
||||
$this->out("\n" . $this->indent);
|
||||
} elseif (substr($this->output, -2) == "\n\n") {
|
||||
$this->output = substr($this->output, 0, -1);
|
||||
}
|
||||
$this->out(': ');
|
||||
$this->indent(' ', false);
|
||||
} else {
|
||||
// lookahead for next dt
|
||||
if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') {
|
||||
$this->setLineBreaks(2);
|
||||
} else {
|
||||
$this->setLineBreaks(1);
|
||||
}
|
||||
$this->indent(' ');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_fnref()
|
||||
{
|
||||
$this->out('[^' . $this->parser->tagAttributes['target'] . ']');
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <fn> tags (custom footnotes, see markdownify_extra::parseString()
|
||||
* and markdownify_extra::_makeFootnotes())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_fn()
|
||||
{
|
||||
if ($this->parser->isStartTag) {
|
||||
$this->out('[^' . $this->parser->tagAttributes['name'] . ']:');
|
||||
$this->setLineBreaks(1);
|
||||
} else {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
$this->indent(' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString()
|
||||
* and markdownify_extra::_makeFootnotes())
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleTag_footnotes()
|
||||
{
|
||||
if (!$this->parser->isStartTag) {
|
||||
$this->setLineBreaks(2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* parse a HTML string, clean up footnotes prior
|
||||
*
|
||||
* @param string $HTML input
|
||||
* @return string Markdown formatted output
|
||||
*/
|
||||
public function parseString($html)
|
||||
{
|
||||
/** TODO: custom markdown-extra options, e.g. titles & classes **/
|
||||
// <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup>
|
||||
// => <fnref target="..." />
|
||||
$html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html);
|
||||
// <div class="footnotes">
|
||||
// <hr />
|
||||
// <ol>
|
||||
//
|
||||
// <li id="fn:...">...</li>
|
||||
// ...
|
||||
//
|
||||
// </ol>
|
||||
// </div>
|
||||
// =>
|
||||
// <footnotes>
|
||||
// <fn name="...">...</fn>
|
||||
// ...
|
||||
// </footnotes>
|
||||
$html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html);
|
||||
|
||||
return parent::parseString($html);
|
||||
}
|
||||
|
||||
/**
|
||||
* replace HTML representation of footnotes with something more easily parsable
|
||||
*
|
||||
* @note this is a callback to be used in parseString()
|
||||
*
|
||||
* @param array $matches
|
||||
* @return string
|
||||
*/
|
||||
protected function _makeFootnotes($matches)
|
||||
{
|
||||
// <li id="fn:1">
|
||||
// ...
|
||||
// <a href="#fnref:block" rev="footnote">↩</a></p>
|
||||
// </li>
|
||||
// => <fn name="1">...</fn>
|
||||
// remove footnote link
|
||||
$fns = preg_replace('@\s*( \s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>↩</a>\s*@s', '', $matches[1]);
|
||||
// remove empty paragraph
|
||||
$fns = preg_replace('@<p>\s*</p>@s', '', $fns);
|
||||
// <li id="fn:1">...</li> -> <footnote nr="1">...</footnote>
|
||||
$fns = str_replace('<li id="fn:', '<fn name="', $fns);
|
||||
|
||||
$fns = '<footnotes>' . $fns . '</footnotes>';
|
||||
|
||||
return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns);
|
||||
}
|
||||
|
||||
/**
|
||||
* handle <a> tags parsing
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function getCurrentCssSelector()
|
||||
{
|
||||
$cssSelector = '';
|
||||
if (isset($this->parser->tagAttributes['id'])) {
|
||||
$cssSelector .= '#' . $this->decode($this->parser->tagAttributes['id']);
|
||||
}
|
||||
if (isset($this->parser->tagAttributes['class'])) {
|
||||
$classes = explode(' ', $this->decode($this->parser->tagAttributes['class']));
|
||||
$classes = array_filter($classes);
|
||||
$cssSelector .= '.' . join('.', $classes);
|
||||
}
|
||||
return $cssSelector;
|
||||
}
|
||||
}
|
564
vendor/pixel418/markdownify/src/Parser.php
vendored
Normal file
564
vendor/pixel418/markdownify/src/Parser.php
vendored
Normal file
@ -0,0 +1,564 @@
|
||||
<?php
|
||||
|
||||
/* This file is part of the Markdownify project, which is under LGPL license */
|
||||
|
||||
namespace Markdownify;
|
||||
|
||||
class Parser
|
||||
{
|
||||
public static $skipWhitespace = true;
|
||||
public static $a_ord;
|
||||
public static $z_ord;
|
||||
public static $special_ords;
|
||||
|
||||
/**
|
||||
* tags which are always empty (<br /> etc.)
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
public $emptyTags = array(
|
||||
'br',
|
||||
'hr',
|
||||
'input',
|
||||
'img',
|
||||
'area',
|
||||
'link',
|
||||
'meta',
|
||||
'param',
|
||||
);
|
||||
|
||||
/**
|
||||
* tags with preformatted text
|
||||
* whitespaces wont be touched in them
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
public $preformattedTags = array(
|
||||
'script',
|
||||
'style',
|
||||
'pre',
|
||||
'code',
|
||||
);
|
||||
|
||||
/**
|
||||
* supress HTML tags inside preformatted tags (see above)
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
public $noTagsInCode = false;
|
||||
|
||||
/**
|
||||
* html to be parsed
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $html = '';
|
||||
|
||||
/**
|
||||
* node type:
|
||||
*
|
||||
* - tag (see isStartTag)
|
||||
* - text (includes cdata)
|
||||
* - comment
|
||||
* - doctype
|
||||
* - pi (processing instruction)
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $nodeType = '';
|
||||
|
||||
/**
|
||||
* current node content, i.e. either a
|
||||
* simple string (text node), or something like
|
||||
* <tag attrib="value"...>
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $node = '';
|
||||
|
||||
/**
|
||||
* wether current node is an opening tag (<a>) or not (</a>)
|
||||
* set to NULL if current node is not a tag
|
||||
* NOTE: empty tags (<br />) set this to true as well!
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isStartTag = null;
|
||||
|
||||
/**
|
||||
* wether current node is an empty tag (<br />) or not (<a></a>)
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isEmptyTag = null;
|
||||
|
||||
/**
|
||||
* tag name
|
||||
*
|
||||
* @var string | null
|
||||
*/
|
||||
public $tagName = '';
|
||||
|
||||
/**
|
||||
* attributes of current tag
|
||||
*
|
||||
* @var array (attribName=>value) | null
|
||||
*/
|
||||
public $tagAttributes = null;
|
||||
|
||||
/**
|
||||
* whether or not the actual context is a inline context
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isInlineContext = null;
|
||||
|
||||
/**
|
||||
* whether the current tag is a block element
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isBlockElement = null;
|
||||
|
||||
/**
|
||||
* whether the previous tag (browser) is a block element
|
||||
*
|
||||
* @var bool | null
|
||||
*/
|
||||
public $isNextToInlineContext = null;
|
||||
|
||||
/**
|
||||
* keep whitespace
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $keepWhitespace = 0;
|
||||
|
||||
/**
|
||||
* list of open tags
|
||||
* count this to get current depth
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $openTags = array();
|
||||
|
||||
/**
|
||||
* list of block elements
|
||||
*
|
||||
* @var array
|
||||
* TODO: what shall we do with <del> and <ins> ?!
|
||||
*/
|
||||
public $blockElements = array(
|
||||
// tag name => <bool> is block
|
||||
// block elements
|
||||
'address' => true,
|
||||
'blockquote' => true,
|
||||
'center' => true,
|
||||
'del' => true,
|
||||
'dir' => true,
|
||||
'div' => true,
|
||||
'dl' => true,
|
||||
'fieldset' => true,
|
||||
'form' => true,
|
||||
'h1' => true,
|
||||
'h2' => true,
|
||||
'h3' => true,
|
||||
'h4' => true,
|
||||
'h5' => true,
|
||||
'h6' => true,
|
||||
'hr' => true,
|
||||
'ins' => true,
|
||||
'isindex' => true,
|
||||
'menu' => true,
|
||||
'noframes' => true,
|
||||
'noscript' => true,
|
||||
'ol' => true,
|
||||
'p' => true,
|
||||
'pre' => true,
|
||||
'table' => true,
|
||||
'ul' => true,
|
||||
// set table elements and list items to block as well
|
||||
'thead' => true,
|
||||
'tbody' => true,
|
||||
'tfoot' => true,
|
||||
'td' => true,
|
||||
'tr' => true,
|
||||
'th' => true,
|
||||
'li' => true,
|
||||
'dd' => true,
|
||||
'dt' => true,
|
||||
// header items and html / body as well
|
||||
'html' => true,
|
||||
'body' => true,
|
||||
'head' => true,
|
||||
'meta' => true,
|
||||
'link' => true,
|
||||
'style' => true,
|
||||
'title' => true,
|
||||
// unfancy media tags, when indented should be rendered as block
|
||||
'map' => true,
|
||||
'object' => true,
|
||||
'param' => true,
|
||||
'embed' => true,
|
||||
'area' => true,
|
||||
// inline elements
|
||||
'a' => false,
|
||||
'abbr' => false,
|
||||
'acronym' => false,
|
||||
'applet' => false,
|
||||
'b' => false,
|
||||
'basefont' => false,
|
||||
'bdo' => false,
|
||||
'big' => false,
|
||||
'br' => false,
|
||||
'button' => false,
|
||||
'cite' => false,
|
||||
'code' => false,
|
||||
'del' => false,
|
||||
'dfn' => false,
|
||||
'em' => false,
|
||||
'font' => false,
|
||||
'i' => false,
|
||||
'img' => false,
|
||||
'ins' => false,
|
||||
'input' => false,
|
||||
'iframe' => false,
|
||||
'kbd' => false,
|
||||
'label' => false,
|
||||
'q' => false,
|
||||
'samp' => false,
|
||||
'script' => false,
|
||||
'select' => false,
|
||||
'small' => false,
|
||||
'span' => false,
|
||||
'strong' => false,
|
||||
'sub' => false,
|
||||
'sup' => false,
|
||||
'textarea' => false,
|
||||
'tt' => false,
|
||||
'var' => false,
|
||||
);
|
||||
|
||||
/**
|
||||
* get next node, set $this->html prior!
|
||||
*
|
||||
* @param void
|
||||
* @return bool
|
||||
*/
|
||||
public function nextNode()
|
||||
{
|
||||
if (empty($this->html)) {
|
||||
// we are done with parsing the html string
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($this->isStartTag && !$this->isEmptyTag) {
|
||||
array_push($this->openTags, $this->tagName);
|
||||
if (in_array($this->tagName, $this->preformattedTags)) {
|
||||
// dont truncate whitespaces for <code> or <pre> contents
|
||||
$this->keepWhitespace++;
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->html[0] == '<') {
|
||||
$token = substr($this->html, 0, 9);
|
||||
if (substr($token, 0, 2) == '<?') {
|
||||
// xml prolog or other pi's
|
||||
/** TODO **/
|
||||
// trigger_error('this might need some work', E_USER_NOTICE);
|
||||
$pos = strpos($this->html, '>');
|
||||
$this->setNode('pi', $pos + 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
if (substr($token, 0, 4) == '<!--') {
|
||||
// comment
|
||||
$pos = strpos($this->html, '-->');
|
||||
if ($pos === false) {
|
||||
// could not find a closing -->, use next gt instead
|
||||
// this is firefox' behaviour
|
||||
$pos = strpos($this->html, '>') + 1;
|
||||
} else {
|
||||
$pos += 3;
|
||||
}
|
||||
$this->setNode('comment', $pos);
|
||||
|
||||
static::$skipWhitespace = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
if ($token == '<!DOCTYPE') {
|
||||
// doctype
|
||||
$this->setNode('doctype', strpos($this->html, '>') + 1);
|
||||
|
||||
static::$skipWhitespace = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
if ($token == '<![CDATA[') {
|
||||
// cdata, use text node
|
||||
|
||||
// remove leading <![CDATA[
|
||||
$this->html = substr($this->html, 9);
|
||||
|
||||
$this->setNode('text', strpos($this->html, ']]>') + 3);
|
||||
|
||||
// remove trailing ]]> and trim
|
||||
$this->node = substr($this->node, 0, -3);
|
||||
$this->handleWhitespaces();
|
||||
|
||||
static::$skipWhitespace = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
if ($this->parseTag()) {
|
||||
// seems to be a tag
|
||||
// handle whitespaces
|
||||
if ($this->isBlockElement) {
|
||||
static::$skipWhitespace = true;
|
||||
} else {
|
||||
static::$skipWhitespace = false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if ($this->keepWhitespace) {
|
||||
static::$skipWhitespace = false;
|
||||
}
|
||||
// when we get here it seems to be a text node
|
||||
$pos = strpos($this->html, '<');
|
||||
if ($pos === false) {
|
||||
$pos = strlen($this->html);
|
||||
}
|
||||
$this->setNode('text', $pos);
|
||||
$this->handleWhitespaces();
|
||||
if (static::$skipWhitespace && $this->node == ' ') {
|
||||
return $this->nextNode();
|
||||
}
|
||||
$this->isInlineContext = true;
|
||||
static::$skipWhitespace = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse tag, set tag name and attributes, see if it's a closing tag and so forth...
|
||||
*
|
||||
* @param void
|
||||
* @return bool
|
||||
*/
|
||||
protected function parseTag()
|
||||
{
|
||||
if (!isset(static::$a_ord)) {
|
||||
static::$a_ord = ord('a');
|
||||
static::$z_ord = ord('z');
|
||||
static::$special_ords = array(
|
||||
ord(':'), // for xml:lang
|
||||
ord('-'), // for http-equiv
|
||||
);
|
||||
}
|
||||
|
||||
$tagName = '';
|
||||
|
||||
$pos = 1;
|
||||
$isStartTag = $this->html[$pos] != '/';
|
||||
if (!$isStartTag) {
|
||||
$pos++;
|
||||
}
|
||||
// get tagName
|
||||
while (isset($this->html[$pos])) {
|
||||
$pos_ord = ord(strtolower($this->html[$pos]));
|
||||
if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || (!empty($tagName) && is_numeric($this->html[$pos]))) {
|
||||
$tagName .= $this->html[$pos];
|
||||
$pos++;
|
||||
} else {
|
||||
$pos--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$tagName = strtolower($tagName);
|
||||
if (empty($tagName) || !isset($this->blockElements[$tagName])) {
|
||||
// something went wrong => invalid tag
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
if ($this->noTagsInCode && end($this->openTags) == 'code' && !($tagName == 'code' && !$isStartTag)) {
|
||||
// we supress all HTML tags inside code tags
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// get tag attributes
|
||||
/** TODO: in html 4 attributes do not need to be quoted **/
|
||||
$isEmptyTag = false;
|
||||
$attributes = array();
|
||||
$currAttrib = '';
|
||||
while (isset($this->html[$pos + 1])) {
|
||||
$pos++;
|
||||
// close tag
|
||||
if ($this->html[$pos] == '>' || $this->html[$pos] . $this->html[$pos + 1] == '/>') {
|
||||
if ($this->html[$pos] == '/') {
|
||||
$isEmptyTag = true;
|
||||
$pos++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
$pos_ord = ord(strtolower($this->html[$pos]));
|
||||
if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || in_array($pos_ord, static::$special_ords)) {
|
||||
// attribute name
|
||||
$currAttrib .= $this->html[$pos];
|
||||
} elseif (in_array($this->html[$pos], array(' ', "\t", "\n"))) {
|
||||
// drop whitespace
|
||||
} elseif (in_array($this->html[$pos] . $this->html[$pos + 1], array('="', "='"))) {
|
||||
// get attribute value
|
||||
$pos++;
|
||||
$await = $this->html[$pos]; // single or double quote
|
||||
$pos++;
|
||||
$value = '';
|
||||
while (isset($this->html[$pos]) && $this->html[$pos] != $await) {
|
||||
$value .= $this->html[$pos];
|
||||
$pos++;
|
||||
}
|
||||
$attributes[$currAttrib] = $value;
|
||||
$currAttrib = '';
|
||||
} else {
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if ($this->html[$pos] != '>') {
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!empty($currAttrib)) {
|
||||
// html 4 allows something like <option selected> instead of <option selected="selected">
|
||||
$attributes[$currAttrib] = $currAttrib;
|
||||
}
|
||||
if (!$isStartTag) {
|
||||
if (!empty($attributes) || $tagName != end($this->openTags)) {
|
||||
// end tags must not contain any attributes
|
||||
// or maybe we did not expect a different tag to be closed
|
||||
$this->invalidTag();
|
||||
|
||||
return false;
|
||||
}
|
||||
array_pop($this->openTags);
|
||||
if (in_array($tagName, $this->preformattedTags)) {
|
||||
$this->keepWhitespace--;
|
||||
}
|
||||
}
|
||||
$pos++;
|
||||
$this->node = substr($this->html, 0, $pos);
|
||||
$this->html = substr($this->html, $pos);
|
||||
$this->tagName = $tagName;
|
||||
$this->tagAttributes = $attributes;
|
||||
$this->isStartTag = $isStartTag;
|
||||
$this->isEmptyTag = $isEmptyTag || in_array($tagName, $this->emptyTags);
|
||||
if ($this->isEmptyTag) {
|
||||
// might be not well formed
|
||||
$this->node = preg_replace('# */? *>$#', ' />', $this->node);
|
||||
}
|
||||
$this->nodeType = 'tag';
|
||||
$this->isBlockElement = $this->blockElements[$tagName];
|
||||
$this->isNextToInlineContext = $isStartTag && $this->isInlineContext;
|
||||
$this->isInlineContext = !$this->isBlockElement;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* handle invalid tags
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function invalidTag()
|
||||
{
|
||||
$this->html = substr_replace($this->html, '<', 0, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* update all vars and make $this->html shorter
|
||||
*
|
||||
* @param string $type see description for $this->nodeType
|
||||
* @param int $pos to which position shall we cut?
|
||||
* @return void
|
||||
*/
|
||||
protected function setNode($type, $pos)
|
||||
{
|
||||
if ($this->nodeType == 'tag') {
|
||||
// set tag specific vars to null
|
||||
// $type == tag should not be called here
|
||||
// see this::parseTag() for more
|
||||
$this->tagName = null;
|
||||
$this->tagAttributes = null;
|
||||
$this->isStartTag = null;
|
||||
$this->isEmptyTag = null;
|
||||
$this->isBlockElement = null;
|
||||
|
||||
}
|
||||
$this->nodeType = $type;
|
||||
$this->node = substr($this->html, 0, $pos);
|
||||
$this->html = substr($this->html, $pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* check if $this->html begins with $str
|
||||
*
|
||||
* @param string $str
|
||||
* @return bool
|
||||
*/
|
||||
protected function match($str)
|
||||
{
|
||||
return substr($this->html, 0, strlen($str)) == $str;
|
||||
}
|
||||
|
||||
/**
|
||||
* truncate whitespaces
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function handleWhitespaces()
|
||||
{
|
||||
if ($this->keepWhitespace) {
|
||||
// <pre> or <code> before...
|
||||
|
||||
return;
|
||||
}
|
||||
// truncate multiple whitespaces to a single one
|
||||
$this->node = preg_replace('#\s+#s', ' ', $this->node);
|
||||
}
|
||||
|
||||
/**
|
||||
* normalize self::node
|
||||
*
|
||||
* @param void
|
||||
* @return void
|
||||
*/
|
||||
protected function normalizeNode()
|
||||
{
|
||||
$this->node = '<';
|
||||
if (!$this->isStartTag) {
|
||||
$this->node .= '/' . $this->tagName . '>';
|
||||
|
||||
return;
|
||||
}
|
||||
$this->node .= $this->tagName;
|
||||
foreach ($this->tagAttributes as $name => $value) {
|
||||
$this->node .= ' ' . $name . '="' . str_replace('"', '"', $value) . '"';
|
||||
}
|
||||
if ($this->isEmptyTag) {
|
||||
$this->node .= ' /';
|
||||
}
|
||||
$this->node .= '>';
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user