🔨✅ Add html2markdown unit tests.
A tiny refactoring to make HTML 2 markdown conversion testable. Add some unit tests to check the behavior of the now used HTML2Markdown library. There are some differences compared to the old pixel418/markdownify library.
This commit is contained in:
parent
8ce98e38dc
commit
0f0e23445a
@ -453,15 +453,24 @@ function bb2diaspora_itembody($item, $force_update = false, $have_channel = fals
|
|||||||
return html_entity_decode($body);
|
return html_entity_decode($body);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Prepare bbcode for Diaspora.
|
||||||
|
*
|
||||||
|
* @hooks bb2diaspora
|
||||||
|
* * \e string The prepared text for diaspora.
|
||||||
|
*
|
||||||
|
* @param string $Text bbcode
|
||||||
|
* @param boolean $preserve_nl (default false) preserve new lines
|
||||||
|
* @param boolean $fordiaspora (default true, but unused)
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
|
function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
|
||||||
|
|
||||||
// Re-enabling the converter again.
|
// Re-enabling the converter again.
|
||||||
// The bbcode parser now handles youtube-links (and the other stuff) correctly.
|
// The bbcode parser now handles youtube-links (and the other stuff) correctly.
|
||||||
// Additionally the html code is now fixed so that lists are now working.
|
// Additionally the html code is now fixed so that lists are now working.
|
||||||
|
|
||||||
/*
|
// Transform #tags, strip off the [url] and replace spaces with underscore
|
||||||
* Transform #tags, strip off the [url] and replace spaces with underscore
|
|
||||||
*/
|
|
||||||
$Text = preg_replace_callback('/#\[([zu])rl\=(\w+.*?)\](\w+.*?)\[\/[(zu)]rl\]/i', create_function('$match',
|
$Text = preg_replace_callback('/#\[([zu])rl\=(\w+.*?)\](\w+.*?)\[\/[(zu)]rl\]/i', create_function('$match',
|
||||||
'return \'#\'. str_replace(\' \', \'_\', $match[3]);'
|
'return \'#\'. str_replace(\' \', \'_\', $match[3]);'
|
||||||
), $Text);
|
), $Text);
|
||||||
@ -473,7 +482,6 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
|
|||||||
// strip map tags, as the rendering is performed in bbcode() and the resulting output
|
// strip map tags, as the rendering is performed in bbcode() and the resulting output
|
||||||
// is not compatible with Diaspora (at least in the case of openstreetmap and probably
|
// is not compatible with Diaspora (at least in the case of openstreetmap and probably
|
||||||
// due to the inclusion of an html iframe)
|
// due to the inclusion of an html iframe)
|
||||||
|
|
||||||
$Text = preg_replace("/\[map\=(.*?)\]/ism", '$1', $Text);
|
$Text = preg_replace("/\[map\=(.*?)\]/ism", '$1', $Text);
|
||||||
$Text = preg_replace("/\[map\](.*?)\[\/map\]/ism", '$1', $Text);
|
$Text = preg_replace("/\[map\](.*?)\[\/map\]/ism", '$1', $Text);
|
||||||
|
|
||||||
@ -491,15 +499,12 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
|
|||||||
$Text = bbcode($Text, $preserve_nl, false);
|
$Text = bbcode($Text, $preserve_nl, false);
|
||||||
|
|
||||||
// Markdownify does not preserve previously escaped html entities such as <> and &.
|
// Markdownify does not preserve previously escaped html entities such as <> and &.
|
||||||
|
|
||||||
$Text = str_replace(array('<','>','&'),array('&_lt_;','&_gt_;','&_amp_;'),$Text);
|
$Text = str_replace(array('<','>','&'),array('&_lt_;','&_gt_;','&_amp_;'),$Text);
|
||||||
|
|
||||||
// Now convert HTML to Markdown
|
// Now convert HTML to Markdown
|
||||||
$md = new HtmlConverter();
|
$Text = html2markdown($Text);
|
||||||
$Text = $md->convert($Text);
|
|
||||||
|
|
||||||
// It also adds backslashes to our attempt at getting around the html entity preservation for some weird reason.
|
// It also adds backslashes to our attempt at getting around the html entity preservation for some weird reason.
|
||||||
|
|
||||||
$Text = str_replace(array('&\\_lt\\_;','&\\_gt\\_;','&\\_amp\\_;'),array('<','>','&'),$Text);
|
$Text = str_replace(array('&\\_lt\\_;','&\\_gt\\_;','&\\_amp\\_;'),array('<','>','&'),$Text);
|
||||||
|
|
||||||
// If the text going into bbcode() has a plain URL in it, i.e.
|
// If the text going into bbcode() has a plain URL in it, i.e.
|
||||||
@ -516,7 +521,6 @@ function bb2diaspora($Text, $preserve_nl = false, $fordiaspora = true) {
|
|||||||
|
|
||||||
// Remove any leading or trailing whitespace, as this will mess up
|
// Remove any leading or trailing whitespace, as this will mess up
|
||||||
// the Diaspora signature verification and cause the item to disappear
|
// the Diaspora signature verification and cause the item to disappear
|
||||||
|
|
||||||
$Text = trim($Text);
|
$Text = trim($Text);
|
||||||
|
|
||||||
call_hooks('bb2diaspora', $Text);
|
call_hooks('bb2diaspora', $Text);
|
||||||
@ -563,3 +567,31 @@ function format_event_diaspora($ev) {
|
|||||||
|
|
||||||
return $o;
|
return $o;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert a HTML text into Markdown.
|
||||||
|
*
|
||||||
|
* This function uses the library league/html-to-markdown for this task.
|
||||||
|
*
|
||||||
|
* If the HTML text can not get parsed it will return an empty string.
|
||||||
|
*
|
||||||
|
* @see HTMLToMarkdown
|
||||||
|
*
|
||||||
|
* @param string $html The HTML code to convert
|
||||||
|
* @return string Markdown representation of the given HTML text, empty on error
|
||||||
|
*/
|
||||||
|
function html2markdown(String $html) : String {
|
||||||
|
$markdown = '';
|
||||||
|
$converter = new HtmlConverter();
|
||||||
|
|
||||||
|
try {
|
||||||
|
$markdown = $converter->convert($html);
|
||||||
|
} catch (InvalidArgumentException $e) {
|
||||||
|
logger("Invalid HTML. HTMLToMarkdown library threw an exception.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// The old html 2 markdown library "pixel418/markdownify": "^2.2",
|
||||||
|
//$md = new HtmlConverter();
|
||||||
|
//$markdown = $md->convert($Text);
|
||||||
|
return $markdown;
|
||||||
|
}
|
||||||
|
149
tests/unit/includes/MarkdownTest.php
Normal file
149
tests/unit/includes/MarkdownTest.php
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
<?php
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2017 Hubzilla
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Zotlabs\Tests\Unit\includes;
|
||||||
|
|
||||||
|
use Zotlabs\Tests\Unit\UnitTestCase;
|
||||||
|
use phpmock\phpunit\PHPMock;
|
||||||
|
|
||||||
|
require_once 'include/markdown.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Unit Test case for markdown functions.
|
||||||
|
*/
|
||||||
|
class MarkdownTest extends UnitTestCase {
|
||||||
|
use PHPMock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @covers ::html2markdown
|
||||||
|
* @dataProvider html2markdownProvider
|
||||||
|
*/
|
||||||
|
public function testHtml2markdown($html, $markdown) {
|
||||||
|
$this->assertEquals($markdown, html2markdown($html));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function html2markdownProvider() {
|
||||||
|
return [
|
||||||
|
'empty text' => [
|
||||||
|
'',
|
||||||
|
''
|
||||||
|
],
|
||||||
|
'space and nbsp only' => [
|
||||||
|
' ',
|
||||||
|
''
|
||||||
|
],
|
||||||
|
'strong, b, em, i, bib' => [
|
||||||
|
'<strong>strong</strong> <b>bold</b> <em>em</em> <i>italic</i> <b>bo<i>italic</i>ld</b>',
|
||||||
|
'**strong** **bold** _em_ _italic_ **bo_italic_ld**'
|
||||||
|
],
|
||||||
|
'empty tags' => [
|
||||||
|
'text1 <b></b> text2 <i></i>',
|
||||||
|
'text1 text2'
|
||||||
|
],
|
||||||
|
'HTML entities, lt does not work' => [
|
||||||
|
'& gt > lt <',
|
||||||
|
'& gt > lt'
|
||||||
|
],
|
||||||
|
'escaped HTML entities' => [
|
||||||
|
'& lt < gt >',
|
||||||
|
'& lt < gt >'
|
||||||
|
],
|
||||||
|
'our escaped HTML entities' => [
|
||||||
|
'&_lt_; &_gt_; &_amp_;',
|
||||||
|
'&\_lt\_; &\_gt\_; &\_amp\_;'
|
||||||
|
],
|
||||||
|
'linebreak' => [
|
||||||
|
"line1<br>line2\nline3",
|
||||||
|
"line1 \nline2 line3"
|
||||||
|
],
|
||||||
|
'headlines' => [
|
||||||
|
'<h1>header1</h1><h3>Header 3</h3>',
|
||||||
|
"header1\n=======\n\n### Header 3"
|
||||||
|
],
|
||||||
|
'unordered list' => [
|
||||||
|
'<ul><li>Item 1</li><li>Item 2</li><li>Item <b>3</b></li></ul>',
|
||||||
|
"- Item 1\n- Item 2\n- Item **3**"
|
||||||
|
],
|
||||||
|
'ordered list' => [
|
||||||
|
'<ol><li>Item 1</li><li>Item 2</li><li>Item <b>3</b></li></ol>',
|
||||||
|
"1. Item 1\n2. Item 2\n3. Item **3**"
|
||||||
|
],
|
||||||
|
'nested lists' => [
|
||||||
|
'<ul><li>Item 1<ol><li>Item 1a</li><li>Item <b>1b</b></ol></li><li>Item 2</li></ul>',
|
||||||
|
"- Item 1\n 1. Item 1a\n 2. Item **1b**\n- Item 2"
|
||||||
|
],
|
||||||
|
'img' => [
|
||||||
|
'<img src="/path/to/img.png" alt="alt text" title="title text">',
|
||||||
|
''
|
||||||
|
],
|
||||||
|
'link' => [
|
||||||
|
'<a href="http://hubzilla.org" title="Hubzilla">link</a>',
|
||||||
|
'[link](http://hubzilla.org "Hubzilla")'
|
||||||
|
],
|
||||||
|
'img link' => [
|
||||||
|
'<a href="http://hubzilla.org" title="Hubzilla"><img src="/img/hubzilla.png" alt="alt img text" title="img title"></a>',
|
||||||
|
'[](http://hubzilla.org "Hubzilla")'
|
||||||
|
],
|
||||||
|
'script' => [
|
||||||
|
"<script>alert('test');</script>",
|
||||||
|
"<script>alert('test');</script>"
|
||||||
|
],
|
||||||
|
'blockquote, issue #793' => [
|
||||||
|
'<blockquote>something</blockquote>blah',
|
||||||
|
"> something\n\nblah"
|
||||||
|
],
|
||||||
|
'code' => [
|
||||||
|
'<code><p>HTML text</p></code>',
|
||||||
|
'`<p>HTML text</p>`'
|
||||||
|
],
|
||||||
|
'pre' => [
|
||||||
|
'<pre> line with spaces </pre>',
|
||||||
|
'` line with spaces `'
|
||||||
|
],
|
||||||
|
'div p' => [
|
||||||
|
'<div>div</div><div><p>p</p></div>',
|
||||||
|
"<div>div</div><div>p\n\n</div>"
|
||||||
|
]
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/*public function testHtml2markdownException() {
|
||||||
|
//$this->expectException(\InvalidArgumentException::class);
|
||||||
|
// need to stub logger() for this to work
|
||||||
|
$this->assertEquals('', html2markdown('<<invalid'));
|
||||||
|
}*/
|
||||||
|
|
||||||
|
/* public function testBB2diasporaMardown() {
|
||||||
|
//stub bbcode() and return our HTML, we just need to test the HTML2Markdown library.
|
||||||
|
$html1 = 'test<b>bold</b><br><i>i</i><ul><li>li1</li><li>li2</li></ul><br>';
|
||||||
|
$bb1 = 'test';
|
||||||
|
|
||||||
|
// php-mock can not mock global functions which is called by a global function.
|
||||||
|
// If the calling function is in a namespace it does work.
|
||||||
|
$bbc = $this->getFunctionMock(__NAMESPACE__, "bbcode");
|
||||||
|
$bbc->expects($this->once())->willReturn('test<b>bold</b><br><i>i</i><ul><li>li1</li><li>li2</li></ul><br>');
|
||||||
|
|
||||||
|
$this->assertEquals($bb1, bb2diaspora($html1));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
@ -11,6 +11,9 @@ use Zotlabs\Tests\Unit\UnitTestCase;
|
|||||||
*/
|
*/
|
||||||
class TextTest extends UnitTestCase {
|
class TextTest extends UnitTestCase {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @covers ::valid_email_regex
|
||||||
|
*/
|
||||||
public function testGoodEmail() {
|
public function testGoodEmail() {
|
||||||
$this->assertTrue(valid_email_regex('ken@spaz.org'));
|
$this->assertTrue(valid_email_regex('ken@spaz.org'));
|
||||||
$this->assertTrue(valid_email_regex('ken@restivo.org'));
|
$this->assertTrue(valid_email_regex('ken@restivo.org'));
|
||||||
@ -18,11 +21,17 @@ class TextTest extends UnitTestCase {
|
|||||||
$this->assertTrue(valid_email_regex('foo+nobody@hubzilla.org'));
|
$this->assertTrue(valid_email_regex('foo+nobody@hubzilla.org'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @covers ::valid_email_regex
|
||||||
|
*/
|
||||||
public function testBadEmail() {
|
public function testBadEmail() {
|
||||||
$this->assertFalse(valid_email_regex('nobody!uses!these!any.more'));
|
$this->assertFalse(valid_email_regex('nobody!uses!these!any.more'));
|
||||||
$this->assertFalse(valid_email_regex('foo@bar@hubzilla.org'));
|
$this->assertFalse(valid_email_regex('foo@bar@hubzilla.org'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @covers ::purify_html
|
||||||
|
*/
|
||||||
public function testPurifyHTML() {
|
public function testPurifyHTML() {
|
||||||
// linebreaks
|
// linebreaks
|
||||||
$htmlbr = 'first line<br />
|
$htmlbr = 'first line<br />
|
||||||
@ -46,6 +55,9 @@ empty line above';
|
|||||||
$this->assertEquals('<ul><li>item1</li></ul>', purify_html('<ul data-accordion-menu-unknown><li>item1</li></ul>'));
|
$this->assertEquals('<ul><li>item1</li></ul>', purify_html('<ul data-accordion-menu-unknown><li>item1</li></ul>'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @covers ::purify_html
|
||||||
|
*/
|
||||||
public function testPurifyHTML_html() {
|
public function testPurifyHTML_html() {
|
||||||
$this->assertEquals('<div id="id01"><p class="class01">ids und classes</p></div>', purify_html('<div id="id01"><p class="class01">ids und classes</p></div>'));
|
$this->assertEquals('<div id="id01"><p class="class01">ids und classes</p></div>', purify_html('<div id="id01"><p class="class01">ids und classes</p></div>'));
|
||||||
$this->assertEquals('<div><p>close missing tags</p></div>', purify_html('<div><p>close missing tags'));
|
$this->assertEquals('<div><p>close missing tags</p></div>', purify_html('<div><p>close missing tags'));
|
||||||
@ -59,6 +71,9 @@ empty line above';
|
|||||||
$this->assertEquals('', purify_html('<iframe width="560" height="315" src="https://www.youtube.com/embed/kiNGx5oL7hk" frameborder="0" allowfullscreen></iframe>'));
|
$this->assertEquals('', purify_html('<iframe width="560" height="315" src="https://www.youtube.com/embed/kiNGx5oL7hk" frameborder="0" allowfullscreen></iframe>'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @covers ::purify_html
|
||||||
|
*/
|
||||||
public function testPurifyHTML_js() {
|
public function testPurifyHTML_js() {
|
||||||
$this->assertEquals('<div></div>', purify_html('<div><img src="javascript:evil();" onload="evil();"></div>'));
|
$this->assertEquals('<div></div>', purify_html('<div><img src="javascript:evil();" onload="evil();"></div>'));
|
||||||
$this->assertEquals('<a href="#">link</a>', purify_html('<a href="#" onclick="alert(\'xss\')">link</a>'));
|
$this->assertEquals('<a href="#">link</a>', purify_html('<a href="#" onclick="alert(\'xss\')">link</a>'));
|
||||||
@ -66,6 +81,9 @@ empty line above';
|
|||||||
$this->assertEquals('', purify_html('<script>alter("42")</script>'));
|
$this->assertEquals('', purify_html('<script>alter("42")</script>'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @covers ::purify_html
|
||||||
|
*/
|
||||||
public function testPurifyHTML_css() {
|
public function testPurifyHTML_css() {
|
||||||
$this->assertEquals('<p style="color:#FF0000;background-color:#fff;">red</p>', purify_html('<p style="color:red; background-color:#fff">red</p>'));
|
$this->assertEquals('<p style="color:#FF0000;background-color:#fff;">red</p>', purify_html('<p style="color:red; background-color:#fff">red</p>'));
|
||||||
$this->assertEquals('<p>invalid color</p>', purify_html('<p style="color:invalid; background-color:#jjkkmm">invalid color</p>'));
|
$this->assertEquals('<p>invalid color</p>', purify_html('<p style="color:invalid; background-color:#jjkkmm">invalid color</p>'));
|
||||||
|
Reference in New Issue
Block a user