some important stuff we'll need
This commit is contained in:
48
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php
Normal file
48
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php
Normal file
@@ -0,0 +1,48 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition that uses different definitions depending on context.
|
||||
*
|
||||
* The del and ins tags are notable because they allow different types of
|
||||
* elements depending on whether or not they're in a block or inline context.
|
||||
* Chameleon allows this behavior to happen by using two different
|
||||
* definitions depending on context. While this somewhat generalized,
|
||||
* it is specifically intended for those two tags.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when inline. Usually stricter.
|
||||
*/
|
||||
public $inline;
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when block.
|
||||
*/
|
||||
public $block;
|
||||
|
||||
public $type = 'chameleon';
|
||||
|
||||
/**
|
||||
* @param $inline List of elements to allow when inline.
|
||||
* @param $block List of elements to allow when block.
|
||||
*/
|
||||
public function __construct($inline, $block) {
|
||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
$this->elements = $this->block->elements;
|
||||
}
|
||||
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
if ($context->get('IsInline') === false) {
|
||||
return $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
} else {
|
||||
return $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
90
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php
Normal file
90
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php
Normal file
@@ -0,0 +1,90 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Custom validation class, accepts DTD child definitions
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
public $type = 'custom';
|
||||
public $allow_empty = false;
|
||||
/**
|
||||
* Allowed child pattern as defined by the DTD
|
||||
*/
|
||||
public $dtd_regex;
|
||||
/**
|
||||
* PCRE regex derived from $dtd_regex
|
||||
* @private
|
||||
*/
|
||||
private $_pcre_regex;
|
||||
/**
|
||||
* @param $dtd_regex Allowed child pattern from the DTD
|
||||
*/
|
||||
public function __construct($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
/**
|
||||
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
||||
*/
|
||||
protected function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
}
|
||||
$el = '[#a-zA-Z0-9_.-]+';
|
||||
$reg = $raw;
|
||||
|
||||
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
|
||||
// DOING! Seriously: if there's problems, please report them.
|
||||
|
||||
// collect all elements into the $elements array
|
||||
preg_match_all("/$el/", $reg, $matches);
|
||||
foreach ($matches[0] as $match) {
|
||||
$this->elements[$match] = true;
|
||||
}
|
||||
|
||||
// setup all elements as parentheticals with leading commas
|
||||
$reg = preg_replace("/$el/", '(,\\0)', $reg);
|
||||
|
||||
// remove commas when they were not solicited
|
||||
$reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
|
||||
|
||||
// remove all non-paranthetical commas: they are handled by first regex
|
||||
$reg = preg_replace("/,\(/", '(', $reg);
|
||||
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) continue;
|
||||
|
||||
$is_child = ($nesting == 0); // direct
|
||||
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$nesting++;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$list_of_children .= $token->name . ',';
|
||||
}
|
||||
}
|
||||
// add leading comma to deal with stray comma declarations
|
||||
$list_of_children = ',' . rtrim($list_of_children, ',');
|
||||
$okay =
|
||||
preg_match(
|
||||
'/^,?'.$this->_pcre_regex.'$/',
|
||||
$list_of_children
|
||||
);
|
||||
|
||||
return (bool) $okay;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
20
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php
Normal file
20
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php
Normal file
@@ -0,0 +1,20 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition that disallows all elements.
|
||||
* @warning validateChildren() in this class is actually never called, because
|
||||
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
|
||||
* before child definitions are parsed in earnest by
|
||||
* HTMLPurifier_Strategy_FixNesting.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
public $allow_empty = true;
|
||||
public $type = 'empty';
|
||||
public function __construct() {}
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
120
lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php
Normal file
120
lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php
Normal file
@@ -0,0 +1,120 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition for list containers ul and ol.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
|
||||
{
|
||||
public $type = 'list';
|
||||
// lying a little bit, so that we can handle ul and ol ourselves
|
||||
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
|
||||
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
// Flag for subclasses
|
||||
$this->whitespace = false;
|
||||
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
$seen_li = false;
|
||||
$need_close_li = false;
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
if ($nesting == 1 && $need_close_li) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
$nesting--;
|
||||
$need_close_li = false;
|
||||
}
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$nesting++;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
if ($token->name === 'li') {
|
||||
// good
|
||||
$seen_li = true;
|
||||
} elseif ($token->name === 'ul' || $token->name === 'ol') {
|
||||
// we want to tuck this into the previous li
|
||||
$need_close_li = true;
|
||||
$nesting++;
|
||||
if (!$seen_li) {
|
||||
// create a new li element
|
||||
$result[] = new HTMLPurifier_Token_Start('li');
|
||||
} else {
|
||||
// backtrack until </li> found
|
||||
while(true) {
|
||||
$t = array_pop($result);
|
||||
if ($t instanceof HTMLPurifier_Token_End) {
|
||||
// XXX actually, these invariants could very plausibly be violated
|
||||
// if we are doing silly things with modifying the set of allowed elements.
|
||||
// FORTUNATELY, it doesn't make a difference, since the allowed
|
||||
// elements are hard-coded here!
|
||||
if ($t->name !== 'li') {
|
||||
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
|
||||
if ($t->name !== 'li') {
|
||||
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
// XXX this should have a helper for it...
|
||||
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
|
||||
break;
|
||||
} else {
|
||||
if (!$t->is_whitespace) {
|
||||
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// start wrapping (this doesn't precisely mimic
|
||||
// browser behavior, but what browsers do is kind of
|
||||
// hard to mimic in a standards compliant way
|
||||
// XXX Actually, this has no impact in practice,
|
||||
// because this gets handled earlier. Arguably,
|
||||
// we should rip out all of that processing
|
||||
$result[] = new HTMLPurifier_Token_Start('li');
|
||||
$nesting++;
|
||||
$seen_li = true;
|
||||
$need_close_li = true;
|
||||
}
|
||||
}
|
||||
$result[] = $token;
|
||||
}
|
||||
if ($need_close_li) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) {
|
||||
return false;
|
||||
}
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
26
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php
Normal file
26
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, and allows no children.
|
||||
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
|
||||
* really, one shouldn't inherit from the other. Only altered behavior
|
||||
* is to overload a returned false with an array. Thus, it will never
|
||||
* return false.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
public $allow_empty = true;
|
||||
public $type = 'optional';
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
// we assume that $tokens_of_children is not modified
|
||||
if ($result === false) {
|
||||
if (empty($tokens_of_children)) return true;
|
||||
elseif ($this->whitespace) return $tokens_of_children;
|
||||
else return array();
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
117
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php
Normal file
117
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php
Normal file
@@ -0,0 +1,117 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, but disallows empty children.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Lookup table of allowed elements.
|
||||
* @public
|
||||
*/
|
||||
public $elements = array();
|
||||
/**
|
||||
* Whether or not the last passed node was all whitespace.
|
||||
*/
|
||||
protected $whitespace = false;
|
||||
/**
|
||||
* @param $elements List of allowed element names (lowercase).
|
||||
*/
|
||||
public function __construct($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$keys = array_keys($elements);
|
||||
if ($keys == array_keys($keys)) {
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) {
|
||||
$elements[$i] = true;
|
||||
if (empty($i)) unset($elements[$i]); // remove blank
|
||||
}
|
||||
}
|
||||
$this->elements = $elements;
|
||||
}
|
||||
public $allow_empty = false;
|
||||
public $type = 'required';
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
// Flag for subclasses
|
||||
$this->whitespace = false;
|
||||
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
// some configuration
|
||||
$escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
|
||||
|
||||
// generator
|
||||
$gen = new HTMLPurifier_Generator($config, $context);
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$nesting++;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$gen->generateFromToken($token)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$gen->generateFromToken($token)
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) {
|
||||
$this->whitespace = true;
|
||||
return false;
|
||||
}
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
@@ -0,0 +1,88 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
protected $real_elements;
|
||||
protected $fake_elements;
|
||||
public $allow_empty = true;
|
||||
public $type = 'strictblockquote';
|
||||
protected $init = false;
|
||||
|
||||
/**
|
||||
* @note We don't want MakeWellFormed to auto-close inline elements since
|
||||
* they might be allowed.
|
||||
*/
|
||||
public function getAllowedElements($config) {
|
||||
$this->init($config);
|
||||
return $this->fake_elements;
|
||||
}
|
||||
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
|
||||
$this->init($config);
|
||||
|
||||
// trick the parent class into thinking it allows more
|
||||
$this->elements = $this->fake_elements;
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
$this->elements = $this->real_elements;
|
||||
|
||||
if ($result === false) return array();
|
||||
if ($result === true) $result = $tokens_of_children;
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
|
||||
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
|
||||
$is_inline = false;
|
||||
$depth = 0;
|
||||
$ret = array();
|
||||
|
||||
// assuming that there are no comment tokens
|
||||
foreach ($result as $i => $token) {
|
||||
$token = $result[$i];
|
||||
// ifs are nested for readability
|
||||
if (!$is_inline) {
|
||||
if (!$depth) {
|
||||
if (
|
||||
($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
|
||||
(!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
|
||||
) {
|
||||
$is_inline = true;
|
||||
$ret[] = $block_wrap_start;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!$depth) {
|
||||
// starting tokens have been inline text / empty
|
||||
if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
|
||||
if (isset($this->elements[$token->name])) {
|
||||
// ended
|
||||
$ret[] = $block_wrap_end;
|
||||
$is_inline = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$ret[] = $token;
|
||||
if ($token instanceof HTMLPurifier_Token_Start) $depth++;
|
||||
if ($token instanceof HTMLPurifier_Token_End) $depth--;
|
||||
}
|
||||
if ($is_inline) $ret[] = $block_wrap_end;
|
||||
return $ret;
|
||||
}
|
||||
|
||||
private function init($config) {
|
||||
if (!$this->init) {
|
||||
$def = $config->getHTMLDefinition();
|
||||
// allow all inline elements
|
||||
$this->real_elements = $this->elements;
|
||||
$this->fake_elements = $def->info_content_sets['Flow'];
|
||||
$this->fake_elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
227
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
Normal file
227
lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
Normal file
@@ -0,0 +1,227 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition for tables. The general idea is to extract out all of the
|
||||
* essential bits, and then reconstruct it later.
|
||||
*
|
||||
* This is a bit confusing, because the DTDs and the W3C
|
||||
* validators seem to disagree on the appropriate definition. The
|
||||
* DTD claims:
|
||||
*
|
||||
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
|
||||
*
|
||||
* But actually, the HTML4 spec then has this to say:
|
||||
*
|
||||
* The TBODY start tag is always required except when the table
|
||||
* contains only one table body and no table head or foot sections.
|
||||
* The TBODY end tag may always be safely omitted.
|
||||
*
|
||||
* So the DTD is kind of wrong. The validator is, unfortunately, kind
|
||||
* of on crack.
|
||||
*
|
||||
* The definition changed again in XHTML1.1; and in my opinion, this
|
||||
* formulation makes the most sense.
|
||||
*
|
||||
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
|
||||
*
|
||||
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
|
||||
* If we encounter a thead, tfoot or tbody, we are placed in the former
|
||||
* mode, and we *must* wrap any stray tr segments with a tbody. But if
|
||||
* we don't run into any of them, just have tr tags is OK.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
public $allow_empty = false;
|
||||
public $type = 'table';
|
||||
public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
|
||||
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
|
||||
public function __construct() {}
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||
// get rid of the token later.
|
||||
$tokens_of_children[] = false;
|
||||
|
||||
// only one of these elements is allowed in a table
|
||||
$caption = false;
|
||||
$thead = false;
|
||||
$tfoot = false;
|
||||
|
||||
// as many of these as you want
|
||||
$cols = array();
|
||||
$content = array();
|
||||
|
||||
$nesting = 0; // current depth so we can determine nodes
|
||||
$is_collecting = false; // are we globbing together tokens to package
|
||||
// into one of the collectors?
|
||||
$collection = array(); // collected nodes
|
||||
$tag_index = 0; // the first node might be whitespace,
|
||||
// so this tells us where the start tag is
|
||||
$tbody_mode = false; // if true, then we need to wrap any stray
|
||||
// <tr>s with a <tbody>.
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token === false) {
|
||||
// terminating sequence started
|
||||
} elseif ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$nesting++;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
// handle node collection
|
||||
if ($is_collecting) {
|
||||
if ($is_child) {
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[$tag_index]->name) {
|
||||
case 'tbody':
|
||||
$tbody_mode = true;
|
||||
case 'tr':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
if ($caption !== false) break;
|
||||
$caption = $collection;
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
$tbody_mode = true;
|
||||
// XXX This breaks rendering properties with
|
||||
// Firefox, which never floats a <thead> to
|
||||
// the top. Ever. (Our scheme will float the
|
||||
// first <thead> to the top.) So maybe
|
||||
// <thead>s that are not first should be
|
||||
// turned into <tbody>? Very tricky, indeed.
|
||||
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[$tag_index]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// Oops, there's a second one! What
|
||||
// should we do? Current behavior is to
|
||||
// transmutate the first and last entries into
|
||||
// tbody tags, and then put into content.
|
||||
// Maybe a better idea is to *attach
|
||||
// it* to the existing thead or tfoot?
|
||||
// We don't do this, because Firefox
|
||||
// doesn't float an extra tfoot to the
|
||||
// bottom like it does for the first one.
|
||||
$collection[$tag_index]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
}
|
||||
break;
|
||||
case 'colgroup':
|
||||
$cols[] = $collection;
|
||||
break;
|
||||
}
|
||||
$collection = array();
|
||||
$is_collecting = false;
|
||||
$tag_index = 0;
|
||||
} else {
|
||||
// add the node to the collection
|
||||
$collection[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// terminate
|
||||
if ($token === false) break;
|
||||
|
||||
if ($is_child) {
|
||||
// determine what we're dealing with
|
||||
if ($token->name == 'col') {
|
||||
// the only empty tag in the possie, we can handle it
|
||||
// immediately
|
||||
$cols[] = array_merge($collection, array($token));
|
||||
$collection = array();
|
||||
$tag_index = 0;
|
||||
continue;
|
||||
}
|
||||
switch($token->name) {
|
||||
case 'caption':
|
||||
case 'colgroup':
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
case 'tbody':
|
||||
case 'tr':
|
||||
$is_collecting = true;
|
||||
$collection[] = $token;
|
||||
continue;
|
||||
default:
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$collection[] = $token;
|
||||
$tag_index++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($content)) return false;
|
||||
|
||||
$ret = array();
|
||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
|
||||
if ($tbody_mode) {
|
||||
// a little tricky, since the start of the collection may be
|
||||
// whitespace
|
||||
$inside_tbody = false;
|
||||
foreach ($content as $token_array) {
|
||||
// find the starting token
|
||||
foreach ($token_array as $t) {
|
||||
if ($t->name === 'tr' || $t->name === 'tbody') {
|
||||
break;
|
||||
}
|
||||
} // iterator variable carries over
|
||||
if ($t->name === 'tr') {
|
||||
if ($inside_tbody) {
|
||||
$ret = array_merge($ret, $token_array);
|
||||
} else {
|
||||
$ret[] = new HTMLPurifier_Token_Start('tbody');
|
||||
$ret = array_merge($ret, $token_array);
|
||||
$inside_tbody = true;
|
||||
}
|
||||
} elseif ($t->name === 'tbody') {
|
||||
if ($inside_tbody) {
|
||||
$ret[] = new HTMLPurifier_Token_End('tbody');
|
||||
$inside_tbody = false;
|
||||
$ret = array_merge($ret, $token_array);
|
||||
} else {
|
||||
$ret = array_merge($ret, $token_array);
|
||||
}
|
||||
} else {
|
||||
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
if ($inside_tbody) {
|
||||
$ret[] = new HTMLPurifier_Token_End('tbody');
|
||||
}
|
||||
} else {
|
||||
foreach ($content as $token_array) {
|
||||
// invariant: everything in here is <tr>s
|
||||
$ret = array_merge($ret, $token_array);
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($collection) && $is_collecting == false){
|
||||
// grab the trailing space
|
||||
$ret = array_merge($ret, $collection);
|
||||
}
|
||||
|
||||
array_pop($tokens_of_children); // remove phantom token
|
||||
|
||||
return ($ret === $tokens_of_children) ? true : $ret;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
Reference in New Issue
Block a user