- , $tokens);
+ getElementsByTagName('body')->item(0)-> //
+ getElementsByTagName('div')->item(0) //
+ ,
+ $tokens
+ );
return $tokens;
}
-
}
/*
-Copyright 2007 Jeroen van der Meer
+Copyright 2007 Jeroen van der Meer
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-class HTML5 {
+class HTML5
+{
private $data;
private $char;
private $EOF;
@@ -69,91 +78,418 @@ class HTML5 {
private $token;
private $content_model;
private $escape = false;
- private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute',
- 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;',
- 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;',
- 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;',
- 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;',
- 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;',
- 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;',
- 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;',
- 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;',
- 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN',
- 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;',
- 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;',
- 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig',
- 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;',
- 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;',
- 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil',
- 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;',
- 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;',
- 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;',
- 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth',
- 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12',
- 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt',
- 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc',
- 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;',
- 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;',
- 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;',
- 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro',
- 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;',
- 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;',
- 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;',
- 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash',
- 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;',
- 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;',
- 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;',
- 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;',
- 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;',
- 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;',
- 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;',
- 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;',
- 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc',
- 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;',
- 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;');
+ private $entities = array(
+ 'AElig;',
+ 'AElig',
+ 'AMP;',
+ 'AMP',
+ 'Aacute;',
+ 'Aacute',
+ 'Acirc;',
+ 'Acirc',
+ 'Agrave;',
+ 'Agrave',
+ 'Alpha;',
+ 'Aring;',
+ 'Aring',
+ 'Atilde;',
+ 'Atilde',
+ 'Auml;',
+ 'Auml',
+ 'Beta;',
+ 'COPY;',
+ 'COPY',
+ 'Ccedil;',
+ 'Ccedil',
+ 'Chi;',
+ 'Dagger;',
+ 'Delta;',
+ 'ETH;',
+ 'ETH',
+ 'Eacute;',
+ 'Eacute',
+ 'Ecirc;',
+ 'Ecirc',
+ 'Egrave;',
+ 'Egrave',
+ 'Epsilon;',
+ 'Eta;',
+ 'Euml;',
+ 'Euml',
+ 'GT;',
+ 'GT',
+ 'Gamma;',
+ 'Iacute;',
+ 'Iacute',
+ 'Icirc;',
+ 'Icirc',
+ 'Igrave;',
+ 'Igrave',
+ 'Iota;',
+ 'Iuml;',
+ 'Iuml',
+ 'Kappa;',
+ 'LT;',
+ 'LT',
+ 'Lambda;',
+ 'Mu;',
+ 'Ntilde;',
+ 'Ntilde',
+ 'Nu;',
+ 'OElig;',
+ 'Oacute;',
+ 'Oacute',
+ 'Ocirc;',
+ 'Ocirc',
+ 'Ograve;',
+ 'Ograve',
+ 'Omega;',
+ 'Omicron;',
+ 'Oslash;',
+ 'Oslash',
+ 'Otilde;',
+ 'Otilde',
+ 'Ouml;',
+ 'Ouml',
+ 'Phi;',
+ 'Pi;',
+ 'Prime;',
+ 'Psi;',
+ 'QUOT;',
+ 'QUOT',
+ 'REG;',
+ 'REG',
+ 'Rho;',
+ 'Scaron;',
+ 'Sigma;',
+ 'THORN;',
+ 'THORN',
+ 'TRADE;',
+ 'Tau;',
+ 'Theta;',
+ 'Uacute;',
+ 'Uacute',
+ 'Ucirc;',
+ 'Ucirc',
+ 'Ugrave;',
+ 'Ugrave',
+ 'Upsilon;',
+ 'Uuml;',
+ 'Uuml',
+ 'Xi;',
+ 'Yacute;',
+ 'Yacute',
+ 'Yuml;',
+ 'Zeta;',
+ 'aacute;',
+ 'aacute',
+ 'acirc;',
+ 'acirc',
+ 'acute;',
+ 'acute',
+ 'aelig;',
+ 'aelig',
+ 'agrave;',
+ 'agrave',
+ 'alefsym;',
+ 'alpha;',
+ 'amp;',
+ 'amp',
+ 'and;',
+ 'ang;',
+ 'apos;',
+ 'aring;',
+ 'aring',
+ 'asymp;',
+ 'atilde;',
+ 'atilde',
+ 'auml;',
+ 'auml',
+ 'bdquo;',
+ 'beta;',
+ 'brvbar;',
+ 'brvbar',
+ 'bull;',
+ 'cap;',
+ 'ccedil;',
+ 'ccedil',
+ 'cedil;',
+ 'cedil',
+ 'cent;',
+ 'cent',
+ 'chi;',
+ 'circ;',
+ 'clubs;',
+ 'cong;',
+ 'copy;',
+ 'copy',
+ 'crarr;',
+ 'cup;',
+ 'curren;',
+ 'curren',
+ 'dArr;',
+ 'dagger;',
+ 'darr;',
+ 'deg;',
+ 'deg',
+ 'delta;',
+ 'diams;',
+ 'divide;',
+ 'divide',
+ 'eacute;',
+ 'eacute',
+ 'ecirc;',
+ 'ecirc',
+ 'egrave;',
+ 'egrave',
+ 'empty;',
+ 'emsp;',
+ 'ensp;',
+ 'epsilon;',
+ 'equiv;',
+ 'eta;',
+ 'eth;',
+ 'eth',
+ 'euml;',
+ 'euml',
+ 'euro;',
+ 'exist;',
+ 'fnof;',
+ 'forall;',
+ 'frac12;',
+ 'frac12',
+ 'frac14;',
+ 'frac14',
+ 'frac34;',
+ 'frac34',
+ 'frasl;',
+ 'gamma;',
+ 'ge;',
+ 'gt;',
+ 'gt',
+ 'hArr;',
+ 'harr;',
+ 'hearts;',
+ 'hellip;',
+ 'iacute;',
+ 'iacute',
+ 'icirc;',
+ 'icirc',
+ 'iexcl;',
+ 'iexcl',
+ 'igrave;',
+ 'igrave',
+ 'image;',
+ 'infin;',
+ 'int;',
+ 'iota;',
+ 'iquest;',
+ 'iquest',
+ 'isin;',
+ 'iuml;',
+ 'iuml',
+ 'kappa;',
+ 'lArr;',
+ 'lambda;',
+ 'lang;',
+ 'laquo;',
+ 'laquo',
+ 'larr;',
+ 'lceil;',
+ 'ldquo;',
+ 'le;',
+ 'lfloor;',
+ 'lowast;',
+ 'loz;',
+ 'lrm;',
+ 'lsaquo;',
+ 'lsquo;',
+ 'lt;',
+ 'lt',
+ 'macr;',
+ 'macr',
+ 'mdash;',
+ 'micro;',
+ 'micro',
+ 'middot;',
+ 'middot',
+ 'minus;',
+ 'mu;',
+ 'nabla;',
+ 'nbsp;',
+ 'nbsp',
+ 'ndash;',
+ 'ne;',
+ 'ni;',
+ 'not;',
+ 'not',
+ 'notin;',
+ 'nsub;',
+ 'ntilde;',
+ 'ntilde',
+ 'nu;',
+ 'oacute;',
+ 'oacute',
+ 'ocirc;',
+ 'ocirc',
+ 'oelig;',
+ 'ograve;',
+ 'ograve',
+ 'oline;',
+ 'omega;',
+ 'omicron;',
+ 'oplus;',
+ 'or;',
+ 'ordf;',
+ 'ordf',
+ 'ordm;',
+ 'ordm',
+ 'oslash;',
+ 'oslash',
+ 'otilde;',
+ 'otilde',
+ 'otimes;',
+ 'ouml;',
+ 'ouml',
+ 'para;',
+ 'para',
+ 'part;',
+ 'permil;',
+ 'perp;',
+ 'phi;',
+ 'pi;',
+ 'piv;',
+ 'plusmn;',
+ 'plusmn',
+ 'pound;',
+ 'pound',
+ 'prime;',
+ 'prod;',
+ 'prop;',
+ 'psi;',
+ 'quot;',
+ 'quot',
+ 'rArr;',
+ 'radic;',
+ 'rang;',
+ 'raquo;',
+ 'raquo',
+ 'rarr;',
+ 'rceil;',
+ 'rdquo;',
+ 'real;',
+ 'reg;',
+ 'reg',
+ 'rfloor;',
+ 'rho;',
+ 'rlm;',
+ 'rsaquo;',
+ 'rsquo;',
+ 'sbquo;',
+ 'scaron;',
+ 'sdot;',
+ 'sect;',
+ 'sect',
+ 'shy;',
+ 'shy',
+ 'sigma;',
+ 'sigmaf;',
+ 'sim;',
+ 'spades;',
+ 'sub;',
+ 'sube;',
+ 'sum;',
+ 'sup1;',
+ 'sup1',
+ 'sup2;',
+ 'sup2',
+ 'sup3;',
+ 'sup3',
+ 'sup;',
+ 'supe;',
+ 'szlig;',
+ 'szlig',
+ 'tau;',
+ 'there4;',
+ 'theta;',
+ 'thetasym;',
+ 'thinsp;',
+ 'thorn;',
+ 'thorn',
+ 'tilde;',
+ 'times;',
+ 'times',
+ 'trade;',
+ 'uArr;',
+ 'uacute;',
+ 'uacute',
+ 'uarr;',
+ 'ucirc;',
+ 'ucirc',
+ 'ugrave;',
+ 'ugrave',
+ 'uml;',
+ 'uml',
+ 'upsih;',
+ 'upsilon;',
+ 'uuml;',
+ 'uuml',
+ 'weierp;',
+ 'xi;',
+ 'yacute;',
+ 'yacute',
+ 'yen;',
+ 'yen',
+ 'yuml;',
+ 'yuml',
+ 'zeta;',
+ 'zwj;',
+ 'zwnj;'
+ );
- const PCDATA = 0;
- const RCDATA = 1;
- const CDATA = 2;
+ const PCDATA = 0;
+ const RCDATA = 1;
+ const CDATA = 2;
const PLAINTEXT = 3;
- const DOCTYPE = 0;
+ const DOCTYPE = 0;
const STARTTAG = 1;
- const ENDTAG = 2;
- const COMMENT = 3;
+ const ENDTAG = 2;
+ const COMMENT = 3;
const CHARACTR = 4;
- const EOF = 5;
-
- public function __construct($data) {
- $data = str_replace("\r\n", "\n", $data);
- $data = str_replace("\r", null, $data);
+ const EOF = 5;
+ public function __construct($data)
+ {
$this->data = $data;
$this->char = -1;
- $this->EOF = strlen($data);
+ $this->EOF = strlen($data);
$this->tree = new HTML5TreeConstructer;
$this->content_model = self::PCDATA;
$this->state = 'data';
- while($this->state !== null) {
- $this->{$this->state.'State'}();
+ while ($this->state !== null) {
+ $this->{$this->state . 'State'}();
}
}
- public function save() {
+ public function save()
+ {
return $this->tree->save();
}
- private function char() {
+ private function char()
+ {
return ($this->char < $this->EOF)
? $this->data[$this->char]
: false;
}
- private function character($s, $l = 0) {
- if($s + $l < $this->EOF) {
- if($l === 0) {
+ private function character($s, $l = 0)
+ {
+ if ($s + $l < $this->EOF) {
+ if ($l === 0) {
return $this->data[$s];
} else {
return substr($this->data, $s, $l);
@@ -161,46 +497,52 @@ class HTML5 {
}
}
- private function characters($char_class, $start) {
- return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start));
+ private function characters($char_class, $start)
+ {
+ return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
}
- private function dataState() {
+ private function dataState()
+ {
// Consume the next input character
$this->char++;
$char = $this->char();
- if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
+ if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
/* U+0026 AMPERSAND (&)
When the content model flag is set to one of the PCDATA or RCDATA
states: switch to the entity data state. Otherwise: treat it as per
the "anything else" entry below. */
$this->state = 'entityData';
- } elseif($char === '-') {
+ } elseif ($char === '-') {
/* If the content model flag is set to either the RCDATA state or
the CDATA state, and the escape flag is false, and there are at
least three characters before this one in the input stream, and the
last four characters in the input stream, including this one, are
U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
and U+002D HYPHEN-MINUS (""),
set the escape flag to false. */
- if(($this->content_model === self::RCDATA ||
- $this->content_model === self::CDATA) && $this->escape === true &&
- $this->character($this->char, 3) === '-->') {
+ if (($this->content_model === self::RCDATA ||
+ $this->content_model === self::CDATA) && $this->escape === true &&
+ $this->character($this->char, 3) === '-->'
+ ) {
$this->escape = false;
}
/* In any case, emit the input character as a character token.
Stay in the data state. */
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => $char
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => $char
+ )
+ );
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
/* EOF
Emit an end-of-file token. */
$this->EOF();
- } elseif($this->content_model === self::PLAINTEXT) {
+ } elseif ($this->content_model === self::PLAINTEXT) {
/* When the content model flag is set to the PLAINTEXT state
THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
the text and emit it as a character token. */
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => substr($this->data, $this->char)
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => substr($this->data, $this->char)
+ )
+ );
$this->EOF();
@@ -252,37 +599,43 @@ class HTML5 {
THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
otherwise would also be treated as a character token and emit it
as a single character token. Stay in the data state. */
- $len = strcspn($this->data, '<&', $this->char);
+ $len = strcspn($this->data, '<&', $this->char);
$char = substr($this->data, $this->char, $len);
$this->char += $len - 1;
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => $char
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => $char
+ )
+ );
$this->state = 'data';
}
}
- private function entityDataState() {
+ private function entityDataState()
+ {
// Attempt to consume an entity.
$entity = $this->entity();
// If nothing is returned, emit a U+0026 AMPERSAND character token.
// Otherwise, emit the character token that was returned.
$char = (!$entity) ? '&' : $entity;
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => $char
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => $char
+ )
+ );
// Finally, switch to the data state.
$this->state = 'data';
}
- private function tagOpenState() {
- switch($this->content_model) {
+ private function tagOpenState()
+ {
+ switch ($this->content_model) {
case self::RCDATA:
case self::CDATA:
/* If the next input character is a U+002F SOLIDUS (/) character,
@@ -290,19 +643,21 @@ class HTML5 {
input character is not a U+002F SOLIDUS (/) character, emit a
U+003C LESS-THAN SIGN character token and switch to the data
state to process the next input character. */
- if($this->character($this->char + 1) === '/') {
+ if ($this->character($this->char + 1) === '/') {
$this->char++;
$this->state = 'closeTagOpen';
} else {
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => '<'
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => '<'
+ )
+ );
$this->state = 'data';
}
- break;
+ break;
case self::PCDATA:
// If the content model flag is set to the PCDATA state
@@ -310,42 +665,44 @@ class HTML5 {
$this->char++;
$char = $this->char();
- if($char === '!') {
+ if ($char === '!') {
/* U+0021 EXCLAMATION MARK (!)
Switch to the markup declaration open state. */
$this->state = 'markupDeclarationOpen';
- } elseif($char === '/') {
+ } elseif ($char === '/') {
/* U+002F SOLIDUS (/)
Switch to the close tag open state. */
$this->state = 'closeTagOpen';
- } elseif(preg_match('/^[A-Za-z]$/', $char)) {
+ } elseif (preg_match('/^[A-Za-z]$/', $char)) {
/* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
Create a new start tag token, set its tag name to the lowercase
version of the input character (add 0x0020 to the character's code
point), then switch to the tag name state. (Don't emit the token
yet; further details will be filled in before it is emitted.) */
$this->token = array(
- 'name' => strtolower($char),
- 'type' => self::STARTTAG,
- 'attr' => array()
+ 'name' => strtolower($char),
+ 'type' => self::STARTTAG,
+ 'attr' => array()
);
$this->state = 'tagName';
- } elseif($char === '>') {
+ } elseif ($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
Parse error. Emit a U+003C LESS-THAN SIGN character token and a
U+003E GREATER-THAN SIGN character token. Switch to the data state. */
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => '<>'
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => '<>'
+ )
+ );
$this->state = 'data';
- } elseif($char === '?') {
+ } elseif ($char === '?') {
/* U+003F QUESTION MARK (?)
Parse error. Switch to the bogus comment state. */
$this->state = 'bogusComment';
@@ -354,25 +711,31 @@ class HTML5 {
/* Anything else
Parse error. Emit a U+003C LESS-THAN SIGN character token and
reconsume the current input character in the data state. */
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => '<'
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => '<'
+ )
+ );
$this->char--;
$this->state = 'data';
}
- break;
+ break;
}
}
- private function closeTagOpenState() {
+ private function closeTagOpenState()
+ {
$next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
$the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
- if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
- (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/',
- $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) {
+ if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
+ (!$the_same || ($the_same && (!preg_match(
+ '/[\t\n\x0b\x0c >\/]/',
+ $this->character($this->char + 1 + strlen($next_node))
+ ) || $this->EOF === $this->char)))
+ ) {
/* If the content model flag is set to the RCDATA or CDATA states then
examine the next few characters. If they do not match the tag name of
the last start tag token emitted (case insensitively), or if they do but
@@ -388,10 +751,12 @@ class HTML5 {
...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
token, a U+002F SOLIDUS character token, and switch to the data state
to process the next input character. */
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => ''
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => ''
+ )
+ );
$this->state = 'data';
@@ -402,32 +767,34 @@ class HTML5 {
$this->char++;
$char = $this->char();
- if(preg_match('/^[A-Za-z]$/', $char)) {
+ if (preg_match('/^[A-Za-z]$/', $char)) {
/* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
Create a new end tag token, set its tag name to the lowercase version
of the input character (add 0x0020 to the character's code point), then
switch to the tag name state. (Don't emit the token yet; further details
will be filled in before it is emitted.) */
$this->token = array(
- 'name' => strtolower($char),
- 'type' => self::ENDTAG
+ 'name' => strtolower($char),
+ 'type' => self::ENDTAG
);
$this->state = 'tagName';
- } elseif($char === '>') {
+ } elseif ($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
Parse error. Switch to the data state. */
$this->state = 'data';
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
/* EOF
Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
SOLIDUS character token. Reconsume the EOF character in the data state. */
- $this->emitToken(array(
- 'type' => self::CHARACTR,
- 'data' => ''
- ));
+ $this->emitToken(
+ array(
+ 'type' => self::CHARACTR,
+ 'data' => ''
+ )
+ );
$this->char--;
$this->state = 'data';
@@ -439,12 +806,13 @@ class HTML5 {
}
}
- private function tagNameState() {
+ private function tagNameState()
+ {
// Consume the next input character:
$this->char++;
$char = $this->character($this->char);
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
/* U+0009 CHARACTER TABULATION
U+000A LINE FEED (LF)
U+000B LINE TABULATION
@@ -453,13 +821,13 @@ class HTML5 {
Switch to the before attribute name state. */
$this->state = 'beforeAttributeName';
- } elseif($char === '>') {
+ } elseif ($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
Emit the current tag token. Switch to the data state. */
$this->emitToken($this->token);
$this->state = 'data';
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
/* EOF
Parse error. Emit the current tag token. Reconsume the EOF
character in the data state. */
@@ -468,7 +836,7 @@ class HTML5 {
$this->char--;
$this->state = 'data';
- } elseif($char === '/') {
+ } elseif ($char === '/') {
/* U+002F SOLIDUS (/)
Parse error unless this is a permitted slash. Switch to the before
attribute name state. */
@@ -483,12 +851,13 @@ class HTML5 {
}
}
- private function beforeAttributeNameState() {
+ private function beforeAttributeNameState()
+ {
// Consume the next input character:
$this->char++;
$char = $this->character($this->char);
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
/* U+0009 CHARACTER TABULATION
U+000A LINE FEED (LF)
U+000B LINE TABULATION
@@ -497,19 +866,19 @@ class HTML5 {
Stay in the before attribute name state. */
$this->state = 'beforeAttributeName';
- } elseif($char === '>') {
+ } elseif ($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
Emit the current tag token. Switch to the data state. */
$this->emitToken($this->token);
$this->state = 'data';
- } elseif($char === '/') {
+ } elseif ($char === '/') {
/* U+002F SOLIDUS (/)
Parse error unless this is a permitted slash. Stay in the before
attribute name state. */
$this->state = 'beforeAttributeName';
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
/* EOF
Parse error. Emit the current tag token. Reconsume the EOF
character in the data state. */
@@ -524,7 +893,7 @@ class HTML5 {
name to the current input character, and its value to the empty string.
Switch to the attribute name state. */
$this->token['attr'][] = array(
- 'name' => strtolower($char),
+ 'name' => strtolower($char),
'value' => null
);
@@ -532,12 +901,13 @@ class HTML5 {
}
}
- private function attributeNameState() {
+ private function attributeNameState()
+ {
// Consume the next input character:
$this->char++;
$char = $this->character($this->char);
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
/* U+0009 CHARACTER TABULATION
U+000A LINE FEED (LF)
U+000B LINE TABULATION
@@ -546,24 +916,24 @@ class HTML5 {
Stay in the before attribute name state. */
$this->state = 'afterAttributeName';
- } elseif($char === '=') {
+ } elseif ($char === '=') {
/* U+003D EQUALS SIGN (=)
Switch to the before attribute value state. */
$this->state = 'beforeAttributeValue';
- } elseif($char === '>') {
+ } elseif ($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
Emit the current tag token. Switch to the data state. */
$this->emitToken($this->token);
$this->state = 'data';
- } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
+ } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
/* U+002F SOLIDUS (/)
Parse error unless this is a permitted slash. Switch to the before
attribute name state. */
$this->state = 'beforeAttributeName';
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
/* EOF
Parse error. Emit the current tag token. Reconsume the EOF
character in the data state. */
@@ -583,12 +953,13 @@ class HTML5 {
}
}
- private function afterAttributeNameState() {
+ private function afterAttributeNameState()
+ {
// Consume the next input character:
$this->char++;
$char = $this->character($this->char);
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
/* U+0009 CHARACTER TABULATION
U+000A LINE FEED (LF)
U+000B LINE TABULATION
@@ -597,24 +968,24 @@ class HTML5 {
Stay in the after attribute name state. */
$this->state = 'afterAttributeName';
- } elseif($char === '=') {
+ } elseif ($char === '=') {
/* U+003D EQUALS SIGN (=)
Switch to the before attribute value state. */
$this->state = 'beforeAttributeValue';
- } elseif($char === '>') {
+ } elseif ($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
Emit the current tag token. Switch to the data state. */
$this->emitToken($this->token);
$this->state = 'data';
- } elseif($char === '/' && $this->character($this->char + 1) !== '>') {
+ } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
/* U+002F SOLIDUS (/)
Parse error unless this is a permitted slash. Switch to the
before attribute name state. */
$this->state = 'beforeAttributeName';
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
/* EOF
Parse error. Emit the current tag token. Reconsume the EOF
character in the data state. */
@@ -629,7 +1000,7 @@ class HTML5 {
name to the current input character, and its value to the empty string.
Switch to the attribute name state. */
$this->token['attr'][] = array(
- 'name' => strtolower($char),
+ 'name' => strtolower($char),
'value' => null
);
@@ -637,12 +1008,13 @@ class HTML5 {
}
}
- private function beforeAttributeValueState() {
+ private function beforeAttributeValueState()
+ {
// Consume the next input character:
$this->char++;
$char = $this->character($this->char);
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
/* U+0009 CHARACTER TABULATION
U+000A LINE FEED (LF)
U+000B LINE TABULATION
@@ -651,24 +1023,24 @@ class HTML5 {
Stay in the before attribute value state. */
$this->state = 'beforeAttributeValue';
- } elseif($char === '"') {
+ } elseif ($char === '"') {
/* U+0022 QUOTATION MARK (")
Switch to the attribute value (double-quoted) state. */
$this->state = 'attributeValueDoubleQuoted';
- } elseif($char === '&') {
+ } elseif ($char === '&') {
/* U+0026 AMPERSAND (&)
Switch to the attribute value (unquoted) state and reconsume
this input character. */
$this->char--;
$this->state = 'attributeValueUnquoted';
- } elseif($char === '\'') {
+ } elseif ($char === '\'') {
/* U+0027 APOSTROPHE (')
Switch to the attribute value (single-quoted) state. */
$this->state = 'attributeValueSingleQuoted';
- } elseif($char === '>') {
+ } elseif ($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
Emit the current tag token. Switch to the data state. */
$this->emitToken($this->token);
@@ -685,22 +1057,23 @@ class HTML5 {
}
}
- private function attributeValueDoubleQuotedState() {
+ private function attributeValueDoubleQuotedState()
+ {
// Consume the next input character:
$this->char++;
$char = $this->character($this->char);
- if($char === '"') {
+ if ($char === '"') {
/* U+0022 QUOTATION MARK (")
Switch to the before attribute name state. */
$this->state = 'beforeAttributeName';
- } elseif($char === '&') {
+ } elseif ($char === '&') {
/* U+0026 AMPERSAND (&)
Switch to the entity in attribute value state. */
$this->entityInAttributeValueState('double');
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
/* EOF
Parse error. Emit the current tag token. Reconsume the character
in the data state. */
@@ -720,22 +1093,23 @@ class HTML5 {
}
}
- private function attributeValueSingleQuotedState() {
+ private function attributeValueSingleQuotedState()
+ {
// Consume the next input character:
$this->char++;
$char = $this->character($this->char);
- if($char === '\'') {
+ if ($char === '\'') {
/* U+0022 QUOTATION MARK (')
Switch to the before attribute name state. */
$this->state = 'beforeAttributeName';
- } elseif($char === '&') {
+ } elseif ($char === '&') {
/* U+0026 AMPERSAND (&)
Switch to the entity in attribute value state. */
$this->entityInAttributeValueState('single');
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
/* EOF
Parse error. Emit the current tag token. Reconsume the character
in the data state. */
@@ -755,12 +1129,13 @@ class HTML5 {
}
}
- private function attributeValueUnquotedState() {
+ private function attributeValueUnquotedState()
+ {
// Consume the next input character:
$this->char++;
$char = $this->character($this->char);
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
/* U+0009 CHARACTER TABULATION
U+000A LINE FEED (LF)
U+000B LINE TABULATION
@@ -769,12 +1144,12 @@ class HTML5 {
Switch to the before attribute name state. */
$this->state = 'beforeAttributeName';
- } elseif($char === '&') {
+ } elseif ($char === '&') {
/* U+0026 AMPERSAND (&)
Switch to the entity in attribute value state. */
$this->entityInAttributeValueState();
- } elseif($char === '>') {
+ } elseif ($char === '>') {
/* U+003E GREATER-THAN SIGN (>)
Emit the current tag token. Switch to the data state. */
$this->emitToken($this->token);
@@ -791,7 +1166,8 @@ class HTML5 {
}
}
- private function entityInAttributeValueState() {
+ private function entityInAttributeValueState()
+ {
// Attempt to consume an entity.
$entity = $this->entity();
@@ -806,7 +1182,8 @@ class HTML5 {
$this->token['attr'][$last]['value'] .= $char;
}
- private function bogusCommentState() {
+ private function bogusCommentState()
+ {
/* Consume every character up to the first U+003E GREATER-THAN SIGN
character (>) or the end of the file (EOF), whichever comes first. Emit
a comment token whose data is the concatenation of all the characters
@@ -816,10 +1193,12 @@ class HTML5 {
end of the file otherwise. (If the comment was started by the end of
the file (EOF), the token is empty.) */
$data = $this->characters('^>', $this->char);
- $this->emitToken(array(
- 'data' => $data,
- 'type' => self::COMMENT
- ));
+ $this->emitToken(
+ array(
+ 'data' => $data,
+ 'type' => self::COMMENT
+ )
+ );
$this->char += strlen($data);
@@ -827,16 +1206,17 @@ class HTML5 {
$this->state = 'data';
/* If the end of the file was reached, reconsume the EOF character. */
- if($this->char === $this->EOF) {
+ if ($this->char === $this->EOF) {
$this->char = $this->EOF - 1;
}
}
- private function markupDeclarationOpenState() {
+ private function markupDeclarationOpenState()
+ {
/* If the next two characters are both U+002D HYPHEN-MINUS (-)
characters, consume those two characters, create a comment token whose
data is the empty string, and switch to the comment state. */
- if($this->character($this->char + 1, 2) === '--') {
+ if ($this->character($this->char + 1, 2) === '--') {
$this->char += 2;
$this->state = 'comment';
$this->token = array(
@@ -844,41 +1224,42 @@ class HTML5 {
'type' => self::COMMENT
);
- /* Otherwise if the next seven chacacters are a case-insensitive match
- for the word "DOCTYPE", then consume those characters and switch to the
- DOCTYPE state. */
- } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
+ /* Otherwise if the next seven chacacters are a case-insensitive match
+ for the word "DOCTYPE", then consume those characters and switch to the
+ DOCTYPE state. */
+ } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
$this->char += 7;
$this->state = 'doctype';
- /* Otherwise, is is a parse error. Switch to the bogus comment state.
- The next character that is consumed, if any, is the first character
- that will be in the comment. */
+ /* Otherwise, is is a parse error. Switch to the bogus comment state.
+ The next character that is consumed, if any, is the first character
+ that will be in the comment. */
} else {
$this->char++;
$this->state = 'bogusComment';
}
}
- private function commentState() {
+ private function commentState()
+ {
/* Consume the next input character: */
$this->char++;
$char = $this->char();
/* U+002D HYPHEN-MINUS (-) */
- if($char === '-') {
+ if ($char === '-') {
/* Switch to the comment dash state */
$this->state = 'commentDash';
- /* EOF */
- } elseif($this->char === $this->EOF) {
+ /* EOF */
+ } elseif ($this->char === $this->EOF) {
/* Parse error. Emit the comment token. Reconsume the EOF character
in the data state. */
$this->emitToken($this->token);
$this->char--;
$this->state = 'data';
- /* Anything else */
+ /* Anything else */
} else {
/* Append the input character to the comment token's data. Stay in
the comment state. */
@@ -886,62 +1267,65 @@ class HTML5 {
}
}
- private function commentDashState() {
+ private function commentDashState()
+ {
/* Consume the next input character: */
$this->char++;
$char = $this->char();
/* U+002D HYPHEN-MINUS (-) */
- if($char === '-') {
+ if ($char === '-') {
/* Switch to the comment end state */
$this->state = 'commentEnd';
- /* EOF */
- } elseif($this->char === $this->EOF) {
+ /* EOF */
+ } elseif ($this->char === $this->EOF) {
/* Parse error. Emit the comment token. Reconsume the EOF character
in the data state. */
$this->emitToken($this->token);
$this->char--;
$this->state = 'data';
- /* Anything else */
+ /* Anything else */
} else {
/* Append a U+002D HYPHEN-MINUS (-) character and the input
character to the comment token's data. Switch to the comment state. */
- $this->token['data'] .= '-'.$char;
+ $this->token['data'] .= '-' . $char;
$this->state = 'comment';
}
}
- private function commentEndState() {
+ private function commentEndState()
+ {
/* Consume the next input character: */
$this->char++;
$char = $this->char();
- if($char === '>') {
+ if ($char === '>') {
$this->emitToken($this->token);
$this->state = 'data';
- } elseif($char === '-') {
+ } elseif ($char === '-') {
$this->token['data'] .= '-';
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
$this->emitToken($this->token);
$this->char--;
$this->state = 'data';
} else {
- $this->token['data'] .= '--'.$char;
+ $this->token['data'] .= '--' . $char;
$this->state = 'comment';
}
}
- private function doctypeState() {
+ private function doctypeState()
+ {
/* Consume the next input character: */
$this->char++;
$char = $this->char();
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
$this->state = 'beforeDoctypeName';
} else {
@@ -950,15 +1334,16 @@ class HTML5 {
}
}
- private function beforeDoctypeNameState() {
+ private function beforeDoctypeNameState()
+ {
/* Consume the next input character: */
$this->char++;
$char = $this->char();
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
// Stay in the before DOCTYPE name state.
- } elseif(preg_match('/^[a-z]$/', $char)) {
+ } elseif (preg_match('/^[a-z]$/', $char)) {
$this->token = array(
'name' => strtoupper($char),
'type' => self::DOCTYPE,
@@ -967,21 +1352,25 @@ class HTML5 {
$this->state = 'doctypeName';
- } elseif($char === '>') {
- $this->emitToken(array(
- 'name' => null,
- 'type' => self::DOCTYPE,
- 'error' => true
- ));
+ } elseif ($char === '>') {
+ $this->emitToken(
+ array(
+ 'name' => null,
+ 'type' => self::DOCTYPE,
+ 'error' => true
+ )
+ );
$this->state = 'data';
- } elseif($this->char === $this->EOF) {
- $this->emitToken(array(
- 'name' => null,
- 'type' => self::DOCTYPE,
- 'error' => true
- ));
+ } elseif ($this->char === $this->EOF) {
+ $this->emitToken(
+ array(
+ 'name' => null,
+ 'type' => self::DOCTYPE,
+ 'error' => true
+ )
+ );
$this->char--;
$this->state = 'data';
@@ -997,22 +1386,23 @@ class HTML5 {
}
}
- private function doctypeNameState() {
+ private function doctypeNameState()
+ {
/* Consume the next input character: */
$this->char++;
$char = $this->char();
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
$this->state = 'AfterDoctypeName';
- } elseif($char === '>') {
+ } elseif ($char === '>') {
$this->emitToken($this->token);
$this->state = 'data';
- } elseif(preg_match('/^[a-z]$/', $char)) {
+ } elseif (preg_match('/^[a-z]$/', $char)) {
$this->token['name'] .= strtoupper($char);
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
$this->emitToken($this->token);
$this->char--;
$this->state = 'data';
@@ -1026,19 +1416,20 @@ class HTML5 {
: true;
}
- private function afterDoctypeNameState() {
+ private function afterDoctypeNameState()
+ {
/* Consume the next input character: */
$this->char++;
$char = $this->char();
- if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
+ if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
// Stay in the DOCTYPE name state.
- } elseif($char === '>') {
+ } elseif ($char === '>') {
$this->emitToken($this->token);
$this->state = 'data';
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
$this->emitToken($this->token);
$this->char--;
$this->state = 'data';
@@ -1049,16 +1440,17 @@ class HTML5 {
}
}
- private function bogusDoctypeState() {
+ private function bogusDoctypeState()
+ {
/* Consume the next input character: */
$this->char++;
$char = $this->char();
- if($char === '>') {
+ if ($char === '>') {
$this->emitToken($this->token);
$this->state = 'data';
- } elseif($this->char === $this->EOF) {
+ } elseif ($this->char === $this->EOF) {
$this->emitToken($this->token);
$this->char--;
$this->state = 'data';
@@ -1068,22 +1460,23 @@ class HTML5 {
}
}
- private function entity() {
+ private function entity()
+ {
$start = $this->char;
// This section defines how to consume an entity. This definition is
// used when parsing entities in text and in attributes.
// The behaviour depends on the identity of the next character (the
- // one immediately after the U+0026 AMPERSAND character):
+ // one immediately after the U+0026 AMPERSAND character):
- switch($this->character($this->char + 1)) {
+ switch ($this->character($this->char + 1)) {
// U+0023 NUMBER SIGN (#)
case '#':
// The behaviour further depends on the character after the
// U+0023 NUMBER SIGN:
- switch($this->character($this->char + 1)) {
+ switch ($this->character($this->char + 1)) {
// U+0078 LATIN SMALL LETTER X
// U+0058 LATIN CAPITAL LETTER X
case 'x':
@@ -1096,7 +1489,7 @@ class HTML5 {
// words, 0-9, A-F, a-f).
$char = 1;
$char_class = '0-9A-Fa-f';
- break;
+ break;
// Anything else
default:
@@ -1105,7 +1498,7 @@ class HTML5 {
// NINE (i.e. just 0-9).
$char = 0;
$char_class = '0-9';
- break;
+ break;
}
// Consume as many characters as match the range of characters
@@ -1116,7 +1509,7 @@ class HTML5 {
$cond = strlen($e_name) > 0;
// The rest of the parsing happens bellow.
- break;
+ break;
// Anything else
default:
@@ -1126,12 +1519,12 @@ class HTML5 {
$e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
$len = strlen($e_name);
- for($c = 1; $c <= $len; $c++) {
+ for ($c = 1; $c <= $len; $c++) {
$id = substr($e_name, 0, $c);
$this->char++;
- if(in_array($id, $this->entities)) {
- if ($e_name[$c-1] !== ';') {
+ if (in_array($id, $this->entities)) {
+ if ($e_name[$c - 1] !== ';') {
if ($c < $len && $e_name[$c] == ';') {
$this->char++; // consume extra semicolon
}
@@ -1143,10 +1536,10 @@ class HTML5 {
$cond = isset($entity);
// The rest of the parsing happens bellow.
- break;
+ break;
}
- if(!$cond) {
+ if (!$cond) {
// If no match can be made, then this is a parse error. No
// characters are consumed, and nothing is returned.
$this->char = $start;
@@ -1155,81 +1548,157 @@ class HTML5 {
// Return a character token for the character corresponding to the
// entity name (as given by the second column of the entities table).
- return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8');
+ return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
}
- private function emitToken($token) {
+ private function emitToken($token)
+ {
$emit = $this->tree->emitToken($token);
- if(is_int($emit)) {
+ if (is_int($emit)) {
$this->content_model = $emit;
- } elseif($token['type'] === self::ENDTAG) {
+ } elseif ($token['type'] === self::ENDTAG) {
$this->content_model = self::PCDATA;
}
}
- private function EOF() {
+ private function EOF()
+ {
$this->state = null;
- $this->tree->emitToken(array(
- 'type' => self::EOF
- ));
+ $this->tree->emitToken(
+ array(
+ 'type' => self::EOF
+ )
+ );
}
}
-class HTML5TreeConstructer {
+class HTML5TreeConstructer
+{
public $stack = array();
private $phase;
private $mode;
private $dom;
private $foster_parent = null;
- private $a_formatting = array();
+ private $a_formatting = array();
private $head_pointer = null;
private $form_pointer = null;
- private $scoping = array('button','caption','html','marquee','object','table','td','th');
- private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u');
- private $special = array('address','area','base','basefont','bgsound',
- 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl',
- 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5',
- 'h6','head','hr','iframe','image','img','input','isindex','li','link',
- 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup',
- 'option','p','param','plaintext','pre','script','select','spacer','style',
- 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
+ private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th');
+ private $formatting = array(
+ 'a',
+ 'b',
+ 'big',
+ 'em',
+ 'font',
+ 'i',
+ 'nobr',
+ 's',
+ 'small',
+ 'strike',
+ 'strong',
+ 'tt',
+ 'u'
+ );
+ private $special = array(
+ 'address',
+ 'area',
+ 'base',
+ 'basefont',
+ 'bgsound',
+ 'blockquote',
+ 'body',
+ 'br',
+ 'center',
+ 'col',
+ 'colgroup',
+ 'dd',
+ 'dir',
+ 'div',
+ 'dl',
+ 'dt',
+ 'embed',
+ 'fieldset',
+ 'form',
+ 'frame',
+ 'frameset',
+ 'h1',
+ 'h2',
+ 'h3',
+ 'h4',
+ 'h5',
+ 'h6',
+ 'head',
+ 'hr',
+ 'iframe',
+ 'image',
+ 'img',
+ 'input',
+ 'isindex',
+ 'li',
+ 'link',
+ 'listing',
+ 'menu',
+ 'meta',
+ 'noembed',
+ 'noframes',
+ 'noscript',
+ 'ol',
+ 'optgroup',
+ 'option',
+ 'p',
+ 'param',
+ 'plaintext',
+ 'pre',
+ 'script',
+ 'select',
+ 'spacer',
+ 'style',
+ 'tbody',
+ 'textarea',
+ 'tfoot',
+ 'thead',
+ 'title',
+ 'tr',
+ 'ul',
+ 'wbr'
+ );
// The different phases.
const INIT_PHASE = 0;
const ROOT_PHASE = 1;
const MAIN_PHASE = 2;
- const END_PHASE = 3;
+ const END_PHASE = 3;
// The different insertion modes for the main phase.
const BEFOR_HEAD = 0;
- const IN_HEAD = 1;
+ const IN_HEAD = 1;
const AFTER_HEAD = 2;
- const IN_BODY = 3;
- const IN_TABLE = 4;
+ const IN_BODY = 3;
+ const IN_TABLE = 4;
const IN_CAPTION = 5;
- const IN_CGROUP = 6;
- const IN_TBODY = 7;
- const IN_ROW = 8;
- const IN_CELL = 9;
- const IN_SELECT = 10;
+ const IN_CGROUP = 6;
+ const IN_TBODY = 7;
+ const IN_ROW = 8;
+ const IN_CELL = 9;
+ const IN_SELECT = 10;
const AFTER_BODY = 11;
- const IN_FRAME = 12;
+ const IN_FRAME = 12;
const AFTR_FRAME = 13;
// The different types of elements.
- const SPECIAL = 0;
- const SCOPING = 1;
+ const SPECIAL = 0;
+ const SCOPING = 1;
const FORMATTING = 2;
- const PHRASING = 3;
+ const PHRASING = 3;
- const MARKER = 0;
+ const MARKER = 0;
- public function __construct() {
+ public function __construct()
+ {
$this->phase = self::INIT_PHASE;
$this->mode = self::BEFOR_HEAD;
$this->dom = new DOMDocument;
@@ -1241,16 +1710,26 @@ class HTML5TreeConstructer {
}
// Process tag tokens
- public function emitToken($token) {
- switch($this->phase) {
- case self::INIT_PHASE: return $this->initPhase($token); break;
- case self::ROOT_PHASE: return $this->rootElementPhase($token); break;
- case self::MAIN_PHASE: return $this->mainPhase($token); break;
- case self::END_PHASE : return $this->trailingEndPhase($token); break;
+ public function emitToken($token)
+ {
+ switch ($this->phase) {
+ case self::INIT_PHASE:
+ return $this->initPhase($token);
+ break;
+ case self::ROOT_PHASE:
+ return $this->rootElementPhase($token);
+ break;
+ case self::MAIN_PHASE:
+ return $this->mainPhase($token);
+ break;
+ case self::END_PHASE :
+ return $this->trailingEndPhase($token);
+ break;
}
}
- private function initPhase($token) {
+ private function initPhase($token)
+ {
/* Initially, the tree construction stage must handle each token
emitted from the tokenisation stage as follows: */
@@ -1262,13 +1741,14 @@ class HTML5TreeConstructer {
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
or U+0020 SPACE
An end-of-file token */
- if((isset($token['error']) && $token['error']) ||
- $token['type'] === HTML5::COMMENT ||
- $token['type'] === HTML5::STARTTAG ||
- $token['type'] === HTML5::ENDTAG ||
- $token['type'] === HTML5::EOF ||
- ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
- !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) {
+ if ((isset($token['error']) && $token['error']) ||
+ $token['type'] === HTML5::COMMENT ||
+ $token['type'] === HTML5::STARTTAG ||
+ $token['type'] === HTML5::ENDTAG ||
+ $token['type'] === HTML5::EOF ||
+ ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
+ !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))
+ ) {
/* This specification does not define how to handle this case. In
particular, user agents may ignore the entirety of this specification
altogether for such documents, and instead invoke special parse modes
@@ -1277,8 +1757,8 @@ class HTML5TreeConstructer {
$this->phase = self::ROOT_PHASE;
return $this->rootElementPhase($token);
- /* A DOCTYPE token marked as being correct */
- } elseif(isset($token['error']) && !$token['error']) {
+ /* A DOCTYPE token marked as being correct */
+ } elseif (isset($token['error']) && !$token['error']) {
/* Append a DocumentType node to the Document node, with the name
attribute set to the name given in the DOCTYPE token (which will be
"HTML"), and the other attributes specific to DocumentType objects
@@ -1289,52 +1769,58 @@ class HTML5TreeConstructer {
stage. */
$this->phase = self::ROOT_PHASE;
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE */
- } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/',
- $token['data'])) {
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ } elseif (isset($token['data']) && preg_match(
+ '/^[\t\n\x0b\x0c ]+$/',
+ $token['data']
+ )
+ ) {
/* Append that character to the Document node. */
$text = $this->dom->createTextNode($token['data']);
$this->dom->appendChild($text);
}
}
- private function rootElementPhase($token) {
+ private function rootElementPhase($token)
+ {
/* After the initial phase, as each token is emitted from the tokenisation
stage, it must be processed as described in this section. */
/* A DOCTYPE token */
- if($token['type'] === HTML5::DOCTYPE) {
+ if ($token['type'] === HTML5::DOCTYPE) {
// Parse error. Ignore the token.
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the Document object with the data
attribute set to the data given in the comment token. */
$comment = $this->dom->createComment($token['data']);
$this->dom->appendChild($comment);
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE */
- } elseif($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ } elseif ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Append that character to the Document node. */
$text = $this->dom->createTextNode($token['data']);
$this->dom->appendChild($text);
- /* A character token that is not one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
- (FF), or U+0020 SPACE
- A start tag token
- An end tag token
- An end-of-file token */
- } elseif(($token['type'] === HTML5::CHARACTR &&
- !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
- $token['type'] === HTML5::STARTTAG ||
- $token['type'] === HTML5::ENDTAG ||
- $token['type'] === HTML5::EOF) {
+ /* A character token that is not one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
+ (FF), or U+0020 SPACE
+ A start tag token
+ An end tag token
+ An end-of-file token */
+ } elseif (($token['type'] === HTML5::CHARACTR &&
+ !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
+ $token['type'] === HTML5::STARTTAG ||
+ $token['type'] === HTML5::ENDTAG ||
+ $token['type'] === HTML5::EOF
+ ) {
/* Create an HTMLElement node with the tag name html, in the HTML
namespace. Append it to the Document object. Switch to the main
phase and reprocess the current token. */
@@ -1347,15 +1833,16 @@ class HTML5TreeConstructer {
}
}
- private function mainPhase($token) {
+ private function mainPhase($token)
+ {
/* Tokens in the main phase must be handled as follows: */
/* A DOCTYPE token */
- if($token['type'] === HTML5::DOCTYPE) {
+ if ($token['type'] === HTML5::DOCTYPE) {
// Parse error. Ignore the token.
- /* A start tag token with the tag name "html" */
- } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
+ /* A start tag token with the tag name "html" */
+ } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
/* If this start tag token was not the first start tag token, then
it is a parse error. */
@@ -1363,59 +1850,91 @@ class HTML5TreeConstructer {
is already present on the top element of the stack of open elements.
If it is not, add the attribute and its corresponding value to that
element. */
- foreach($token['attr'] as $attr) {
- if(!$this->stack[0]->hasAttribute($attr['name'])) {
+ foreach ($token['attr'] as $attr) {
+ if (!$this->stack[0]->hasAttribute($attr['name'])) {
$this->stack[0]->setAttribute($attr['name'], $attr['value']);
}
}
- /* An end-of-file token */
- } elseif($token['type'] === HTML5::EOF) {
+ /* An end-of-file token */
+ } elseif ($token['type'] === HTML5::EOF) {
/* Generate implied end tags. */
$this->generateImpliedEndTags();
- /* Anything else. */
+ /* Anything else. */
} else {
/* Depends on the insertion mode: */
- switch($this->mode) {
- case self::BEFOR_HEAD: return $this->beforeHead($token); break;
- case self::IN_HEAD: return $this->inHead($token); break;
- case self::AFTER_HEAD: return $this->afterHead($token); break;
- case self::IN_BODY: return $this->inBody($token); break;
- case self::IN_TABLE: return $this->inTable($token); break;
- case self::IN_CAPTION: return $this->inCaption($token); break;
- case self::IN_CGROUP: return $this->inColumnGroup($token); break;
- case self::IN_TBODY: return $this->inTableBody($token); break;
- case self::IN_ROW: return $this->inRow($token); break;
- case self::IN_CELL: return $this->inCell($token); break;
- case self::IN_SELECT: return $this->inSelect($token); break;
- case self::AFTER_BODY: return $this->afterBody($token); break;
- case self::IN_FRAME: return $this->inFrameset($token); break;
- case self::AFTR_FRAME: return $this->afterFrameset($token); break;
- case self::END_PHASE: return $this->trailingEndPhase($token); break;
+ switch ($this->mode) {
+ case self::BEFOR_HEAD:
+ return $this->beforeHead($token);
+ break;
+ case self::IN_HEAD:
+ return $this->inHead($token);
+ break;
+ case self::AFTER_HEAD:
+ return $this->afterHead($token);
+ break;
+ case self::IN_BODY:
+ return $this->inBody($token);
+ break;
+ case self::IN_TABLE:
+ return $this->inTable($token);
+ break;
+ case self::IN_CAPTION:
+ return $this->inCaption($token);
+ break;
+ case self::IN_CGROUP:
+ return $this->inColumnGroup($token);
+ break;
+ case self::IN_TBODY:
+ return $this->inTableBody($token);
+ break;
+ case self::IN_ROW:
+ return $this->inRow($token);
+ break;
+ case self::IN_CELL:
+ return $this->inCell($token);
+ break;
+ case self::IN_SELECT:
+ return $this->inSelect($token);
+ break;
+ case self::AFTER_BODY:
+ return $this->afterBody($token);
+ break;
+ case self::IN_FRAME:
+ return $this->inFrameset($token);
+ break;
+ case self::AFTR_FRAME:
+ return $this->afterFrameset($token);
+ break;
+ case self::END_PHASE:
+ return $this->trailingEndPhase($token);
+ break;
}
}
}
- private function beforeHead($token) {
+ private function beforeHead($token)
+ {
/* Handle the token as follows: */
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
or U+0020 SPACE */
- if($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ if ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Append the character to the current node. */
$this->insertText($token['data']);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the current node with the data attribute
set to the data given in the comment token. */
$this->insertComment($token['data']);
- /* A start tag token with the tag name "head" */
- } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
+ /* A start tag token with the tag name "head" */
+ } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
/* Create an element for the token, append the new element to the
current node and push it onto the stack of open elements. */
$element = $this->insertElement($token);
@@ -1426,32 +1945,38 @@ class HTML5TreeConstructer {
/* Change the insertion mode to "in head". */
$this->mode = self::IN_HEAD;
- /* A start tag token whose tag name is one of: "base", "link", "meta",
- "script", "style", "title". Or an end tag with the tag name "html".
- Or a character token that is not one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE. Or any other start tag token */
- } elseif($token['type'] === HTML5::STARTTAG ||
- ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
- ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/',
- $token['data']))) {
+ /* A start tag token whose tag name is one of: "base", "link", "meta",
+ "script", "style", "title". Or an end tag with the tag name "html".
+ Or a character token that is not one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE. Or any other start tag token */
+ } elseif ($token['type'] === HTML5::STARTTAG ||
+ ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
+ ($token['type'] === HTML5::CHARACTR && !preg_match(
+ '/^[\t\n\x0b\x0c ]$/',
+ $token['data']
+ ))
+ ) {
/* Act as if a start tag token with the tag name "head" and no
attributes had been seen, then reprocess the current token. */
- $this->beforeHead(array(
- 'name' => 'head',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
+ $this->beforeHead(
+ array(
+ 'name' => 'head',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
return $this->inHead($token);
- /* Any other end tag */
- } elseif($token['type'] === HTML5::ENDTAG) {
+ /* Any other end tag */
+ } elseif ($token['type'] === HTML5::ENDTAG) {
/* Parse error. Ignore the token. */
}
}
- private function inHead($token) {
+ private function inHead($token)
+ {
/* Handle the token as follows: */
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
@@ -1461,30 +1986,34 @@ class HTML5TreeConstructer {
THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
or script element, append the character to the current node regardless
of its content. */
- if(($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
- $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName,
- array('title', 'style', 'script')))) {
+ if (($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
+ $token['type'] === HTML5::CHARACTR && in_array(
+ end($this->stack)->nodeName,
+ array('title', 'style', 'script')
+ ))
+ ) {
/* Append the character to the current node. */
$this->insertText($token['data']);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the current node with the data attribute
set to the data given in the comment token. */
$this->insertComment($token['data']);
- } elseif($token['type'] === HTML5::ENDTAG &&
- in_array($token['name'], array('title', 'style', 'script'))) {
+ } elseif ($token['type'] === HTML5::ENDTAG &&
+ in_array($token['name'], array('title', 'style', 'script'))
+ ) {
array_pop($this->stack);
return HTML5::PCDATA;
- /* A start tag with the tag name "title" */
- } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
+ /* A start tag with the tag name "title" */
+ } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
/* Create an element for the token and append the new element to the
node pointed to by the head element pointer, or, if that is null
(innerHTML case), to the current node. */
- if($this->head_pointer !== null) {
+ if ($this->head_pointer !== null) {
$element = $this->insertElement($token, false);
$this->head_pointer->appendChild($element);
@@ -1495,12 +2024,12 @@ class HTML5TreeConstructer {
/* Switch the tokeniser's content model flag to the RCDATA state. */
return HTML5::RCDATA;
- /* A start tag with the tag name "style" */
- } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
+ /* A start tag with the tag name "style" */
+ } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
/* Create an element for the token and append the new element to the
node pointed to by the head element pointer, or, if that is null
(innerHTML case), to the current node. */
- if($this->head_pointer !== null) {
+ if ($this->head_pointer !== null) {
$element = $this->insertElement($token, false);
$this->head_pointer->appendChild($element);
@@ -1511,8 +2040,8 @@ class HTML5TreeConstructer {
/* Switch the tokeniser's content model flag to the CDATA state. */
return HTML5::CDATA;
- /* A start tag with the tag name "script" */
- } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
+ /* A start tag with the tag name "script" */
+ } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
/* Create an element for the token. */
$element = $this->insertElement($token, false);
$this->head_pointer->appendChild($element);
@@ -1520,13 +2049,16 @@ class HTML5TreeConstructer {
/* Switch the tokeniser's content model flag to the CDATA state. */
return HTML5::CDATA;
- /* A start tag with the tag name "base", "link", or "meta" */
- } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
- array('base', 'link', 'meta'))) {
+ /* A start tag with the tag name "base", "link", or "meta" */
+ } elseif ($token['type'] === HTML5::STARTTAG && in_array(
+ $token['name'],
+ array('base', 'link', 'meta')
+ )
+ ) {
/* Create an element for the token and append the new element to the
node pointed to by the head element pointer, or, if that is null
(innerHTML case), to the current node. */
- if($this->head_pointer !== null) {
+ if ($this->head_pointer !== null) {
$element = $this->insertElement($token, false);
$this->head_pointer->appendChild($element);
array_pop($this->stack);
@@ -1535,14 +2067,14 @@ class HTML5TreeConstructer {
$this->insertElement($token);
}
- /* An end tag with the tag name "head" */
- } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
+ /* An end tag with the tag name "head" */
+ } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
/* If the current node is a head element, pop the current node off
the stack of open elements. */
- if($this->head_pointer->isSameNode(end($this->stack))) {
+ if ($this->head_pointer->isSameNode(end($this->stack))) {
array_pop($this->stack);
- /* Otherwise, this is a parse error. */
+ /* Otherwise, this is a parse error. */
} else {
// k
}
@@ -1550,22 +2082,25 @@ class HTML5TreeConstructer {
/* Change the insertion mode to "after head". */
$this->mode = self::AFTER_HEAD;
- /* A start tag with the tag name "head" or an end tag except "html". */
- } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
- ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) {
+ /* A start tag with the tag name "head" or an end tag except "html". */
+ } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
+ ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')
+ ) {
// Parse error. Ignore the token.
- /* Anything else */
+ /* Anything else */
} else {
/* If the current node is a head element, act as if an end tag
token with the tag name "head" had been seen. */
- if($this->head_pointer->isSameNode(end($this->stack))) {
- $this->inHead(array(
- 'name' => 'head',
- 'type' => HTML5::ENDTAG
- ));
+ if ($this->head_pointer->isSameNode(end($this->stack))) {
+ $this->inHead(
+ array(
+ 'name' => 'head',
+ 'type' => HTML5::ENDTAG
+ )
+ );
- /* Otherwise, change the insertion mode to "after head". */
+ /* Otherwise, change the insertion mode to "after head". */
} else {
$this->mode = self::AFTER_HEAD;
}
@@ -1575,66 +2110,74 @@ class HTML5TreeConstructer {
}
}
- private function afterHead($token) {
+ private function afterHead($token)
+ {
/* Handle the token as follows: */
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
or U+0020 SPACE */
- if($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ if ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Append the character to the current node. */
$this->insertText($token['data']);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the current node with the data attribute
set to the data given in the comment token. */
$this->insertComment($token['data']);
- /* A start tag token with the tag name "body" */
- } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
+ /* A start tag token with the tag name "body" */
+ } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
/* Insert a body element for the token. */
$this->insertElement($token);
/* Change the insertion mode to "in body". */
$this->mode = self::IN_BODY;
- /* A start tag token with the tag name "frameset" */
- } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
+ /* A start tag token with the tag name "frameset" */
+ } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
/* Insert a frameset element for the token. */
$this->insertElement($token);
/* Change the insertion mode to "in frameset". */
$this->mode = self::IN_FRAME;
- /* A start tag token whose tag name is one of: "base", "link", "meta",
- "script", "style", "title" */
- } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
- array('base', 'link', 'meta', 'script', 'style', 'title'))) {
+ /* A start tag token whose tag name is one of: "base", "link", "meta",
+ "script", "style", "title" */
+ } elseif ($token['type'] === HTML5::STARTTAG && in_array(
+ $token['name'],
+ array('base', 'link', 'meta', 'script', 'style', 'title')
+ )
+ ) {
/* Parse error. Switch the insertion mode back to "in head" and
reprocess the token. */
$this->mode = self::IN_HEAD;
return $this->inHead($token);
- /* Anything else */
+ /* Anything else */
} else {
/* Act as if a start tag token with the tag name "body" and no
attributes had been seen, and then reprocess the current token. */
- $this->afterHead(array(
- 'name' => 'body',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
+ $this->afterHead(
+ array(
+ 'name' => 'body',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
return $this->inBody($token);
}
}
- private function inBody($token) {
+ private function inBody($token)
+ {
/* Handle the token as follows: */
- switch($token['type']) {
+ switch ($token['type']) {
/* A character token */
case HTML5::CHARACTR:
/* Reconstruct the active formatting elements, if any. */
@@ -1642,1015 +2185,1159 @@ class HTML5TreeConstructer {
/* Append the token's character to the current node. */
$this->insertText($token['data']);
- break;
+ break;
/* A comment token */
case HTML5::COMMENT:
/* Append a Comment node to the current node with the data
attribute set to the data given in the comment token. */
$this->insertComment($token['data']);
- break;
+ break;
case HTML5::STARTTAG:
- switch($token['name']) {
- /* A start tag token whose tag name is one of: "script",
- "style" */
- case 'script': case 'style':
- /* Process the token as if the insertion mode had been "in
- head". */
- return $this->inHead($token);
- break;
+ switch ($token['name']) {
+ /* A start tag token whose tag name is one of: "script",
+ "style" */
+ case 'script':
+ case 'style':
+ /* Process the token as if the insertion mode had been "in
+ head". */
+ return $this->inHead($token);
+ break;
- /* A start tag token whose tag name is one of: "base", "link",
- "meta", "title" */
- case 'base': case 'link': case 'meta': case 'title':
- /* Parse error. Process the token as if the insertion mode
- had been "in head". */
- return $this->inHead($token);
- break;
+ /* A start tag token whose tag name is one of: "base", "link",
+ "meta", "title" */
+ case 'base':
+ case 'link':
+ case 'meta':
+ case 'title':
+ /* Parse error. Process the token as if the insertion mode
+ had been "in head". */
+ return $this->inHead($token);
+ break;
- /* A start tag token with the tag name "body" */
- case 'body':
- /* Parse error. If the second element on the stack of open
- elements is not a body element, or, if the stack of open
- elements has only one node on it, then ignore the token.
- (innerHTML case) */
- if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
- // Ignore
+ /* A start tag token with the tag name "body" */
+ case 'body':
+ /* Parse error. If the second element on the stack of open
+ elements is not a body element, or, if the stack of open
+ elements has only one node on it, then ignore the token.
+ (innerHTML case) */
+ if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
+ // Ignore
- /* Otherwise, for each attribute on the token, check to see
- if the attribute is already present on the body element (the
- second element) on the stack of open elements. If it is not,
- add the attribute and its corresponding value to that
- element. */
- } else {
- foreach($token['attr'] as $attr) {
- if(!$this->stack[1]->hasAttribute($attr['name'])) {
- $this->stack[1]->setAttribute($attr['name'], $attr['value']);
+ /* Otherwise, for each attribute on the token, check to see
+ if the attribute is already present on the body element (the
+ second element) on the stack of open elements. If it is not,
+ add the attribute and its corresponding value to that
+ element. */
+ } else {
+ foreach ($token['attr'] as $attr) {
+ if (!$this->stack[1]->hasAttribute($attr['name'])) {
+ $this->stack[1]->setAttribute($attr['name'], $attr['value']);
+ }
}
}
- }
- break;
+ break;
- /* A start tag whose tag name is one of: "address",
- "blockquote", "center", "dir", "div", "dl", "fieldset",
- "listing", "menu", "ol", "p", "ul" */
- case 'address': case 'blockquote': case 'center': case 'dir':
- case 'div': case 'dl': case 'fieldset': case 'listing':
- case 'menu': case 'ol': case 'p': case 'ul':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been
- seen. */
- if($this->elementInScope('p')) {
- $this->emitToken(array(
- 'name' => 'p',
- 'type' => HTML5::ENDTAG
- ));
- }
-
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- break;
-
- /* A start tag whose tag name is "form" */
- case 'form':
- /* If the form element pointer is not null, ignore the
- token with a parse error. */
- if($this->form_pointer !== null) {
- // Ignore.
-
- /* Otherwise: */
- } else {
- /* If the stack of open elements has a p element in
- scope, then act as if an end tag with the tag name p
- had been seen. */
- if($this->elementInScope('p')) {
- $this->emitToken(array(
- 'name' => 'p',
- 'type' => HTML5::ENDTAG
- ));
+ /* A start tag whose tag name is one of: "address",
+ "blockquote", "center", "dir", "div", "dl", "fieldset",
+ "listing", "menu", "ol", "p", "ul" */
+ case 'address':
+ case 'blockquote':
+ case 'center':
+ case 'dir':
+ case 'div':
+ case 'dl':
+ case 'fieldset':
+ case 'listing':
+ case 'menu':
+ case 'ol':
+ case 'p':
+ case 'ul':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if ($this->elementInScope('p')) {
+ $this->emitToken(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ )
+ );
}
- /* Insert an HTML element for the token, and set the
- form element pointer to point to the element created. */
- $element = $this->insertElement($token);
- $this->form_pointer = $element;
- }
- break;
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+ break;
- /* A start tag whose tag name is "li", "dd" or "dt" */
- case 'li': case 'dd': case 'dt':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been
- seen. */
- if($this->elementInScope('p')) {
- $this->emitToken(array(
- 'name' => 'p',
- 'type' => HTML5::ENDTAG
- ));
- }
+ /* A start tag whose tag name is "form" */
+ case 'form':
+ /* If the form element pointer is not null, ignore the
+ token with a parse error. */
+ if ($this->form_pointer !== null) {
+ // Ignore.
- $stack_length = count($this->stack) - 1;
-
- for($n = $stack_length; 0 <= $n; $n--) {
- /* 1. Initialise node to be the current node (the
- bottommost node of the stack). */
- $stop = false;
- $node = $this->stack[$n];
- $cat = $this->getElementCategory($node->tagName);
-
- /* 2. If node is an li, dd or dt element, then pop all
- the nodes from the current node up to node, including
- node, then stop this algorithm. */
- if($token['name'] === $node->tagName || ($token['name'] !== 'li'
- && ($node->tagName === 'dd' || $node->tagName === 'dt'))) {
- for($x = $stack_length; $x >= $n ; $x--) {
- array_pop($this->stack);
+ /* Otherwise: */
+ } else {
+ /* If the stack of open elements has a p element in
+ scope, then act as if an end tag with the tag name p
+ had been seen. */
+ if ($this->elementInScope('p')) {
+ $this->emitToken(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ )
+ );
}
- break;
+ /* Insert an HTML element for the token, and set the
+ form element pointer to point to the element created. */
+ $element = $this->insertElement($token);
+ $this->form_pointer = $element;
+ }
+ break;
+
+ /* A start tag whose tag name is "li", "dd" or "dt" */
+ case 'li':
+ case 'dd':
+ case 'dt':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if ($this->elementInScope('p')) {
+ $this->emitToken(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ )
+ );
}
- /* 3. If node is not in the formatting category, and is
- not in the phrasing category, and is not an address or
- div element, then stop this algorithm. */
- if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
- $node->tagName !== 'address' && $node->tagName !== 'div') {
- break;
+ $stack_length = count($this->stack) - 1;
+
+ for ($n = $stack_length; 0 <= $n; $n--) {
+ /* 1. Initialise node to be the current node (the
+ bottommost node of the stack). */
+ $stop = false;
+ $node = $this->stack[$n];
+ $cat = $this->getElementCategory($node->tagName);
+
+ /* 2. If node is an li, dd or dt element, then pop all
+ the nodes from the current node up to node, including
+ node, then stop this algorithm. */
+ if ($token['name'] === $node->tagName || ($token['name'] !== 'li'
+ && ($node->tagName === 'dd' || $node->tagName === 'dt'))
+ ) {
+ for ($x = $stack_length; $x >= $n; $x--) {
+ array_pop($this->stack);
+ }
+
+ break;
+ }
+
+ /* 3. If node is not in the formatting category, and is
+ not in the phrasing category, and is not an address or
+ div element, then stop this algorithm. */
+ if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
+ $node->tagName !== 'address' && $node->tagName !== 'div'
+ ) {
+ break;
+ }
}
- }
- /* Finally, insert an HTML element with the same tag
- name as the token's. */
- $this->insertElement($token);
- break;
+ /* Finally, insert an HTML element with the same tag
+ name as the token's. */
+ $this->insertElement($token);
+ break;
- /* A start tag token whose tag name is "plaintext" */
- case 'plaintext':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been
- seen. */
- if($this->elementInScope('p')) {
- $this->emitToken(array(
- 'name' => 'p',
- 'type' => HTML5::ENDTAG
- ));
- }
+ /* A start tag token whose tag name is "plaintext" */
+ case 'plaintext':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been
+ seen. */
+ if ($this->elementInScope('p')) {
+ $this->emitToken(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+ }
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
- return HTML5::PLAINTEXT;
- break;
+ return HTML5::PLAINTEXT;
+ break;
- /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
- "h5", "h6" */
- case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been seen. */
- if($this->elementInScope('p')) {
- $this->emitToken(array(
- 'name' => 'p',
- 'type' => HTML5::ENDTAG
- ));
- }
+ /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
+ "h5", "h6" */
+ case 'h1':
+ case 'h2':
+ case 'h3':
+ case 'h4':
+ case 'h5':
+ case 'h6':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if ($this->elementInScope('p')) {
+ $this->emitToken(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+ }
- /* If the stack of open elements has in scope an element whose
- tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
- this is a parse error; pop elements from the stack until an
- element with one of those tag names has been popped from the
- stack. */
- while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
+ /* If the stack of open elements has in scope an element whose
+ tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+ this is a parse error; pop elements from the stack until an
+ element with one of those tag names has been popped from the
+ stack. */
+ while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
+ array_pop($this->stack);
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+ break;
+
+ /* A start tag whose tag name is "a" */
+ case 'a':
+ /* If the list of active formatting elements contains
+ an element whose tag name is "a" between the end of the
+ list and the last marker on the list (or the start of
+ the list if there is no marker on the list), then this
+ is a parse error; act as if an end tag with the tag name
+ "a" had been seen, then remove that element from the list
+ of active formatting elements and the stack of open
+ elements if the end tag didn't already remove it (it
+ might not have if the element is not in table scope). */
+ $leng = count($this->a_formatting);
+
+ for ($n = $leng - 1; $n >= 0; $n--) {
+ if ($this->a_formatting[$n] === self::MARKER) {
+ break;
+
+ } elseif ($this->a_formatting[$n]->nodeName === 'a') {
+ $this->emitToken(
+ array(
+ 'name' => 'a',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+ break;
+ }
+ }
+
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $el = $this->insertElement($token);
+
+ /* Add that element to the list of active formatting
+ elements. */
+ $this->a_formatting[] = $el;
+ break;
+
+ /* A start tag whose tag name is one of: "b", "big", "em", "font",
+ "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+ case 'b':
+ case 'big':
+ case 'em':
+ case 'font':
+ case 'i':
+ case 'nobr':
+ case 's':
+ case 'small':
+ case 'strike':
+ case 'strong':
+ case 'tt':
+ case 'u':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $el = $this->insertElement($token);
+
+ /* Add that element to the list of active formatting
+ elements. */
+ $this->a_formatting[] = $el;
+ break;
+
+ /* A start tag token whose tag name is "button" */
+ case 'button':
+ /* If the stack of open elements has a button element in scope,
+ then this is a parse error; act as if an end tag with the tag
+ name "button" had been seen, then reprocess the token. (We don't
+ do that. Unnecessary.) */
+ if ($this->elementInScope('button')) {
+ $this->inBody(
+ array(
+ 'name' => 'button',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+ }
+
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Insert a marker at the end of the list of active
+ formatting elements. */
+ $this->a_formatting[] = self::MARKER;
+ break;
+
+ /* A start tag token whose tag name is one of: "marquee", "object" */
+ case 'marquee':
+ case 'object':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Insert a marker at the end of the list of active
+ formatting elements. */
+ $this->a_formatting[] = self::MARKER;
+ break;
+
+ /* A start tag token whose tag name is "xmp" */
+ case 'xmp':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Switch the content model flag to the CDATA state. */
+ return HTML5::CDATA;
+ break;
+
+ /* A start tag whose tag name is "table" */
+ case 'table':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if ($this->elementInScope('p')) {
+ $this->emitToken(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+ }
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Change the insertion mode to "in table". */
+ $this->mode = self::IN_TABLE;
+ break;
+
+ /* A start tag whose tag name is one of: "area", "basefont",
+ "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
+ case 'area':
+ case 'basefont':
+ case 'bgsound':
+ case 'br':
+ case 'embed':
+ case 'img':
+ case 'param':
+ case 'spacer':
+ case 'wbr':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Immediately pop the current node off the stack of open elements. */
array_pop($this->stack);
- }
+ break;
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
- break;
-
- /* A start tag whose tag name is "a" */
- case 'a':
- /* If the list of active formatting elements contains
- an element whose tag name is "a" between the end of the
- list and the last marker on the list (or the start of
- the list if there is no marker on the list), then this
- is a parse error; act as if an end tag with the tag name
- "a" had been seen, then remove that element from the list
- of active formatting elements and the stack of open
- elements if the end tag didn't already remove it (it
- might not have if the element is not in table scope). */
- $leng = count($this->a_formatting);
-
- for($n = $leng - 1; $n >= 0; $n--) {
- if($this->a_formatting[$n] === self::MARKER) {
- break;
-
- } elseif($this->a_formatting[$n]->nodeName === 'a') {
- $this->emitToken(array(
- 'name' => 'a',
- 'type' => HTML5::ENDTAG
- ));
- break;
+ /* A start tag whose tag name is "hr" */
+ case 'hr':
+ /* If the stack of open elements has a p element in scope,
+ then act as if an end tag with the tag name p had been seen. */
+ if ($this->elementInScope('p')) {
+ $this->emitToken(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ )
+ );
}
- }
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
- /* Insert an HTML element for the token. */
- $el = $this->insertElement($token);
+ /* Immediately pop the current node off the stack of open elements. */
+ array_pop($this->stack);
+ break;
- /* Add that element to the list of active formatting
- elements. */
- $this->a_formatting[] = $el;
+ /* A start tag whose tag name is "image" */
+ case 'image':
+ /* Parse error. Change the token's tag name to "img" and
+ reprocess it. (Don't ask.) */
+ $token['name'] = 'img';
+ return $this->inBody($token);
+ break;
+
+ /* A start tag whose tag name is "input" */
+ case 'input':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an input element for the token. */
+ $element = $this->insertElement($token, false);
+
+ /* If the form element pointer is not null, then associate the
+ input element with the form element pointed to by the form
+ element pointer. */
+ $this->form_pointer !== null
+ ? $this->form_pointer->appendChild($element)
+ : end($this->stack)->appendChild($element);
+
+ /* Pop that input element off the stack of open elements. */
+ array_pop($this->stack);
+ break;
+
+ /* A start tag whose tag name is "isindex" */
+ case 'isindex':
+ /* Parse error. */
+ // w/e
+
+ /* If the form element pointer is not null,
+ then ignore the token. */
+ if ($this->form_pointer === null) {
+ /* Act as if a start tag token with the tag name "form" had
+ been seen. */
+ $this->inBody(
+ array(
+ 'name' => 'body',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
+
+ /* Act as if a start tag token with the tag name "hr" had
+ been seen. */
+ $this->inBody(
+ array(
+ 'name' => 'hr',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
+
+ /* Act as if a start tag token with the tag name "p" had
+ been seen. */
+ $this->inBody(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
+
+ /* Act as if a start tag token with the tag name "label"
+ had been seen. */
+ $this->inBody(
+ array(
+ 'name' => 'label',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
+
+ /* Act as if a stream of character tokens had been seen. */
+ $this->insertText(
+ 'This is a searchable index. ' .
+ 'Insert your search keywords here: '
+ );
+
+ /* Act as if a start tag token with the tag name "input"
+ had been seen, with all the attributes from the "isindex"
+ token, except with the "name" attribute set to the value
+ "isindex" (ignoring any explicit "name" attribute). */
+ $attr = $token['attr'];
+ $attr[] = array('name' => 'name', 'value' => 'isindex');
+
+ $this->inBody(
+ array(
+ 'name' => 'input',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => $attr
+ )
+ );
+
+ /* Act as if a stream of character tokens had been seen
+ (see below for what they should say). */
+ $this->insertText(
+ 'This is a searchable index. ' .
+ 'Insert your search keywords here: '
+ );
+
+ /* Act as if an end tag token with the tag name "label"
+ had been seen. */
+ $this->inBody(
+ array(
+ 'name' => 'label',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+
+ /* Act as if an end tag token with the tag name "p" had
+ been seen. */
+ $this->inBody(
+ array(
+ 'name' => 'p',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+
+ /* Act as if a start tag token with the tag name "hr" had
+ been seen. */
+ $this->inBody(
+ array(
+ 'name' => 'hr',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+
+ /* Act as if an end tag token with the tag name "form" had
+ been seen. */
+ $this->inBody(
+ array(
+ 'name' => 'form',
+ 'type' => HTML5::ENDTAG
+ )
+ );
+ }
+ break;
+
+ /* A start tag whose tag name is "textarea" */
+ case 'textarea':
+ $this->insertElement($token);
+
+ /* Switch the tokeniser's content model flag to the
+ RCDATA state. */
+ return HTML5::RCDATA;
+ break;
+
+ /* A start tag whose tag name is one of: "iframe", "noembed",
+ "noframes" */
+ case 'iframe':
+ case 'noembed':
+ case 'noframes':
+ $this->insertElement($token);
+
+ /* Switch the tokeniser's content model flag to the CDATA state. */
+ return HTML5::CDATA;
+ break;
+
+ /* A start tag whose tag name is "select" */
+ case 'select':
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ /* Insert an HTML element for the token. */
+ $this->insertElement($token);
+
+ /* Change the insertion mode to "in select". */
+ $this->mode = self::IN_SELECT;
+ break;
+
+ /* A start or end tag whose tag name is one of: "caption", "col",
+ "colgroup", "frame", "frameset", "head", "option", "optgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr". */
+ case 'caption':
+ case 'col':
+ case 'colgroup':
+ case 'frame':
+ case 'frameset':
+ case 'head':
+ case 'option':
+ case 'optgroup':
+ case 'tbody':
+ case 'td':
+ case 'tfoot':
+ case 'th':
+ case 'thead':
+ case 'tr':
+ // Parse error. Ignore the token.
+ break;
+
+ /* A start or end tag whose tag name is one of: "event-source",
+ "section", "nav", "article", "aside", "header", "footer",
+ "datagrid", "command" */
+ case 'event-source':
+ case 'section':
+ case 'nav':
+ case 'article':
+ case 'aside':
+ case 'header':
+ case 'footer':
+ case 'datagrid':
+ case 'command':
+ // Work in progress!
+ break;
+
+ /* A start tag token not covered by the previous entries */
+ default:
+ /* Reconstruct the active formatting elements, if any. */
+ $this->reconstructActiveFormattingElements();
+
+ $this->insertElement($token, true, true);
+ break;
+ }
break;
- /* A start tag whose tag name is one of: "b", "big", "em", "font",
- "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
- case 'b': case 'big': case 'em': case 'font': case 'i':
- case 'nobr': case 's': case 'small': case 'strike':
- case 'strong': case 'tt': case 'u':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- /* Insert an HTML element for the token. */
- $el = $this->insertElement($token);
-
- /* Add that element to the list of active formatting
- elements. */
- $this->a_formatting[] = $el;
- break;
-
- /* A start tag token whose tag name is "button" */
- case 'button':
- /* If the stack of open elements has a button element in scope,
- then this is a parse error; act as if an end tag with the tag
- name "button" had been seen, then reprocess the token. (We don't
- do that. Unnecessary.) */
- if($this->elementInScope('button')) {
- $this->inBody(array(
- 'name' => 'button',
- 'type' => HTML5::ENDTAG
- ));
- }
-
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
-
- /* Insert a marker at the end of the list of active
- formatting elements. */
- $this->a_formatting[] = self::MARKER;
- break;
-
- /* A start tag token whose tag name is one of: "marquee", "object" */
- case 'marquee': case 'object':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
-
- /* Insert a marker at the end of the list of active
- formatting elements. */
- $this->a_formatting[] = self::MARKER;
- break;
-
- /* A start tag token whose tag name is "xmp" */
- case 'xmp':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
-
- /* Switch the content model flag to the CDATA state. */
- return HTML5::CDATA;
- break;
-
- /* A start tag whose tag name is "table" */
- case 'table':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been seen. */
- if($this->elementInScope('p')) {
- $this->emitToken(array(
- 'name' => 'p',
- 'type' => HTML5::ENDTAG
- ));
- }
-
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
-
- /* Change the insertion mode to "in table". */
- $this->mode = self::IN_TABLE;
- break;
-
- /* A start tag whose tag name is one of: "area", "basefont",
- "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
- case 'area': case 'basefont': case 'bgsound': case 'br':
- case 'embed': case 'img': case 'param': case 'spacer':
- case 'wbr':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
-
- /* Immediately pop the current node off the stack of open elements. */
- array_pop($this->stack);
- break;
-
- /* A start tag whose tag name is "hr" */
- case 'hr':
- /* If the stack of open elements has a p element in scope,
- then act as if an end tag with the tag name p had been seen. */
- if($this->elementInScope('p')) {
- $this->emitToken(array(
- 'name' => 'p',
- 'type' => HTML5::ENDTAG
- ));
- }
-
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
-
- /* Immediately pop the current node off the stack of open elements. */
- array_pop($this->stack);
- break;
-
- /* A start tag whose tag name is "image" */
- case 'image':
- /* Parse error. Change the token's tag name to "img" and
- reprocess it. (Don't ask.) */
- $token['name'] = 'img';
- return $this->inBody($token);
- break;
-
- /* A start tag whose tag name is "input" */
- case 'input':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- /* Insert an input element for the token. */
- $element = $this->insertElement($token, false);
-
- /* If the form element pointer is not null, then associate the
- input element with the form element pointed to by the form
- element pointer. */
- $this->form_pointer !== null
- ? $this->form_pointer->appendChild($element)
- : end($this->stack)->appendChild($element);
-
- /* Pop that input element off the stack of open elements. */
- array_pop($this->stack);
- break;
-
- /* A start tag whose tag name is "isindex" */
- case 'isindex':
- /* Parse error. */
- // w/e
-
- /* If the form element pointer is not null,
- then ignore the token. */
- if($this->form_pointer === null) {
- /* Act as if a start tag token with the tag name "form" had
- been seen. */
- $this->inBody(array(
- 'name' => 'body',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
-
- /* Act as if a start tag token with the tag name "hr" had
- been seen. */
- $this->inBody(array(
- 'name' => 'hr',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
-
- /* Act as if a start tag token with the tag name "p" had
- been seen. */
- $this->inBody(array(
- 'name' => 'p',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
-
- /* Act as if a start tag token with the tag name "label"
- had been seen. */
- $this->inBody(array(
- 'name' => 'label',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
-
- /* Act as if a stream of character tokens had been seen. */
- $this->insertText('This is a searchable index. '.
- 'Insert your search keywords here: ');
-
- /* Act as if a start tag token with the tag name "input"
- had been seen, with all the attributes from the "isindex"
- token, except with the "name" attribute set to the value
- "isindex" (ignoring any explicit "name" attribute). */
- $attr = $token['attr'];
- $attr[] = array('name' => 'name', 'value' => 'isindex');
-
- $this->inBody(array(
- 'name' => 'input',
- 'type' => HTML5::STARTTAG,
- 'attr' => $attr
- ));
-
- /* Act as if a stream of character tokens had been seen
- (see below for what they should say). */
- $this->insertText('This is a searchable index. '.
- 'Insert your search keywords here: ');
-
- /* Act as if an end tag token with the tag name "label"
- had been seen. */
- $this->inBody(array(
- 'name' => 'label',
- 'type' => HTML5::ENDTAG
- ));
-
- /* Act as if an end tag token with the tag name "p" had
- been seen. */
- $this->inBody(array(
- 'name' => 'p',
- 'type' => HTML5::ENDTAG
- ));
-
- /* Act as if a start tag token with the tag name "hr" had
- been seen. */
- $this->inBody(array(
- 'name' => 'hr',
- 'type' => HTML5::ENDTAG
- ));
-
- /* Act as if an end tag token with the tag name "form" had
- been seen. */
- $this->inBody(array(
- 'name' => 'form',
- 'type' => HTML5::ENDTAG
- ));
- }
- break;
-
- /* A start tag whose tag name is "textarea" */
- case 'textarea':
- $this->insertElement($token);
-
- /* Switch the tokeniser's content model flag to the
- RCDATA state. */
- return HTML5::RCDATA;
- break;
-
- /* A start tag whose tag name is one of: "iframe", "noembed",
- "noframes" */
- case 'iframe': case 'noembed': case 'noframes':
- $this->insertElement($token);
-
- /* Switch the tokeniser's content model flag to the CDATA state. */
- return HTML5::CDATA;
- break;
-
- /* A start tag whose tag name is "select" */
- case 'select':
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- /* Insert an HTML element for the token. */
- $this->insertElement($token);
-
- /* Change the insertion mode to "in select". */
- $this->mode = self::IN_SELECT;
- break;
-
- /* A start or end tag whose tag name is one of: "caption", "col",
- "colgroup", "frame", "frameset", "head", "option", "optgroup",
- "tbody", "td", "tfoot", "th", "thead", "tr". */
- case 'caption': case 'col': case 'colgroup': case 'frame':
- case 'frameset': case 'head': case 'option': case 'optgroup':
- case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead':
- case 'tr':
- // Parse error. Ignore the token.
- break;
-
- /* A start or end tag whose tag name is one of: "event-source",
- "section", "nav", "article", "aside", "header", "footer",
- "datagrid", "command" */
- case 'event-source': case 'section': case 'nav': case 'article':
- case 'aside': case 'header': case 'footer': case 'datagrid':
- case 'command':
- // Work in progress!
- break;
-
- /* A start tag token not covered by the previous entries */
- default:
- /* Reconstruct the active formatting elements, if any. */
- $this->reconstructActiveFormattingElements();
-
- $this->insertElement($token, true, true);
- break;
- }
- break;
-
case HTML5::ENDTAG:
- switch($token['name']) {
- /* An end tag with the tag name "body" */
- case 'body':
- /* If the second element in the stack of open elements is
- not a body element, this is a parse error. Ignore the token.
- (innerHTML case) */
- if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
- // Ignore.
+ switch ($token['name']) {
+ /* An end tag with the tag name "body" */
+ case 'body':
+ /* If the second element in the stack of open elements is
+ not a body element, this is a parse error. Ignore the token.
+ (innerHTML case) */
+ if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
+ // Ignore.
- /* If the current node is not the body element, then this
- is a parse error. */
- } elseif(end($this->stack)->nodeName !== 'body') {
- // Parse error.
- }
-
- /* Change the insertion mode to "after body". */
- $this->mode = self::AFTER_BODY;
- break;
-
- /* An end tag with the tag name "html" */
- case 'html':
- /* Act as if an end tag with tag name "body" had been seen,
- then, if that token wasn't ignored, reprocess the current
- token. */
- $this->inBody(array(
- 'name' => 'body',
- 'type' => HTML5::ENDTAG
- ));
-
- return $this->afterBody($token);
- break;
-
- /* An end tag whose tag name is one of: "address", "blockquote",
- "center", "dir", "div", "dl", "fieldset", "listing", "menu",
- "ol", "pre", "ul" */
- case 'address': case 'blockquote': case 'center': case 'dir':
- case 'div': case 'dl': case 'fieldset': case 'listing':
- case 'menu': case 'ol': case 'pre': case 'ul':
- /* If the stack of open elements has an element in scope
- with the same tag name as that of the token, then generate
- implied end tags. */
- if($this->elementInScope($token['name'])) {
- $this->generateImpliedEndTags();
-
- /* Now, if the current node is not an element with
- the same tag name as that of the token, then this
- is a parse error. */
- // w/e
-
- /* If the stack of open elements has an element in
- scope with the same tag name as that of the token,
- then pop elements from this stack until an element
- with that tag name has been popped from the stack. */
- for($n = count($this->stack) - 1; $n >= 0; $n--) {
- if($this->stack[$n]->nodeName === $token['name']) {
- $n = -1;
- }
-
- array_pop($this->stack);
+ /* If the current node is not the body element, then this
+ is a parse error. */
+ } elseif (end($this->stack)->nodeName !== 'body') {
+ // Parse error.
}
- }
- break;
- /* An end tag whose tag name is "form" */
- case 'form':
- /* If the stack of open elements has an element in scope
- with the same tag name as that of the token, then generate
- implied end tags. */
- if($this->elementInScope($token['name'])) {
- $this->generateImpliedEndTags();
+ /* Change the insertion mode to "after body". */
+ $this->mode = self::AFTER_BODY;
+ break;
- }
+ /* An end tag with the tag name "html" */
+ case 'html':
+ /* Act as if an end tag with tag name "body" had been seen,
+ then, if that token wasn't ignored, reprocess the current
+ token. */
+ $this->inBody(
+ array(
+ 'name' => 'body',
+ 'type' => HTML5::ENDTAG
+ )
+ );
- if(end($this->stack)->nodeName !== $token['name']) {
- /* Now, if the current node is not an element with the
- same tag name as that of the token, then this is a parse
- error. */
- // w/e
-
- } else {
- /* Otherwise, if the current node is an element with
- the same tag name as that of the token pop that element
- from the stack. */
- array_pop($this->stack);
- }
-
- /* In any case, set the form element pointer to null. */
- $this->form_pointer = null;
- break;
-
- /* An end tag whose tag name is "p" */
- case 'p':
- /* If the stack of open elements has a p element in scope,
- then generate implied end tags, except for p elements. */
- if($this->elementInScope('p')) {
- $this->generateImpliedEndTags(array('p'));
-
- /* If the current node is not a p element, then this is
- a parse error. */
- // k
-
- /* If the stack of open elements has a p element in
- scope, then pop elements from this stack until the stack
- no longer has a p element in scope. */
- for($n = count($this->stack) - 1; $n >= 0; $n--) {
- if($this->elementInScope('p')) {
- array_pop($this->stack);
-
- } else {
- break;
- }
- }
- }
- break;
-
- /* An end tag whose tag name is "dd", "dt", or "li" */
- case 'dd': case 'dt': case 'li':
- /* If the stack of open elements has an element in scope
- whose tag name matches the tag name of the token, then
- generate implied end tags, except for elements with the
- same tag name as the token. */
- if($this->elementInScope($token['name'])) {
- $this->generateImpliedEndTags(array($token['name']));
-
- /* If the current node is not an element with the same
- tag name as the token, then this is a parse error. */
- // w/e
+ return $this->afterBody($token);
+ break;
+ /* An end tag whose tag name is one of: "address", "blockquote",
+ "center", "dir", "div", "dl", "fieldset", "listing", "menu",
+ "ol", "pre", "ul" */
+ case 'address':
+ case 'blockquote':
+ case 'center':
+ case 'dir':
+ case 'div':
+ case 'dl':
+ case 'fieldset':
+ case 'listing':
+ case 'menu':
+ case 'ol':
+ case 'pre':
+ case 'ul':
/* If the stack of open elements has an element in scope
- whose tag name matches the tag name of the token, then
- pop elements from this stack until an element with that
- tag name has been popped from the stack. */
- for($n = count($this->stack) - 1; $n >= 0; $n--) {
- if($this->stack[$n]->nodeName === $token['name']) {
- $n = -1;
- }
+ with the same tag name as that of the token, then generate
+ implied end tags. */
+ if ($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags();
- array_pop($this->stack);
- }
- }
- break;
+ /* Now, if the current node is not an element with
+ the same tag name as that of the token, then this
+ is a parse error. */
+ // w/e
- /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
- "h5", "h6" */
- case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
- $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
+ /* If the stack of open elements has an element in
+ scope with the same tag name as that of the token,
+ then pop elements from this stack until an element
+ with that tag name has been popped from the stack. */
+ for ($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if ($this->stack[$n]->nodeName === $token['name']) {
+ $n = -1;
+ }
- /* If the stack of open elements has in scope an element whose
- tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
- generate implied end tags. */
- if($this->elementInScope($elements)) {
- $this->generateImpliedEndTags();
-
- /* Now, if the current node is not an element with the same
- tag name as that of the token, then this is a parse error. */
- // w/e
-
- /* If the stack of open elements has in scope an element
- whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
- "h6", then pop elements from the stack until an element
- with one of those tag names has been popped from the stack. */
- while($this->elementInScope($elements)) {
- array_pop($this->stack);
- }
- }
- break;
-
- /* An end tag whose tag name is one of: "a", "b", "big", "em",
- "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
- case 'a': case 'b': case 'big': case 'em': case 'font':
- case 'i': case 'nobr': case 's': case 'small': case 'strike':
- case 'strong': case 'tt': case 'u':
- /* 1. Let the formatting element be the last element in
- the list of active formatting elements that:
- * is between the end of the list and the last scope
- marker in the list, if any, or the start of the list
- otherwise, and
- * has the same tag name as the token.
- */
- while(true) {
- for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
- if($this->a_formatting[$a] === self::MARKER) {
- break;
-
- } elseif($this->a_formatting[$a]->tagName === $token['name']) {
- $formatting_element = $this->a_formatting[$a];
- $in_stack = in_array($formatting_element, $this->stack, true);
- $fe_af_pos = $a;
- break;
- }
- }
-
- /* If there is no such node, or, if that node is
- also in the stack of open elements but the element
- is not in scope, then this is a parse error. Abort
- these steps. The token is ignored. */
- if(!isset($formatting_element) || ($in_stack &&
- !$this->elementInScope($token['name']))) {
- break;
-
- /* Otherwise, if there is such a node, but that node
- is not in the stack of open elements, then this is a
- parse error; remove the element from the list, and
- abort these steps. */
- } elseif(isset($formatting_element) && !$in_stack) {
- unset($this->a_formatting[$fe_af_pos]);
- $this->a_formatting = array_merge($this->a_formatting);
- break;
- }
-
- /* 2. Let the furthest block be the topmost node in the
- stack of open elements that is lower in the stack
- than the formatting element, and is not an element in
- the phrasing or formatting categories. There might
- not be one. */
- $fe_s_pos = array_search($formatting_element, $this->stack, true);
- $length = count($this->stack);
-
- for($s = $fe_s_pos + 1; $s < $length; $s++) {
- $category = $this->getElementCategory($this->stack[$s]->nodeName);
-
- if($category !== self::PHRASING && $category !== self::FORMATTING) {
- $furthest_block = $this->stack[$s];
- }
- }
-
- /* 3. If there is no furthest block, then the UA must
- skip the subsequent steps and instead just pop all
- the nodes from the bottom of the stack of open
- elements, from the current node up to the formatting
- element, and remove the formatting element from the
- list of active formatting elements. */
- if(!isset($furthest_block)) {
- for($n = $length - 1; $n >= $fe_s_pos; $n--) {
array_pop($this->stack);
}
+ }
+ break;
+
+ /* An end tag whose tag name is "form" */
+ case 'form':
+ /* If the stack of open elements has an element in scope
+ with the same tag name as that of the token, then generate
+ implied end tags. */
+ if ($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags();
- unset($this->a_formatting[$fe_af_pos]);
- $this->a_formatting = array_merge($this->a_formatting);
- break;
}
- /* 4. Let the common ancestor be the element
- immediately above the formatting element in the stack
- of open elements. */
- $common_ancestor = $this->stack[$fe_s_pos - 1];
+ if (end($this->stack)->nodeName !== $token['name']) {
+ /* Now, if the current node is not an element with the
+ same tag name as that of the token, then this is a parse
+ error. */
+ // w/e
- /* 5. If the furthest block has a parent node, then
- remove the furthest block from its parent node. */
- if($furthest_block->parentNode !== null) {
- $furthest_block->parentNode->removeChild($furthest_block);
+ } else {
+ /* Otherwise, if the current node is an element with
+ the same tag name as that of the token pop that element
+ from the stack. */
+ array_pop($this->stack);
}
- /* 6. Let a bookmark note the position of the
- formatting element in the list of active formatting
- elements relative to the elements on either side
- of it in the list. */
- $bookmark = $fe_af_pos;
+ /* In any case, set the form element pointer to null. */
+ $this->form_pointer = null;
+ break;
- /* 7. Let node and last node be the furthest block.
- Follow these steps: */
- $node = $furthest_block;
- $last_node = $furthest_block;
+ /* An end tag whose tag name is "p" */
+ case 'p':
+ /* If the stack of open elements has a p element in scope,
+ then generate implied end tags, except for p elements. */
+ if ($this->elementInScope('p')) {
+ $this->generateImpliedEndTags(array('p'));
- while(true) {
- for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
- /* 7.1 Let node be the element immediately
- prior to node in the stack of open elements. */
- $node = $this->stack[$n];
+ /* If the current node is not a p element, then this is
+ a parse error. */
+ // k
- /* 7.2 If node is not in the list of active
- formatting elements, then remove node from
- the stack of open elements and then go back
- to step 1. */
- if(!in_array($node, $this->a_formatting, true)) {
- unset($this->stack[$n]);
- $this->stack = array_merge($this->stack);
+ /* If the stack of open elements has a p element in
+ scope, then pop elements from this stack until the stack
+ no longer has a p element in scope. */
+ for ($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if ($this->elementInScope('p')) {
+ array_pop($this->stack);
} else {
break;
}
}
+ }
+ break;
- /* 7.3 Otherwise, if node is the formatting
- element, then go to the next step in the overall
- algorithm. */
- if($node === $formatting_element) {
+ /* An end tag whose tag name is "dd", "dt", or "li" */
+ case 'dd':
+ case 'dt':
+ case 'li':
+ /* If the stack of open elements has an element in scope
+ whose tag name matches the tag name of the token, then
+ generate implied end tags, except for elements with the
+ same tag name as the token. */
+ if ($this->elementInScope($token['name'])) {
+ $this->generateImpliedEndTags(array($token['name']));
+
+ /* If the current node is not an element with the same
+ tag name as the token, then this is a parse error. */
+ // w/e
+
+ /* If the stack of open elements has an element in scope
+ whose tag name matches the tag name of the token, then
+ pop elements from this stack until an element with that
+ tag name has been popped from the stack. */
+ for ($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if ($this->stack[$n]->nodeName === $token['name']) {
+ $n = -1;
+ }
+
+ array_pop($this->stack);
+ }
+ }
+ break;
+
+ /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
+ "h5", "h6" */
+ case 'h1':
+ case 'h2':
+ case 'h3':
+ case 'h4':
+ case 'h5':
+ case 'h6':
+ $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
+
+ /* If the stack of open elements has in scope an element whose
+ tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
+ generate implied end tags. */
+ if ($this->elementInScope($elements)) {
+ $this->generateImpliedEndTags();
+
+ /* Now, if the current node is not an element with the same
+ tag name as that of the token, then this is a parse error. */
+ // w/e
+
+ /* If the stack of open elements has in scope an element
+ whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
+ "h6", then pop elements from the stack until an element
+ with one of those tag names has been popped from the stack. */
+ while ($this->elementInScope($elements)) {
+ array_pop($this->stack);
+ }
+ }
+ break;
+
+ /* An end tag whose tag name is one of: "a", "b", "big", "em",
+ "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
+ case 'a':
+ case 'b':
+ case 'big':
+ case 'em':
+ case 'font':
+ case 'i':
+ case 'nobr':
+ case 's':
+ case 'small':
+ case 'strike':
+ case 'strong':
+ case 'tt':
+ case 'u':
+ /* 1. Let the formatting element be the last element in
+ the list of active formatting elements that:
+ * is between the end of the list and the last scope
+ marker in the list, if any, or the start of the list
+ otherwise, and
+ * has the same tag name as the token.
+ */
+ while (true) {
+ for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
+ if ($this->a_formatting[$a] === self::MARKER) {
+ break;
+
+ } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
+ $formatting_element = $this->a_formatting[$a];
+ $in_stack = in_array($formatting_element, $this->stack, true);
+ $fe_af_pos = $a;
+ break;
+ }
+ }
+
+ /* If there is no such node, or, if that node is
+ also in the stack of open elements but the element
+ is not in scope, then this is a parse error. Abort
+ these steps. The token is ignored. */
+ if (!isset($formatting_element) || ($in_stack &&
+ !$this->elementInScope($token['name']))
+ ) {
break;
- /* 7.4 Otherwise, if last node is the furthest
- block, then move the aforementioned bookmark to
- be immediately after the node in the list of
- active formatting elements. */
- } elseif($last_node === $furthest_block) {
- $bookmark = array_search($node, $this->a_formatting, true) + 1;
+ /* Otherwise, if there is such a node, but that node
+ is not in the stack of open elements, then this is a
+ parse error; remove the element from the list, and
+ abort these steps. */
+ } elseif (isset($formatting_element) && !$in_stack) {
+ unset($this->a_formatting[$fe_af_pos]);
+ $this->a_formatting = array_merge($this->a_formatting);
+ break;
}
- /* 7.5 If node has any children, perform a
- shallow clone of node, replace the entry for
- node in the list of active formatting elements
- with an entry for the clone, replace the entry
- for node in the stack of open elements with an
- entry for the clone, and let node be the clone. */
- if($node->hasChildNodes()) {
- $clone = $node->cloneNode();
- $s_pos = array_search($node, $this->stack, true);
- $a_pos = array_search($node, $this->a_formatting, true);
+ /* 2. Let the furthest block be the topmost node in the
+ stack of open elements that is lower in the stack
+ than the formatting element, and is not an element in
+ the phrasing or formatting categories. There might
+ not be one. */
+ $fe_s_pos = array_search($formatting_element, $this->stack, true);
+ $length = count($this->stack);
- $this->stack[$s_pos] = $clone;
- $this->a_formatting[$a_pos] = $clone;
- $node = $clone;
+ for ($s = $fe_s_pos + 1; $s < $length; $s++) {
+ $category = $this->getElementCategory($this->stack[$s]->nodeName);
+
+ if ($category !== self::PHRASING && $category !== self::FORMATTING) {
+ $furthest_block = $this->stack[$s];
+ }
}
- /* 7.6 Insert last node into node, first removing
- it from its previous parent node if any. */
- if($last_node->parentNode !== null) {
+ /* 3. If there is no furthest block, then the UA must
+ skip the subsequent steps and instead just pop all
+ the nodes from the bottom of the stack of open
+ elements, from the current node up to the formatting
+ element, and remove the formatting element from the
+ list of active formatting elements. */
+ if (!isset($furthest_block)) {
+ for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
+ array_pop($this->stack);
+ }
+
+ unset($this->a_formatting[$fe_af_pos]);
+ $this->a_formatting = array_merge($this->a_formatting);
+ break;
+ }
+
+ /* 4. Let the common ancestor be the element
+ immediately above the formatting element in the stack
+ of open elements. */
+ $common_ancestor = $this->stack[$fe_s_pos - 1];
+
+ /* 5. If the furthest block has a parent node, then
+ remove the furthest block from its parent node. */
+ if ($furthest_block->parentNode !== null) {
+ $furthest_block->parentNode->removeChild($furthest_block);
+ }
+
+ /* 6. Let a bookmark note the position of the
+ formatting element in the list of active formatting
+ elements relative to the elements on either side
+ of it in the list. */
+ $bookmark = $fe_af_pos;
+
+ /* 7. Let node and last node be the furthest block.
+ Follow these steps: */
+ $node = $furthest_block;
+ $last_node = $furthest_block;
+
+ while (true) {
+ for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
+ /* 7.1 Let node be the element immediately
+ prior to node in the stack of open elements. */
+ $node = $this->stack[$n];
+
+ /* 7.2 If node is not in the list of active
+ formatting elements, then remove node from
+ the stack of open elements and then go back
+ to step 1. */
+ if (!in_array($node, $this->a_formatting, true)) {
+ unset($this->stack[$n]);
+ $this->stack = array_merge($this->stack);
+
+ } else {
+ break;
+ }
+ }
+
+ /* 7.3 Otherwise, if node is the formatting
+ element, then go to the next step in the overall
+ algorithm. */
+ if ($node === $formatting_element) {
+ break;
+
+ /* 7.4 Otherwise, if last node is the furthest
+ block, then move the aforementioned bookmark to
+ be immediately after the node in the list of
+ active formatting elements. */
+ } elseif ($last_node === $furthest_block) {
+ $bookmark = array_search($node, $this->a_formatting, true) + 1;
+ }
+
+ /* 7.5 If node has any children, perform a
+ shallow clone of node, replace the entry for
+ node in the list of active formatting elements
+ with an entry for the clone, replace the entry
+ for node in the stack of open elements with an
+ entry for the clone, and let node be the clone. */
+ if ($node->hasChildNodes()) {
+ $clone = $node->cloneNode();
+ $s_pos = array_search($node, $this->stack, true);
+ $a_pos = array_search($node, $this->a_formatting, true);
+
+ $this->stack[$s_pos] = $clone;
+ $this->a_formatting[$a_pos] = $clone;
+ $node = $clone;
+ }
+
+ /* 7.6 Insert last node into node, first removing
+ it from its previous parent node if any. */
+ if ($last_node->parentNode !== null) {
+ $last_node->parentNode->removeChild($last_node);
+ }
+
+ $node->appendChild($last_node);
+
+ /* 7.7 Let last node be node. */
+ $last_node = $node;
+ }
+
+ /* 8. Insert whatever last node ended up being in
+ the previous step into the common ancestor node,
+ first removing it from its previous parent node if
+ any. */
+ if ($last_node->parentNode !== null) {
$last_node->parentNode->removeChild($last_node);
}
- $node->appendChild($last_node);
+ $common_ancestor->appendChild($last_node);
- /* 7.7 Let last node be node. */
- $last_node = $node;
- }
+ /* 9. Perform a shallow clone of the formatting
+ element. */
+ $clone = $formatting_element->cloneNode();
- /* 8. Insert whatever last node ended up being in
- the previous step into the common ancestor node,
- first removing it from its previous parent node if
- any. */
- if($last_node->parentNode !== null) {
- $last_node->parentNode->removeChild($last_node);
- }
-
- $common_ancestor->appendChild($last_node);
-
- /* 9. Perform a shallow clone of the formatting
- element. */
- $clone = $formatting_element->cloneNode();
-
- /* 10. Take all of the child nodes of the furthest
- block and append them to the clone created in the
- last step. */
- while($furthest_block->hasChildNodes()) {
- $child = $furthest_block->firstChild;
- $furthest_block->removeChild($child);
- $clone->appendChild($child);
- }
-
- /* 11. Append that clone to the furthest block. */
- $furthest_block->appendChild($clone);
-
- /* 12. Remove the formatting element from the list
- of active formatting elements, and insert the clone
- into the list of active formatting elements at the
- position of the aforementioned bookmark. */
- $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
- unset($this->a_formatting[$fe_af_pos]);
- $this->a_formatting = array_merge($this->a_formatting);
-
- $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
- $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
- $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
-
- /* 13. Remove the formatting element from the stack
- of open elements, and insert the clone into the stack
- of open elements immediately after (i.e. in a more
- deeply nested position than) the position of the
- furthest block in that stack. */
- $fe_s_pos = array_search($formatting_element, $this->stack, true);
- $fb_s_pos = array_search($furthest_block, $this->stack, true);
- unset($this->stack[$fe_s_pos]);
-
- $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
- $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
- $this->stack = array_merge($s_part1, array($clone), $s_part2);
-
- /* 14. Jump back to step 1 in this series of steps. */
- unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
- }
- break;
-
- /* An end tag token whose tag name is one of: "button",
- "marquee", "object" */
- case 'button': case 'marquee': case 'object':
- /* If the stack of open elements has an element in scope whose
- tag name matches the tag name of the token, then generate implied
- tags. */
- if($this->elementInScope($token['name'])) {
- $this->generateImpliedEndTags();
-
- /* Now, if the current node is not an element with the same
- tag name as the token, then this is a parse error. */
- // k
-
- /* Now, if the stack of open elements has an element in scope
- whose tag name matches the tag name of the token, then pop
- elements from the stack until that element has been popped from
- the stack, and clear the list of active formatting elements up
- to the last marker. */
- for($n = count($this->stack) - 1; $n >= 0; $n--) {
- if($this->stack[$n]->nodeName === $token['name']) {
- $n = -1;
+ /* 10. Take all of the child nodes of the furthest
+ block and append them to the clone created in the
+ last step. */
+ while ($furthest_block->hasChildNodes()) {
+ $child = $furthest_block->firstChild;
+ $furthest_block->removeChild($child);
+ $clone->appendChild($child);
}
- array_pop($this->stack);
+ /* 11. Append that clone to the furthest block. */
+ $furthest_block->appendChild($clone);
+
+ /* 12. Remove the formatting element from the list
+ of active formatting elements, and insert the clone
+ into the list of active formatting elements at the
+ position of the aforementioned bookmark. */
+ $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
+ unset($this->a_formatting[$fe_af_pos]);
+ $this->a_formatting = array_merge($this->a_formatting);
+
+ $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
+ $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
+ $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
+
+ /* 13. Remove the formatting element from the stack
+ of open elements, and insert the clone into the stack
+ of open elements immediately after (i.e. in a more
+ deeply nested position than) the position of the
+ furthest block in that stack. */
+ $fe_s_pos = array_search($formatting_element, $this->stack, true);
+ $fb_s_pos = array_search($furthest_block, $this->stack, true);
+ unset($this->stack[$fe_s_pos]);
+
+ $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
+ $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
+ $this->stack = array_merge($s_part1, array($clone), $s_part2);
+
+ /* 14. Jump back to step 1 in this series of steps. */
+ unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
}
+ break;
- $marker = end(array_keys($this->a_formatting, self::MARKER, true));
-
- for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
- array_pop($this->a_formatting);
- }
- }
- break;
-
- /* Or an end tag whose tag name is one of: "area", "basefont",
- "bgsound", "br", "embed", "hr", "iframe", "image", "img",
- "input", "isindex", "noembed", "noframes", "param", "select",
- "spacer", "table", "textarea", "wbr" */
- case 'area': case 'basefont': case 'bgsound': case 'br':
- case 'embed': case 'hr': case 'iframe': case 'image':
- case 'img': case 'input': case 'isindex': case 'noembed':
- case 'noframes': case 'param': case 'select': case 'spacer':
- case 'table': case 'textarea': case 'wbr':
- // Parse error. Ignore the token.
- break;
-
- /* An end tag token not covered by the previous entries */
- default:
- for($n = count($this->stack) - 1; $n >= 0; $n--) {
- /* Initialise node to be the current node (the bottommost
- node of the stack). */
- $node = end($this->stack);
-
- /* If node has the same tag name as the end tag token,
- then: */
- if($token['name'] === $node->nodeName) {
- /* Generate implied end tags. */
+ /* An end tag token whose tag name is one of: "button",
+ "marquee", "object" */
+ case 'button':
+ case 'marquee':
+ case 'object':
+ /* If the stack of open elements has an element in scope whose
+ tag name matches the tag name of the token, then generate implied
+ tags. */
+ if ($this->elementInScope($token['name'])) {
$this->generateImpliedEndTags();
- /* If the tag name of the end tag token does not
- match the tag name of the current node, this is a
- parse error. */
+ /* Now, if the current node is not an element with the same
+ tag name as the token, then this is a parse error. */
// k
- /* Pop all the nodes from the current node up to
- node, including node, then stop this algorithm. */
- for($x = count($this->stack) - $n; $x >= $n; $x--) {
+ /* Now, if the stack of open elements has an element in scope
+ whose tag name matches the tag name of the token, then pop
+ elements from the stack until that element has been popped from
+ the stack, and clear the list of active formatting elements up
+ to the last marker. */
+ for ($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if ($this->stack[$n]->nodeName === $token['name']) {
+ $n = -1;
+ }
+
array_pop($this->stack);
}
-
- } else {
- $category = $this->getElementCategory($node);
- if($category !== self::SPECIAL && $category !== self::SCOPING) {
- /* Otherwise, if node is in neither the formatting
- category nor the phrasing category, then this is a
- parse error. Stop this algorithm. The end tag token
- is ignored. */
- return false;
+ $marker = end(array_keys($this->a_formatting, self::MARKER, true));
+
+ for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
+ array_pop($this->a_formatting);
}
}
- }
+ break;
+
+ /* Or an end tag whose tag name is one of: "area", "basefont",
+ "bgsound", "br", "embed", "hr", "iframe", "image", "img",
+ "input", "isindex", "noembed", "noframes", "param", "select",
+ "spacer", "table", "textarea", "wbr" */
+ case 'area':
+ case 'basefont':
+ case 'bgsound':
+ case 'br':
+ case 'embed':
+ case 'hr':
+ case 'iframe':
+ case 'image':
+ case 'img':
+ case 'input':
+ case 'isindex':
+ case 'noembed':
+ case 'noframes':
+ case 'param':
+ case 'select':
+ case 'spacer':
+ case 'table':
+ case 'textarea':
+ case 'wbr':
+ // Parse error. Ignore the token.
+ break;
+
+ /* An end tag token not covered by the previous entries */
+ default:
+ for ($n = count($this->stack) - 1; $n >= 0; $n--) {
+ /* Initialise node to be the current node (the bottommost
+ node of the stack). */
+ $node = end($this->stack);
+
+ /* If node has the same tag name as the end tag token,
+ then: */
+ if ($token['name'] === $node->nodeName) {
+ /* Generate implied end tags. */
+ $this->generateImpliedEndTags();
+
+ /* If the tag name of the end tag token does not
+ match the tag name of the current node, this is a
+ parse error. */
+ // k
+
+ /* Pop all the nodes from the current node up to
+ node, including node, then stop this algorithm. */
+ for ($x = count($this->stack) - $n; $x >= $n; $x--) {
+ array_pop($this->stack);
+ }
+
+ } else {
+ $category = $this->getElementCategory($node);
+
+ if ($category !== self::SPECIAL && $category !== self::SCOPING) {
+ /* Otherwise, if node is in neither the formatting
+ category nor the phrasing category, then this is a
+ parse error. Stop this algorithm. The end tag token
+ is ignored. */
+ return false;
+ }
+ }
+ }
+ break;
+ }
break;
- }
- break;
}
}
- private function inTable($token) {
+ private function inTable($token)
+ {
$clear = array('html', 'table');
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
or U+0020 SPACE */
- if($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ if ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Append the character to the current node. */
$text = $this->dom->createTextNode($token['data']);
end($this->stack)->appendChild($text);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the current node with the data
attribute set to the data given in the comment token. */
$comment = $this->dom->createComment($token['data']);
end($this->stack)->appendChild($comment);
- /* A start tag whose tag name is "caption" */
- } elseif($token['type'] === HTML5::STARTTAG &&
- $token['name'] === 'caption') {
+ /* A start tag whose tag name is "caption" */
+ } elseif ($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'caption'
+ ) {
/* Clear the stack back to a table context. */
$this->clearStackToTableContext($clear);
@@ -2663,9 +3350,10 @@ class HTML5TreeConstructer {
$this->insertElement($token);
$this->mode = self::IN_CAPTION;
- /* A start tag whose tag name is "colgroup" */
- } elseif($token['type'] === HTML5::STARTTAG &&
- $token['name'] === 'colgroup') {
+ /* A start tag whose tag name is "colgroup" */
+ } elseif ($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'colgroup'
+ ) {
/* Clear the stack back to a table context. */
$this->clearStackToTableContext($clear);
@@ -2674,20 +3362,26 @@ class HTML5TreeConstructer {
$this->insertElement($token);
$this->mode = self::IN_CGROUP;
- /* A start tag whose tag name is "col" */
- } elseif($token['type'] === HTML5::STARTTAG &&
- $token['name'] === 'col') {
- $this->inTable(array(
- 'name' => 'colgroup',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
+ /* A start tag whose tag name is "col" */
+ } elseif ($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'col'
+ ) {
+ $this->inTable(
+ array(
+ 'name' => 'colgroup',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
$this->inColumnGroup($token);
- /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
- } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
- array('tbody', 'tfoot', 'thead'))) {
+ /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif ($token['type'] === HTML5::STARTTAG && in_array(
+ $token['name'],
+ array('tbody', 'tfoot', 'thead')
+ )
+ ) {
/* Clear the stack back to a table context. */
$this->clearStackToTableContext($clear);
@@ -2696,42 +3390,49 @@ class HTML5TreeConstructer {
$this->insertElement($token);
$this->mode = self::IN_TBODY;
- /* A start tag whose tag name is one of: "td", "th", "tr" */
- } elseif($token['type'] === HTML5::STARTTAG &&
- in_array($token['name'], array('td', 'th', 'tr'))) {
+ /* A start tag whose tag name is one of: "td", "th", "tr" */
+ } elseif ($token['type'] === HTML5::STARTTAG &&
+ in_array($token['name'], array('td', 'th', 'tr'))
+ ) {
/* Act as if a start tag token with the tag name "tbody" had been
seen, then reprocess the current token. */
- $this->inTable(array(
- 'name' => 'tbody',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
+ $this->inTable(
+ array(
+ 'name' => 'tbody',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
return $this->inTableBody($token);
- /* A start tag whose tag name is "table" */
- } elseif($token['type'] === HTML5::STARTTAG &&
- $token['name'] === 'table') {
+ /* A start tag whose tag name is "table" */
+ } elseif ($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'table'
+ ) {
/* Parse error. Act as if an end tag token with the tag name "table"
had been seen, then, if that token wasn't ignored, reprocess the
current token. */
- $this->inTable(array(
- 'name' => 'table',
- 'type' => HTML5::ENDTAG
- ));
+ $this->inTable(
+ array(
+ 'name' => 'table',
+ 'type' => HTML5::ENDTAG
+ )
+ );
return $this->mainPhase($token);
- /* An end tag whose tag name is "table" */
- } elseif($token['type'] === HTML5::ENDTAG &&
- $token['name'] === 'table') {
+ /* An end tag whose tag name is "table" */
+ } elseif ($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'table'
+ ) {
/* If the stack of open elements does not have an element in table
scope with the same tag name as the token, this is a parse error.
Ignore the token. (innerHTML case) */
- if(!$this->elementInScope($token['name'], true)) {
+ if (!$this->elementInScope($token['name'], true)) {
return false;
- /* Otherwise: */
+ /* Otherwise: */
} else {
/* Generate implied end tags. */
$this->generateImpliedEndTags();
@@ -2742,11 +3443,11 @@ class HTML5TreeConstructer {
/* Pop elements from this stack until a table element has been
popped from the stack. */
- while(true) {
+ while (true) {
$current = end($this->stack)->nodeName;
array_pop($this->stack);
- if($current === 'table') {
+ if ($current === 'table') {
break;
}
}
@@ -2755,14 +3456,28 @@ class HTML5TreeConstructer {
$this->resetInsertionMode();
}
- /* An end tag whose tag name is one of: "body", "caption", "col",
- "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
- } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
- array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
- 'tfoot', 'th', 'thead', 'tr'))) {
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif ($token['type'] === HTML5::ENDTAG && in_array(
+ $token['name'],
+ array(
+ 'body',
+ 'caption',
+ 'col',
+ 'colgroup',
+ 'html',
+ 'tbody',
+ 'td',
+ 'tfoot',
+ 'th',
+ 'thead',
+ 'tr'
+ )
+ )
+ ) {
// Parse error. Ignore the token.
- /* Anything else */
+ /* Anything else */
} else {
/* Parse error. Process the token as if the insertion mode was "in
body", with the following exception: */
@@ -2770,8 +3485,11 @@ class HTML5TreeConstructer {
/* If the current node is a table, tbody, tfoot, thead, or tr
element, then, whenever a node would be inserted into the current
node, it must instead be inserted into the foster parent element. */
- if(in_array(end($this->stack)->nodeName,
- array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ if (in_array(
+ end($this->stack)->nodeName,
+ array('table', 'tbody', 'tfoot', 'thead', 'tr')
+ )
+ ) {
/* The foster parent element is the parent element of the last
table element in the stack of open elements, if there is a
table element and it has such a parent element. If there is no
@@ -2783,21 +3501,22 @@ class HTML5TreeConstructer {
its parent node is not an element, then the foster parent
element is the element before the last table element in the
stack of open elements. */
- for($n = count($this->stack) - 1; $n >= 0; $n--) {
- if($this->stack[$n]->nodeName === 'table') {
+ for ($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if ($this->stack[$n]->nodeName === 'table') {
$table = $this->stack[$n];
break;
}
}
- if(isset($table) && $table->parentNode !== null) {
+ if (isset($table) && $table->parentNode !== null) {
$this->foster_parent = $table->parentNode;
- } elseif(!isset($table)) {
+ } elseif (!isset($table)) {
$this->foster_parent = $this->stack[0];
- } elseif(isset($table) && ($table->parentNode === null ||
- $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
+ } elseif (isset($table) && ($table->parentNode === null ||
+ $table->parentNode->nodeType !== XML_ELEMENT_NODE)
+ ) {
$this->foster_parent = $this->stack[$n - 1];
}
}
@@ -2806,16 +3525,17 @@ class HTML5TreeConstructer {
}
}
- private function inCaption($token) {
+ private function inCaption($token)
+ {
/* An end tag whose tag name is "caption" */
- if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
+ if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
/* If the stack of open elements does not have an element in table
scope with the same tag name as the token, this is a parse error.
Ignore the token. (innerHTML case) */
- if(!$this->elementInScope($token['name'], true)) {
+ if (!$this->elementInScope($token['name'], true)) {
// Ignore
- /* Otherwise: */
+ /* Otherwise: */
} else {
/* Generate implied end tags. */
$this->generateImpliedEndTags();
@@ -2826,11 +3546,11 @@ class HTML5TreeConstructer {
/* Pop elements from this stack until a caption element has
been popped from the stack. */
- while(true) {
+ while (true) {
$node = end($this->stack)->nodeName;
array_pop($this->stack);
- if($node === 'caption') {
+ if ($node === 'caption') {
break;
}
}
@@ -2843,99 +3563,131 @@ class HTML5TreeConstructer {
$this->mode = self::IN_TABLE;
}
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
- name is "table" */
- } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
- array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
- 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG &&
- $token['name'] === 'table')) {
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
+ name is "table" */
+ } elseif (($token['type'] === HTML5::STARTTAG && in_array(
+ $token['name'],
+ array(
+ 'caption',
+ 'col',
+ 'colgroup',
+ 'tbody',
+ 'td',
+ 'tfoot',
+ 'th',
+ 'thead',
+ 'tr'
+ )
+ )) || ($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'table')
+ ) {
/* Parse error. Act as if an end tag with the tag name "caption"
had been seen, then, if that token wasn't ignored, reprocess the
current token. */
- $this->inCaption(array(
- 'name' => 'caption',
- 'type' => HTML5::ENDTAG
- ));
+ $this->inCaption(
+ array(
+ 'name' => 'caption',
+ 'type' => HTML5::ENDTAG
+ )
+ );
return $this->inTable($token);
- /* An end tag whose tag name is one of: "body", "col", "colgroup",
- "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
- } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
- array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
- 'thead', 'tr'))) {
+ /* An end tag whose tag name is one of: "body", "col", "colgroup",
+ "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif ($token['type'] === HTML5::ENDTAG && in_array(
+ $token['name'],
+ array(
+ 'body',
+ 'col',
+ 'colgroup',
+ 'html',
+ 'tbody',
+ 'tfoot',
+ 'th',
+ 'thead',
+ 'tr'
+ )
+ )
+ ) {
// Parse error. Ignore the token.
- /* Anything else */
+ /* Anything else */
} else {
/* Process the token as if the insertion mode was "in body". */
$this->inBody($token);
}
}
- private function inColumnGroup($token) {
+ private function inColumnGroup($token)
+ {
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
or U+0020 SPACE */
- if($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ if ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Append the character to the current node. */
$text = $this->dom->createTextNode($token['data']);
end($this->stack)->appendChild($text);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the current node with the data
attribute set to the data given in the comment token. */
$comment = $this->dom->createComment($token['data']);
end($this->stack)->appendChild($comment);
- /* A start tag whose tag name is "col" */
- } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
+ /* A start tag whose tag name is "col" */
+ } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
/* Insert a col element for the token. Immediately pop the current
node off the stack of open elements. */
$this->insertElement($token);
array_pop($this->stack);
- /* An end tag whose tag name is "colgroup" */
- } elseif($token['type'] === HTML5::ENDTAG &&
- $token['name'] === 'colgroup') {
+ /* An end tag whose tag name is "colgroup" */
+ } elseif ($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'colgroup'
+ ) {
/* If the current node is the root html element, then this is a
parse error, ignore the token. (innerHTML case) */
- if(end($this->stack)->nodeName === 'html') {
+ if (end($this->stack)->nodeName === 'html') {
// Ignore
- /* Otherwise, pop the current node (which will be a colgroup
- element) from the stack of open elements. Switch the insertion
- mode to "in table". */
+ /* Otherwise, pop the current node (which will be a colgroup
+ element) from the stack of open elements. Switch the insertion
+ mode to "in table". */
} else {
array_pop($this->stack);
$this->mode = self::IN_TABLE;
}
- /* An end tag whose tag name is "col" */
- } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
+ /* An end tag whose tag name is "col" */
+ } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
/* Parse error. Ignore the token. */
- /* Anything else */
+ /* Anything else */
} else {
/* Act as if an end tag with the tag name "colgroup" had been seen,
and then, if that token wasn't ignored, reprocess the current token. */
- $this->inColumnGroup(array(
- 'name' => 'colgroup',
- 'type' => HTML5::ENDTAG
- ));
+ $this->inColumnGroup(
+ array(
+ 'name' => 'colgroup',
+ 'type' => HTML5::ENDTAG
+ )
+ );
return $this->inTable($token);
}
}
- private function inTableBody($token) {
+ private function inTableBody($token)
+ {
$clear = array('tbody', 'tfoot', 'thead', 'html');
/* A start tag whose tag name is "tr" */
- if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
+ if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
/* Clear the stack back to a table body context. */
$this->clearStackToTableContext($clear);
@@ -2944,29 +3696,33 @@ class HTML5TreeConstructer {
$this->insertElement($token);
$this->mode = self::IN_ROW;
- /* A start tag whose tag name is one of: "th", "td" */
- } elseif($token['type'] === HTML5::STARTTAG &&
- ($token['name'] === 'th' || $token['name'] === 'td')) {
+ /* A start tag whose tag name is one of: "th", "td" */
+ } elseif ($token['type'] === HTML5::STARTTAG &&
+ ($token['name'] === 'th' || $token['name'] === 'td')
+ ) {
/* Parse error. Act as if a start tag with the tag name "tr" had
been seen, then reprocess the current token. */
- $this->inTableBody(array(
- 'name' => 'tr',
- 'type' => HTML5::STARTTAG,
- 'attr' => array()
- ));
+ $this->inTableBody(
+ array(
+ 'name' => 'tr',
+ 'type' => HTML5::STARTTAG,
+ 'attr' => array()
+ )
+ );
return $this->inRow($token);
- /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
- } elseif($token['type'] === HTML5::ENDTAG &&
- in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+ /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif ($token['type'] === HTML5::ENDTAG &&
+ in_array($token['name'], array('tbody', 'tfoot', 'thead'))
+ ) {
/* If the stack of open elements does not have an element in table
scope with the same tag name as the token, this is a parse error.
Ignore the token. */
- if(!$this->elementInScope($token['name'], true)) {
+ if (!$this->elementInScope($token['name'], true)) {
// Ignore
- /* Otherwise: */
+ /* Otherwise: */
} else {
/* Clear the stack back to a table body context. */
$this->clearStackToTableContext($clear);
@@ -2977,18 +3733,21 @@ class HTML5TreeConstructer {
$this->mode = self::IN_TABLE;
}
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
- } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'],
- array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) ||
- ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) {
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
+ } elseif (($token['type'] === HTML5::STARTTAG && in_array(
+ $token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead')
+ )) ||
+ ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')
+ ) {
/* If the stack of open elements does not have a tbody, thead, or
tfoot element in table scope, this is a parse error. Ignore the
token. (innerHTML case) */
- if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
+ if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
// Ignore.
- /* Otherwise: */
+ /* Otherwise: */
} else {
/* Clear the stack back to a table body context. */
$this->clearStackToTableContext($clear);
@@ -2996,33 +3755,40 @@ class HTML5TreeConstructer {
/* Act as if an end tag with the same tag name as the current
node ("tbody", "tfoot", or "thead") had been seen, then
reprocess the current token. */
- $this->inTableBody(array(
- 'name' => end($this->stack)->nodeName,
- 'type' => HTML5::ENDTAG
- ));
+ $this->inTableBody(
+ array(
+ 'name' => end($this->stack)->nodeName,
+ 'type' => HTML5::ENDTAG
+ )
+ );
return $this->mainPhase($token);
}
- /* An end tag whose tag name is one of: "body", "caption", "col",
- "colgroup", "html", "td", "th", "tr" */
- } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
- array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th", "tr" */
+ } elseif ($token['type'] === HTML5::ENDTAG && in_array(
+ $token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
+ )
+ ) {
/* Parse error. Ignore the token. */
- /* Anything else */
+ /* Anything else */
} else {
/* Process the token as if the insertion mode was "in table". */
$this->inTable($token);
}
}
- private function inRow($token) {
+ private function inRow($token)
+ {
$clear = array('tr', 'html');
/* A start tag whose tag name is one of: "th", "td" */
- if($token['type'] === HTML5::STARTTAG &&
- ($token['name'] === 'th' || $token['name'] === 'td')) {
+ if ($token['type'] === HTML5::STARTTAG &&
+ ($token['name'] === 'th' || $token['name'] === 'td')
+ ) {
/* Clear the stack back to a table row context. */
$this->clearStackToTableContext($clear);
@@ -3035,15 +3801,15 @@ class HTML5TreeConstructer {
elements. */
$this->a_formatting[] = self::MARKER;
- /* An end tag whose tag name is "tr" */
- } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
+ /* An end tag whose tag name is "tr" */
+ } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
/* If the stack of open elements does not have an element in table
scope with the same tag name as the token, this is a parse error.
Ignore the token. (innerHTML case) */
- if(!$this->elementInScope($token['name'], true)) {
+ if (!$this->elementInScope($token['name'], true)) {
// Ignore.
- /* Otherwise: */
+ /* Otherwise: */
} else {
/* Clear the stack back to a table row context. */
$this->clearStackToTableContext($clear);
@@ -3055,64 +3821,77 @@ class HTML5TreeConstructer {
$this->mode = self::IN_TBODY;
}
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
- } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
- array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
+ } elseif ($token['type'] === HTML5::STARTTAG && in_array(
+ $token['name'],
+ array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr')
+ )
+ ) {
/* Act as if an end tag with the tag name "tr" had been seen, then,
if that token wasn't ignored, reprocess the current token. */
- $this->inRow(array(
- 'name' => 'tr',
- 'type' => HTML5::ENDTAG
- ));
+ $this->inRow(
+ array(
+ 'name' => 'tr',
+ 'type' => HTML5::ENDTAG
+ )
+ );
return $this->inCell($token);
- /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
- } elseif($token['type'] === HTML5::ENDTAG &&
- in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
+ /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
+ } elseif ($token['type'] === HTML5::ENDTAG &&
+ in_array($token['name'], array('tbody', 'tfoot', 'thead'))
+ ) {
/* If the stack of open elements does not have an element in table
scope with the same tag name as the token, this is a parse error.
Ignore the token. */
- if(!$this->elementInScope($token['name'], true)) {
+ if (!$this->elementInScope($token['name'], true)) {
// Ignore.
- /* Otherwise: */
+ /* Otherwise: */
} else {
/* Otherwise, act as if an end tag with the tag name "tr" had
been seen, then reprocess the current token. */
- $this->inRow(array(
- 'name' => 'tr',
- 'type' => HTML5::ENDTAG
- ));
+ $this->inRow(
+ array(
+ 'name' => 'tr',
+ 'type' => HTML5::ENDTAG
+ )
+ );
return $this->inCell($token);
}
- /* An end tag whose tag name is one of: "body", "caption", "col",
- "colgroup", "html", "td", "th" */
- } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
- array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html", "td", "th" */
+ } elseif ($token['type'] === HTML5::ENDTAG && in_array(
+ $token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
+ )
+ ) {
/* Parse error. Ignore the token. */
- /* Anything else */
+ /* Anything else */
} else {
/* Process the token as if the insertion mode was "in table". */
$this->inTable($token);
}
}
- private function inCell($token) {
+ private function inCell($token)
+ {
/* An end tag whose tag name is one of: "td", "th" */
- if($token['type'] === HTML5::ENDTAG &&
- ($token['name'] === 'td' || $token['name'] === 'th')) {
+ if ($token['type'] === HTML5::ENDTAG &&
+ ($token['name'] === 'td' || $token['name'] === 'th')
+ ) {
/* If the stack of open elements does not have an element in table
scope with the same tag name as that of the token, then this is a
parse error and the token must be ignored. */
- if(!$this->elementInScope($token['name'], true)) {
+ if (!$this->elementInScope($token['name'], true)) {
// Ignore.
- /* Otherwise: */
+ /* Otherwise: */
} else {
/* Generate implied end tags, except for elements with the same
tag name as the token. */
@@ -3124,11 +3903,11 @@ class HTML5TreeConstructer {
/* Pop elements from this stack until an element with the same
tag name as the token has been popped from the stack. */
- while(true) {
+ while (true) {
$node = end($this->stack)->nodeName;
array_pop($this->stack);
- if($node === $token['name']) {
+ if ($node === $token['name']) {
break;
}
}
@@ -3142,178 +3921,223 @@ class HTML5TreeConstructer {
$this->mode = self::IN_ROW;
}
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "td", "tfoot", "th", "thead", "tr" */
- } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
- array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
- 'thead', 'tr'))) {
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif ($token['type'] === HTML5::STARTTAG && in_array(
+ $token['name'],
+ array(
+ 'caption',
+ 'col',
+ 'colgroup',
+ 'tbody',
+ 'td',
+ 'tfoot',
+ 'th',
+ 'thead',
+ 'tr'
+ )
+ )
+ ) {
/* If the stack of open elements does not have a td or th element
in table scope, then this is a parse error; ignore the token.
(innerHTML case) */
- if(!$this->elementInScope(array('td', 'th'), true)) {
+ if (!$this->elementInScope(array('td', 'th'), true)) {
// Ignore.
- /* Otherwise, close the cell (see below) and reprocess the current
- token. */
+ /* Otherwise, close the cell (see below) and reprocess the current
+ token. */
} else {
$this->closeCell();
return $this->inRow($token);
}
- /* A start tag whose tag name is one of: "caption", "col", "colgroup",
- "tbody", "td", "tfoot", "th", "thead", "tr" */
- } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'],
- array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
- 'thead', 'tr'))) {
+ /* A start tag whose tag name is one of: "caption", "col", "colgroup",
+ "tbody", "td", "tfoot", "th", "thead", "tr" */
+ } elseif ($token['type'] === HTML5::STARTTAG && in_array(
+ $token['name'],
+ array(
+ 'caption',
+ 'col',
+ 'colgroup',
+ 'tbody',
+ 'td',
+ 'tfoot',
+ 'th',
+ 'thead',
+ 'tr'
+ )
+ )
+ ) {
/* If the stack of open elements does not have a td or th element
in table scope, then this is a parse error; ignore the token.
(innerHTML case) */
- if(!$this->elementInScope(array('td', 'th'), true)) {
+ if (!$this->elementInScope(array('td', 'th'), true)) {
// Ignore.
- /* Otherwise, close the cell (see below) and reprocess the current
- token. */
+ /* Otherwise, close the cell (see below) and reprocess the current
+ token. */
} else {
$this->closeCell();
return $this->inRow($token);
}
- /* An end tag whose tag name is one of: "body", "caption", "col",
- "colgroup", "html" */
- } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
- array('body', 'caption', 'col', 'colgroup', 'html'))) {
+ /* An end tag whose tag name is one of: "body", "caption", "col",
+ "colgroup", "html" */
+ } elseif ($token['type'] === HTML5::ENDTAG && in_array(
+ $token['name'],
+ array('body', 'caption', 'col', 'colgroup', 'html')
+ )
+ ) {
/* Parse error. Ignore the token. */
- /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
- "thead", "tr" */
- } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'],
- array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
+ /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
+ "thead", "tr" */
+ } elseif ($token['type'] === HTML5::ENDTAG && in_array(
+ $token['name'],
+ array('table', 'tbody', 'tfoot', 'thead', 'tr')
+ )
+ ) {
/* If the stack of open elements does not have an element in table
scope with the same tag name as that of the token (which can only
happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
then this is a parse error and the token must be ignored. */
- if(!$this->elementInScope($token['name'], true)) {
+ if (!$this->elementInScope($token['name'], true)) {
// Ignore.
- /* Otherwise, close the cell (see below) and reprocess the current
- token. */
+ /* Otherwise, close the cell (see below) and reprocess the current
+ token. */
} else {
$this->closeCell();
return $this->inRow($token);
}
- /* Anything else */
+ /* Anything else */
} else {
/* Process the token as if the insertion mode was "in body". */
$this->inBody($token);
}
}
- private function inSelect($token) {
+ private function inSelect($token)
+ {
/* Handle the token as follows: */
/* A character token */
- if($token['type'] === HTML5::CHARACTR) {
+ if ($token['type'] === HTML5::CHARACTR) {
/* Append the token's character to the current node. */
$this->insertText($token['data']);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the current node with the data
attribute set to the data given in the comment token. */
$this->insertComment($token['data']);
- /* A start tag token whose tag name is "option" */
- } elseif($token['type'] === HTML5::STARTTAG &&
- $token['name'] === 'option') {
+ /* A start tag token whose tag name is "option" */
+ } elseif ($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'option'
+ ) {
/* If the current node is an option element, act as if an end tag
with the tag name "option" had been seen. */
- if(end($this->stack)->nodeName === 'option') {
- $this->inSelect(array(
- 'name' => 'option',
- 'type' => HTML5::ENDTAG
- ));
+ if (end($this->stack)->nodeName === 'option') {
+ $this->inSelect(
+ array(
+ 'name' => 'option',
+ 'type' => HTML5::ENDTAG
+ )
+ );
}
/* Insert an HTML element for the token. */
$this->insertElement($token);
- /* A start tag token whose tag name is "optgroup" */
- } elseif($token['type'] === HTML5::STARTTAG &&
- $token['name'] === 'optgroup') {
+ /* A start tag token whose tag name is "optgroup" */
+ } elseif ($token['type'] === HTML5::STARTTAG &&
+ $token['name'] === 'optgroup'
+ ) {
/* If the current node is an option element, act as if an end tag
with the tag name "option" had been seen. */
- if(end($this->stack)->nodeName === 'option') {
- $this->inSelect(array(
- 'name' => 'option',
- 'type' => HTML5::ENDTAG
- ));
+ if (end($this->stack)->nodeName === 'option') {
+ $this->inSelect(
+ array(
+ 'name' => 'option',
+ 'type' => HTML5::ENDTAG
+ )
+ );
}
/* If the current node is an optgroup element, act as if an end tag
with the tag name "optgroup" had been seen. */
- if(end($this->stack)->nodeName === 'optgroup') {
- $this->inSelect(array(
- 'name' => 'optgroup',
- 'type' => HTML5::ENDTAG
- ));
+ if (end($this->stack)->nodeName === 'optgroup') {
+ $this->inSelect(
+ array(
+ 'name' => 'optgroup',
+ 'type' => HTML5::ENDTAG
+ )
+ );
}
/* Insert an HTML element for the token. */
$this->insertElement($token);
- /* An end tag token whose tag name is "optgroup" */
- } elseif($token['type'] === HTML5::ENDTAG &&
- $token['name'] === 'optgroup') {
+ /* An end tag token whose tag name is "optgroup" */
+ } elseif ($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'optgroup'
+ ) {
/* First, if the current node is an option element, and the node
immediately before it in the stack of open elements is an optgroup
element, then act as if an end tag with the tag name "option" had
been seen. */
$elements_in_stack = count($this->stack);
- if($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
- $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') {
- $this->inSelect(array(
- 'name' => 'option',
- 'type' => HTML5::ENDTAG
- ));
+ if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
+ $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup'
+ ) {
+ $this->inSelect(
+ array(
+ 'name' => 'option',
+ 'type' => HTML5::ENDTAG
+ )
+ );
}
/* If the current node is an optgroup element, then pop that node
from the stack of open elements. Otherwise, this is a parse error,
ignore the token. */
- if($this->stack[$elements_in_stack - 1] === 'optgroup') {
+ if ($this->stack[$elements_in_stack - 1] === 'optgroup') {
array_pop($this->stack);
}
- /* An end tag token whose tag name is "option" */
- } elseif($token['type'] === HTML5::ENDTAG &&
- $token['name'] === 'option') {
+ /* An end tag token whose tag name is "option" */
+ } elseif ($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'option'
+ ) {
/* If the current node is an option element, then pop that node
from the stack of open elements. Otherwise, this is a parse error,
ignore the token. */
- if(end($this->stack)->nodeName === 'option') {
+ if (end($this->stack)->nodeName === 'option') {
array_pop($this->stack);
}
- /* An end tag whose tag name is "select" */
- } elseif($token['type'] === HTML5::ENDTAG &&
- $token['name'] === 'select') {
+ /* An end tag whose tag name is "select" */
+ } elseif ($token['type'] === HTML5::ENDTAG &&
+ $token['name'] === 'select'
+ ) {
/* If the stack of open elements does not have an element in table
scope with the same tag name as the token, this is a parse error.
Ignore the token. (innerHTML case) */
- if(!$this->elementInScope($token['name'], true)) {
+ if (!$this->elementInScope($token['name'], true)) {
// w/e
- /* Otherwise: */
+ /* Otherwise: */
} else {
/* Pop elements from the stack of open elements until a select
element has been popped from the stack. */
- while(true) {
+ while (true) {
$current = end($this->stack)->nodeName;
array_pop($this->stack);
- if($current === 'select') {
+ if ($current === 'select') {
break;
}
}
@@ -3322,20 +4146,35 @@ class HTML5TreeConstructer {
$this->resetInsertionMode();
}
- /* A start tag whose tag name is "select" */
- } elseif($token['name'] === 'select' &&
- $token['type'] === HTML5::STARTTAG) {
+ /* A start tag whose tag name is "select" */
+ } elseif ($token['name'] === 'select' &&
+ $token['type'] === HTML5::STARTTAG
+ ) {
/* Parse error. Act as if the token had been an end tag with the
tag name "select" instead. */
- $this->inSelect(array(
- 'name' => 'select',
- 'type' => HTML5::ENDTAG
- ));
+ $this->inSelect(
+ array(
+ 'name' => 'select',
+ 'type' => HTML5::ENDTAG
+ )
+ );
- /* An end tag whose tag name is one of: "caption", "table", "tbody",
- "tfoot", "thead", "tr", "td", "th" */
- } elseif(in_array($token['name'], array('caption', 'table', 'tbody',
- 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) {
+ /* An end tag whose tag name is one of: "caption", "table", "tbody",
+ "tfoot", "thead", "tr", "td", "th" */
+ } elseif (in_array(
+ $token['name'],
+ array(
+ 'caption',
+ 'table',
+ 'tbody',
+ 'tfoot',
+ 'thead',
+ 'tr',
+ 'td',
+ 'th'
+ )
+ ) && $token['type'] === HTML5::ENDTAG
+ ) {
/* Parse error. */
// w/e
@@ -3343,43 +4182,47 @@ class HTML5TreeConstructer {
the same tag name as that of the token, then act as if an end tag
with the tag name "select" had been seen, and reprocess the token.
Otherwise, ignore the token. */
- if($this->elementInScope($token['name'], true)) {
- $this->inSelect(array(
- 'name' => 'select',
- 'type' => HTML5::ENDTAG
- ));
+ if ($this->elementInScope($token['name'], true)) {
+ $this->inSelect(
+ array(
+ 'name' => 'select',
+ 'type' => HTML5::ENDTAG
+ )
+ );
$this->mainPhase($token);
}
- /* Anything else */
+ /* Anything else */
} else {
/* Parse error. Ignore the token. */
}
}
- private function afterBody($token) {
+ private function afterBody($token)
+ {
/* Handle the token as follows: */
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
or U+0020 SPACE */
- if($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ if ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Process the token as it would be processed if the insertion mode
was "in body". */
$this->inBody($token);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the first element in the stack of open
elements (the html element), with the data attribute set to the
data given in the comment token. */
$comment = $this->dom->createComment($token['data']);
$this->stack[0]->appendChild($comment);
- /* An end tag with the tag name "html" */
- } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
+ /* An end tag with the tag name "html" */
+ } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
/* If the parser was originally created in order to handle the
setting of an element's innerHTML attribute, this is a parse error;
ignore the token. (The element will be an html element in this
@@ -3388,7 +4231,7 @@ class HTML5TreeConstructer {
/* Otherwise, switch to the trailing end phase. */
$this->phase = self::END_PHASE;
- /* Anything else */
+ /* Anything else */
} else {
/* Parse error. Set the insertion mode to "in body" and reprocess
the token. */
@@ -3397,34 +4240,38 @@ class HTML5TreeConstructer {
}
}
- private function inFrameset($token) {
+ private function inFrameset($token)
+ {
/* Handle the token as follows: */
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
- if($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ if ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Append the character to the current node. */
$this->insertText($token['data']);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the current node with the data
attribute set to the data given in the comment token. */
$this->insertComment($token['data']);
- /* A start tag with the tag name "frameset" */
- } elseif($token['name'] === 'frameset' &&
- $token['type'] === HTML5::STARTTAG) {
+ /* A start tag with the tag name "frameset" */
+ } elseif ($token['name'] === 'frameset' &&
+ $token['type'] === HTML5::STARTTAG
+ ) {
$this->insertElement($token);
- /* An end tag with the tag name "frameset" */
- } elseif($token['name'] === 'frameset' &&
- $token['type'] === HTML5::ENDTAG) {
+ /* An end tag with the tag name "frameset" */
+ } elseif ($token['name'] === 'frameset' &&
+ $token['type'] === HTML5::ENDTAG
+ ) {
/* If the current node is the root html element, then this is a
parse error; ignore the token. (innerHTML case) */
- if(end($this->stack)->nodeName === 'html') {
+ if (end($this->stack)->nodeName === 'html') {
// Ignore
} else {
@@ -3439,103 +4286,113 @@ class HTML5TreeConstructer {
$this->mode = self::AFTR_FRAME;
}
- /* A start tag with the tag name "frame" */
- } elseif($token['name'] === 'frame' &&
- $token['type'] === HTML5::STARTTAG) {
+ /* A start tag with the tag name "frame" */
+ } elseif ($token['name'] === 'frame' &&
+ $token['type'] === HTML5::STARTTAG
+ ) {
/* Insert an HTML element for the token. */
$this->insertElement($token);
/* Immediately pop the current node off the stack of open elements. */
array_pop($this->stack);
- /* A start tag with the tag name "noframes" */
- } elseif($token['name'] === 'noframes' &&
- $token['type'] === HTML5::STARTTAG) {
+ /* A start tag with the tag name "noframes" */
+ } elseif ($token['name'] === 'noframes' &&
+ $token['type'] === HTML5::STARTTAG
+ ) {
/* Process the token as if the insertion mode had been "in body". */
$this->inBody($token);
- /* Anything else */
+ /* Anything else */
} else {
/* Parse error. Ignore the token. */
}
}
- private function afterFrameset($token) {
+ private function afterFrameset($token)
+ {
/* Handle the token as follows: */
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
- if($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ if ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Append the character to the current node. */
$this->insertText($token['data']);
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the current node with the data
attribute set to the data given in the comment token. */
$this->insertComment($token['data']);
- /* An end tag with the tag name "html" */
- } elseif($token['name'] === 'html' &&
- $token['type'] === HTML5::ENDTAG) {
+ /* An end tag with the tag name "html" */
+ } elseif ($token['name'] === 'html' &&
+ $token['type'] === HTML5::ENDTAG
+ ) {
/* Switch to the trailing end phase. */
$this->phase = self::END_PHASE;
- /* A start tag with the tag name "noframes" */
- } elseif($token['name'] === 'noframes' &&
- $token['type'] === HTML5::STARTTAG) {
+ /* A start tag with the tag name "noframes" */
+ } elseif ($token['name'] === 'noframes' &&
+ $token['type'] === HTML5::STARTTAG
+ ) {
/* Process the token as if the insertion mode had been "in body". */
$this->inBody($token);
- /* Anything else */
+ /* Anything else */
} else {
/* Parse error. Ignore the token. */
}
}
- private function trailingEndPhase($token) {
+ private function trailingEndPhase($token)
+ {
/* After the main phase, as each token is emitted from the tokenisation
stage, it must be processed as described in this section. */
/* A DOCTYPE token */
- if($token['type'] === HTML5::DOCTYPE) {
+ if ($token['type'] === HTML5::DOCTYPE) {
// Parse error. Ignore the token.
- /* A comment token */
- } elseif($token['type'] === HTML5::COMMENT) {
+ /* A comment token */
+ } elseif ($token['type'] === HTML5::COMMENT) {
/* Append a Comment node to the Document object with the data
attribute set to the data given in the comment token. */
$comment = $this->dom->createComment($token['data']);
$this->dom->appendChild($comment);
- /* A character token that is one of one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE */
- } elseif($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) {
+ /* A character token that is one of one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE */
+ } elseif ($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
+ ) {
/* Process the token as it would be processed in the main phase. */
$this->mainPhase($token);
- /* A character token that is not one of U+0009 CHARACTER TABULATION,
- U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
- or U+0020 SPACE. Or a start tag token. Or an end tag token. */
- } elseif(($token['type'] === HTML5::CHARACTR &&
- preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
- $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) {
+ /* A character token that is not one of U+0009 CHARACTER TABULATION,
+ U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
+ or U+0020 SPACE. Or a start tag token. Or an end tag token. */
+ } elseif (($token['type'] === HTML5::CHARACTR &&
+ preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
+ $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG
+ ) {
/* Parse error. Switch back to the main phase and reprocess the
token. */
$this->phase = self::MAIN_PHASE;
return $this->mainPhase($token);
- /* An end-of-file token */
- } elseif($token['type'] === HTML5::EOF) {
+ /* An end-of-file token */
+ } elseif ($token['type'] === HTML5::EOF) {
/* OMG DONE!! */
}
}
- private function insertElement($token, $append = true, $check = false) {
+ private function insertElement($token, $append = true, $check = false)
+ {
// Proprietary workaround for libxml2's limitations with tag names
if ($check) {
// Slightly modified HTML5 tag-name modification,
@@ -3544,13 +4401,15 @@ class HTML5TreeConstructer {
// Remove leading hyphens and numbers
$token['name'] = ltrim($token['name'], '-0..9');
// In theory, this should ever be needed, but just in case
- if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
+ if ($token['name'] === '') {
+ $token['name'] = 'span';
+ } // arbitrary generic choice
}
-
+
$el = $this->dom->createElement($token['name']);
- foreach($token['attr'] as $attr) {
- if(!$el->hasAttribute($attr['name'])) {
+ foreach ($token['attr'] as $attr) {
+ if (!$el->hasAttribute($attr['name'])) {
$el->setAttribute($attr['name'], $attr['value']);
}
}
@@ -3561,48 +4420,54 @@ class HTML5TreeConstructer {
return $el;
}
- private function insertText($data) {
+ private function insertText($data)
+ {
$text = $this->dom->createTextNode($data);
$this->appendToRealParent($text);
}
- private function insertComment($data) {
+ private function insertComment($data)
+ {
$comment = $this->dom->createComment($data);
$this->appendToRealParent($comment);
}
- private function appendToRealParent($node) {
- if($this->foster_parent === null) {
+ private function appendToRealParent($node)
+ {
+ if ($this->foster_parent === null) {
end($this->stack)->appendChild($node);
- } elseif($this->foster_parent !== null) {
+ } elseif ($this->foster_parent !== null) {
/* If the foster parent element is the parent element of the
last table element in the stack of open elements, then the new
node must be inserted immediately before the last table element
in the stack of open elements in the foster parent element;
otherwise, the new node must be appended to the foster parent
element. */
- for($n = count($this->stack) - 1; $n >= 0; $n--) {
- if($this->stack[$n]->nodeName === 'table' &&
- $this->stack[$n]->parentNode !== null) {
+ for ($n = count($this->stack) - 1; $n >= 0; $n--) {
+ if ($this->stack[$n]->nodeName === 'table' &&
+ $this->stack[$n]->parentNode !== null
+ ) {
$table = $this->stack[$n];
break;
}
}
- if(isset($table) && $this->foster_parent->isSameNode($table->parentNode))
+ if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
$this->foster_parent->insertBefore($node, $table);
- else
+ } else {
$this->foster_parent->appendChild($node);
+ }
$this->foster_parent = null;
}
}
- private function elementInScope($el, $table = false) {
- if(is_array($el)) {
- foreach($el as $element) {
- if($this->elementInScope($element, $table)) {
+ private function elementInScope($el, $table = false)
+ {
+ if (is_array($el)) {
+ foreach ($el as $element) {
+ if ($this->elementInScope($element, $table)) {
return true;
}
}
@@ -3612,28 +4477,38 @@ class HTML5TreeConstructer {
$leng = count($this->stack);
- for($n = 0; $n < $leng; $n++) {
+ for ($n = 0; $n < $leng; $n++) {
/* 1. Initialise node to be the current node (the bottommost node of
the stack). */
$node = $this->stack[$leng - 1 - $n];
- if($node->tagName === $el) {
+ if ($node->tagName === $el) {
/* 2. If node is the target node, terminate in a match state. */
return true;
- } elseif($node->tagName === 'table') {
+ } elseif ($node->tagName === 'table') {
/* 3. Otherwise, if node is a table element, terminate in a failure
state. */
return false;
- } elseif($table === true && in_array($node->tagName, array('caption', 'td',
- 'th', 'button', 'marquee', 'object'))) {
+ } elseif ($table === true && in_array(
+ $node->tagName,
+ array(
+ 'caption',
+ 'td',
+ 'th',
+ 'button',
+ 'marquee',
+ 'object'
+ )
+ )
+ ) {
/* 4. Otherwise, if the algorithm is the "has an element in scope"
variant (rather than the "has an element in table scope" variant),
and node is one of the following, terminate in a failure state. */
return false;
- } elseif($node === $node->ownerDocument->documentElement) {
+ } elseif ($node === $node->ownerDocument->documentElement) {
/* 5. Otherwise, if node is an html element (root element), terminate
in a failure state. (This can only happen if the node is the topmost
node of the stack of open elements, and prevents the next step from
@@ -3648,12 +4523,13 @@ class HTML5TreeConstructer {
}
}
- private function reconstructActiveFormattingElements() {
+ private function reconstructActiveFormattingElements()
+ {
/* 1. If there are no entries in the list of active formatting elements,
then there is nothing to reconstruct; stop this algorithm. */
$formatting_elements = count($this->a_formatting);
- if($formatting_elements === 0) {
+ if ($formatting_elements === 0) {
return false;
}
@@ -3665,14 +4541,14 @@ class HTML5TreeConstructer {
formatting elements is a marker, or if it is an element that is in the
stack of open elements, then there is nothing to reconstruct; stop this
algorithm. */
- if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+ if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
return false;
}
- for($a = $formatting_elements - 1; $a >= 0; true) {
+ for ($a = $formatting_elements - 1; $a >= 0; true) {
/* 4. If there are no entries before entry in the list of active
formatting elements, then jump to step 8. */
- if($a === 0) {
+ if ($a === 0) {
$step_seven = false;
break;
}
@@ -3684,15 +4560,15 @@ class HTML5TreeConstructer {
/* 6. If entry is neither a marker nor an element that is also in
thetack of open elements, go to step 4. */
- if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
+ if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
break;
}
}
- while(true) {
+ while (true) {
/* 7. Let entry be the element one later than entry in the list of
active formatting elements. */
- if(isset($step_seven) && $step_seven === true) {
+ if (isset($step_seven) && $step_seven === true) {
$a++;
$entry = $this->a_formatting[$a];
}
@@ -3711,7 +4587,7 @@ class HTML5TreeConstructer {
/* 11. If the entry for clone in the list of active formatting
elements is not the last entry in the list, return to step 7. */
- if(end($this->a_formatting) !== $clone) {
+ if (end($this->a_formatting) !== $clone) {
$step_seven = true;
} else {
break;
@@ -3719,12 +4595,13 @@ class HTML5TreeConstructer {
}
}
- private function clearTheActiveFormattingElementsUpToTheLastMarker() {
+ private function clearTheActiveFormattingElementsUpToTheLastMarker()
+ {
/* When the steps below require the UA to clear the list of active
formatting elements up to the last marker, the UA must perform the
following steps: */
- while(true) {
+ while (true) {
/* 1. Let entry be the last (most recently added) entry in the list
of active formatting elements. */
$entry = end($this->a_formatting);
@@ -3734,13 +4611,14 @@ class HTML5TreeConstructer {
/* 3. If entry was a marker, then stop the algorithm at this point.
The list has been cleared up to the last marker. */
- if($entry === self::MARKER) {
+ if ($entry === self::MARKER) {
break;
}
}
}
- private function generateImpliedEndTags($exclude = array()) {
+ private function generateImpliedEndTags($exclude = array())
+ {
/* When the steps below require the UA to generate implied end tags,
then, if the current node is a dd element, a dt element, an li element,
a p element, a td element, a th element, or a tr element, the UA must
@@ -3749,36 +4627,36 @@ class HTML5TreeConstructer {
$node = end($this->stack);
$elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
- while(in_array(end($this->stack)->nodeName, $elements)) {
+ while (in_array(end($this->stack)->nodeName, $elements)) {
array_pop($this->stack);
}
}
- private function getElementCategory($node) {
+ private function getElementCategory($node)
+ {
$name = $node->tagName;
- if(in_array($name, $this->special))
+ if (in_array($name, $this->special)) {
return self::SPECIAL;
-
- elseif(in_array($name, $this->scoping))
+ } elseif (in_array($name, $this->scoping)) {
return self::SCOPING;
-
- elseif(in_array($name, $this->formatting))
+ } elseif (in_array($name, $this->formatting)) {
return self::FORMATTING;
-
- else
+ } else {
return self::PHRASING;
+ }
}
- private function clearStackToTableContext($elements) {
+ private function clearStackToTableContext($elements)
+ {
/* When the steps above require the UA to clear the stack back to a
table context, it means that the UA must, while the current node is not
a table element or an html element, pop elements from the stack of open
elements. If this causes any elements to be popped from the stack, then
this is a parse error. */
- while(true) {
+ while (true) {
$node = end($this->stack)->nodeName;
- if(in_array($node, $elements)) {
+ if (in_array($node, $elements)) {
break;
} else {
array_pop($this->stack);
@@ -3786,12 +4664,13 @@ class HTML5TreeConstructer {
}
}
- private function resetInsertionMode() {
+ private function resetInsertionMode()
+ {
/* 1. Let last be false. */
$last = false;
$leng = count($this->stack);
- for($n = $leng - 1; $n >= 0; $n--) {
+ for ($n = $leng - 1; $n >= 0; $n--) {
/* 2. Let node be the last node in the stack of open elements. */
$node = $this->stack[$n];
@@ -3799,108 +4678,111 @@ class HTML5TreeConstructer {
set last to true. If the element whose innerHTML attribute is being
set is neither a td element nor a th element, then set node to the
element whose innerHTML attribute is being set. (innerHTML case) */
- if($this->stack[0]->isSameNode($node)) {
+ if ($this->stack[0]->isSameNode($node)) {
$last = true;
}
/* 4. If node is a select element, then switch the insertion mode to
"in select" and abort these steps. (innerHTML case) */
- if($node->nodeName === 'select') {
+ if ($node->nodeName === 'select') {
$this->mode = self::IN_SELECT;
break;
- /* 5. If node is a td or th element, then switch the insertion mode
- to "in cell" and abort these steps. */
- } elseif($node->nodeName === 'td' || $node->nodeName === 'th') {
+ /* 5. If node is a td or th element, then switch the insertion mode
+ to "in cell" and abort these steps. */
+ } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') {
$this->mode = self::IN_CELL;
break;
- /* 6. If node is a tr element, then switch the insertion mode to
- "in row" and abort these steps. */
- } elseif($node->nodeName === 'tr') {
+ /* 6. If node is a tr element, then switch the insertion mode to
+ "in row" and abort these steps. */
+ } elseif ($node->nodeName === 'tr') {
$this->mode = self::IN_ROW;
break;
- /* 7. If node is a tbody, thead, or tfoot element, then switch the
- insertion mode to "in table body" and abort these steps. */
- } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
+ /* 7. If node is a tbody, thead, or tfoot element, then switch the
+ insertion mode to "in table body" and abort these steps. */
+ } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
$this->mode = self::IN_TBODY;
break;
- /* 8. If node is a caption element, then switch the insertion mode
- to "in caption" and abort these steps. */
- } elseif($node->nodeName === 'caption') {
+ /* 8. If node is a caption element, then switch the insertion mode
+ to "in caption" and abort these steps. */
+ } elseif ($node->nodeName === 'caption') {
$this->mode = self::IN_CAPTION;
break;
- /* 9. If node is a colgroup element, then switch the insertion mode
- to "in column group" and abort these steps. (innerHTML case) */
- } elseif($node->nodeName === 'colgroup') {
+ /* 9. If node is a colgroup element, then switch the insertion mode
+ to "in column group" and abort these steps. (innerHTML case) */
+ } elseif ($node->nodeName === 'colgroup') {
$this->mode = self::IN_CGROUP;
break;
- /* 10. If node is a table element, then switch the insertion mode
- to "in table" and abort these steps. */
- } elseif($node->nodeName === 'table') {
+ /* 10. If node is a table element, then switch the insertion mode
+ to "in table" and abort these steps. */
+ } elseif ($node->nodeName === 'table') {
$this->mode = self::IN_TABLE;
break;
- /* 11. If node is a head element, then switch the insertion mode
- to "in body" ("in body"! not "in head"!) and abort these steps.
- (innerHTML case) */
- } elseif($node->nodeName === 'head') {
+ /* 11. If node is a head element, then switch the insertion mode
+ to "in body" ("in body"! not "in head"!) and abort these steps.
+ (innerHTML case) */
+ } elseif ($node->nodeName === 'head') {
$this->mode = self::IN_BODY;
break;
- /* 12. If node is a body element, then switch the insertion mode to
- "in body" and abort these steps. */
- } elseif($node->nodeName === 'body') {
+ /* 12. If node is a body element, then switch the insertion mode to
+ "in body" and abort these steps. */
+ } elseif ($node->nodeName === 'body') {
$this->mode = self::IN_BODY;
break;
- /* 13. If node is a frameset element, then switch the insertion
- mode to "in frameset" and abort these steps. (innerHTML case) */
- } elseif($node->nodeName === 'frameset') {
+ /* 13. If node is a frameset element, then switch the insertion
+ mode to "in frameset" and abort these steps. (innerHTML case) */
+ } elseif ($node->nodeName === 'frameset') {
$this->mode = self::IN_FRAME;
break;
- /* 14. If node is an html element, then: if the head element
- pointer is null, switch the insertion mode to "before head",
- otherwise, switch the insertion mode to "after head". In either
- case, abort these steps. (innerHTML case) */
- } elseif($node->nodeName === 'html') {
+ /* 14. If node is an html element, then: if the head element
+ pointer is null, switch the insertion mode to "before head",
+ otherwise, switch the insertion mode to "after head". In either
+ case, abort these steps. (innerHTML case) */
+ } elseif ($node->nodeName === 'html') {
$this->mode = ($this->head_pointer === null)
? self::BEFOR_HEAD
: self::AFTER_HEAD;
break;
- /* 15. If last is true, then set the insertion mode to "in body"
- and abort these steps. (innerHTML case) */
- } elseif($last) {
+ /* 15. If last is true, then set the insertion mode to "in body"
+ and abort these steps. (innerHTML case) */
+ } elseif ($last) {
$this->mode = self::IN_BODY;
break;
}
}
}
- private function closeCell() {
+ private function closeCell()
+ {
/* If the stack of open elements has a td or th element in table scope,
then act as if an end tag token with that tag name had been seen. */
- foreach(array('td', 'th') as $cell) {
- if($this->elementInScope($cell, true)) {
- $this->inCell(array(
- 'name' => $cell,
- 'type' => HTML5::ENDTAG
- ));
+ foreach (array('td', 'th') as $cell) {
+ if ($this->elementInScope($cell, true)) {
+ $this->inCell(
+ array(
+ 'name' => $cell,
+ 'type' => HTML5::ENDTAG
+ )
+ );
break;
}
}
}
- public function save() {
+ public function save()
+ {
return $this->dom;
}
}
-?>
diff --git a/library/HTMLPurifier/Node.php b/library/HTMLPurifier/Node.php
new file mode 100644
index 000000000..3995fec9f
--- /dev/null
+++ b/library/HTMLPurifier/Node.php
@@ -0,0 +1,49 @@
+data = $data;
+ $this->line = $line;
+ $this->col = $col;
+ }
+
+ public function toTokenPair() {
+ return array(new HTMLPurifier_Token_Comment($this->data, $this->line, $this->col), null);
+ }
+}
diff --git a/library/HTMLPurifier/Node/Element.php b/library/HTMLPurifier/Node/Element.php
new file mode 100644
index 000000000..6cbf56dad
--- /dev/null
+++ b/library/HTMLPurifier/Node/Element.php
@@ -0,0 +1,59 @@
+ form or the form, i.e.
+ * is it a pair of start/end tokens or an empty token.
+ * @bool
+ */
+ public $empty = false;
+
+ public $endCol = null, $endLine = null, $endArmor = array();
+
+ public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) {
+ $this->name = $name;
+ $this->attr = $attr;
+ $this->line = $line;
+ $this->col = $col;
+ $this->armor = $armor;
+ }
+
+ public function toTokenPair() {
+ // XXX inefficiency here, normalization is not necessary
+ if ($this->empty) {
+ return array(new HTMLPurifier_Token_Empty($this->name, $this->attr, $this->line, $this->col, $this->armor), null);
+ } else {
+ $start = new HTMLPurifier_Token_Start($this->name, $this->attr, $this->line, $this->col, $this->armor);
+ $end = new HTMLPurifier_Token_End($this->name, array(), $this->endLine, $this->endCol, $this->endArmor);
+ //$end->start = $start;
+ return array($start, $end);
+ }
+ }
+}
+
diff --git a/library/HTMLPurifier/Node/Text.php b/library/HTMLPurifier/Node/Text.php
new file mode 100644
index 000000000..aec916647
--- /dev/null
+++ b/library/HTMLPurifier/Node/Text.php
@@ -0,0 +1,54 @@
+data = $data;
+ $this->is_whitespace = $is_whitespace;
+ $this->line = $line;
+ $this->col = $col;
+ }
+
+ public function toTokenPair() {
+ return array(new HTMLPurifier_Token_Text($this->data, $this->line, $this->col), null);
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/PercentEncoder.php b/library/HTMLPurifier/PercentEncoder.php
index a43c44f4c..18c8bbb00 100644
--- a/library/HTMLPurifier/PercentEncoder.php
+++ b/library/HTMLPurifier/PercentEncoder.php
@@ -13,17 +13,26 @@ class HTMLPurifier_PercentEncoder
/**
* Reserved characters to preserve when using encode().
+ * @type array
*/
protected $preserve = array();
/**
* String of characters that should be preserved while using encode().
+ * @param bool $preserve
*/
- public function __construct($preserve = false) {
+ public function __construct($preserve = false)
+ {
// unreserved letters, ought to const-ify
- for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
- for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
- for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
+ for ($i = 48; $i <= 57; $i++) { // digits
+ $this->preserve[$i] = true;
+ }
+ for ($i = 65; $i <= 90; $i++) { // upper-case
+ $this->preserve[$i] = true;
+ }
+ for ($i = 97; $i <= 122; $i++) { // lower-case
+ $this->preserve[$i] = true;
+ }
$this->preserve[45] = true; // Dash -
$this->preserve[46] = true; // Period .
$this->preserve[95] = true; // Underscore _
@@ -44,13 +53,14 @@ class HTMLPurifier_PercentEncoder
* Assumes that the string has already been normalized, making any
* and all percent escape sequences valid. Percents will not be
* re-escaped, regardless of their status in $preserve
- * @param $string String to be encoded
- * @return Encoded string.
+ * @param string $string String to be encoded
+ * @return string Encoded string.
*/
- public function encode($string) {
+ public function encode($string)
+ {
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
- if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
+ if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
$ret .= '%' . sprintf('%02X', $int);
} else {
$ret .= $string[$i];
@@ -64,10 +74,14 @@ class HTMLPurifier_PercentEncoder
* @warning This function is affected by $preserve, even though the
* usual desired behavior is for this not to preserve those
* characters. Be careful when reusing instances of PercentEncoder!
- * @param $string String to normalize
+ * @param string $string String to normalize
+ * @return string
*/
- public function normalize($string) {
- if ($string == '') return '';
+ public function normalize($string)
+ {
+ if ($string == '') {
+ return '';
+ }
$parts = explode('%', $string);
$ret = array_shift($parts);
foreach ($parts as $part) {
@@ -92,7 +106,6 @@ class HTMLPurifier_PercentEncoder
}
return $ret;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Printer.php b/library/HTMLPurifier/Printer.php
index e7eb82e83..549e4cea1 100644
--- a/library/HTMLPurifier/Printer.php
+++ b/library/HTMLPurifier/Printer.php
@@ -7,25 +7,30 @@ class HTMLPurifier_Printer
{
/**
- * Instance of HTMLPurifier_Generator for HTML generation convenience funcs
+ * For HTML generation convenience funcs.
+ * @type HTMLPurifier_Generator
*/
protected $generator;
/**
- * Instance of HTMLPurifier_Config, for easy access
+ * For easy access.
+ * @type HTMLPurifier_Config
*/
protected $config;
/**
* Initialize $generator.
*/
- public function __construct() {
+ public function __construct()
+ {
}
/**
* Give generator necessary configuration if possible
+ * @param HTMLPurifier_Config $config
*/
- public function prepareGenerator($config) {
+ public function prepareGenerator($config)
+ {
$all = $config->getAll();
$context = new HTMLPurifier_Context();
$this->generator = new HTMLPurifier_Generator($config, $context);
@@ -39,45 +44,62 @@ class HTMLPurifier_Printer
/**
* Returns a start tag
- * @param $tag Tag name
- * @param $attr Attribute array
+ * @param string $tag Tag name
+ * @param array $attr Attribute array
+ * @return string
*/
- protected function start($tag, $attr = array()) {
+ protected function start($tag, $attr = array())
+ {
return $this->generator->generateFromToken(
- new HTMLPurifier_Token_Start($tag, $attr ? $attr : array())
- );
+ new HTMLPurifier_Token_Start($tag, $attr ? $attr : array())
+ );
}
/**
- * Returns an end teg
- * @param $tag Tag name
+ * Returns an end tag
+ * @param string $tag Tag name
+ * @return string
*/
- protected function end($tag) {
+ protected function end($tag)
+ {
return $this->generator->generateFromToken(
- new HTMLPurifier_Token_End($tag)
- );
+ new HTMLPurifier_Token_End($tag)
+ );
}
/**
* Prints a complete element with content inside
- * @param $tag Tag name
- * @param $contents Element contents
- * @param $attr Tag attributes
- * @param $escape Bool whether or not to escape contents
+ * @param string $tag Tag name
+ * @param string $contents Element contents
+ * @param array $attr Tag attributes
+ * @param bool $escape whether or not to escape contents
+ * @return string
*/
- protected function element($tag, $contents, $attr = array(), $escape = true) {
+ protected function element($tag, $contents, $attr = array(), $escape = true)
+ {
return $this->start($tag, $attr) .
- ($escape ? $this->escape($contents) : $contents) .
- $this->end($tag);
+ ($escape ? $this->escape($contents) : $contents) .
+ $this->end($tag);
}
- protected function elementEmpty($tag, $attr = array()) {
+ /**
+ * @param string $tag
+ * @param array $attr
+ * @return string
+ */
+ protected function elementEmpty($tag, $attr = array())
+ {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_Empty($tag, $attr)
);
}
- protected function text($text) {
+ /**
+ * @param string $text
+ * @return string
+ */
+ protected function text($text)
+ {
return $this->generator->generateFromToken(
new HTMLPurifier_Token_Text($text)
);
@@ -85,24 +107,29 @@ class HTMLPurifier_Printer
/**
* Prints a simple key/value row in a table.
- * @param $name Key
- * @param $value Value
+ * @param string $name Key
+ * @param mixed $value Value
+ * @return string
*/
- protected function row($name, $value) {
- if (is_bool($value)) $value = $value ? 'On' : 'Off';
+ protected function row($name, $value)
+ {
+ if (is_bool($value)) {
+ $value = $value ? 'On' : 'Off';
+ }
return
$this->start('tr') . "\n" .
- $this->element('th', $name) . "\n" .
- $this->element('td', $value) . "\n" .
- $this->end('tr')
- ;
+ $this->element('th', $name) . "\n" .
+ $this->element('td', $value) . "\n" .
+ $this->end('tr');
}
/**
* Escapes a string for HTML output.
- * @param $string String to escape
+ * @param string $string String to escape
+ * @return string
*/
- protected function escape($string) {
+ protected function escape($string)
+ {
$string = HTMLPurifier_Encoder::cleanUTF8($string);
$string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
return $string;
@@ -110,32 +137,46 @@ class HTMLPurifier_Printer
/**
* Takes a list of strings and turns them into a single list
- * @param $array List of strings
- * @param $polite Bool whether or not to add an end before the last
+ * @param string[] $array List of strings
+ * @param bool $polite Bool whether or not to add an end before the last
+ * @return string
*/
- protected function listify($array, $polite = false) {
- if (empty($array)) return 'None';
+ protected function listify($array, $polite = false)
+ {
+ if (empty($array)) {
+ return 'None';
+ }
$ret = '';
$i = count($array);
foreach ($array as $value) {
$i--;
$ret .= $value;
- if ($i > 0 && !($polite && $i == 1)) $ret .= ', ';
- if ($polite && $i == 1) $ret .= 'and ';
+ if ($i > 0 && !($polite && $i == 1)) {
+ $ret .= ', ';
+ }
+ if ($polite && $i == 1) {
+ $ret .= 'and ';
+ }
}
return $ret;
}
/**
* Retrieves the class of an object without prefixes, as well as metadata
- * @param $obj Object to determine class of
- * @param $prefix Further prefix to remove
+ * @param object $obj Object to determine class of
+ * @param string $sec_prefix Further prefix to remove
+ * @return string
*/
- protected function getClass($obj, $sec_prefix = '') {
+ protected function getClass($obj, $sec_prefix = '')
+ {
static $five = null;
- if ($five === null) $five = version_compare(PHP_VERSION, '5', '>=');
+ if ($five === null) {
+ $five = version_compare(PHP_VERSION, '5', '>=');
+ }
$prefix = 'HTMLPurifier_' . $sec_prefix;
- if (!$five) $prefix = strtolower($prefix);
+ if (!$five) {
+ $prefix = strtolower($prefix);
+ }
$class = str_replace($prefix, '', get_class($obj));
$lclass = strtolower($class);
$class .= '(';
@@ -164,13 +205,14 @@ class HTMLPurifier_Printer
break;
case 'css_importantdecorator':
$class .= $this->getClass($obj->def, $sec_prefix);
- if ($obj->allow) $class .= ', !important';
+ if ($obj->allow) {
+ $class .= ', !important';
+ }
break;
}
$class .= ')';
return $class;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Printer/CSSDefinition.php b/library/HTMLPurifier/Printer/CSSDefinition.php
index 81f986590..29505fe12 100644
--- a/library/HTMLPurifier/Printer/CSSDefinition.php
+++ b/library/HTMLPurifier/Printer/CSSDefinition.php
@@ -2,10 +2,17 @@
class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer
{
-
+ /**
+ * @type HTMLPurifier_CSSDefinition
+ */
protected $def;
- public function render($config) {
+ /**
+ * @param HTMLPurifier_Config $config
+ * @return string
+ */
+ public function render($config)
+ {
$this->def = $config->getCSSDefinition();
$ret = '';
@@ -32,7 +39,6 @@ class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer
return $ret;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Printer/ConfigForm.php b/library/HTMLPurifier/Printer/ConfigForm.php
index 02aa65689..36100ce73 100644
--- a/library/HTMLPurifier/Printer/ConfigForm.php
+++ b/library/HTMLPurifier/Printer/ConfigForm.php
@@ -7,17 +7,20 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
{
/**
- * Printers for specific fields
+ * Printers for specific fields.
+ * @type HTMLPurifier_Printer[]
*/
protected $fields = array();
/**
- * Documentation URL, can have fragment tagged on end
+ * Documentation URL, can have fragment tagged on end.
+ * @type string
*/
protected $docURL;
/**
- * Name of form element to stuff config in
+ * Name of form element to stuff config in.
+ * @type string
*/
protected $name;
@@ -25,24 +28,27 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
* Whether or not to compress directive names, clipping them off
* after a certain amount of letters. False to disable or integer letters
* before clipping.
+ * @type bool
*/
protected $compress = false;
/**
- * @param $name Form element name for directives to be stuffed into
- * @param $doc_url String documentation URL, will have fragment tagged on
- * @param $compress Integer max length before compressing a directive name, set to false to turn off
+ * @param string $name Form element name for directives to be stuffed into
+ * @param string $doc_url String documentation URL, will have fragment tagged on
+ * @param bool $compress Integer max length before compressing a directive name, set to false to turn off
*/
public function __construct(
- $name, $doc_url = null, $compress = false
+ $name,
+ $doc_url = null,
+ $compress = false
) {
parent::__construct();
$this->docURL = $doc_url;
- $this->name = $name;
+ $this->name = $name;
$this->compress = $compress;
// initialize sub-printers
- $this->fields[0] = new HTMLPurifier_Printer_ConfigForm_default();
- $this->fields[HTMLPurifier_VarParser::BOOL] = new HTMLPurifier_Printer_ConfigForm_bool();
+ $this->fields[0] = new HTMLPurifier_Printer_ConfigForm_default();
+ $this->fields[HTMLPurifier_VarParser::BOOL] = new HTMLPurifier_Printer_ConfigForm_bool();
}
/**
@@ -50,32 +56,42 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
* @param $cols Integer columns of textarea, null to use default
* @param $rows Integer rows of textarea, null to use default
*/
- public function setTextareaDimensions($cols = null, $rows = null) {
- if ($cols) $this->fields['default']->cols = $cols;
- if ($rows) $this->fields['default']->rows = $rows;
+ public function setTextareaDimensions($cols = null, $rows = null)
+ {
+ if ($cols) {
+ $this->fields['default']->cols = $cols;
+ }
+ if ($rows) {
+ $this->fields['default']->rows = $rows;
+ }
}
/**
* Retrieves styling, in case it is not accessible by webserver
*/
- public static function getCSS() {
+ public static function getCSS()
+ {
return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css');
}
/**
* Retrieves JavaScript, in case it is not accessible by webserver
*/
- public static function getJavaScript() {
+ public static function getJavaScript()
+ {
return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js');
}
/**
* Returns HTML output for a configuration form
- * @param $config Configuration object of current form state, or an array
+ * @param HTMLPurifier_Config|array $config Configuration object of current form state, or an array
* where [0] has an HTML namespace and [1] is being rendered.
- * @param $allowed Optional namespace(s) and directives to restrict form to.
+ * @param array|bool $allowed Optional namespace(s) and directives to restrict form to.
+ * @param bool $render_controls
+ * @return string
*/
- public function render($config, $allowed = true, $render_controls = true) {
+ public function render($config, $allowed = true, $render_controls = true)
+ {
if (is_array($config) && isset($config[0])) {
$gen_config = $config[0];
$config = $config[1];
@@ -91,29 +107,29 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
$all = array();
foreach ($allowed as $key) {
list($ns, $directive) = $key;
- $all[$ns][$directive] = $config->get($ns .'.'. $directive);
+ $all[$ns][$directive] = $config->get($ns . '.' . $directive);
}
$ret = '';
$ret .= $this->start('table', array('class' => 'hp-config'));
$ret .= $this->start('thead');
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Directive', array('class' => 'hp-directive'));
- $ret .= $this->element('th', 'Value', array('class' => 'hp-value'));
+ $ret .= $this->element('th', 'Directive', array('class' => 'hp-directive'));
+ $ret .= $this->element('th', 'Value', array('class' => 'hp-value'));
$ret .= $this->end('tr');
$ret .= $this->end('thead');
foreach ($all as $ns => $directives) {
$ret .= $this->renderNamespace($ns, $directives);
}
if ($render_controls) {
- $ret .= $this->start('tbody');
- $ret .= $this->start('tr');
- $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls'));
- $ret .= $this->elementEmpty('input', array('type' => 'submit', 'value' => 'Submit'));
- $ret .= '[
Reset]';
- $ret .= $this->end('td');
- $ret .= $this->end('tr');
- $ret .= $this->end('tbody');
+ $ret .= $this->start('tbody');
+ $ret .= $this->start('tr');
+ $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls'));
+ $ret .= $this->elementEmpty('input', array('type' => 'submit', 'value' => 'Submit'));
+ $ret .= '[
Reset]';
+ $ret .= $this->end('td');
+ $ret .= $this->end('tr');
+ $ret .= $this->end('tbody');
}
$ret .= $this->end('table');
return $ret;
@@ -122,13 +138,15 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
/**
* Renders a single namespace
* @param $ns String namespace name
- * @param $directive Associative array of directives to values
+ * @param array $directives array of directives to values
+ * @return string
*/
- protected function renderNamespace($ns, $directives) {
+ protected function renderNamespace($ns, $directives)
+ {
$ret = '';
$ret .= $this->start('tbody', array('class' => 'namespace'));
$ret .= $this->start('tr');
- $ret .= $this->element('th', $ns, array('colspan' => 2));
+ $ret .= $this->element('th', $ns, array('colspan' => 2));
$ret .= $this->end('tr');
$ret .= $this->end('tbody');
$ret .= $this->start('tbody');
@@ -139,40 +157,44 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
$url = str_replace('%s', urlencode("$ns.$directive"), $this->docURL);
$ret .= $this->start('a', array('href' => $url));
}
- $attr = array('for' => "{$this->name}:$ns.$directive");
+ $attr = array('for' => "{$this->name}:$ns.$directive");
- // crop directive name if it's too long
- if (!$this->compress || (strlen($directive) < $this->compress)) {
- $directive_disp = $directive;
- } else {
- $directive_disp = substr($directive, 0, $this->compress - 2) . '...';
- $attr['title'] = $directive;
- }
+ // crop directive name if it's too long
+ if (!$this->compress || (strlen($directive) < $this->compress)) {
+ $directive_disp = $directive;
+ } else {
+ $directive_disp = substr($directive, 0, $this->compress - 2) . '...';
+ $attr['title'] = $directive;
+ }
- $ret .= $this->element(
- 'label',
- $directive_disp,
- // component printers must create an element with this id
- $attr
- );
- if ($this->docURL) $ret .= $this->end('a');
+ $ret .= $this->element(
+ 'label',
+ $directive_disp,
+ // component printers must create an element with this id
+ $attr
+ );
+ if ($this->docURL) {
+ $ret .= $this->end('a');
+ }
$ret .= $this->end('th');
$ret .= $this->start('td');
- $def = $this->config->def->info["$ns.$directive"];
- if (is_int($def)) {
- $allow_null = $def < 0;
- $type = abs($def);
- } else {
- $type = $def->type;
- $allow_null = isset($def->allow_null);
- }
- if (!isset($this->fields[$type])) $type = 0; // default
- $type_obj = $this->fields[$type];
- if ($allow_null) {
- $type_obj = new HTMLPurifier_Printer_ConfigForm_NullDecorator($type_obj);
- }
- $ret .= $type_obj->render($ns, $directive, $value, $this->name, array($this->genConfig, $this->config));
+ $def = $this->config->def->info["$ns.$directive"];
+ if (is_int($def)) {
+ $allow_null = $def < 0;
+ $type = abs($def);
+ } else {
+ $type = $def->type;
+ $allow_null = isset($def->allow_null);
+ }
+ if (!isset($this->fields[$type])) {
+ $type = 0;
+ } // default
+ $type_obj = $this->fields[$type];
+ if ($allow_null) {
+ $type_obj = new HTMLPurifier_Printer_ConfigForm_NullDecorator($type_obj);
+ }
+ $ret .= $type_obj->render($ns, $directive, $value, $this->name, array($this->genConfig, $this->config));
$ret .= $this->end('td');
$ret .= $this->end('tr');
}
@@ -185,19 +207,33 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
/**
* Printer decorator for directives that accept null
*/
-class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer {
+class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer
+{
/**
* Printer being decorated
+ * @type HTMLPurifier_Printer
*/
protected $obj;
+
/**
- * @param $obj Printer to decorate
+ * @param HTMLPurifier_Printer $obj Printer to decorate
*/
- public function __construct($obj) {
+ public function __construct($obj)
+ {
parent::__construct();
$this->obj = $obj;
}
- public function render($ns, $directive, $value, $name, $config) {
+
+ /**
+ * @param string $ns
+ * @param string $directive
+ * @param string $value
+ * @param string $name
+ * @param HTMLPurifier_Config|array $config
+ * @return string
+ */
+ public function render($ns, $directive, $value, $name, $config)
+ {
if (is_array($config) && isset($config[0])) {
$gen_config = $config[0];
$config = $config[1];
@@ -215,15 +251,19 @@ class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer
'type' => 'checkbox',
'value' => '1',
'class' => 'null-toggle',
- 'name' => "$name"."[Null_$ns.$directive]",
+ 'name' => "$name" . "[Null_$ns.$directive]",
'id' => "$name:Null_$ns.$directive",
'onclick' => "toggleWriteability('$name:$ns.$directive',checked)" // INLINE JAVASCRIPT!!!!
);
if ($this->obj instanceof HTMLPurifier_Printer_ConfigForm_bool) {
// modify inline javascript slightly
- $attr['onclick'] = "toggleWriteability('$name:Yes_$ns.$directive',checked);toggleWriteability('$name:No_$ns.$directive',checked)";
+ $attr['onclick'] =
+ "toggleWriteability('$name:Yes_$ns.$directive',checked);" .
+ "toggleWriteability('$name:No_$ns.$directive',checked)";
+ }
+ if ($value === null) {
+ $attr['checked'] = 'checked';
}
- if ($value === null) $attr['checked'] = 'checked';
$ret .= $this->elementEmpty('input', $attr);
$ret .= $this->text(' or ');
$ret .= $this->elementEmpty('br');
@@ -235,10 +275,28 @@ class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer
/**
* Swiss-army knife configuration form field printer
*/
-class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
+class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer
+{
+ /**
+ * @type int
+ */
public $cols = 18;
+
+ /**
+ * @type int
+ */
public $rows = 5;
- public function render($ns, $directive, $value, $name, $config) {
+
+ /**
+ * @param string $ns
+ * @param string $directive
+ * @param string $value
+ * @param string $name
+ * @param HTMLPurifier_Config|array $config
+ * @return string
+ */
+ public function render($ns, $directive, $value, $name, $config)
+ {
if (is_array($config) && isset($config[0])) {
$gen_config = $config[0];
$config = $config[1];
@@ -262,6 +320,7 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
foreach ($array as $val => $b) {
$value[] = $val;
}
+ //TODO does this need a break?
case HTMLPurifier_VarParser::ALIST:
$value = implode(PHP_EOL, $value);
break;
@@ -281,25 +340,27 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
$value = serialize($value);
}
$attr = array(
- 'name' => "$name"."[$ns.$directive]",
+ 'name' => "$name" . "[$ns.$directive]",
'id' => "$name:$ns.$directive"
);
- if ($value === null) $attr['disabled'] = 'disabled';
+ if ($value === null) {
+ $attr['disabled'] = 'disabled';
+ }
if (isset($def->allowed)) {
$ret .= $this->start('select', $attr);
foreach ($def->allowed as $val => $b) {
$attr = array();
- if ($value == $val) $attr['selected'] = 'selected';
+ if ($value == $val) {
+ $attr['selected'] = 'selected';
+ }
$ret .= $this->element('option', $val, $attr);
}
$ret .= $this->end('select');
- } elseif (
- $type === HTMLPurifier_VarParser::TEXT ||
- $type === HTMLPurifier_VarParser::ITEXT ||
- $type === HTMLPurifier_VarParser::ALIST ||
- $type === HTMLPurifier_VarParser::HASH ||
- $type === HTMLPurifier_VarParser::LOOKUP
- ) {
+ } elseif ($type === HTMLPurifier_VarParser::TEXT ||
+ $type === HTMLPurifier_VarParser::ITEXT ||
+ $type === HTMLPurifier_VarParser::ALIST ||
+ $type === HTMLPurifier_VarParser::HASH ||
+ $type === HTMLPurifier_VarParser::LOOKUP) {
$attr['cols'] = $this->cols;
$attr['rows'] = $this->rows;
$ret .= $this->start('textarea', $attr);
@@ -317,8 +378,18 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer {
/**
* Bool form field printer
*/
-class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer {
- public function render($ns, $directive, $value, $name, $config) {
+class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer
+{
+ /**
+ * @param string $ns
+ * @param string $directive
+ * @param string $value
+ * @param string $name
+ * @param HTMLPurifier_Config|array $config
+ * @return string
+ */
+ public function render($ns, $directive, $value, $name, $config)
+ {
if (is_array($config) && isset($config[0])) {
$gen_config = $config[0];
$config = $config[1];
@@ -336,12 +407,16 @@ class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer {
$attr = array(
'type' => 'radio',
- 'name' => "$name"."[$ns.$directive]",
+ 'name' => "$name" . "[$ns.$directive]",
'id' => "$name:Yes_$ns.$directive",
'value' => '1'
);
- if ($value === true) $attr['checked'] = 'checked';
- if ($value === null) $attr['disabled'] = 'disabled';
+ if ($value === true) {
+ $attr['checked'] = 'checked';
+ }
+ if ($value === null) {
+ $attr['disabled'] = 'disabled';
+ }
$ret .= $this->elementEmpty('input', $attr);
$ret .= $this->start('label', array('for' => "$name:No_$ns.$directive"));
@@ -351,12 +426,16 @@ class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer {
$attr = array(
'type' => 'radio',
- 'name' => "$name"."[$ns.$directive]",
+ 'name' => "$name" . "[$ns.$directive]",
'id' => "$name:No_$ns.$directive",
'value' => '0'
);
- if ($value === false) $attr['checked'] = 'checked';
- if ($value === null) $attr['disabled'] = 'disabled';
+ if ($value === false) {
+ $attr['checked'] = 'checked';
+ }
+ if ($value === null) {
+ $attr['disabled'] = 'disabled';
+ }
$ret .= $this->elementEmpty('input', $attr);
$ret .= $this->end('div');
diff --git a/library/HTMLPurifier/Printer/HTMLDefinition.php b/library/HTMLPurifier/Printer/HTMLDefinition.php
index 8a8f126b8..5f2f2f8a7 100644
--- a/library/HTMLPurifier/Printer/HTMLDefinition.php
+++ b/library/HTMLPurifier/Printer/HTMLDefinition.php
@@ -4,11 +4,16 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
{
/**
- * Instance of HTMLPurifier_HTMLDefinition, for easy access
+ * @type HTMLPurifier_HTMLDefinition, for easy access
*/
protected $def;
- public function render($config) {
+ /**
+ * @param HTMLPurifier_Config $config
+ * @return string
+ */
+ public function render($config)
+ {
$ret = '';
$this->config =& $config;
@@ -28,8 +33,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
/**
* Renders the Doctype table
+ * @return string
*/
- protected function renderDoctype() {
+ protected function renderDoctype()
+ {
$doctype = $this->def->doctype;
$ret = '';
$ret .= $this->start('table');
@@ -45,8 +52,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
/**
* Renders environment table, which is miscellaneous info
+ * @return string
*/
- protected function renderEnvironment() {
+ protected function renderEnvironment()
+ {
$def = $this->def;
$ret = '';
@@ -59,28 +68,28 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$ret .= $this->row('Block wrap name', $def->info_block_wrapper);
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Global attributes');
- $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
+ $ret .= $this->element('th', 'Global attributes');
+ $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr), null, 0);
$ret .= $this->end('tr');
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Tag transforms');
- $list = array();
- foreach ($def->info_tag_transform as $old => $new) {
- $new = $this->getClass($new, 'TagTransform_');
- $list[] = "<$old> with $new";
- }
- $ret .= $this->element('td', $this->listify($list));
+ $ret .= $this->element('th', 'Tag transforms');
+ $list = array();
+ foreach ($def->info_tag_transform as $old => $new) {
+ $new = $this->getClass($new, 'TagTransform_');
+ $list[] = "<$old> with $new";
+ }
+ $ret .= $this->element('td', $this->listify($list));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Pre-AttrTransform');
- $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre));
+ $ret .= $this->element('th', 'Pre-AttrTransform');
+ $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Post-AttrTransform');
- $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post));
+ $ret .= $this->element('th', 'Post-AttrTransform');
+ $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post));
$ret .= $this->end('tr');
$ret .= $this->end('table');
@@ -89,8 +98,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
/**
* Renders the Content Sets table
+ * @return string
*/
- protected function renderContentSets() {
+ protected function renderContentSets()
+ {
$ret = '';
$ret .= $this->start('table');
$ret .= $this->element('caption', 'Content Sets');
@@ -106,8 +117,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
/**
* Renders the Elements ($info) table
+ * @return string
*/
- protected function renderInfo() {
+ protected function renderInfo()
+ {
$ret = '';
$ret .= $this->start('table');
$ret .= $this->element('caption', 'Elements ($info)');
@@ -118,39 +131,39 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
$ret .= $this->end('tr');
foreach ($this->def->info as $name => $def) {
$ret .= $this->start('tr');
- $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
+ $ret .= $this->element('th', "<$name>", array('class' => 'heavy', 'colspan' => 2));
$ret .= $this->end('tr');
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Inline content');
- $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No');
+ $ret .= $this->element('th', 'Inline content');
+ $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No');
$ret .= $this->end('tr');
if (!empty($def->excludes)) {
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Excludes');
- $ret .= $this->element('td', $this->listifyTagLookup($def->excludes));
+ $ret .= $this->element('th', 'Excludes');
+ $ret .= $this->element('td', $this->listifyTagLookup($def->excludes));
$ret .= $this->end('tr');
}
if (!empty($def->attr_transform_pre)) {
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Pre-AttrTransform');
- $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre));
+ $ret .= $this->element('th', 'Pre-AttrTransform');
+ $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre));
$ret .= $this->end('tr');
}
if (!empty($def->attr_transform_post)) {
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Post-AttrTransform');
- $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post));
+ $ret .= $this->element('th', 'Post-AttrTransform');
+ $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post));
$ret .= $this->end('tr');
}
if (!empty($def->auto_close)) {
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Auto closed by');
- $ret .= $this->element('td', $this->listifyTagLookup($def->auto_close));
+ $ret .= $this->element('th', 'Auto closed by');
+ $ret .= $this->element('td', $this->listifyTagLookup($def->auto_close));
$ret .= $this->end('tr');
}
$ret .= $this->start('tr');
- $ret .= $this->element('th', 'Allowed attributes');
- $ret .= $this->element('td',$this->listifyAttr($def->attr), array(), 0);
+ $ret .= $this->element('th', 'Allowed attributes');
+ $ret .= $this->element('td', $this->listifyAttr($def->attr), array(), 0);
$ret .= $this->end('tr');
if (!empty($def->required_attr)) {
@@ -165,64 +178,94 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
/**
* Renders a row describing the allowed children of an element
- * @param $def HTMLPurifier_ChildDef of pertinent element
+ * @param HTMLPurifier_ChildDef $def HTMLPurifier_ChildDef of pertinent element
+ * @return string
*/
- protected function renderChildren($def) {
+ protected function renderChildren($def)
+ {
$context = new HTMLPurifier_Context();
$ret = '';
$ret .= $this->start('tr');
+ $elements = array();
+ $attr = array();
+ if (isset($def->elements)) {
+ if ($def->type == 'strictblockquote') {
+ $def->validateChildren(array(), $this->config, $context);
+ }
+ $elements = $def->elements;
+ }
+ if ($def->type == 'chameleon') {
+ $attr['rowspan'] = 2;
+ } elseif ($def->type == 'empty') {
$elements = array();
- $attr = array();
- if (isset($def->elements)) {
- if ($def->type == 'strictblockquote') {
- $def->validateChildren(array(), $this->config, $context);
- }
- $elements = $def->elements;
- }
- if ($def->type == 'chameleon') {
- $attr['rowspan'] = 2;
- } elseif ($def->type == 'empty') {
- $elements = array();
- } elseif ($def->type == 'table') {
- $elements = array_flip(array('col', 'caption', 'colgroup', 'thead',
- 'tfoot', 'tbody', 'tr'));
- }
- $ret .= $this->element('th', 'Allowed children', $attr);
+ } elseif ($def->type == 'table') {
+ $elements = array_flip(
+ array(
+ 'col',
+ 'caption',
+ 'colgroup',
+ 'thead',
+ 'tfoot',
+ 'tbody',
+ 'tr'
+ )
+ );
+ }
+ $ret .= $this->element('th', 'Allowed children', $attr);
- if ($def->type == 'chameleon') {
+ if ($def->type == 'chameleon') {
- $ret .= $this->element('td',
- '
Block: ' .
- $this->escape($this->listifyTagLookup($def->block->elements)),0,0);
- $ret .= $this->end('tr');
- $ret .= $this->start('tr');
- $ret .= $this->element('td',
- '
Inline: ' .
- $this->escape($this->listifyTagLookup($def->inline->elements)),0,0);
+ $ret .= $this->element(
+ 'td',
+ '
Block: ' .
+ $this->escape($this->listifyTagLookup($def->block->elements)),
+ null,
+ 0
+ );
+ $ret .= $this->end('tr');
+ $ret .= $this->start('tr');
+ $ret .= $this->element(
+ 'td',
+ '
Inline: ' .
+ $this->escape($this->listifyTagLookup($def->inline->elements)),
+ null,
+ 0
+ );
- } elseif ($def->type == 'custom') {
+ } elseif ($def->type == 'custom') {
- $ret .= $this->element('td', '
'.ucfirst($def->type).': ' .
- $def->dtd_regex);
+ $ret .= $this->element(
+ 'td',
+ '
' . ucfirst($def->type) . ': ' .
+ $def->dtd_regex
+ );
- } else {
- $ret .= $this->element('td',
- '
'.ucfirst($def->type).': ' .
- $this->escape($this->listifyTagLookup($elements)),0,0);
- }
+ } else {
+ $ret .= $this->element(
+ 'td',
+ '
' . ucfirst($def->type) . ': ' .
+ $this->escape($this->listifyTagLookup($elements)),
+ null,
+ 0
+ );
+ }
$ret .= $this->end('tr');
return $ret;
}
/**
* Listifies a tag lookup table.
- * @param $array Tag lookup array in form of array('tagname' => true)
+ * @param array $array Tag lookup array in form of array('tagname' => true)
+ * @return string
*/
- protected function listifyTagLookup($array) {
+ protected function listifyTagLookup($array)
+ {
ksort($array);
$list = array();
foreach ($array as $name => $discard) {
- if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
+ if ($name !== '#PCDATA' && !isset($this->def->info[$name])) {
+ continue;
+ }
$list[] = $name;
}
return $this->listify($list);
@@ -230,13 +273,15 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
/**
* Listifies a list of objects by retrieving class names and internal state
- * @param $array List of objects
+ * @param array $array List of objects
+ * @return string
* @todo Also add information about internal state
*/
- protected function listifyObjectList($array) {
+ protected function listifyObjectList($array)
+ {
ksort($array);
$list = array();
- foreach ($array as $discard => $obj) {
+ foreach ($array as $obj) {
$list[] = $this->getClass($obj, 'AttrTransform_');
}
return $this->listify($list);
@@ -244,13 +289,17 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
/**
* Listifies a hash of attributes to AttrDef classes
- * @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
+ * @param array $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
+ * @return string
*/
- protected function listifyAttr($array) {
+ protected function listifyAttr($array)
+ {
ksort($array);
$list = array();
foreach ($array as $name => $obj) {
- if ($obj === false) continue;
+ if ($obj === false) {
+ continue;
+ }
$list[] = "$name =
" . $this->getClass($obj, 'AttrDef_') . '';
}
return $this->listify($list);
@@ -258,15 +307,18 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
/**
* Creates a heavy header row
+ * @param string $text
+ * @param int $num
+ * @return string
*/
- protected function heavyHeader($text, $num = 1) {
+ protected function heavyHeader($text, $num = 1)
+ {
$ret = '';
$ret .= $this->start('tr');
$ret .= $this->element('th', $text, array('colspan' => $num, 'class' => 'heavy'));
$ret .= $this->end('tr');
return $ret;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/PropertyList.php b/library/HTMLPurifier/PropertyList.php
index 2b99fb7bc..189348fd9 100644
--- a/library/HTMLPurifier/PropertyList.php
+++ b/library/HTMLPurifier/PropertyList.php
@@ -6,61 +6,93 @@
class HTMLPurifier_PropertyList
{
/**
- * Internal data-structure for properties
+ * Internal data-structure for properties.
+ * @type array
*/
protected $data = array();
/**
- * Parent plist
+ * Parent plist.
+ * @type HTMLPurifier_PropertyList
*/
protected $parent;
+ /**
+ * Cache.
+ * @type array
+ */
protected $cache;
- public function __construct($parent = null) {
+ /**
+ * @param HTMLPurifier_PropertyList $parent Parent plist
+ */
+ public function __construct($parent = null)
+ {
$this->parent = $parent;
}
/**
* Recursively retrieves the value for a key
+ * @param string $name
+ * @throws HTMLPurifier_Exception
*/
- public function get($name) {
- if ($this->has($name)) return $this->data[$name];
+ public function get($name)
+ {
+ if ($this->has($name)) {
+ return $this->data[$name];
+ }
// possible performance bottleneck, convert to iterative if necessary
- if ($this->parent) return $this->parent->get($name);
+ if ($this->parent) {
+ return $this->parent->get($name);
+ }
throw new HTMLPurifier_Exception("Key '$name' not found");
}
/**
* Sets the value of a key, for this plist
+ * @param string $name
+ * @param mixed $value
*/
- public function set($name, $value) {
+ public function set($name, $value)
+ {
$this->data[$name] = $value;
}
/**
* Returns true if a given key exists
+ * @param string $name
+ * @return bool
*/
- public function has($name) {
+ public function has($name)
+ {
return array_key_exists($name, $this->data);
}
/**
* Resets a value to the value of it's parent, usually the default. If
* no value is specified, the entire plist is reset.
+ * @param string $name
*/
- public function reset($name = null) {
- if ($name == null) $this->data = array();
- else unset($this->data[$name]);
+ public function reset($name = null)
+ {
+ if ($name == null) {
+ $this->data = array();
+ } else {
+ unset($this->data[$name]);
+ }
}
/**
* Squashes this property list and all of its property lists into a single
* array, and returns the array. This value is cached by default.
- * @param $force If true, ignores the cache and regenerates the array.
+ * @param bool $force If true, ignores the cache and regenerates the array.
+ * @return array
*/
- public function squash($force = false) {
- if ($this->cache !== null && !$force) return $this->cache;
+ public function squash($force = false)
+ {
+ if ($this->cache !== null && !$force) {
+ return $this->cache;
+ }
if ($this->parent) {
return $this->cache = array_merge($this->parent->squash($force), $this->data);
} else {
@@ -70,15 +102,19 @@ class HTMLPurifier_PropertyList
/**
* Returns the parent plist.
+ * @return HTMLPurifier_PropertyList
*/
- public function getParent() {
+ public function getParent()
+ {
return $this->parent;
}
/**
* Sets the parent plist.
+ * @param HTMLPurifier_PropertyList $plist Parent plist
*/
- public function setParent($plist) {
+ public function setParent($plist)
+ {
$this->parent = $plist;
}
}
diff --git a/library/HTMLPurifier/PropertyListIterator.php b/library/HTMLPurifier/PropertyListIterator.php
index 8f250443e..15b330ea3 100644
--- a/library/HTMLPurifier/PropertyListIterator.php
+++ b/library/HTMLPurifier/PropertyListIterator.php
@@ -6,27 +6,37 @@
class HTMLPurifier_PropertyListIterator extends FilterIterator
{
+ /**
+ * @type int
+ */
protected $l;
+ /**
+ * @type string
+ */
protected $filter;
/**
- * @param $data Array of data to iterate over
- * @param $filter Optional prefix to only allow values of
+ * @param Iterator $iterator Array of data to iterate over
+ * @param string $filter Optional prefix to only allow values of
*/
- public function __construct(Iterator $iterator, $filter = null) {
+ public function __construct(Iterator $iterator, $filter = null)
+ {
parent::__construct($iterator);
$this->l = strlen($filter);
$this->filter = $filter;
}
- public function accept() {
+ /**
+ * @return bool
+ */
+ public function accept()
+ {
$key = $this->getInnerIterator()->key();
- if( strncmp($key, $this->filter, $this->l) !== 0 ) {
+ if (strncmp($key, $this->filter, $this->l) !== 0) {
return false;
}
return true;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Queue.php b/library/HTMLPurifier/Queue.php
new file mode 100644
index 000000000..f58db9042
--- /dev/null
+++ b/library/HTMLPurifier/Queue.php
@@ -0,0 +1,56 @@
+input = $input;
+ $this->output = array();
+ }
+
+ /**
+ * Shifts an element off the front of the queue.
+ */
+ public function shift() {
+ if (empty($this->output)) {
+ $this->output = array_reverse($this->input);
+ $this->input = array();
+ }
+ if (empty($this->output)) {
+ return NULL;
+ }
+ return array_pop($this->output);
+ }
+
+ /**
+ * Pushes an element onto the front of the queue.
+ */
+ public function push($x) {
+ array_push($this->input, $x);
+ }
+
+ /**
+ * Checks if it's empty.
+ */
+ public function isEmpty() {
+ return empty($this->input) && empty($this->output);
+ }
+}
diff --git a/library/HTMLPurifier/Strategy.php b/library/HTMLPurifier/Strategy.php
index 246286521..e1ff3b72d 100644
--- a/library/HTMLPurifier/Strategy.php
+++ b/library/HTMLPurifier/Strategy.php
@@ -15,12 +15,12 @@ abstract class HTMLPurifier_Strategy
/**
* Executes the strategy on the tokens.
*
- * @param $tokens Array of HTMLPurifier_Token objects to be operated on.
- * @param $config Configuration options
- * @returns Processed array of token objects.
+ * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token objects to be operated on.
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return HTMLPurifier_Token[] Processed array of token objects.
*/
abstract public function execute($tokens, $config, $context);
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Strategy/Composite.php b/library/HTMLPurifier/Strategy/Composite.php
index 816490b79..d7d35ce7d 100644
--- a/library/HTMLPurifier/Strategy/Composite.php
+++ b/library/HTMLPurifier/Strategy/Composite.php
@@ -8,18 +8,23 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
/**
* List of strategies to run tokens through.
+ * @type HTMLPurifier_Strategy[]
*/
protected $strategies = array();
- abstract public function __construct();
-
- public function execute($tokens, $config, $context) {
+ /**
+ * @param HTMLPurifier_Token[] $tokens
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return HTMLPurifier_Token[]
+ */
+ public function execute($tokens, $config, $context)
+ {
foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config, $context);
}
return $tokens;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Strategy/Core.php b/library/HTMLPurifier/Strategy/Core.php
index d90e15860..4414c17d6 100644
--- a/library/HTMLPurifier/Strategy/Core.php
+++ b/library/HTMLPurifier/Strategy/Core.php
@@ -5,14 +5,13 @@
*/
class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite
{
-
- public function __construct() {
+ public function __construct()
+ {
$this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements();
$this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed();
$this->strategies[] = new HTMLPurifier_Strategy_FixNesting();
$this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes();
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php
index f81802391..6fa673db9 100644
--- a/library/HTMLPurifier/Strategy/FixNesting.php
+++ b/library/HTMLPurifier/Strategy/FixNesting.php
@@ -10,12 +10,12 @@
* document type definitions, such as the chameleon nature of ins/del
* tags and global child exclusions.
*
- * The first major objective of this strategy is to iterate through all the
- * nodes (not tokens) of the list of tokens and determine whether or not
- * their children conform to the element's definition. If they do not, the
- * child definition may optionally supply an amended list of elements that
- * is valid or require that the entire node be deleted (and the previous
- * node rescanned).
+ * The first major objective of this strategy is to iterate through all
+ * the nodes and determine whether or not their children conform to the
+ * element's definition. If they do not, the child definition may
+ * optionally supply an amended list of elements that is valid or
+ * require that the entire node be deleted (and the previous node
+ * rescanned).
*
* The second objective is to ensure that explicitly excluded elements of
* an element do not appear in its children. Code that accomplishes this
@@ -25,24 +25,33 @@
* @note Whether or not unrecognized children are silently dropped or
* translated into text depends on the child definitions.
*
- * @todo Enable nodes to be bubbled out of the structure.
+ * @todo Enable nodes to be bubbled out of the structure. This is
+ * easier with our new algorithm.
*/
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
{
- public function execute($tokens, $config, $context) {
+ /**
+ * @param HTMLPurifier_Token[] $tokens
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array|HTMLPurifier_Token[]
+ */
+ public function execute($tokens, $config, $context)
+ {
+
//####################################################################//
// Pre-processing
+ // O(n) pass to convert to a tree, so that we can efficiently
+ // refer to substrings
+ $top_node = HTMLPurifier_Arborize::arborize($tokens, $config, $context);
+
// get a copy of the HTML definition
$definition = $config->getHTMLDefinition();
- // insert implicit "parent" node, will be removed at end.
- // DEFINITION CALL
- $parent_name = $definition->info_parent;
- array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
- $tokens[] = new HTMLPurifier_Token_End($parent_name);
+ $excludes_enabled = !$config->get('Core.DisableExcludes');
// setup the context variable 'IsInline', for chameleon processing
// is 'false' when we are not inline, 'true' when it must always
@@ -57,272 +66,116 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
//####################################################################//
// Loop initialization
- // stack that contains the indexes of all parents,
- // $stack[count($stack)-1] being the current parent
- $stack = array();
-
// stack that contains all elements that are excluded
// it is organized by parent elements, similar to $stack,
// but it is only populated when an element with exclusions is
// processed, i.e. there won't be empty exclusions.
- $exclude_stack = array();
+ $exclude_stack = array($definition->info_parent_def->excludes);
// variable that contains the start token while we are processing
// nodes. This enables error reporting to do its job
- $start_token = false;
- $context->register('CurrentToken', $start_token);
+ $node = $top_node;
+ // dummy token
+ list($token, $d) = $node->toTokenPair();
+ $context->register('CurrentNode', $node);
+ $context->register('CurrentToken', $token);
//####################################################################//
// Loop
- // iterate through all start nodes. Determining the start node
- // is complicated so it has been omitted from the loop construct
- for ($i = 0, $size = count($tokens) ; $i < $size; ) {
+ // We need to implement a post-order traversal iteratively, to
+ // avoid running into stack space limits. This is pretty tricky
+ // to reason about, so we just manually stack-ify the recursive
+ // variant:
+ //
+ // function f($node) {
+ // foreach ($node->children as $child) {
+ // f($child);
+ // }
+ // validate($node);
+ // }
+ //
+ // Thus, we will represent a stack frame as array($node,
+ // $is_inline, stack of children)
+ // e.g. array_reverse($node->children) - already processed
+ // children.
- //################################################################//
- // Gather information on children
+ $parent_def = $definition->info_parent_def;
+ $stack = array(
+ array($top_node,
+ $parent_def->descendants_are_inline,
+ $parent_def->excludes, // exclusions
+ 0)
+ );
- // child token accumulator
- $child_tokens = array();
-
- // scroll to the end of this node, report number, and collect
- // all children
- for ($j = $i, $depth = 0; ; $j++) {
- if ($tokens[$j] instanceof HTMLPurifier_Token_Start) {
- $depth++;
- // skip token assignment on first iteration, this is the
- // token we currently are on
- if ($depth == 1) continue;
- } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) {
- $depth--;
- // skip token assignment on last iteration, this is the
- // end token of the token we're currently on
- if ($depth == 0) break;
+ while (!empty($stack)) {
+ list($node, $is_inline, $excludes, $ix) = array_pop($stack);
+ // recursive call
+ $go = false;
+ $def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name];
+ while (isset($node->children[$ix])) {
+ $child = $node->children[$ix++];
+ if ($child instanceof HTMLPurifier_Node_Element) {
+ $go = true;
+ $stack[] = array($node, $is_inline, $excludes, $ix);
+ $stack[] = array($child,
+ // ToDo: I don't think it matters if it's def or
+ // child_def, but double check this...
+ $is_inline || $def->descendants_are_inline,
+ empty($def->excludes) ? $excludes
+ : array_merge($excludes, $def->excludes),
+ 0);
+ break;
}
- $child_tokens[] = $tokens[$j];
- }
-
- // $i is index of start token
- // $j is index of end token
-
- $start_token = $tokens[$i]; // to make token available via CurrentToken
-
- //################################################################//
- // Gather information on parent
-
- // calculate parent information
- if ($count = count($stack)) {
- $parent_index = $stack[$count-1];
- $parent_name = $tokens[$parent_index]->name;
- if ($parent_index == 0) {
- $parent_def = $definition->info_parent_def;
+ };
+ if ($go) continue;
+ list($token, $d) = $node->toTokenPair();
+ // base case
+ if ($excludes_enabled && isset($excludes[$node->name])) {
+ $node->dead = true;
+ if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
+ } else {
+ // XXX I suppose it would be slightly more efficient to
+ // avoid the allocation here and have children
+ // strategies handle it
+ $children = array();
+ foreach ($node->children as $child) {
+ if (!$child->dead) $children[] = $child;
+ }
+ $result = $def->child->validateChildren($children, $config, $context);
+ if ($result === true) {
+ // nop
+ $node->children = $children;
+ } elseif ($result === false) {
+ $node->dead = true;
+ if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
} else {
- $parent_def = $definition->info[$parent_name];
- }
- } else {
- // processing as if the parent were the "root" node
- // unknown info, it won't be used anyway, in the future,
- // we may want to enforce one element only (this is
- // necessary for HTML Purifier to clean entire documents
- $parent_index = $parent_name = $parent_def = null;
- }
-
- // calculate context
- if ($is_inline === false) {
- // check if conditions make it inline
- if (!empty($parent_def) && $parent_def->descendants_are_inline) {
- $is_inline = $count - 1;
- }
- } else {
- // check if we're out of inline
- if ($count === $is_inline) {
- $is_inline = false;
- }
- }
-
- //################################################################//
- // Determine whether element is explicitly excluded SGML-style
-
- // determine whether or not element is excluded by checking all
- // parent exclusions. The array should not be very large, two
- // elements at most.
- $excluded = false;
- if (!empty($exclude_stack)) {
- foreach ($exclude_stack as $lookup) {
- if (isset($lookup[$tokens[$i]->name])) {
- $excluded = true;
- // no need to continue processing
- break;
+ $node->children = $result;
+ if ($e) {
+ // XXX This will miss mutations of internal nodes. Perhaps defer to the child validators
+ if (empty($result) && !empty($children)) {
+ $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
+ } else if ($result != $children) {
+ $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
+ }
}
}
}
-
- //################################################################//
- // Perform child validation
-
- if ($excluded) {
- // there is an exclusion, remove the entire node
- $result = false;
- $excludes = array(); // not used, but good to initialize anyway
- } else {
- // DEFINITION CALL
- if ($i === 0) {
- // special processing for the first node
- $def = $definition->info_parent_def;
- } else {
- $def = $definition->info[$tokens[$i]->name];
-
- }
-
- if (!empty($def->child)) {
- // have DTD child def validate children
- $result = $def->child->validateChildren(
- $child_tokens, $config, $context);
- } else {
- // weird, no child definition, get rid of everything
- $result = false;
- }
-
- // determine whether or not this element has any exclusions
- $excludes = $def->excludes;
- }
-
- // $result is now a bool or array
-
- //################################################################//
- // Process result by interpreting $result
-
- if ($result === true || $child_tokens === $result) {
- // leave the node as is
-
- // register start token as a parental node start
- $stack[] = $i;
-
- // register exclusions if there are any
- if (!empty($excludes)) $exclude_stack[] = $excludes;
-
- // move cursor to next possible start node
- $i++;
-
- } elseif($result === false) {
- // remove entire node
-
- if ($e) {
- if ($excluded) {
- $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
- } else {
- $e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
- }
- }
-
- // calculate length of inner tokens and current tokens
- $length = $j - $i + 1;
-
- // perform removal
- array_splice($tokens, $i, $length);
-
- // update size
- $size -= $length;
-
- // there is no start token to register,
- // current node is now the next possible start node
- // unless it turns out that we need to do a double-check
-
- // this is a rought heuristic that covers 100% of HTML's
- // cases and 99% of all other cases. A child definition
- // that would be tricked by this would be something like:
- // ( | a b c) where it's all or nothing. Fortunately,
- // our current implementation claims that that case would
- // not allow empty, even if it did
- if (!$parent_def->child->allow_empty) {
- // we need to do a double-check
- $i = $parent_index;
- array_pop($stack);
- }
-
- // PROJECTED OPTIMIZATION: Process all children elements before
- // reprocessing parent node.
-
- } else {
- // replace node with $result
-
- // calculate length of inner tokens
- $length = $j - $i - 1;
-
- if ($e) {
- if (empty($result) && $length) {
- $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
- } else {
- $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
- }
- }
-
- // perform replacement
- array_splice($tokens, $i + 1, $length, $result);
-
- // update size
- $size -= $length;
- $size += count($result);
-
- // register start token as a parental node start
- $stack[] = $i;
-
- // register exclusions if there are any
- if (!empty($excludes)) $exclude_stack[] = $excludes;
-
- // move cursor to next possible start node
- $i++;
-
- }
-
- //################################################################//
- // Scroll to next start node
-
- // We assume, at this point, that $i is the index of the token
- // that is the first possible new start point for a node.
-
- // Test if the token indeed is a start tag, if not, move forward
- // and test again.
- $size = count($tokens);
- while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) {
- if ($tokens[$i] instanceof HTMLPurifier_Token_End) {
- // pop a token index off the stack if we ended a node
- array_pop($stack);
- // pop an exclusion lookup off exclusion stack if
- // we ended node and that node had exclusions
- if ($i == 0 || $i == $size - 1) {
- // use specialized var if it's the super-parent
- $s_excludes = $definition->info_parent_def->excludes;
- } else {
- $s_excludes = $definition->info[$tokens[$i]->name]->excludes;
- }
- if ($s_excludes) {
- array_pop($exclude_stack);
- }
- }
- $i++;
- }
-
}
//####################################################################//
// Post-processing
- // remove implicit parent tokens at the beginning and end
- array_shift($tokens);
- array_pop($tokens);
-
// remove context variables
$context->destroy('IsInline');
+ $context->destroy('CurrentNode');
$context->destroy('CurrentToken');
//####################################################################//
// Return
- return $tokens;
-
+ return HTMLPurifier_Arborize::flatten($node, $config, $context);
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php
index c73658400..e389e0011 100644
--- a/library/HTMLPurifier/Strategy/MakeWellFormed.php
+++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php
@@ -2,66 +2,97 @@
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
+ *
+ * Specification of the armor attributes this strategy uses:
+ *
+ * - MakeWellFormed_TagClosedError: This armor field is used to
+ * suppress tag closed errors for certain tokens [TagClosedSuppress],
+ * in particular, if a tag was generated automatically by HTML
+ * Purifier, we may rely on our infrastructure to close it for us
+ * and shouldn't report an error to the user [TagClosedAuto].
*/
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{
/**
* Array stream of tokens being processed.
+ * @type HTMLPurifier_Token[]
*/
protected $tokens;
/**
- * Current index in $tokens.
+ * Current token.
+ * @type HTMLPurifier_Token
*/
- protected $t;
+ protected $token;
+
+ /**
+ * Zipper managing the true state.
+ * @type HTMLPurifier_Zipper
+ */
+ protected $zipper;
/**
* Current nesting of elements.
+ * @type array
*/
protected $stack;
/**
* Injectors active in this stream processing.
+ * @type HTMLPurifier_Injector[]
*/
protected $injectors;
/**
* Current instance of HTMLPurifier_Config.
+ * @type HTMLPurifier_Config
*/
protected $config;
/**
* Current instance of HTMLPurifier_Context.
+ * @type HTMLPurifier_Context
*/
protected $context;
- public function execute($tokens, $config, $context) {
-
+ /**
+ * @param HTMLPurifier_Token[] $tokens
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return HTMLPurifier_Token[]
+ * @throws HTMLPurifier_Exception
+ */
+ public function execute($tokens, $config, $context)
+ {
$definition = $config->getHTMLDefinition();
// local variables
$generator = new HTMLPurifier_Generator($config, $context);
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
+ // used for autoclose early abortion
+ $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
$e = $context->get('ErrorCollector', true);
- $t = false; // token index
$i = false; // injector index
- $token = false; // the current token
- $reprocess = false; // whether or not to reprocess the same token
+ list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens);
+ if ($token === NULL) {
+ return array();
+ }
+ $reprocess = false; // whether or not to reprocess the same token
$stack = array();
// member variables
- $this->stack =& $stack;
- $this->t =& $t;
- $this->tokens =& $tokens;
- $this->config = $config;
+ $this->stack =& $stack;
+ $this->tokens =& $tokens;
+ $this->token =& $token;
+ $this->zipper =& $zipper;
+ $this->config = $config;
$this->context = $context;
// context variables
$context->register('CurrentNesting', $stack);
- $context->register('InputIndex', $t);
- $context->register('InputTokens', $tokens);
- $context->register('CurrentToken', $token);
+ $context->register('InputZipper', $zipper);
+ $context->register('CurrentToken', $token);
// -- begin INJECTOR --
@@ -73,9 +104,13 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
unset($injectors['Custom']); // special case
foreach ($injectors as $injector => $b) {
// XXX: Fix with a legitimate lookup table of enabled filters
- if (strpos($injector, '.') !== false) continue;
+ if (strpos($injector, '.') !== false) {
+ continue;
+ }
$injector = "HTMLPurifier_Injector_$injector";
- if (!$b) continue;
+ if (!$b) {
+ continue;
+ }
$this->injectors[] = new $injector;
}
foreach ($def_injectors as $injector) {
@@ -83,7 +118,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->injectors[] = $injector;
}
foreach ($custom_injectors as $injector) {
- if (!$injector) continue;
+ if (!$injector) {
+ continue;
+ }
if (is_string($injector)) {
$injector = "HTMLPurifier_Injector_$injector";
$injector = new $injector;
@@ -95,14 +132,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// variables for performance reasons
foreach ($this->injectors as $ix => $injector) {
$error = $injector->prepare($config, $context);
- if (!$error) continue;
+ if (!$error) {
+ continue;
+ }
array_splice($this->injectors, $ix, 1); // rm the injector
trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
}
// -- end INJECTOR --
- // a note on punting:
+ // a note on reprocessing:
// In order to reduce code duplication, whenever some code needs
// to make HTML changes in order to make things "correct", the
// new HTML gets sent through the purifier, regardless of its
@@ -111,70 +150,75 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// punt ($reprocess = true; continue;) and it does that for us.
// isset is in loop because $tokens size changes during loop exec
- for (
- $t = 0;
- $t == 0 || isset($tokens[$t - 1]);
- // only increment if we don't need to reprocess
- $reprocess ? $reprocess = false : $t++
- ) {
+ for (;;
+ // only increment if we don't need to reprocess
+ $reprocess ? $reprocess = false : $token = $zipper->next($token)) {
// check for a rewind
- if (is_int($i) && $i >= 0) {
+ if (is_int($i)) {
// possibility: disable rewinding if the current token has a
// rewind set on it already. This would offer protection from
// infinite loop, but might hinder some advanced rewinding.
- $rewind_to = $this->injectors[$i]->getRewind();
- if (is_int($rewind_to) && $rewind_to < $t) {
- if ($rewind_to < 0) $rewind_to = 0;
- while ($t > $rewind_to) {
- $t--;
- $prev = $tokens[$t];
+ $rewind_offset = $this->injectors[$i]->getRewindOffset();
+ if (is_int($rewind_offset)) {
+ for ($j = 0; $j < $rewind_offset; $j++) {
+ if (empty($zipper->front)) break;
+ $token = $zipper->prev($token);
// indicate that other injectors should not process this token,
// but we need to reprocess it
- unset($prev->skip[$i]);
- $prev->rewind = $i;
- if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack);
- elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start;
+ unset($token->skip[$i]);
+ $token->rewind = $i;
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ array_pop($this->stack);
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $this->stack[] = $token->start;
+ }
}
}
$i = false;
}
// handle case of document end
- if (!isset($tokens[$t])) {
+ if ($token === NULL) {
// kill processing if stack is empty
- if (empty($this->stack)) break;
+ if (empty($this->stack)) {
+ break;
+ }
// peek
$top_nesting = array_pop($this->stack);
$this->stack[] = $top_nesting;
- // send error
+ // send error [TagClosedSuppress]
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
}
// append, don't splice, since this is the end
- $tokens[] = new HTMLPurifier_Token_End($top_nesting->name);
+ $token = new HTMLPurifier_Token_End($top_nesting->name);
// punt!
$reprocess = true;
continue;
}
- $token = $tokens[$t];
-
- //echo '
'; printTokens($tokens, $t); printTokens($this->stack);
+ //echo '
'; printZipper($zipper, $token);//printTokens($this->stack);
//flush();
// quick-check: if it's not a tag, no need to process
if (empty($token->is_tag)) {
if ($token instanceof HTMLPurifier_Token_Text) {
foreach ($this->injectors as $i => $injector) {
- if (isset($token->skip[$i])) continue;
- if ($token->rewind !== null && $token->rewind !== $i) continue;
- $injector->handleText($token);
- $this->processToken($token, $i);
+ if (isset($token->skip[$i])) {
+ continue;
+ }
+ if ($token->rewind !== null && $token->rewind !== $i) {
+ continue;
+ }
+ // XXX fuckup
+ $r = $token;
+ $injector->handleText($r);
+ $token = $this->processToken($r, $i);
$reprocess = true;
break;
}
@@ -193,12 +237,22 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$ok = false;
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
// claims to be a start tag but is empty
- $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
+ $token = new HTMLPurifier_Token_Empty(
+ $token->name,
+ $token->attr,
+ $token->line,
+ $token->col,
+ $token->armor
+ );
$ok = true;
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
// claims to be empty but really is a start tag
- $this->swap(new HTMLPurifier_Token_End($token->name));
- $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
+ // NB: this assignment is required
+ $old_token = $token;
+ $token = new HTMLPurifier_Token_End($token->name);
+ $token = $this->insertBefore(
+ new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
+ );
// punt (since we had to modify the input stream in a non-trivial way)
$reprocess = true;
continue;
@@ -211,56 +265,97 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// ...unless they also have to close their parent
if (!empty($this->stack)) {
+ // Performance note: you might think that it's rather
+ // inefficient, recalculating the autoclose information
+ // for every tag that a token closes (since when we
+ // do an autoclose, we push a new token into the
+ // stream and then /process/ that, before
+ // re-processing this token.) But this is
+ // necessary, because an injector can make an
+ // arbitrary transformations to the autoclosing
+ // tokens we introduce, so things may have changed
+ // in the meantime. Also, doing the inefficient thing is
+ // "easy" to reason about (for certain perverse definitions
+ // of "easy")
+
$parent = array_pop($this->stack);
$this->stack[] = $parent;
+ $parent_def = null;
+ $parent_elements = null;
+ $autoclose = false;
if (isset($definition->info[$parent->name])) {
- $elements = $definition->info[$parent->name]->child->getAllowedElements($config);
- $autoclose = !isset($elements[$token->name]);
- } else {
- $autoclose = false;
+ $parent_def = $definition->info[$parent->name];
+ $parent_elements = $parent_def->child->getAllowedElements($config);
+ $autoclose = !isset($parent_elements[$token->name]);
}
if ($autoclose && $definition->info[$token->name]->wrap) {
- // Check if an element can be wrapped by another
- // element to make it valid in a context (for
+ // Check if an element can be wrapped by another
+ // element to make it valid in a context (for
// example,
needs a - in between)
$wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname];
$elements = $wrapdef->child->getAllowedElements($config);
- $parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
$newtoken = new HTMLPurifier_Token_Start($wrapname);
- $this->insertBefore($newtoken);
+ $token = $this->insertBefore($newtoken);
$reprocess = true;
continue;
}
}
$carryover = false;
- if ($autoclose && $definition->info[$parent->name]->formatting) {
+ if ($autoclose && $parent_def->formatting) {
$carryover = true;
}
if ($autoclose) {
- // errors need to be updated
- $new_token = new HTMLPurifier_Token_End($parent->name);
- $new_token->start = $parent;
- if ($carryover) {
- $element = clone $parent;
- $element->armor['MakeWellFormed_TagClosedError'] = true;
- $element->carryover = true;
- $this->processToken(array($new_token, $token, $element));
- } else {
- $this->insertBefore($new_token);
- }
- if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
- if (!$carryover) {
- $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
- } else {
- $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
+ // check if this autoclose is doomed to fail
+ // (this rechecks $parent, which his harmless)
+ $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
+ if (!$autoclose_ok) {
+ foreach ($this->stack as $ancestor) {
+ $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
+ if (isset($elements[$token->name])) {
+ $autoclose_ok = true;
+ break;
+ }
+ if ($definition->info[$token->name]->wrap) {
+ $wrapname = $definition->info[$token->name]->wrap;
+ $wrapdef = $definition->info[$wrapname];
+ $wrap_elements = $wrapdef->child->getAllowedElements($config);
+ if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
+ $autoclose_ok = true;
+ break;
+ }
+ }
}
}
+ if ($autoclose_ok) {
+ // errors need to be updated
+ $new_token = new HTMLPurifier_Token_End($parent->name);
+ $new_token->start = $parent;
+ // [TagClosedSuppress]
+ if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
+ if (!$carryover) {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
+ } else {
+ $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
+ }
+ }
+ if ($carryover) {
+ $element = clone $parent;
+ // [TagClosedAuto]
+ $element->armor['MakeWellFormed_TagClosedError'] = true;
+ $element->carryover = true;
+ $token = $this->processToken(array($new_token, $token, $element));
+ } else {
+ $token = $this->insertBefore($new_token);
+ }
+ } else {
+ $token = $this->remove();
+ }
$reprocess = true;
continue;
}
@@ -271,20 +366,26 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($ok) {
foreach ($this->injectors as $i => $injector) {
- if (isset($token->skip[$i])) continue;
- if ($token->rewind !== null && $token->rewind !== $i) continue;
- $injector->handleElement($token);
- $this->processToken($token, $i);
+ if (isset($token->skip[$i])) {
+ continue;
+ }
+ if ($token->rewind !== null && $token->rewind !== $i) {
+ continue;
+ }
+ $r = $token;
+ $injector->handleElement($r);
+ $token = $this->processToken($r, $i);
$reprocess = true;
break;
}
if (!$reprocess) {
// ah, nothing interesting happened; do normal processing
- $this->swap($token);
if ($token instanceof HTMLPurifier_Token_Start) {
$this->stack[] = $token;
} elseif ($token instanceof HTMLPurifier_Token_End) {
- throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed');
+ throw new HTMLPurifier_Exception(
+ 'Improper handling of end tag in start code; possible error in MakeWellFormed'
+ );
}
}
continue;
@@ -298,13 +399,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// make sure that we have something open
if (empty($this->stack)) {
if ($escape_invalid_tags) {
- if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
- $this->swap(new HTMLPurifier_Token_Text(
- $generator->generateFromToken($token)
- ));
+ if ($e) {
+ $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
+ }
+ $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
} else {
- $this->remove();
- if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
+ if ($e) {
+ $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
+ }
+ $token = $this->remove();
}
$reprocess = true;
continue;
@@ -318,10 +421,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($current_parent->name == $token->name) {
$token->start = $current_parent;
foreach ($this->injectors as $i => $injector) {
- if (isset($token->skip[$i])) continue;
- if ($token->rewind !== null && $token->rewind !== $i) continue;
- $injector->handleEnd($token);
- $this->processToken($token, $i);
+ if (isset($token->skip[$i])) {
+ continue;
+ }
+ if ($token->rewind !== null && $token->rewind !== $i) {
+ continue;
+ }
+ $r = $token;
+ $injector->handleEnd($r);
+ $token = $this->processToken($r, $i);
$this->stack[] = $current_parent;
$reprocess = true;
break;
@@ -349,13 +457,15 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// we didn't find the tag, so remove
if ($skipped_tags === false) {
if ($escape_invalid_tags) {
- $this->swap(new HTMLPurifier_Token_Text(
- $generator->generateFromToken($token)
- ));
- if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
+ if ($e) {
+ $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
+ }
+ $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
} else {
- $this->remove();
- if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
+ if ($e) {
+ $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
+ }
+ $token = $this->remove();
}
$reprocess = true;
continue;
@@ -366,7 +476,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($e) {
for ($j = $c - 1; $j > 0; $j--) {
// notice we exclude $j == 0, i.e. the current ending tag, from
- // the errors...
+ // the errors... [TagClosedSuppress]
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
}
@@ -381,24 +491,24 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$new_token->start = $skipped_tags[$j];
array_unshift($replace, $new_token);
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
+ // [TagClosedAuto]
$element = clone $skipped_tags[$j];
$element->carryover = true;
$element->armor['MakeWellFormed_TagClosedError'] = true;
$replace[] = $element;
}
}
- $this->processToken($replace);
+ $token = $this->processToken($replace);
$reprocess = true;
continue;
}
- $context->destroy('CurrentNesting');
- $context->destroy('InputTokens');
- $context->destroy('InputIndex');
$context->destroy('CurrentToken');
+ $context->destroy('CurrentNesting');
+ $context->destroy('InputZipper');
- unset($this->injectors, $this->stack, $this->tokens, $this->t);
- return $tokens;
+ unset($this->injectors, $this->stack, $this->tokens);
+ return $zipper->toArray($token);
}
/**
@@ -417,25 +527,38 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
* If $token is an integer, that number of tokens (with the first token
* being the current one) will be deleted.
*
- * @param $token Token substitution value
- * @param $injector Injector that performed the substitution; default is if
+ * @param HTMLPurifier_Token|array|int|bool $token Token substitution value
+ * @param HTMLPurifier_Injector|int $injector Injector that performed the substitution; default is if
* this is not an injector related operation.
+ * @throws HTMLPurifier_Exception
*/
- protected function processToken($token, $injector = -1) {
-
+ protected function processToken($token, $injector = -1)
+ {
// normalize forms of token
- if (is_object($token)) $token = array(1, $token);
- if (is_int($token)) $token = array($token);
- if ($token === false) $token = array(1);
- if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector');
- if (!is_int($token[0])) array_unshift($token, 1);
- if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
+ if (is_object($token)) {
+ $token = array(1, $token);
+ }
+ if (is_int($token)) {
+ $token = array($token);
+ }
+ if ($token === false) {
+ $token = array(1);
+ }
+ if (!is_array($token)) {
+ throw new HTMLPurifier_Exception('Invalid token type from injector');
+ }
+ if (!is_int($token[0])) {
+ array_unshift($token, 1);
+ }
+ if ($token[0] === 0) {
+ throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
+ }
// $token is now an array with the following form:
// array(number nodes to delete, new node 1, new node 2, ...)
$delete = array_shift($token);
- $old = array_splice($this->tokens, $this->t, $delete, $token);
+ list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
if ($injector > -1) {
// determine appropriate skips
@@ -446,30 +569,32 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
}
}
+ return $r;
+
}
/**
- * Inserts a token before the current token. Cursor now points to this token
+ * Inserts a token before the current token. Cursor now points to
+ * this token. You must reprocess after this.
+ * @param HTMLPurifier_Token $token
*/
- private function insertBefore($token) {
- array_splice($this->tokens, $this->t, 0, array($token));
+ private function insertBefore($token)
+ {
+ // NB not $this->zipper->insertBefore(), due to positioning
+ // differences
+ $splice = $this->zipper->splice($this->token, 0, array($token));
+
+ return $splice[1];
}
/**
* Removes current token. Cursor now points to new token occupying previously
- * occupied space.
+ * occupied space. You must reprocess after this.
*/
- private function remove() {
- array_splice($this->tokens, $this->t, 1);
+ private function remove()
+ {
+ return $this->zipper->delete();
}
-
- /**
- * Swap current token with new token. Cursor points to new token (no change).
- */
- private function swap($token) {
- $this->tokens[$this->t] = $token;
- }
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
index cf3a33e40..1a8149ecc 100644
--- a/library/HTMLPurifier/Strategy/RemoveForeignElements.php
+++ b/library/HTMLPurifier/Strategy/RemoveForeignElements.php
@@ -11,19 +11,29 @@
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
{
- public function execute($tokens, $config, $context) {
+ /**
+ * @param HTMLPurifier_Token[] $tokens
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array|HTMLPurifier_Token[]
+ */
+ public function execute($tokens, $config, $context)
+ {
$definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator($config, $context);
$result = array();
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
- $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
+ $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
// currently only used to determine if comments should be kept
$trusted = $config->get('HTML.Trusted');
+ $comment_lookup = $config->get('HTML.AllowedComments');
+ $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
+ $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
$remove_script_contents = $config->get('Core.RemoveScriptContents');
- $hidden_elements = $config->get('Core.HiddenElements');
+ $hidden_elements = $config->get('Core.HiddenElements');
// remove script contents compatibility
if ($remove_script_contents === true) {
@@ -48,34 +58,31 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$e =& $context->get('ErrorCollector');
}
- foreach($tokens as $token) {
+ foreach ($tokens as $token) {
if ($remove_until) {
if (empty($token->is_tag) || $token->name !== $remove_until) {
continue;
}
}
- if (!empty( $token->is_tag )) {
+ if (!empty($token->is_tag)) {
// DEFINITION CALL
// before any processing, try to transform the element
- if (
- isset($definition->info_tag_transform[$token->name])
- ) {
+ if (isset($definition->info_tag_transform[$token->name])) {
$original_name = $token->name;
// there is a transformation for this tag
// DEFINITION CALL
$token = $definition->
- info_tag_transform[$token->name]->
- transform($token, $config, $context);
- if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
+ info_tag_transform[$token->name]->transform($token, $config, $context);
+ if ($e) {
+ $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
+ }
}
if (isset($definition->info[$token->name])) {
-
// mostly everything's good, but
// we need to make sure required attributes are in order
- if (
- ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
+ if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
@@ -88,7 +95,13 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
}
if (!$ok) {
- if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
+ if ($e) {
+ $e->send(
+ E_ERROR,
+ 'Strategy_RemoveForeignElements: Missing required attribute',
+ $name
+ );
+ }
continue;
}
$token->armor['ValidateAttributes'] = true;
@@ -102,7 +115,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} elseif ($escape_invalid_tags) {
// invalid tag, generate HTML representation and insert in
- if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
+ if ($e) {
+ $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
+ }
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token)
);
@@ -117,9 +132,13 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} else {
$remove_until = false;
}
- if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
+ if ($e) {
+ $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
+ }
} else {
- if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
+ if ($e) {
+ $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
+ }
}
continue;
}
@@ -128,26 +147,46 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if ($textify_comments !== false) {
$data = $token->data;
$token = new HTMLPurifier_Token_Text($data);
- } elseif ($trusted) {
- // keep, but perform comment cleaning
+ } elseif ($trusted || $check_comments) {
+ // always cleanup comments
+ $trailing_hyphen = false;
if ($e) {
// perform check whether or not there's a trailing hyphen
if (substr($token->data, -1) == '-') {
- $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
+ $trailing_hyphen = true;
}
}
$token->data = rtrim($token->data, '-');
$found_double_hyphen = false;
while (strpos($token->data, '--') !== false) {
- if ($e && !$found_double_hyphen) {
- $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
- }
- $found_double_hyphen = true; // prevent double-erroring
+ $found_double_hyphen = true;
$token->data = str_replace('--', '-', $token->data);
}
+ if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
+ ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
+ // OK good
+ if ($e) {
+ if ($trailing_hyphen) {
+ $e->send(
+ E_NOTICE,
+ 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
+ );
+ }
+ if ($found_double_hyphen) {
+ $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
+ }
+ }
+ } else {
+ if ($e) {
+ $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
+ }
+ continue;
+ }
} else {
// strip comments
- if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
+ if ($e) {
+ $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
+ }
continue;
}
} elseif ($token instanceof HTMLPurifier_Token_Text) {
@@ -160,12 +199,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// we removed tokens until the end, throw error
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
}
-
$context->destroy('CurrentToken');
-
return $result;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Strategy/ValidateAttributes.php b/library/HTMLPurifier/Strategy/ValidateAttributes.php
index c3328a9d4..fbb3d27c8 100644
--- a/library/HTMLPurifier/Strategy/ValidateAttributes.php
+++ b/library/HTMLPurifier/Strategy/ValidateAttributes.php
@@ -7,8 +7,14 @@
class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
{
- public function execute($tokens, $config, $context) {
-
+ /**
+ * @param HTMLPurifier_Token[] $tokens
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return HTMLPurifier_Token[]
+ */
+ public function execute($tokens, $config, $context)
+ {
// setup validator
$validator = new HTMLPurifier_AttrValidator();
@@ -19,21 +25,21 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// only process tokens that have attributes,
// namely start and empty tags
- if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) continue;
+ if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) {
+ continue;
+ }
// skip tokens that are armored
- if (!empty($token->armor['ValidateAttributes'])) continue;
+ if (!empty($token->armor['ValidateAttributes'])) {
+ continue;
+ }
// note that we have no facilities here for removing tokens
$validator->validateToken($token, $config, $context);
-
- $tokens[$key] = $token; // for PHP 4
}
$context->destroy('CurrentToken');
-
return $tokens;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/StringHash.php b/library/HTMLPurifier/StringHash.php
index 62085c5c2..c07370197 100644
--- a/library/HTMLPurifier/StringHash.php
+++ b/library/HTMLPurifier/StringHash.php
@@ -10,28 +10,36 @@
*/
class HTMLPurifier_StringHash extends ArrayObject
{
+ /**
+ * @type array
+ */
protected $accessed = array();
/**
* Retrieves a value, and logs the access.
+ * @param mixed $index
+ * @return mixed
*/
- public function offsetGet($index) {
+ public function offsetGet($index)
+ {
$this->accessed[$index] = true;
return parent::offsetGet($index);
}
/**
* Returns a lookup array of all array indexes that have been accessed.
- * @return Array in form array($index => true).
+ * @return array in form array($index => true).
*/
- public function getAccessed() {
+ public function getAccessed()
+ {
return $this->accessed;
}
/**
* Resets the access array.
*/
- public function resetAccessed() {
+ public function resetAccessed()
+ {
$this->accessed = array();
}
}
diff --git a/library/HTMLPurifier/StringHashParser.php b/library/HTMLPurifier/StringHashParser.php
index f3e70c712..7c73f8083 100644
--- a/library/HTMLPurifier/StringHashParser.php
+++ b/library/HTMLPurifier/StringHashParser.php
@@ -28,15 +28,25 @@
class HTMLPurifier_StringHashParser
{
+ /**
+ * @type string
+ */
public $default = 'ID';
/**
* Parses a file that contains a single string-hash.
+ * @param string $file
+ * @return array
*/
- public function parseFile($file) {
- if (!file_exists($file)) return false;
+ public function parseFile($file)
+ {
+ if (!file_exists($file)) {
+ return false;
+ }
$fh = fopen($file, 'r');
- if (!$fh) return false;
+ if (!$fh) {
+ return false;
+ }
$ret = $this->parseHandle($fh);
fclose($fh);
return $ret;
@@ -44,12 +54,19 @@ class HTMLPurifier_StringHashParser
/**
* Parses a file that contains multiple string-hashes delimited by '----'
+ * @param string $file
+ * @return array
*/
- public function parseMultiFile($file) {
- if (!file_exists($file)) return false;
+ public function parseMultiFile($file)
+ {
+ if (!file_exists($file)) {
+ return false;
+ }
$ret = array();
$fh = fopen($file, 'r');
- if (!$fh) return false;
+ if (!$fh) {
+ return false;
+ }
while (!feof($fh)) {
$ret[] = $this->parseHandle($fh);
}
@@ -62,26 +79,36 @@ class HTMLPurifier_StringHashParser
* @note While it's possible to simulate in-memory parsing by using
* custom stream wrappers, if such a use-case arises we should
* factor out the file handle into its own class.
- * @param $fh File handle with pointer at start of valid string-hash
+ * @param resource $fh File handle with pointer at start of valid string-hash
* block.
+ * @return array
*/
- protected function parseHandle($fh) {
+ protected function parseHandle($fh)
+ {
$state = false;
$single = false;
$ret = array();
do {
$line = fgets($fh);
- if ($line === false) break;
+ if ($line === false) {
+ break;
+ }
$line = rtrim($line, "\n\r");
- if (!$state && $line === '') continue;
- if ($line === '----') break;
+ if (!$state && $line === '') {
+ continue;
+ }
+ if ($line === '----') {
+ break;
+ }
if (strncmp('--#', $line, 3) === 0) {
// Comment
continue;
} elseif (strncmp('--', $line, 2) === 0) {
// Multiline declaration
$state = trim($line, '- ');
- if (!isset($ret[$state])) $ret[$state] = '';
+ if (!isset($ret[$state])) {
+ $ret[$state] = '';
+ }
continue;
} elseif (!$state) {
$single = true;
@@ -104,7 +131,6 @@ class HTMLPurifier_StringHashParser
} while (!feof($fh));
return $ret;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/TagTransform.php b/library/HTMLPurifier/TagTransform.php
index 210a44721..7b8d83343 100644
--- a/library/HTMLPurifier/TagTransform.php
+++ b/library/HTMLPurifier/TagTransform.php
@@ -8,14 +8,15 @@ abstract class HTMLPurifier_TagTransform
/**
* Tag name to transform the tag to.
+ * @type string
*/
public $transform_to;
/**
* Transforms the obsolete tag into the valid tag.
- * @param $tag Tag to be transformed.
- * @param $config Mandatory HTMLPurifier_Config object
- * @param $context Mandatory HTMLPurifier_Context object
+ * @param HTMLPurifier_Token_Tag $tag Tag to be transformed.
+ * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object
+ * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
*/
abstract public function transform($tag, $config, $context);
@@ -23,14 +24,14 @@ abstract class HTMLPurifier_TagTransform
* Prepends CSS properties to the style attribute, creating the
* attribute if it doesn't exist.
* @warning Copied over from AttrTransform, be sure to keep in sync
- * @param $attr Attribute array to process (passed by reference)
- * @param $css CSS to prepend
+ * @param array $attr Attribute array to process (passed by reference)
+ * @param string $css CSS to prepend
*/
- protected function prependCSS(&$attr, $css) {
+ protected function prependCSS(&$attr, $css)
+ {
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
$attr['style'] = $css . $attr['style'];
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/TagTransform/Font.php b/library/HTMLPurifier/TagTransform/Font.php
index ed2463786..7853d90bc 100644
--- a/library/HTMLPurifier/TagTransform/Font.php
+++ b/library/HTMLPurifier/TagTransform/Font.php
@@ -17,9 +17,14 @@
*/
class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
{
-
+ /**
+ * @type string
+ */
public $transform_to = 'span';
+ /**
+ * @type array
+ */
protected $_size_lookup = array(
'0' => 'xx-small',
'1' => 'xx-small',
@@ -37,8 +42,14 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
'+4' => '300%'
);
- public function transform($tag, $config, $context) {
-
+ /**
+ * @param HTMLPurifier_Token_Tag $tag
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return HTMLPurifier_Token_End|string
+ */
+ public function transform($tag, $config, $context)
+ {
if ($tag instanceof HTMLPurifier_Token_End) {
$new_tag = clone $tag;
$new_tag->name = $this->transform_to;
@@ -63,17 +74,25 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
// handle size transform
if (isset($attr['size'])) {
// normalize large numbers
- if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
- $size = (int) $attr['size'];
- if ($size < -2) $attr['size'] = '-2';
- if ($size > 4) $attr['size'] = '+4';
- } else {
- $size = (int) $attr['size'];
- if ($size > 7) $attr['size'] = '7';
+ if ($attr['size'] !== '') {
+ if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
+ $size = (int)$attr['size'];
+ if ($size < -2) {
+ $attr['size'] = '-2';
+ }
+ if ($size > 4) {
+ $attr['size'] = '+4';
+ }
+ } else {
+ $size = (int)$attr['size'];
+ if ($size > 7) {
+ $attr['size'] = '7';
+ }
+ }
}
if (isset($this->_size_lookup[$attr['size']])) {
$prepend_style .= 'font-size:' .
- $this->_size_lookup[$attr['size']] . ';';
+ $this->_size_lookup[$attr['size']] . ';';
}
unset($attr['size']);
}
@@ -89,7 +108,6 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
$new_tag->attr = $attr;
return $new_tag;
-
}
}
diff --git a/library/HTMLPurifier/TagTransform/Simple.php b/library/HTMLPurifier/TagTransform/Simple.php
index 0e36130f2..71bf10b91 100644
--- a/library/HTMLPurifier/TagTransform/Simple.php
+++ b/library/HTMLPurifier/TagTransform/Simple.php
@@ -7,19 +7,29 @@
*/
class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
{
-
+ /**
+ * @type string
+ */
protected $style;
/**
- * @param $transform_to Tag name to transform to.
- * @param $style CSS style to add to the tag
+ * @param string $transform_to Tag name to transform to.
+ * @param string $style CSS style to add to the tag
*/
- public function __construct($transform_to, $style = null) {
+ public function __construct($transform_to, $style = null)
+ {
$this->transform_to = $transform_to;
$this->style = $style;
}
- public function transform($tag, $config, $context) {
+ /**
+ * @param HTMLPurifier_Token_Tag $tag
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return string
+ */
+ public function transform($tag, $config, $context)
+ {
$new_tag = clone $tag;
$new_tag->name = $this->transform_to;
if (!is_null($this->style) &&
@@ -29,7 +39,6 @@ class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
}
return $new_tag;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php
index 7900e6cb1..85b85e072 100644
--- a/library/HTMLPurifier/Token.php
+++ b/library/HTMLPurifier/Token.php
@@ -3,55 +3,98 @@
/**
* Abstract base token class that all others inherit from.
*/
-class HTMLPurifier_Token {
- public $line; /**< Line number node was on in source document. Null if unknown. */
- public $col; /**< Column of line node was on in source document. Null if unknown. */
+abstract class HTMLPurifier_Token
+{
+ /**
+ * Line number node was on in source document. Null if unknown.
+ * @type int
+ */
+ public $line;
+
+ /**
+ * Column of line node was on in source document. Null if unknown.
+ * @type int
+ */
+ public $col;
/**
* Lookup array of processing that this token is exempt from.
* Currently, valid values are "ValidateAttributes" and
* "MakeWellFormed_TagClosedError"
+ * @type array
*/
public $armor = array();
/**
* Used during MakeWellFormed.
+ * @type
*/
public $skip;
+
+ /**
+ * @type
+ */
public $rewind;
+
+ /**
+ * @type
+ */
public $carryover;
- public function __get($n) {
- if ($n === 'type') {
- trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
- switch (get_class($this)) {
- case 'HTMLPurifier_Token_Start': return 'start';
- case 'HTMLPurifier_Token_Empty': return 'empty';
- case 'HTMLPurifier_Token_End': return 'end';
- case 'HTMLPurifier_Token_Text': return 'text';
- case 'HTMLPurifier_Token_Comment': return 'comment';
- default: return null;
+ /**
+ * @param string $n
+ * @return null|string
+ */
+ public function __get($n)
+ {
+ if ($n === 'type') {
+ trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE);
+ switch (get_class($this)) {
+ case 'HTMLPurifier_Token_Start':
+ return 'start';
+ case 'HTMLPurifier_Token_Empty':
+ return 'empty';
+ case 'HTMLPurifier_Token_End':
+ return 'end';
+ case 'HTMLPurifier_Token_Text':
+ return 'text';
+ case 'HTMLPurifier_Token_Comment':
+ return 'comment';
+ default:
+ return null;
+ }
}
- }
}
/**
* Sets the position of the token in the source document.
+ * @param int $l
+ * @param int $c
*/
- public function position($l = null, $c = null) {
+ public function position($l = null, $c = null)
+ {
$this->line = $l;
- $this->col = $c;
+ $this->col = $c;
}
/**
* Convenience function for DirectLex settings line/col position.
+ * @param int $l
+ * @param int $c
*/
- public function rawPosition($l, $c) {
- if ($c === -1) $l++;
+ public function rawPosition($l, $c)
+ {
+ if ($c === -1) {
+ $l++;
+ }
$this->line = $l;
- $this->col = $c;
+ $this->col = $c;
}
+ /**
+ * Converts a token into its corresponding node.
+ */
+ abstract public function toNode();
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Comment.php b/library/HTMLPurifier/Token/Comment.php
index dc6bdcabb..23453c705 100644
--- a/library/HTMLPurifier/Token/Comment.php
+++ b/library/HTMLPurifier/Token/Comment.php
@@ -5,17 +5,33 @@
*/
class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
{
- public $data; /**< Character data within comment. */
+ /**
+ * Character data within comment.
+ * @type string
+ */
+ public $data;
+
+ /**
+ * @type bool
+ */
public $is_whitespace = true;
+
/**
* Transparent constructor.
*
- * @param $data String comment data.
+ * @param string $data String comment data.
+ * @param int $line
+ * @param int $col
*/
- public function __construct($data, $line = null, $col = null) {
+ public function __construct($data, $line = null, $col = null)
+ {
$this->data = $data;
$this->line = $line;
- $this->col = $col;
+ $this->col = $col;
+ }
+
+ public function toNode() {
+ return new HTMLPurifier_Node_Comment($this->data, $this->line, $this->col);
}
}
diff --git a/library/HTMLPurifier/Token/Empty.php b/library/HTMLPurifier/Token/Empty.php
index 2a82b47ad..78a95f555 100644
--- a/library/HTMLPurifier/Token/Empty.php
+++ b/library/HTMLPurifier/Token/Empty.php
@@ -5,7 +5,11 @@
*/
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
-
+ public function toNode() {
+ $n = parent::toNode();
+ $n->empty = true;
+ return $n;
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/End.php b/library/HTMLPurifier/Token/End.php
index 353e79daf..59b38fdc5 100644
--- a/library/HTMLPurifier/Token/End.php
+++ b/library/HTMLPurifier/Token/End.php
@@ -10,10 +10,15 @@
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
{
/**
- * Token that started this node. Added by MakeWellFormed. Please
- * do not edit this!
+ * Token that started this node.
+ * Added by MakeWellFormed. Please do not edit this!
+ * @type HTMLPurifier_Token
*/
public $start;
+
+ public function toNode() {
+ throw new Exception("HTMLPurifier_Token_End->toNode not supported!");
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Start.php b/library/HTMLPurifier/Token/Start.php
index e0e14fc62..019f317ad 100644
--- a/library/HTMLPurifier/Token/Start.php
+++ b/library/HTMLPurifier/Token/Start.php
@@ -5,7 +5,6 @@
*/
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Token/Tag.php b/library/HTMLPurifier/Token/Tag.php
index 798be028e..d643fa64e 100644
--- a/library/HTMLPurifier/Token/Tag.php
+++ b/library/HTMLPurifier/Token/Tag.php
@@ -3,13 +3,14 @@
/**
* Abstract class of a tag token (start, end or empty), and its behavior.
*/
-class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
+abstract class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
{
/**
* Static bool marker that indicates the class is a tag.
*
* This allows us to check objects with !empty($obj->is_tag)
* without having to use a function call is_a().
+ * @type bool
*/
public $is_tag = true;
@@ -19,21 +20,27 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
* @note Strictly speaking, XML tags are case sensitive, so we shouldn't
* be lower-casing them, but these tokens cater to HTML tags, which are
* insensitive.
+ * @type string
*/
public $name;
/**
* Associative array of the tag's attributes.
+ * @type array
*/
public $attr = array();
/**
* Non-overloaded constructor, which lower-cases passed tag name.
*
- * @param $name String name.
- * @param $attr Associative array of attributes.
+ * @param string $name String name.
+ * @param array $attr Associative array of attributes.
+ * @param int $line
+ * @param int $col
+ * @param array $armor
*/
- public function __construct($name, $attr = array(), $line = null, $col = null) {
+ public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array())
+ {
$this->name = ctype_lower($name) ? $name : strtolower($name);
foreach ($attr as $key => $value) {
// normalization only necessary when key is not lowercase
@@ -49,7 +56,12 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
}
$this->attr = $attr;
$this->line = $line;
- $this->col = $col;
+ $this->col = $col;
+ $this->armor = $armor;
+ }
+
+ public function toNode() {
+ return new HTMLPurifier_Node_Element($this->name, $this->attr, $this->line, $this->col, $this->armor);
}
}
diff --git a/library/HTMLPurifier/Token/Text.php b/library/HTMLPurifier/Token/Text.php
index 82efd823d..f26a1c211 100644
--- a/library/HTMLPurifier/Token/Text.php
+++ b/library/HTMLPurifier/Token/Text.php
@@ -12,22 +12,42 @@
class HTMLPurifier_Token_Text extends HTMLPurifier_Token
{
- public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */
- public $data; /**< Parsed character data of text. */
- public $is_whitespace; /**< Bool indicating if node is whitespace. */
+ /**
+ * @type string
+ */
+ public $name = '#PCDATA';
+ /**< PCDATA tag name compatible with DTD. */
+
+ /**
+ * @type string
+ */
+ public $data;
+ /**< Parsed character data of text. */
+
+ /**
+ * @type bool
+ */
+ public $is_whitespace;
+
+ /**< Bool indicating if node is whitespace. */
/**
* Constructor, accepts data and determines if it is whitespace.
- *
- * @param $data String parsed character data.
+ * @param string $data String parsed character data.
+ * @param int $line
+ * @param int $col
*/
- public function __construct($data, $line = null, $col = null) {
+ public function __construct($data, $line = null, $col = null)
+ {
$this->data = $data;
$this->is_whitespace = ctype_space($data);
$this->line = $line;
- $this->col = $col;
+ $this->col = $col;
}
+ public function toNode() {
+ return new HTMLPurifier_Node_Text($this->data, $this->is_whitespace, $this->line, $this->col);
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/TokenFactory.php b/library/HTMLPurifier/TokenFactory.php
index 7cf48fb41..dea2446b9 100644
--- a/library/HTMLPurifier/TokenFactory.php
+++ b/library/HTMLPurifier/TokenFactory.php
@@ -13,32 +13,53 @@
*/
class HTMLPurifier_TokenFactory
{
+ // p stands for prototype
/**
- * Prototypes that will be cloned.
- * @private
+ * @type HTMLPurifier_Token_Start
*/
- // p stands for prototype
- private $p_start, $p_end, $p_empty, $p_text, $p_comment;
+ private $p_start;
+
+ /**
+ * @type HTMLPurifier_Token_End
+ */
+ private $p_end;
+
+ /**
+ * @type HTMLPurifier_Token_Empty
+ */
+ private $p_empty;
+
+ /**
+ * @type HTMLPurifier_Token_Text
+ */
+ private $p_text;
+
+ /**
+ * @type HTMLPurifier_Token_Comment
+ */
+ private $p_comment;
/**
* Generates blank prototypes for cloning.
*/
- public function __construct() {
- $this->p_start = new HTMLPurifier_Token_Start('', array());
- $this->p_end = new HTMLPurifier_Token_End('');
- $this->p_empty = new HTMLPurifier_Token_Empty('', array());
- $this->p_text = new HTMLPurifier_Token_Text('');
- $this->p_comment= new HTMLPurifier_Token_Comment('');
+ public function __construct()
+ {
+ $this->p_start = new HTMLPurifier_Token_Start('', array());
+ $this->p_end = new HTMLPurifier_Token_End('');
+ $this->p_empty = new HTMLPurifier_Token_Empty('', array());
+ $this->p_text = new HTMLPurifier_Token_Text('');
+ $this->p_comment = new HTMLPurifier_Token_Comment('');
}
/**
* Creates a HTMLPurifier_Token_Start.
- * @param $name Tag name
- * @param $attr Associative array of attributes
- * @return Generated HTMLPurifier_Token_Start
+ * @param string $name Tag name
+ * @param array $attr Associative array of attributes
+ * @return HTMLPurifier_Token_Start Generated HTMLPurifier_Token_Start
*/
- public function createStart($name, $attr = array()) {
+ public function createStart($name, $attr = array())
+ {
$p = clone $this->p_start;
$p->__construct($name, $attr);
return $p;
@@ -46,10 +67,11 @@ class HTMLPurifier_TokenFactory
/**
* Creates a HTMLPurifier_Token_End.
- * @param $name Tag name
- * @return Generated HTMLPurifier_Token_End
+ * @param string $name Tag name
+ * @return HTMLPurifier_Token_End Generated HTMLPurifier_Token_End
*/
- public function createEnd($name) {
+ public function createEnd($name)
+ {
$p = clone $this->p_end;
$p->__construct($name);
return $p;
@@ -57,11 +79,12 @@ class HTMLPurifier_TokenFactory
/**
* Creates a HTMLPurifier_Token_Empty.
- * @param $name Tag name
- * @param $attr Associative array of attributes
- * @return Generated HTMLPurifier_Token_Empty
+ * @param string $name Tag name
+ * @param array $attr Associative array of attributes
+ * @return HTMLPurifier_Token_Empty Generated HTMLPurifier_Token_Empty
*/
- public function createEmpty($name, $attr = array()) {
+ public function createEmpty($name, $attr = array())
+ {
$p = clone $this->p_empty;
$p->__construct($name, $attr);
return $p;
@@ -69,10 +92,11 @@ class HTMLPurifier_TokenFactory
/**
* Creates a HTMLPurifier_Token_Text.
- * @param $data Data of text token
- * @return Generated HTMLPurifier_Token_Text
+ * @param string $data Data of text token
+ * @return HTMLPurifier_Token_Text Generated HTMLPurifier_Token_Text
*/
- public function createText($data) {
+ public function createText($data)
+ {
$p = clone $this->p_text;
$p->__construct($data);
return $p;
@@ -80,15 +104,15 @@ class HTMLPurifier_TokenFactory
/**
* Creates a HTMLPurifier_Token_Comment.
- * @param $data Data of comment token
- * @return Generated HTMLPurifier_Token_Comment
+ * @param string $data Data of comment token
+ * @return HTMLPurifier_Token_Comment Generated HTMLPurifier_Token_Comment
*/
- public function createComment($data) {
+ public function createComment($data)
+ {
$p = clone $this->p_comment;
$p->__construct($data);
return $p;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URI.php b/library/HTMLPurifier/URI.php
index 8b50d0d18..a5e7ae298 100644
--- a/library/HTMLPurifier/URI.php
+++ b/library/HTMLPurifier/URI.php
@@ -10,17 +10,57 @@
*/
class HTMLPurifier_URI
{
-
- public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
+ /**
+ * @type string
+ */
+ public $scheme;
/**
+ * @type string
+ */
+ public $userinfo;
+
+ /**
+ * @type string
+ */
+ public $host;
+
+ /**
+ * @type int
+ */
+ public $port;
+
+ /**
+ * @type string
+ */
+ public $path;
+
+ /**
+ * @type string
+ */
+ public $query;
+
+ /**
+ * @type string
+ */
+ public $fragment;
+
+ /**
+ * @param string $scheme
+ * @param string $userinfo
+ * @param string $host
+ * @param int $port
+ * @param string $path
+ * @param string $query
+ * @param string $fragment
* @note Automatically normalizes scheme and port
*/
- public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
+ public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
+ {
$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
$this->userinfo = $userinfo;
$this->host = $host;
- $this->port = is_null($port) ? $port : (int) $port;
+ $this->port = is_null($port) ? $port : (int)$port;
$this->path = $path;
$this->query = $query;
$this->fragment = $fragment;
@@ -28,19 +68,22 @@ class HTMLPurifier_URI
/**
* Retrieves a scheme object corresponding to the URI's scheme/default
- * @param $config Instance of HTMLPurifier_Config
- * @param $context Instance of HTMLPurifier_Context
- * @return Scheme object appropriate for validating this URI
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
*/
- public function getSchemeObj($config, $context) {
+ public function getSchemeObj($config, $context)
+ {
$registry = HTMLPurifier_URISchemeRegistry::instance();
if ($this->scheme !== null) {
$scheme_obj = $registry->getScheme($this->scheme, $config, $context);
- if (!$scheme_obj) return false; // invalid scheme, clean it out
+ if (!$scheme_obj) {
+ return false;
+ } // invalid scheme, clean it out
} else {
// no scheme: retrieve the default one
$def = $config->getDefinition('URI');
- $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
+ $scheme_obj = $def->getDefaultScheme($config, $context);
if (!$scheme_obj) {
// something funky happened to the default scheme object
trigger_error(
@@ -56,30 +99,39 @@ class HTMLPurifier_URI
/**
* Generic validation method applicable for all schemes. May modify
* this URI in order to get it into a compliant form.
- * @param $config Instance of HTMLPurifier_Config
- * @param $context Instance of HTMLPurifier_Context
- * @return True if validation/filtering succeeds, false if failure
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool True if validation/filtering succeeds, false if failure
*/
- public function validate($config, $context) {
-
+ public function validate($config, $context)
+ {
// ABNF definitions from RFC 3986
$chars_sub_delims = '!$&\'()*+,;=';
$chars_gen_delims = ':/?#[]@';
$chars_pchar = $chars_sub_delims . ':@';
- // validate scheme (MUST BE FIRST!)
- if (!is_null($this->scheme) && is_null($this->host)) {
- $def = $config->getDefinition('URI');
- if ($def->defaultScheme === $this->scheme) {
- $this->scheme = null;
- }
- }
-
// validate host
if (!is_null($this->host)) {
$host_def = new HTMLPurifier_AttrDef_URI_Host();
$this->host = $host_def->validate($this->host, $config, $context);
- if ($this->host === false) $this->host = null;
+ if ($this->host === false) {
+ $this->host = null;
+ }
+ }
+
+ // validate scheme
+ // NOTE: It's not appropriate to check whether or not this
+ // scheme is in our registry, since a URIFilter may convert a
+ // URI that we don't allow into one we do. So instead, we just
+ // check if the scheme can be dropped because there is no host
+ // and it is our default scheme.
+ if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
+ // support for relative paths is pretty abysmal when the
+ // scheme is present, so axe it when possible
+ $def = $config->getDefinition('URI');
+ if ($def->defaultScheme === $this->scheme) {
+ $this->scheme = null;
+ }
}
// validate username
@@ -90,38 +142,55 @@ class HTMLPurifier_URI
// validate port
if (!is_null($this->port)) {
- if ($this->port < 1 || $this->port > 65535) $this->port = null;
+ if ($this->port < 1 || $this->port > 65535) {
+ $this->port = null;
+ }
}
// validate path
- $path_parts = array();
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
- if (!is_null($this->host)) {
+ if (!is_null($this->host)) { // this catches $this->host === ''
// path-abempty (hier and relative)
+ // http://www.example.com/my/path
+ // //www.example.com/my/path (looks odd, but works, and
+ // recognized by most browsers)
+ // (this set is valid or invalid on a scheme by scheme
+ // basis, so we'll deal with it later)
+ // file:///my/path
+ // ///my/path
$this->path = $segments_encoder->encode($this->path);
- } elseif ($this->path !== '' && $this->path[0] === '/') {
- // path-absolute (hier and relative)
- if (strlen($this->path) >= 2 && $this->path[1] === '/') {
- // This shouldn't ever happen!
- $this->path = '';
- } else {
+ } elseif ($this->path !== '') {
+ if ($this->path[0] === '/') {
+ // path-absolute (hier and relative)
+ // http:/my/path
+ // /my/path
+ if (strlen($this->path) >= 2 && $this->path[1] === '/') {
+ // This could happen if both the host gets stripped
+ // out
+ // http://my/path
+ // //my/path
+ $this->path = '';
+ } else {
+ $this->path = $segments_encoder->encode($this->path);
+ }
+ } elseif (!is_null($this->scheme)) {
+ // path-rootless (hier)
+ // http:my/path
+ // Short circuit evaluation means we don't need to check nz
$this->path = $segments_encoder->encode($this->path);
- }
- } elseif (!is_null($this->scheme) && $this->path !== '') {
- // path-rootless (hier)
- // Short circuit evaluation means we don't need to check nz
- $this->path = $segments_encoder->encode($this->path);
- } elseif (is_null($this->scheme) && $this->path !== '') {
- // path-noscheme (relative)
- // (once again, not checking nz)
- $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
- $c = strpos($this->path, '/');
- if ($c !== false) {
- $this->path =
- $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
- $segments_encoder->encode(substr($this->path, $c));
} else {
- $this->path = $segment_nc_encoder->encode($this->path);
+ // path-noscheme (relative)
+ // my/path
+ // (once again, not checking nz)
+ $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
+ $c = strpos($this->path, '/');
+ if ($c !== false) {
+ $this->path =
+ $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
+ $segments_encoder->encode(substr($this->path, $c));
+ } else {
+ $this->path = $segment_nc_encoder->encode($this->path);
+ }
}
} else {
// path-empty (hier and relative)
@@ -138,36 +207,108 @@ class HTMLPurifier_URI
if (!is_null($this->fragment)) {
$this->fragment = $qf_encoder->encode($this->fragment);
}
-
return true;
-
}
/**
* Convert URI back to string
- * @return String URI appropriate for output
+ * @return string URI appropriate for output
*/
- public function toString() {
+ public function toString()
+ {
// reconstruct authority
$authority = null;
+ // there is a rendering difference between a null authority
+ // (http:foo-bar) and an empty string authority
+ // (http:///foo-bar).
if (!is_null($this->host)) {
$authority = '';
- if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
+ if (!is_null($this->userinfo)) {
+ $authority .= $this->userinfo . '@';
+ }
$authority .= $this->host;
- if(!is_null($this->port)) $authority .= ':' . $this->port;
+ if (!is_null($this->port)) {
+ $authority .= ':' . $this->port;
+ }
}
- // reconstruct the result
+ // Reconstruct the result
+ // One might wonder about parsing quirks from browsers after
+ // this reconstruction. Unfortunately, parsing behavior depends
+ // on what *scheme* was employed (file:///foo is handled *very*
+ // differently than http:///foo), so unfortunately we have to
+ // defer to the schemes to do the right thing.
$result = '';
- if (!is_null($this->scheme)) $result .= $this->scheme . ':';
- if (!is_null($authority)) $result .= '//' . $authority;
+ if (!is_null($this->scheme)) {
+ $result .= $this->scheme . ':';
+ }
+ if (!is_null($authority)) {
+ $result .= '//' . $authority;
+ }
$result .= $this->path;
- if (!is_null($this->query)) $result .= '?' . $this->query;
- if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
+ if (!is_null($this->query)) {
+ $result .= '?' . $this->query;
+ }
+ if (!is_null($this->fragment)) {
+ $result .= '#' . $this->fragment;
+ }
return $result;
}
+ /**
+ * Returns true if this URL might be considered a 'local' URL given
+ * the current context. This is true when the host is null, or
+ * when it matches the host supplied to the configuration.
+ *
+ * Note that this does not do any scheme checking, so it is mostly
+ * only appropriate for metadata that doesn't care about protocol
+ * security. isBenign is probably what you actually want.
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function isLocal($config, $context)
+ {
+ if ($this->host === null) {
+ return true;
+ }
+ $uri_def = $config->getDefinition('URI');
+ if ($uri_def->host === $this->host) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if this URL should be considered a 'benign' URL,
+ * that is:
+ *
+ * - It is a local URL (isLocal), and
+ * - It has a equal or better level of security
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function isBenign($config, $context)
+ {
+ if (!$this->isLocal($config, $context)) {
+ return false;
+ }
+
+ $scheme_obj = $this->getSchemeObj($config, $context);
+ if (!$scheme_obj) {
+ return false;
+ } // conservative approach
+
+ $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
+ if ($current_scheme_obj->secure) {
+ if (!$scheme_obj->secure) {
+ return false;
+ }
+ }
+ return true;
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIDefinition.php b/library/HTMLPurifier/URIDefinition.php
index ea2b8fe24..e0bd8bcca 100644
--- a/library/HTMLPurifier/URIDefinition.php
+++ b/library/HTMLPurifier/URIDefinition.php
@@ -23,19 +23,24 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
*/
public $defaultScheme;
- public function __construct() {
+ public function __construct()
+ {
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
+ $this->registerFilter(new HTMLPurifier_URIFilter_DisableResources());
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
+ $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
$this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
$this->registerFilter(new HTMLPurifier_URIFilter_Munge());
}
- public function registerFilter($filter) {
+ public function registerFilter($filter)
+ {
$this->registeredFilters[$filter->name] = $filter;
}
- public function addFilter($filter, $config) {
+ public function addFilter($filter, $config)
+ {
$r = $filter->prepare($config);
if ($r === false) return; // null is ok, for backwards compat
if ($filter->post) {
@@ -45,22 +50,29 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
}
}
- protected function doSetup($config) {
+ protected function doSetup($config)
+ {
$this->setupMemberVariables($config);
$this->setupFilters($config);
}
- protected function setupFilters($config) {
+ protected function setupFilters($config)
+ {
foreach ($this->registeredFilters as $name => $filter) {
- $conf = $config->get('URI.' . $name);
- if ($conf !== false && $conf !== null) {
+ if ($filter->always_load) {
$this->addFilter($filter, $config);
+ } else {
+ $conf = $config->get('URI.' . $name);
+ if ($conf !== false && $conf !== null) {
+ $this->addFilter($filter, $config);
+ }
}
}
unset($this->registeredFilters);
}
- protected function setupMemberVariables($config) {
+ protected function setupMemberVariables($config)
+ {
$this->host = $config->get('URI.Host');
$base_uri = $config->get('URI.Base');
if (!is_null($base_uri)) {
@@ -72,7 +84,13 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
}
- public function filter(&$uri, $config, $context) {
+ public function getDefaultScheme($config, $context)
+ {
+ return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
+ }
+
+ public function filter(&$uri, $config, $context)
+ {
foreach ($this->filters as $name => $f) {
$result = $f->filter($uri, $config, $context);
if (!$result) return false;
@@ -80,7 +98,8 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
return true;
}
- public function postFilter(&$uri, $config, $context) {
+ public function postFilter(&$uri, $config, $context)
+ {
foreach ($this->postFilters as $name => $f) {
$result = $f->filter($uri, $config, $context);
if (!$result) return false;
diff --git a/library/HTMLPurifier/URIFilter.php b/library/HTMLPurifier/URIFilter.php
index c116f93df..09724e9f4 100644
--- a/library/HTMLPurifier/URIFilter.php
+++ b/library/HTMLPurifier/URIFilter.php
@@ -4,7 +4,21 @@
* Chainable filters for custom URI processing.
*
* These filters can perform custom actions on a URI filter object,
- * including transformation or blacklisting.
+ * including transformation or blacklisting. A filter named Foo
+ * must have a corresponding configuration directive %URI.Foo,
+ * unless always_load is specified to be true.
+ *
+ * The following contexts may be available while URIFilters are being
+ * processed:
+ *
+ * - EmbeddedURI: true if URI is an embedded resource that will
+ * be loaded automatically on page load
+ * - CurrentToken: a reference to the token that is currently
+ * being processed
+ * - CurrentAttr: the name of the attribute that is currently being
+ * processed
+ * - CurrentCSSProperty: the name of the CSS property that is
+ * currently being processed (if applicable)
*
* @warning This filter is called before scheme object validation occurs.
* Make sure, if you require a specific scheme object, you
@@ -15,31 +29,46 @@ abstract class HTMLPurifier_URIFilter
{
/**
- * Unique identifier of filter
+ * Unique identifier of filter.
+ * @type string
*/
public $name;
/**
* True if this filter should be run after scheme validation.
+ * @type bool
*/
public $post = false;
/**
- * Performs initialization for the filter
+ * True if this filter should always be loaded.
+ * This permits a filter to be named Foo without the corresponding
+ * %URI.Foo directive existing.
+ * @type bool
*/
- public function prepare($config) {return true;}
+ public $always_load = false;
+
+ /**
+ * Performs initialization for the filter. If the filter returns
+ * false, this means that it shouldn't be considered active.
+ * @param HTMLPurifier_Config $config
+ * @return bool
+ */
+ public function prepare($config)
+ {
+ return true;
+ }
/**
* Filter a URI object
- * @param $uri Reference to URI object variable
- * @param $config Instance of HTMLPurifier_Config
- * @param $context Instance of HTMLPurifier_Context
+ * @param HTMLPurifier_URI $uri Reference to URI object variable
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
* @return bool Whether or not to continue processing: false indicates
* URL is no good, true indicates continue processing. Note that
* all changes are committed directly on the URI object
*/
abstract public function filter(&$uri, $config, $context);
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIFilter/DisableExternal.php b/library/HTMLPurifier/URIFilter/DisableExternal.php
index d8a39a501..ced1b1376 100644
--- a/library/HTMLPurifier/URIFilter/DisableExternal.php
+++ b/library/HTMLPurifier/URIFilter/DisableExternal.php
@@ -2,19 +2,50 @@
class HTMLPurifier_URIFilter_DisableExternal extends HTMLPurifier_URIFilter
{
+ /**
+ * @type string
+ */
public $name = 'DisableExternal';
+
+ /**
+ * @type array
+ */
protected $ourHostParts = false;
- public function prepare($config) {
+
+ /**
+ * @param HTMLPurifier_Config $config
+ * @return void
+ */
+ public function prepare($config)
+ {
$our_host = $config->getDefinition('URI')->host;
- if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host));
+ if ($our_host !== null) {
+ $this->ourHostParts = array_reverse(explode('.', $our_host));
+ }
}
- public function filter(&$uri, $config, $context) {
- if (is_null($uri->host)) return true;
- if ($this->ourHostParts === false) return false;
+
+ /**
+ * @param HTMLPurifier_URI $uri Reference
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function filter(&$uri, $config, $context)
+ {
+ if (is_null($uri->host)) {
+ return true;
+ }
+ if ($this->ourHostParts === false) {
+ return false;
+ }
$host_parts = array_reverse(explode('.', $uri->host));
foreach ($this->ourHostParts as $i => $x) {
- if (!isset($host_parts[$i])) return false;
- if ($host_parts[$i] != $this->ourHostParts[$i]) return false;
+ if (!isset($host_parts[$i])) {
+ return false;
+ }
+ if ($host_parts[$i] != $this->ourHostParts[$i]) {
+ return false;
+ }
}
return true;
}
diff --git a/library/HTMLPurifier/URIFilter/DisableExternalResources.php b/library/HTMLPurifier/URIFilter/DisableExternalResources.php
index 881abc43c..c6562169e 100644
--- a/library/HTMLPurifier/URIFilter/DisableExternalResources.php
+++ b/library/HTMLPurifier/URIFilter/DisableExternalResources.php
@@ -2,9 +2,22 @@
class HTMLPurifier_URIFilter_DisableExternalResources extends HTMLPurifier_URIFilter_DisableExternal
{
+ /**
+ * @type string
+ */
public $name = 'DisableExternalResources';
- public function filter(&$uri, $config, $context) {
- if (!$context->get('EmbeddedURI', true)) return true;
+
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function filter(&$uri, $config, $context)
+ {
+ if (!$context->get('EmbeddedURI', true)) {
+ return true;
+ }
return parent::filter($uri, $config, $context);
}
}
diff --git a/library/HTMLPurifier/URIFilter/DisableResources.php b/library/HTMLPurifier/URIFilter/DisableResources.php
new file mode 100644
index 000000000..d5c412c44
--- /dev/null
+++ b/library/HTMLPurifier/URIFilter/DisableResources.php
@@ -0,0 +1,22 @@
+get('EmbeddedURI', true);
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIFilter/HostBlacklist.php b/library/HTMLPurifier/URIFilter/HostBlacklist.php
index 045aa0992..a6645c17e 100644
--- a/library/HTMLPurifier/URIFilter/HostBlacklist.php
+++ b/library/HTMLPurifier/URIFilter/HostBlacklist.php
@@ -1,15 +1,40 @@
blacklist = $config->get('URI.HostBlacklist');
return true;
}
- public function filter(&$uri, $config, $context) {
- foreach($this->blacklist as $blacklisted_host_fragment) {
+
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function filter(&$uri, $config, $context)
+ {
+ foreach ($this->blacklist as $blacklisted_host_fragment) {
if (strpos($uri->host, $blacklisted_host_fragment) !== false) {
return false;
}
diff --git a/library/HTMLPurifier/URIFilter/MakeAbsolute.php b/library/HTMLPurifier/URIFilter/MakeAbsolute.php
index f46ab2630..c507bbff8 100644
--- a/library/HTMLPurifier/URIFilter/MakeAbsolute.php
+++ b/library/HTMLPurifier/URIFilter/MakeAbsolute.php
@@ -4,14 +4,35 @@
class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
{
+ /**
+ * @type string
+ */
public $name = 'MakeAbsolute';
+
+ /**
+ * @type
+ */
protected $base;
+
+ /**
+ * @type array
+ */
protected $basePathStack = array();
- public function prepare($config) {
+
+ /**
+ * @param HTMLPurifier_Config $config
+ * @return bool
+ */
+ public function prepare($config)
+ {
$def = $config->getDefinition('URI');
$this->base = $def->base;
if (is_null($this->base)) {
- trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_WARNING);
+ trigger_error(
+ 'URI.MakeAbsolute is being ignored due to lack of ' .
+ 'value for URI.Base configuration',
+ E_USER_WARNING
+ );
return false;
}
$this->base->fragment = null; // fragment is invalid for base URI
@@ -21,19 +42,29 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
$this->basePathStack = $stack;
return true;
}
- public function filter(&$uri, $config, $context) {
- if (is_null($this->base)) return true; // abort early
- if (
- $uri->path === '' && is_null($uri->scheme) &&
- is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)
- ) {
+
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function filter(&$uri, $config, $context)
+ {
+ if (is_null($this->base)) {
+ return true;
+ } // abort early
+ if ($uri->path === '' && is_null($uri->scheme) &&
+ is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment)) {
// reference to current document
$uri = clone $this->base;
return true;
}
if (!is_null($uri->scheme)) {
// absolute URI already: don't change
- if (!is_null($uri->host)) return true;
+ if (!is_null($uri->host)) {
+ return true;
+ }
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) {
// scheme not recognized
@@ -66,22 +97,33 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
}
// re-combine
$uri->scheme = $this->base->scheme;
- if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo;
- if (is_null($uri->host)) $uri->host = $this->base->host;
- if (is_null($uri->port)) $uri->port = $this->base->port;
+ if (is_null($uri->userinfo)) {
+ $uri->userinfo = $this->base->userinfo;
+ }
+ if (is_null($uri->host)) {
+ $uri->host = $this->base->host;
+ }
+ if (is_null($uri->port)) {
+ $uri->port = $this->base->port;
+ }
return true;
}
/**
* Resolve dots and double-dots in a path stack
+ * @param array $stack
+ * @return array
*/
- private function _collapseStack($stack) {
+ private function _collapseStack($stack)
+ {
$result = array();
$is_folder = false;
for ($i = 0; isset($stack[$i]); $i++) {
$is_folder = false;
// absorb an internally duplicated slash
- if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue;
+ if ($stack[$i] == '' && $i && isset($stack[$i + 1])) {
+ continue;
+ }
if ($stack[$i] == '..') {
if (!empty($result)) {
$segment = array_pop($result);
@@ -106,7 +148,9 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
}
$result[] = $stack[$i];
}
- if ($is_folder) $result[] = '';
+ if ($is_folder) {
+ $result[] = '';
+ }
return $result;
}
}
diff --git a/library/HTMLPurifier/URIFilter/Munge.php b/library/HTMLPurifier/URIFilter/Munge.php
index efa10a645..6e03315a1 100644
--- a/library/HTMLPurifier/URIFilter/Munge.php
+++ b/library/HTMLPurifier/URIFilter/Munge.php
@@ -2,31 +2,79 @@
class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
{
+ /**
+ * @type string
+ */
public $name = 'Munge';
- public $post = true;
- private $target, $parser, $doEmbed, $secretKey;
+ /**
+ * @type bool
+ */
+ public $post = true;
+
+ /**
+ * @type string
+ */
+ private $target;
+
+ /**
+ * @type HTMLPurifier_URIParser
+ */
+ private $parser;
+
+ /**
+ * @type bool
+ */
+ private $doEmbed;
+
+ /**
+ * @type string
+ */
+ private $secretKey;
+
+ /**
+ * @type array
+ */
protected $replace = array();
- public function prepare($config) {
- $this->target = $config->get('URI.' . $this->name);
- $this->parser = new HTMLPurifier_URIParser();
- $this->doEmbed = $config->get('URI.MungeResources');
+ /**
+ * @param HTMLPurifier_Config $config
+ * @return bool
+ */
+ public function prepare($config)
+ {
+ $this->target = $config->get('URI.' . $this->name);
+ $this->parser = new HTMLPurifier_URIParser();
+ $this->doEmbed = $config->get('URI.MungeResources');
$this->secretKey = $config->get('URI.MungeSecretKey');
+ if ($this->secretKey && !function_exists('hash_hmac')) {
+ throw new Exception("Cannot use %URI.MungeSecretKey without hash_hmac support.");
+ }
return true;
}
- public function filter(&$uri, $config, $context) {
- if ($context->get('EmbeddedURI', true) && !$this->doEmbed) return true;
+
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function filter(&$uri, $config, $context)
+ {
+ if ($context->get('EmbeddedURI', true) && !$this->doEmbed) {
+ return true;
+ }
$scheme_obj = $uri->getSchemeObj($config, $context);
- if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it
- if (is_null($uri->host) || empty($scheme_obj->browsable)) {
+ if (!$scheme_obj) {
return true;
- }
- // don't redirect if target host is our host
- if ($uri->host === $config->getDefinition('URI')->host) {
+ } // ignore unknown schemes, maybe another postfilter did it
+ if (!$scheme_obj->browsable) {
return true;
- }
+ } // ignore non-browseable schemes, since we can't munge those in a reasonable way
+ if ($uri->isBenign($config, $context)) {
+ return true;
+ } // don't redirect if a benign URL
$this->makeReplace($uri, $config, $context);
$this->replace = array_map('rawurlencode', $this->replace);
@@ -35,12 +83,20 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
$new_uri = $this->parser->parse($new_uri);
// don't redirect if the target host is the same as the
// starting host
- if ($uri->host === $new_uri->host) return true;
+ if ($uri->host === $new_uri->host) {
+ return true;
+ }
$uri = $new_uri; // overwrite
return true;
}
- protected function makeReplace($uri, $config, $context) {
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ */
+ protected function makeReplace($uri, $config, $context)
+ {
$string = $uri->toString();
// always available
$this->replace['%s'] = $string;
@@ -50,9 +106,10 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
$this->replace['%m'] = $context->get('CurrentAttr', true);
$this->replace['%p'] = $context->get('CurrentCSSProperty', true);
// not always available
- if ($this->secretKey) $this->replace['%t'] = sha1($this->secretKey . ':' . $string);
+ if ($this->secretKey) {
+ $this->replace['%t'] = hash_hmac("sha256", $string, $this->secretKey);
+ }
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIFilter/SafeIframe.php b/library/HTMLPurifier/URIFilter/SafeIframe.php
new file mode 100644
index 000000000..f609c47a3
--- /dev/null
+++ b/library/HTMLPurifier/URIFilter/SafeIframe.php
@@ -0,0 +1,68 @@
+regexp = $config->get('URI.SafeIframeRegexp');
+ return true;
+ }
+
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function filter(&$uri, $config, $context)
+ {
+ // check if filter not applicable
+ if (!$config->get('HTML.SafeIframe')) {
+ return true;
+ }
+ // check if the filter should actually trigger
+ if (!$context->get('EmbeddedURI', true)) {
+ return true;
+ }
+ $token = $context->get('CurrentToken', true);
+ if (!($token && $token->name == 'iframe')) {
+ return true;
+ }
+ // check if we actually have some whitelists enabled
+ if ($this->regexp === null) {
+ return false;
+ }
+ // actually check the whitelists
+ return preg_match($this->regexp, $uri->toString());
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIParser.php b/library/HTMLPurifier/URIParser.php
index 7179e4ab8..0e7381a07 100644
--- a/library/HTMLPurifier/URIParser.php
+++ b/library/HTMLPurifier/URIParser.php
@@ -12,7 +12,8 @@ class HTMLPurifier_URIParser
*/
protected $percentEncoder;
- public function __construct() {
+ public function __construct()
+ {
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
}
@@ -22,15 +23,15 @@ class HTMLPurifier_URIParser
* @return HTMLPurifier_URI representation of URI. This representation has
* not been validated yet and may not conform to RFC.
*/
- public function parse($uri) {
-
+ public function parse($uri)
+ {
$uri = $this->percentEncoder->normalize($uri);
// Regexp is as per Appendix B.
// Note that ["<>] are an addition to the RFC's recommended
// characters, because they represent external delimeters.
$r_URI = '!'.
- '(([^:/?#"<>]+):)?'. // 2. Scheme
+ '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
'(//([^/?#"<>]*))?'. // 4. Authority
'([^?#"<>]*)'. // 5. Path
'(\?([^#"<>]*))?'. // 7. Query
diff --git a/library/HTMLPurifier/URIScheme.php b/library/HTMLPurifier/URIScheme.php
index 039710fd1..fe9e82cf2 100644
--- a/library/HTMLPurifier/URIScheme.php
+++ b/library/HTMLPurifier/URIScheme.php
@@ -3,40 +3,100 @@
/**
* Validator for the components of a URI for a specific scheme
*/
-class HTMLPurifier_URIScheme
+abstract class HTMLPurifier_URIScheme
{
/**
- * Scheme's default port (integer)
+ * Scheme's default port (integer). If an explicit port number is
+ * specified that coincides with the default port, it will be
+ * elided.
+ * @type int
*/
public $default_port = null;
/**
- * Whether or not URIs of this schem are locatable by a browser
+ * Whether or not URIs of this scheme are locatable by a browser
* http and ftp are accessible, while mailto and news are not.
+ * @type bool
*/
public $browsable = false;
+ /**
+ * Whether or not data transmitted over this scheme is encrypted.
+ * https is secure, http is not.
+ * @type bool
+ */
+ public $secure = false;
+
/**
* Whether or not the URI always uses , resolves edge cases
* with making relative URIs absolute
+ * @type bool
*/
public $hierarchical = false;
/**
- * Validates the components of a URI
- * @note This implementation should be called by children if they define
- * a default port, as it does port processing.
- * @param $uri Instance of HTMLPurifier_URI
- * @param $config HTMLPurifier_Config object
- * @param $context HTMLPurifier_Context object
- * @return Bool success or failure
+ * Whether or not the URI may omit a hostname when the scheme is
+ * explicitly specified, ala file:///path/to/file. As of writing,
+ * 'file' is the only scheme that browsers support his properly.
+ * @type bool
*/
- public function validate(&$uri, $config, $context) {
- if ($this->default_port == $uri->port) $uri->port = null;
- return true;
- }
+ public $may_omit_host = false;
+ /**
+ * Validates the components of a URI for a specific scheme.
+ * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool success or failure
+ */
+ abstract public function doValidate(&$uri, $config, $context);
+
+ /**
+ * Public interface for validating components of a URI. Performs a
+ * bunch of default actions. Don't overload this method.
+ * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool success or failure
+ */
+ public function validate(&$uri, $config, $context)
+ {
+ if ($this->default_port == $uri->port) {
+ $uri->port = null;
+ }
+ // kludge: browsers do funny things when the scheme but not the
+ // authority is set
+ if (!$this->may_omit_host &&
+ // if the scheme is present, a missing host is always in error
+ (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) ||
+ // if the scheme is not present, a *blank* host is in error,
+ // since this translates into '///path' which most browsers
+ // interpret as being 'http://path'.
+ (is_null($uri->scheme) && $uri->host === '')
+ ) {
+ do {
+ if (is_null($uri->scheme)) {
+ if (substr($uri->path, 0, 2) != '//') {
+ $uri->host = null;
+ break;
+ }
+ // URI is '////path', so we cannot nullify the
+ // host to preserve semantics. Try expanding the
+ // hostname instead (fall through)
+ }
+ // first see if we can manually insert a hostname
+ $host = $config->get('URI.Host');
+ if (!is_null($host)) {
+ $uri->host = $host;
+ } else {
+ // we can't do anything sensible, reject the URL.
+ return false;
+ }
+ } while (false);
+ }
+ return $this->doValidate($uri, $config, $context);
+ }
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIScheme/data.php b/library/HTMLPurifier/URIScheme/data.php
index b7f1989cb..6ebca4984 100644
--- a/library/HTMLPurifier/URIScheme/data.php
+++ b/library/HTMLPurifier/URIScheme/data.php
@@ -3,18 +3,38 @@
/**
* Implements data: URI for base64 encoded images supported by GD.
*/
-class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
-
+class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme
+{
+ /**
+ * @type bool
+ */
public $browsable = true;
+
+ /**
+ * @type array
+ */
public $allowed_types = array(
// you better write validation code for other types if you
// decide to allow them
'image/jpeg' => true,
'image/gif' => true,
'image/png' => true,
- );
+ );
+ // this is actually irrelevant since we only write out the path
+ // component
+ /**
+ * @type bool
+ */
+ public $may_omit_host = true;
- public function validate(&$uri, $config, $context) {
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function doValidate(&$uri, $config, $context)
+ {
$result = explode(',', $uri->path, 2);
$is_base64 = false;
$charset = null;
@@ -23,7 +43,7 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
list($metadata, $data) = $result;
// do some legwork on the metadata
$metas = explode(';', $metadata);
- while(!empty($metas)) {
+ while (!empty($metas)) {
$cur = array_shift($metas);
if ($cur == 'base64') {
$is_base64 = true;
@@ -32,10 +52,14 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
if (substr($cur, 0, 8) == 'charset=') {
// doesn't match if there are arbitrary spaces, but
// whatever dude
- if ($charset !== null) continue; // garbage
+ if ($charset !== null) {
+ continue;
+ } // garbage
$charset = substr($cur, 8); // not used
} else {
- if ($content_type !== null) continue; // garbage
+ if ($content_type !== null) {
+ continue;
+ } // garbage
$content_type = $cur;
}
}
@@ -61,11 +85,15 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
file_put_contents($file, $raw_data);
if (function_exists('exif_imagetype')) {
$image_code = exif_imagetype($file);
+ unlink($file);
} elseif (function_exists('getimagesize')) {
set_error_handler(array($this, 'muteErrorHandler'));
$info = getimagesize($file);
restore_error_handler();
- if ($info == false) return false;
+ unlink($file);
+ if ($info == false) {
+ return false;
+ }
$image_code = $info[2];
} else {
trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
@@ -74,7 +102,9 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
if ($real_content_type != $content_type) {
// we're nice guys; if the content type is something else we
// support, change it over
- if (empty($this->allowed_types[$real_content_type])) return false;
+ if (empty($this->allowed_types[$real_content_type])) {
+ return false;
+ }
$content_type = $real_content_type;
}
// ok, it's kosher, rewrite what we need
@@ -87,7 +117,11 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
return true;
}
- public function muteErrorHandler($errno, $errstr) {}
-
+ /**
+ * @param int $errno
+ * @param string $errstr
+ */
+ public function muteErrorHandler($errno, $errstr)
+ {
+ }
}
-
diff --git a/library/HTMLPurifier/URIScheme/file.php b/library/HTMLPurifier/URIScheme/file.php
new file mode 100644
index 000000000..215be4ba8
--- /dev/null
+++ b/library/HTMLPurifier/URIScheme/file.php
@@ -0,0 +1,44 @@
+userinfo = null;
+ // file:// makes no provisions for accessing the resource
+ $uri->port = null;
+ // While it seems to work on Firefox, the querystring has
+ // no possible effect and is thus stripped.
+ $uri->query = null;
+ return true;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIScheme/ftp.php b/library/HTMLPurifier/URIScheme/ftp.php
index 5849bf7ff..1eb43ee5c 100644
--- a/library/HTMLPurifier/URIScheme/ftp.php
+++ b/library/HTMLPurifier/URIScheme/ftp.php
@@ -3,15 +3,32 @@
/**
* Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
*/
-class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
-
+class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme
+{
+ /**
+ * @type int
+ */
public $default_port = 21;
+
+ /**
+ * @type bool
+ */
public $browsable = true; // usually
+
+ /**
+ * @type bool
+ */
public $hierarchical = true;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
- $uri->query = null;
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function doValidate(&$uri, $config, $context)
+ {
+ $uri->query = null;
// typecode check
$semicolon_pos = strrpos($uri->path, ';'); // reverse
@@ -34,10 +51,8 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
$uri->path = str_replace(';', '%3B', $uri->path);
$uri->path .= $type_ret;
}
-
return true;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIScheme/http.php b/library/HTMLPurifier/URIScheme/http.php
index b097a31d6..ce69ec438 100644
--- a/library/HTMLPurifier/URIScheme/http.php
+++ b/library/HTMLPurifier/URIScheme/http.php
@@ -3,18 +3,34 @@
/**
* Validates http (HyperText Transfer Protocol) as defined by RFC 2616
*/
-class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
-
+class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme
+{
+ /**
+ * @type int
+ */
public $default_port = 80;
+
+ /**
+ * @type bool
+ */
public $browsable = true;
+
+ /**
+ * @type bool
+ */
public $hierarchical = true;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function doValidate(&$uri, $config, $context)
+ {
$uri->userinfo = null;
return true;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIScheme/https.php b/library/HTMLPurifier/URIScheme/https.php
index 29e380919..0e96882db 100644
--- a/library/HTMLPurifier/URIScheme/https.php
+++ b/library/HTMLPurifier/URIScheme/https.php
@@ -3,10 +3,16 @@
/**
* Validates https (Secure HTTP) according to http scheme.
*/
-class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
-
+class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http
+{
+ /**
+ * @type int
+ */
public $default_port = 443;
-
+ /**
+ * @type bool
+ */
+ public $secure = true;
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIScheme/mailto.php b/library/HTMLPurifier/URIScheme/mailto.php
index c1e2cd5aa..c3a6b602a 100644
--- a/library/HTMLPurifier/URIScheme/mailto.php
+++ b/library/HTMLPurifier/URIScheme/mailto.php
@@ -9,19 +9,32 @@
* @todo Filter allowed query parameters
*/
-class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
-
+class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme
+{
+ /**
+ * @type bool
+ */
public $browsable = false;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ /**
+ * @type bool
+ */
+ public $may_omit_host = true;
+
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function doValidate(&$uri, $config, $context)
+ {
$uri->userinfo = null;
$uri->host = null;
$uri->port = null;
// we need to validate path against RFC 2368's addr-spec
return true;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIScheme/news.php b/library/HTMLPurifier/URIScheme/news.php
index f5f54f4f5..7490927d6 100644
--- a/library/HTMLPurifier/URIScheme/news.php
+++ b/library/HTMLPurifier/URIScheme/news.php
@@ -3,20 +3,33 @@
/**
* Validates news (Usenet) as defined by generic RFC 1738
*/
-class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
-
+class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme
+{
+ /**
+ * @type bool
+ */
public $browsable = false;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ /**
+ * @type bool
+ */
+ public $may_omit_host = true;
+
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function doValidate(&$uri, $config, $context)
+ {
$uri->userinfo = null;
- $uri->host = null;
- $uri->port = null;
- $uri->query = null;
+ $uri->host = null;
+ $uri->port = null;
+ $uri->query = null;
// typecode check needed on path
return true;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URIScheme/nntp.php b/library/HTMLPurifier/URIScheme/nntp.php
index 5bf93ea78..f211d715e 100644
--- a/library/HTMLPurifier/URIScheme/nntp.php
+++ b/library/HTMLPurifier/URIScheme/nntp.php
@@ -3,18 +3,30 @@
/**
* Validates nntp (Network News Transfer Protocol) as defined by generic RFC 1738
*/
-class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
-
+class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme
+{
+ /**
+ * @type int
+ */
public $default_port = 119;
+
+ /**
+ * @type bool
+ */
public $browsable = false;
- public function validate(&$uri, $config, $context) {
- parent::validate($uri, $config, $context);
+ /**
+ * @param HTMLPurifier_URI $uri
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return bool
+ */
+ public function doValidate(&$uri, $config, $context)
+ {
$uri->userinfo = null;
- $uri->query = null;
+ $uri->query = null;
return true;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/URISchemeRegistry.php b/library/HTMLPurifier/URISchemeRegistry.php
index 576bf7b6d..4ac8a0b76 100644
--- a/library/HTMLPurifier/URISchemeRegistry.php
+++ b/library/HTMLPurifier/URISchemeRegistry.php
@@ -8,12 +8,14 @@ class HTMLPurifier_URISchemeRegistry
/**
* Retrieve sole instance of the registry.
- * @param $prototype Optional prototype to overload sole instance with,
+ * @param HTMLPurifier_URISchemeRegistry $prototype Optional prototype to overload sole instance with,
* or bool true to reset to default registry.
+ * @return HTMLPurifier_URISchemeRegistry
* @note Pass a registry object $prototype with a compatible interface and
* the function will copy it and return it all further times.
*/
- public static function instance($prototype = null) {
+ public static function instance($prototype = null)
+ {
static $instance = null;
if ($prototype !== null) {
$instance = $prototype;
@@ -25,17 +27,22 @@ class HTMLPurifier_URISchemeRegistry
/**
* Cache of retrieved schemes.
+ * @type HTMLPurifier_URIScheme[]
*/
protected $schemes = array();
/**
* Retrieves a scheme validator object
- * @param $scheme String scheme name like http or mailto
- * @param $config HTMLPurifier_Config object
- * @param $config HTMLPurifier_Context object
+ * @param string $scheme String scheme name like http or mailto
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return HTMLPurifier_URIScheme
*/
- public function getScheme($scheme, $config, $context) {
- if (!$config) $config = HTMLPurifier_Config::createDefault();
+ public function getScheme($scheme, $config, $context)
+ {
+ if (!$config) {
+ $config = HTMLPurifier_Config::createDefault();
+ }
// important, otherwise attacker could include arbitrary file
$allowed_schemes = $config->get('URI.AllowedSchemes');
@@ -45,24 +52,30 @@ class HTMLPurifier_URISchemeRegistry
return;
}
- if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
- if (!isset($allowed_schemes[$scheme])) return;
+ if (isset($this->schemes[$scheme])) {
+ return $this->schemes[$scheme];
+ }
+ if (!isset($allowed_schemes[$scheme])) {
+ return;
+ }
$class = 'HTMLPurifier_URIScheme_' . $scheme;
- if (!class_exists($class)) return;
+ if (!class_exists($class)) {
+ return;
+ }
$this->schemes[$scheme] = new $class();
return $this->schemes[$scheme];
}
/**
* Registers a custom scheme to the cache, bypassing reflection.
- * @param $scheme Scheme name
- * @param $scheme_obj HTMLPurifier_URIScheme object
+ * @param string $scheme Scheme name
+ * @param HTMLPurifier_URIScheme $scheme_obj
*/
- public function register($scheme, $scheme_obj) {
+ public function register($scheme, $scheme_obj)
+ {
$this->schemes[$scheme] = $scheme_obj;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/UnitConverter.php b/library/HTMLPurifier/UnitConverter.php
index 545d42622..166f3bf30 100644
--- a/library/HTMLPurifier/UnitConverter.php
+++ b/library/HTMLPurifier/UnitConverter.php
@@ -37,20 +37,24 @@ class HTMLPurifier_UnitConverter
/**
* Minimum bcmath precision for output.
+ * @type int
*/
protected $outputPrecision;
/**
* Bcmath precision for internal calculations.
+ * @type int
*/
protected $internalPrecision;
/**
- * Whether or not BCMath is available
+ * Whether or not BCMath is available.
+ * @type bool
*/
private $bcmath;
- public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) {
+ public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false)
+ {
$this->outputPrecision = $output_precision;
$this->internalPrecision = $internal_precision;
$this->bcmath = !$force_no_bcmath && function_exists('bcmul');
@@ -63,6 +67,7 @@ class HTMLPurifier_UnitConverter
* it before passing it here!
* @param string $to_unit
* Unit to convert to.
+ * @return HTMLPurifier_Length|bool
* @note
* About precision: This conversion function pays very special
* attention to the incoming precision of values and attempts
@@ -74,11 +79,13 @@ class HTMLPurifier_UnitConverter
* and this causes some decimals to be excluded, those
* decimals will be added on.
*/
- public function convert($length, $to_unit) {
+ public function convert($length, $to_unit)
+ {
+ if (!$length->isValid()) {
+ return false;
+ }
- if (!$length->isValid()) return false;
-
- $n = $length->getN();
+ $n = $length->getN();
$unit = $length->getUnit();
if ($n === '0' || $unit === false) {
@@ -87,21 +94,29 @@ class HTMLPurifier_UnitConverter
$state = $dest_state = false;
foreach (self::$units as $k => $x) {
- if (isset($x[$unit])) $state = $k;
- if (isset($x[$to_unit])) $dest_state = $k;
+ if (isset($x[$unit])) {
+ $state = $k;
+ }
+ if (isset($x[$to_unit])) {
+ $dest_state = $k;
+ }
+ }
+ if (!$state || !$dest_state) {
+ return false;
}
- if (!$state || !$dest_state) return false;
// Some calculations about the initial precision of the number;
// this will be useful when we need to do final rounding.
$sigfigs = $this->getSigFigs($n);
- if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision;
+ if ($sigfigs < $this->outputPrecision) {
+ $sigfigs = $this->outputPrecision;
+ }
// BCMath's internal precision deals only with decimals. Use
// our default if the initial number has no decimals, or increase
// it by how ever many decimals, thus, the number of guard digits
// will always be greater than or equal to internalPrecision.
- $log = (int) floor(log(abs($n), 10));
+ $log = (int)floor(log(abs($n), 10));
$cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
for ($i = 0; $i < 2; $i++) {
@@ -152,14 +167,18 @@ class HTMLPurifier_UnitConverter
}
// Post-condition: $unit == $to_unit
- if ($unit !== $to_unit) return false;
+ if ($unit !== $to_unit) {
+ return false;
+ }
// Useful for debugging:
//echo "
n";
//echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n
\n";
$n = $this->round($n, $sigfigs);
- if (strpos($n, '.') !== false) $n = rtrim($n, '0');
+ if (strpos($n, '.') !== false) {
+ $n = rtrim($n, '0');
+ }
$n = rtrim($n, '.');
return new HTMLPurifier_Length($n, $unit);
@@ -170,53 +189,84 @@ class HTMLPurifier_UnitConverter
* @param string $n Decimal number
* @return int number of sigfigs
*/
- public function getSigFigs($n) {
+ public function getSigFigs($n)
+ {
$n = ltrim($n, '0+-');
$dp = strpos($n, '.'); // decimal position
if ($dp === false) {
$sigfigs = strlen(rtrim($n, '0'));
} else {
$sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
- if ($dp !== 0) $sigfigs--;
+ if ($dp !== 0) {
+ $sigfigs--;
+ }
}
return $sigfigs;
}
/**
* Adds two numbers, using arbitrary precision when available.
+ * @param string $s1
+ * @param string $s2
+ * @param int $scale
+ * @return string
*/
- private function add($s1, $s2, $scale) {
- if ($this->bcmath) return bcadd($s1, $s2, $scale);
- else return $this->scale($s1 + $s2, $scale);
+ private function add($s1, $s2, $scale)
+ {
+ if ($this->bcmath) {
+ return bcadd($s1, $s2, $scale);
+ } else {
+ return $this->scale((float)$s1 + (float)$s2, $scale);
+ }
}
/**
* Multiples two numbers, using arbitrary precision when available.
+ * @param string $s1
+ * @param string $s2
+ * @param int $scale
+ * @return string
*/
- private function mul($s1, $s2, $scale) {
- if ($this->bcmath) return bcmul($s1, $s2, $scale);
- else return $this->scale($s1 * $s2, $scale);
+ private function mul($s1, $s2, $scale)
+ {
+ if ($this->bcmath) {
+ return bcmul($s1, $s2, $scale);
+ } else {
+ return $this->scale((float)$s1 * (float)$s2, $scale);
+ }
}
/**
* Divides two numbers, using arbitrary precision when available.
+ * @param string $s1
+ * @param string $s2
+ * @param int $scale
+ * @return string
*/
- private function div($s1, $s2, $scale) {
- if ($this->bcmath) return bcdiv($s1, $s2, $scale);
- else return $this->scale($s1 / $s2, $scale);
+ private function div($s1, $s2, $scale)
+ {
+ if ($this->bcmath) {
+ return bcdiv($s1, $s2, $scale);
+ } else {
+ return $this->scale((float)$s1 / (float)$s2, $scale);
+ }
}
/**
* Rounds a number according to the number of sigfigs it should have,
* using arbitrary precision when available.
+ * @param float $n
+ * @param int $sigfigs
+ * @return string
*/
- private function round($n, $sigfigs) {
- $new_log = (int) floor(log(abs($n), 10)); // Number of digits left of decimal - 1
+ private function round($n, $sigfigs)
+ {
+ $new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1
$rp = $sigfigs - $new_log - 1; // Number of decimal places needed
$neg = $n < 0 ? '-' : ''; // Negative sign
if ($this->bcmath) {
if ($rp >= 0) {
- $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
+ $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
$n = bcdiv($n, '1', $rp);
} else {
// This algorithm partially depends on the standardized
@@ -232,23 +282,26 @@ class HTMLPurifier_UnitConverter
/**
* Scales a float to $scale digits right of decimal point, like BCMath.
+ * @param float $r
+ * @param int $scale
+ * @return string
*/
- private function scale($r, $scale) {
+ private function scale($r, $scale)
+ {
if ($scale < 0) {
// The f sprintf type doesn't support negative numbers, so we
// need to cludge things manually. First get the string.
- $r = sprintf('%.0f', (float) $r);
+ $r = sprintf('%.0f', (float)$r);
// Due to floating point precision loss, $r will more than likely
// look something like 4652999999999.9234. We grab one more digit
// than we need to precise from $r and then use that to round
// appropriately.
- $precise = (string) round(substr($r, 0, strlen($r) + $scale), -1);
+ $precise = (string)round(substr($r, 0, strlen($r) + $scale), -1);
// Now we return it, truncating the zero that was rounded off.
return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
}
- return sprintf('%.' . $scale . 'f', (float) $r);
+ return sprintf('%.' . $scale . 'f', (float)$r);
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/VarParser.php b/library/HTMLPurifier/VarParser.php
index 68e72ae86..50cba6910 100644
--- a/library/HTMLPurifier/VarParser.php
+++ b/library/HTMLPurifier/VarParser.php
@@ -7,58 +7,59 @@
class HTMLPurifier_VarParser
{
- const STRING = 1;
- const ISTRING = 2;
- const TEXT = 3;
- const ITEXT = 4;
- const INT = 5;
- const FLOAT = 6;
- const BOOL = 7;
- const LOOKUP = 8;
- const ALIST = 9;
- const HASH = 10;
- const MIXED = 11;
+ const STRING = 1;
+ const ISTRING = 2;
+ const TEXT = 3;
+ const ITEXT = 4;
+ const INT = 5;
+ const FLOAT = 6;
+ const BOOL = 7;
+ const LOOKUP = 8;
+ const ALIST = 9;
+ const HASH = 10;
+ const MIXED = 11;
/**
* Lookup table of allowed types. Mainly for backwards compatibility, but
* also convenient for transforming string type names to the integer constants.
*/
- static public $types = array(
- 'string' => self::STRING,
- 'istring' => self::ISTRING,
- 'text' => self::TEXT,
- 'itext' => self::ITEXT,
- 'int' => self::INT,
- 'float' => self::FLOAT,
- 'bool' => self::BOOL,
- 'lookup' => self::LOOKUP,
- 'list' => self::ALIST,
- 'hash' => self::HASH,
- 'mixed' => self::MIXED
+ public static $types = array(
+ 'string' => self::STRING,
+ 'istring' => self::ISTRING,
+ 'text' => self::TEXT,
+ 'itext' => self::ITEXT,
+ 'int' => self::INT,
+ 'float' => self::FLOAT,
+ 'bool' => self::BOOL,
+ 'lookup' => self::LOOKUP,
+ 'list' => self::ALIST,
+ 'hash' => self::HASH,
+ 'mixed' => self::MIXED
);
/**
* Lookup table of types that are string, and can have aliases or
* allowed value lists.
*/
- static public $stringTypes = array(
- self::STRING => true,
- self::ISTRING => true,
- self::TEXT => true,
- self::ITEXT => true,
+ public static $stringTypes = array(
+ self::STRING => true,
+ self::ISTRING => true,
+ self::TEXT => true,
+ self::ITEXT => true,
);
/**
- * Validate a variable according to type. Throws
- * HTMLPurifier_VarParserException if invalid.
+ * Validate a variable according to type.
* It may return NULL as a valid type if $allow_null is true.
*
- * @param $var Variable to validate
- * @param $type Type of variable, see HTMLPurifier_VarParser->types
- * @param $allow_null Whether or not to permit null as a value
- * @return Validated and type-coerced variable
+ * @param mixed $var Variable to validate
+ * @param int $type Type of variable, see HTMLPurifier_VarParser->types
+ * @param bool $allow_null Whether or not to permit null as a value
+ * @return string Validated and type-coerced variable
+ * @throws HTMLPurifier_VarParserException
*/
- final public function parse($var, $type, $allow_null = false) {
+ final public function parse($var, $type, $allow_null = false)
+ {
if (is_string($type)) {
if (!isset(HTMLPurifier_VarParser::$types[$type])) {
throw new HTMLPurifier_VarParserException("Invalid type '$type'");
@@ -67,7 +68,9 @@ class HTMLPurifier_VarParser
}
}
$var = $this->parseImplementation($var, $type, $allow_null);
- if ($allow_null && $var === null) return null;
+ if ($allow_null && $var === null) {
+ return null;
+ }
// These are basic checks, to make sure nothing horribly wrong
// happened in our implementations.
switch ($type) {
@@ -75,27 +78,45 @@ class HTMLPurifier_VarParser
case (self::ISTRING):
case (self::TEXT):
case (self::ITEXT):
- if (!is_string($var)) break;
- if ($type == self::ISTRING || $type == self::ITEXT) $var = strtolower($var);
+ if (!is_string($var)) {
+ break;
+ }
+ if ($type == self::ISTRING || $type == self::ITEXT) {
+ $var = strtolower($var);
+ }
return $var;
case (self::INT):
- if (!is_int($var)) break;
+ if (!is_int($var)) {
+ break;
+ }
return $var;
case (self::FLOAT):
- if (!is_float($var)) break;
+ if (!is_float($var)) {
+ break;
+ }
return $var;
case (self::BOOL):
- if (!is_bool($var)) break;
+ if (!is_bool($var)) {
+ break;
+ }
return $var;
case (self::LOOKUP):
case (self::ALIST):
case (self::HASH):
- if (!is_array($var)) break;
+ if (!is_array($var)) {
+ break;
+ }
if ($type === self::LOOKUP) {
- foreach ($var as $k) if ($k !== true) $this->error('Lookup table contains value other than true');
+ foreach ($var as $k) {
+ if ($k !== true) {
+ $this->error('Lookup table contains value other than true');
+ }
+ }
} elseif ($type === self::ALIST) {
$keys = array_keys($var);
- if (array_keys($keys) !== $keys) $this->error('Indices for list are not uniform');
+ if (array_keys($keys) !== $keys) {
+ $this->error('Indices for list are not uniform');
+ }
}
return $var;
case (self::MIXED):
@@ -107,17 +128,24 @@ class HTMLPurifier_VarParser
}
/**
- * Actually implements the parsing. Base implementation is to not
+ * Actually implements the parsing. Base implementation does not
* do anything to $var. Subclasses should overload this!
+ * @param mixed $var
+ * @param int $type
+ * @param bool $allow_null
+ * @return string
*/
- protected function parseImplementation($var, $type, $allow_null) {
+ protected function parseImplementation($var, $type, $allow_null)
+ {
return $var;
}
/**
* Throws an exception.
+ * @throws HTMLPurifier_VarParserException
*/
- protected function error($msg) {
+ protected function error($msg)
+ {
throw new HTMLPurifier_VarParserException($msg);
}
@@ -126,29 +154,45 @@ class HTMLPurifier_VarParser
* @note This should not ever be called. It would be called if we
* extend the allowed values of HTMLPurifier_VarParser without
* updating subclasses.
+ * @param string $class
+ * @param int $type
+ * @throws HTMLPurifier_Exception
*/
- protected function errorInconsistent($class, $type) {
- throw new HTMLPurifier_Exception("Inconsistency in $class: ".HTMLPurifier_VarParser::getTypeName($type)." not implemented");
+ protected function errorInconsistent($class, $type)
+ {
+ throw new HTMLPurifier_Exception(
+ "Inconsistency in $class: " . HTMLPurifier_VarParser::getTypeName($type) .
+ " not implemented"
+ );
}
/**
* Generic error for if a type didn't work.
+ * @param mixed $var
+ * @param int $type
*/
- protected function errorGeneric($var, $type) {
+ protected function errorGeneric($var, $type)
+ {
$vtype = gettype($var);
- $this->error("Expected type ".HTMLPurifier_VarParser::getTypeName($type).", got $vtype");
+ $this->error("Expected type " . HTMLPurifier_VarParser::getTypeName($type) . ", got $vtype");
}
- static public function getTypeName($type) {
+ /**
+ * @param int $type
+ * @return string
+ */
+ public static function getTypeName($type)
+ {
static $lookup;
if (!$lookup) {
// Lazy load the alternative lookup table
$lookup = array_flip(HTMLPurifier_VarParser::$types);
}
- if (!isset($lookup[$type])) return 'unknown';
+ if (!isset($lookup[$type])) {
+ return 'unknown';
+ }
return $lookup[$type];
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/VarParser/Flexible.php b/library/HTMLPurifier/VarParser/Flexible.php
index c954250e9..b15016c5b 100644
--- a/library/HTMLPurifier/VarParser/Flexible.php
+++ b/library/HTMLPurifier/VarParser/Flexible.php
@@ -7,28 +7,41 @@
*/
class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
{
-
- protected function parseImplementation($var, $type, $allow_null) {
- if ($allow_null && $var === null) return null;
+ /**
+ * @param mixed $var
+ * @param int $type
+ * @param bool $allow_null
+ * @return array|bool|float|int|mixed|null|string
+ * @throws HTMLPurifier_VarParserException
+ */
+ protected function parseImplementation($var, $type, $allow_null)
+ {
+ if ($allow_null && $var === null) {
+ return null;
+ }
switch ($type) {
// Note: if code "breaks" from the switch, it triggers a generic
// exception to be thrown. Specific errors can be specifically
// done here.
- case self::MIXED :
- case self::ISTRING :
- case self::STRING :
- case self::TEXT :
- case self::ITEXT :
+ case self::MIXED:
+ case self::ISTRING:
+ case self::STRING:
+ case self::TEXT:
+ case self::ITEXT:
return $var;
- case self::INT :
- if (is_string($var) && ctype_digit($var)) $var = (int) $var;
+ case self::INT:
+ if (is_string($var) && ctype_digit($var)) {
+ $var = (int)$var;
+ }
return $var;
- case self::FLOAT :
- if ((is_string($var) && is_numeric($var)) || is_int($var)) $var = (float) $var;
+ case self::FLOAT:
+ if ((is_string($var) && is_numeric($var)) || is_int($var)) {
+ $var = (float)$var;
+ }
return $var;
- case self::BOOL :
+ case self::BOOL:
if (is_int($var) && ($var === 0 || $var === 1)) {
- $var = (bool) $var;
+ $var = (bool)$var;
} elseif (is_string($var)) {
if ($var == 'on' || $var == 'true' || $var == '1') {
$var = true;
@@ -39,48 +52,70 @@ class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
}
}
return $var;
- case self::ALIST :
- case self::HASH :
- case self::LOOKUP :
+ case self::ALIST:
+ case self::HASH:
+ case self::LOOKUP:
if (is_string($var)) {
// special case: technically, this is an array with
// a single empty string item, but having an empty
// array is more intuitive
- if ($var == '') return array();
+ if ($var == '') {
+ return array();
+ }
if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
// simplistic string to array method that only works
// for simple lists of tag names or alphanumeric characters
- $var = explode(',',$var);
+ $var = explode(',', $var);
} else {
$var = preg_split('/(,|[\n\r]+)/', $var);
}
// remove spaces
- foreach ($var as $i => $j) $var[$i] = trim($j);
+ foreach ($var as $i => $j) {
+ $var[$i] = trim($j);
+ }
if ($type === self::HASH) {
// key:value,key2:value2
$nvar = array();
foreach ($var as $keypair) {
$c = explode(':', $keypair, 2);
- if (!isset($c[1])) continue;
- $nvar[$c[0]] = $c[1];
+ if (!isset($c[1])) {
+ continue;
+ }
+ $nvar[trim($c[0])] = trim($c[1]);
}
$var = $nvar;
}
}
- if (!is_array($var)) break;
+ if (!is_array($var)) {
+ break;
+ }
$keys = array_keys($var);
if ($keys === array_keys($keys)) {
- if ($type == self::ALIST) return $var;
- elseif ($type == self::LOOKUP) {
+ if ($type == self::ALIST) {
+ return $var;
+ } elseif ($type == self::LOOKUP) {
$new = array();
foreach ($var as $key) {
$new[$key] = true;
}
return $new;
- } else break;
+ } else {
+ break;
+ }
+ }
+ if ($type === self::ALIST) {
+ trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING);
+ return array_values($var);
}
if ($type === self::LOOKUP) {
foreach ($var as $key => $value) {
+ if ($value !== true) {
+ trigger_error(
+ "Lookup array has non-true value at key '$key'; " .
+ "maybe your input array was not indexed numerically",
+ E_USER_WARNING
+ );
+ }
$var[$key] = true;
}
}
@@ -90,7 +125,6 @@ class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
}
$this->errorGeneric($var, $type);
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/VarParser/Native.php b/library/HTMLPurifier/VarParser/Native.php
index b02a6de54..f11c318ef 100644
--- a/library/HTMLPurifier/VarParser/Native.php
+++ b/library/HTMLPurifier/VarParser/Native.php
@@ -8,11 +8,24 @@
class HTMLPurifier_VarParser_Native extends HTMLPurifier_VarParser
{
- protected function parseImplementation($var, $type, $allow_null) {
+ /**
+ * @param mixed $var
+ * @param int $type
+ * @param bool $allow_null
+ * @return null|string
+ */
+ protected function parseImplementation($var, $type, $allow_null)
+ {
return $this->evalExpression($var);
}
- protected function evalExpression($expr) {
+ /**
+ * @param string $expr
+ * @return mixed
+ * @throws HTMLPurifier_VarParserException
+ */
+ protected function evalExpression($expr)
+ {
$var = null;
$result = eval("\$var = $expr;");
if ($result === false) {
@@ -20,7 +33,6 @@ class HTMLPurifier_VarParser_Native extends HTMLPurifier_VarParser
}
return $var;
}
-
}
// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Zipper.php b/library/HTMLPurifier/Zipper.php
new file mode 100644
index 000000000..6e21ea070
--- /dev/null
+++ b/library/HTMLPurifier/Zipper.php
@@ -0,0 +1,157 @@
+front = $front;
+ $this->back = $back;
+ }
+
+ /**
+ * Creates a zipper from an array, with a hole in the
+ * 0-index position.
+ * @param Array to zipper-ify.
+ * @return Tuple of zipper and element of first position.
+ */
+ static public function fromArray($array) {
+ $z = new self(array(), array_reverse($array));
+ $t = $z->delete(); // delete the "dummy hole"
+ return array($z, $t);
+ }
+
+ /**
+ * Convert zipper back into a normal array, optionally filling in
+ * the hole with a value. (Usually you should supply a $t, unless you
+ * are at the end of the array.)
+ */
+ public function toArray($t = NULL) {
+ $a = $this->front;
+ if ($t !== NULL) $a[] = $t;
+ for ($i = count($this->back)-1; $i >= 0; $i--) {
+ $a[] = $this->back[$i];
+ }
+ return $a;
+ }
+
+ /**
+ * Move hole to the next element.
+ * @param $t Element to fill hole with
+ * @return Original contents of new hole.
+ */
+ public function next($t) {
+ if ($t !== NULL) array_push($this->front, $t);
+ return empty($this->back) ? NULL : array_pop($this->back);
+ }
+
+ /**
+ * Iterated hole advancement.
+ * @param $t Element to fill hole with
+ * @param $i How many forward to advance hole
+ * @return Original contents of new hole, i away
+ */
+ public function advance($t, $n) {
+ for ($i = 0; $i < $n; $i++) {
+ $t = $this->next($t);
+ }
+ return $t;
+ }
+
+ /**
+ * Move hole to the previous element
+ * @param $t Element to fill hole with
+ * @return Original contents of new hole.
+ */
+ public function prev($t) {
+ if ($t !== NULL) array_push($this->back, $t);
+ return empty($this->front) ? NULL : array_pop($this->front);
+ }
+
+ /**
+ * Delete contents of current hole, shifting hole to
+ * next element.
+ * @return Original contents of new hole.
+ */
+ public function delete() {
+ return empty($this->back) ? NULL : array_pop($this->back);
+ }
+
+ /**
+ * Returns true if we are at the end of the list.
+ * @return bool
+ */
+ public function done() {
+ return empty($this->back);
+ }
+
+ /**
+ * Insert element before hole.
+ * @param Element to insert
+ */
+ public function insertBefore($t) {
+ if ($t !== NULL) array_push($this->front, $t);
+ }
+
+ /**
+ * Insert element after hole.
+ * @param Element to insert
+ */
+ public function insertAfter($t) {
+ if ($t !== NULL) array_push($this->back, $t);
+ }
+
+ /**
+ * Splice in multiple elements at hole. Functional specification
+ * in terms of array_splice:
+ *
+ * $arr1 = $arr;
+ * $old1 = array_splice($arr1, $i, $delete, $replacement);
+ *
+ * list($z, $t) = HTMLPurifier_Zipper::fromArray($arr);
+ * $t = $z->advance($t, $i);
+ * list($old2, $t) = $z->splice($t, $delete, $replacement);
+ * $arr2 = $z->toArray($t);
+ *
+ * assert($old1 === $old2);
+ * assert($arr1 === $arr2);
+ *
+ * NB: the absolute index location after this operation is
+ * *unchanged!*
+ *
+ * @param Current contents of hole.
+ */
+ public function splice($t, $delete, $replacement) {
+ // delete
+ $old = array();
+ $r = $t;
+ for ($i = $delete; $i > 0; $i--) {
+ $old[] = $r;
+ $r = $this->delete();
+ }
+ // insert
+ for ($i = count($replacement)-1; $i >= 0; $i--) {
+ $this->insertAfter($r);
+ $r = $replacement[$i];
+ }
+ return array($old, $r);
+ }
+}