htmlpurifier update - compatibility issue with language library autoloader
This commit is contained in:
		| @@ -11,49 +11,64 @@ class HTMLPurifier_Generator | ||||
| { | ||||
|  | ||||
|     /** | ||||
|      * Whether or not generator should produce XML output | ||||
|      * Whether or not generator should produce XML output. | ||||
|      * @type bool | ||||
|      */ | ||||
|     private $_xhtml = true; | ||||
|  | ||||
|     /** | ||||
|      * :HACK: Whether or not generator should comment the insides of <script> tags | ||||
|      * :HACK: Whether or not generator should comment the insides of <script> tags. | ||||
|      * @type bool | ||||
|      */ | ||||
|     private $_scriptFix = false; | ||||
|  | ||||
|     /** | ||||
|      * Cache of HTMLDefinition during HTML output to determine whether or | ||||
|      * not attributes should be minimized. | ||||
|      * @type HTMLPurifier_HTMLDefinition | ||||
|      */ | ||||
|     private $_def; | ||||
|  | ||||
|     /** | ||||
|      * Cache of %Output.SortAttr | ||||
|      * Cache of %Output.SortAttr. | ||||
|      * @type bool | ||||
|      */ | ||||
|     private $_sortAttr; | ||||
|  | ||||
|     /** | ||||
|      * Cache of %Output.FlashCompat | ||||
|      * Cache of %Output.FlashCompat. | ||||
|      * @type bool | ||||
|      */ | ||||
|     private $_flashCompat; | ||||
|  | ||||
|     /** | ||||
|      * Cache of %Output.FixInnerHTML. | ||||
|      * @type bool | ||||
|      */ | ||||
|     private $_innerHTMLFix; | ||||
|  | ||||
|     /** | ||||
|      * Stack for keeping track of object information when outputting IE | ||||
|      * compatibility code. | ||||
|      * @type array | ||||
|      */ | ||||
|     private $_flashStack = array(); | ||||
|  | ||||
|     /** | ||||
|      * Configuration for the generator | ||||
|      * @type HTMLPurifier_Config | ||||
|      */ | ||||
|     protected $config; | ||||
|  | ||||
|     /** | ||||
|      * @param $config Instance of HTMLPurifier_Config | ||||
|      * @param $context Instance of HTMLPurifier_Context | ||||
|      * @param HTMLPurifier_Config $config | ||||
|      * @param HTMLPurifier_Context $context | ||||
|      */ | ||||
|     public function __construct($config, $context) { | ||||
|     public function __construct($config, $context) | ||||
|     { | ||||
|         $this->config = $config; | ||||
|         $this->_scriptFix = $config->get('Output.CommentScriptContents'); | ||||
|         $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); | ||||
|         $this->_sortAttr = $config->get('Output.SortAttr'); | ||||
|         $this->_flashCompat = $config->get('Output.FlashCompat'); | ||||
|         $this->_def = $config->getHTMLDefinition(); | ||||
| @@ -62,12 +77,14 @@ class HTMLPurifier_Generator | ||||
|  | ||||
|     /** | ||||
|      * Generates HTML from an array of tokens. | ||||
|      * @param $tokens Array of HTMLPurifier_Token | ||||
|      * @param $config HTMLPurifier_Config object | ||||
|      * @return Generated HTML | ||||
|      * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token | ||||
|      * @return string Generated HTML | ||||
|      */ | ||||
|     public function generateFromTokens($tokens) { | ||||
|         if (!$tokens) return ''; | ||||
|     public function generateFromTokens($tokens) | ||||
|     { | ||||
|         if (!$tokens) { | ||||
|             return ''; | ||||
|         } | ||||
|  | ||||
|         // Basic algorithm | ||||
|         $html = ''; | ||||
| @@ -86,30 +103,41 @@ class HTMLPurifier_Generator | ||||
|         // Tidy cleanup | ||||
|         if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { | ||||
|             $tidy = new Tidy; | ||||
|             $tidy->parseString($html, array( | ||||
|                'indent'=> true, | ||||
|                'output-xhtml' => $this->_xhtml, | ||||
|                'show-body-only' => true, | ||||
|                'indent-spaces' => 2, | ||||
|                'wrap' => 68, | ||||
|             ), 'utf8'); | ||||
|             $tidy->parseString( | ||||
|                 $html, | ||||
|                 array( | ||||
|                    'indent'=> true, | ||||
|                    'output-xhtml' => $this->_xhtml, | ||||
|                    'show-body-only' => true, | ||||
|                    'indent-spaces' => 2, | ||||
|                    'wrap' => 68, | ||||
|                 ), | ||||
|                 'utf8' | ||||
|             ); | ||||
|             $tidy->cleanRepair(); | ||||
|             $html = (string) $tidy; // explicit cast necessary | ||||
|         } | ||||
|  | ||||
|         // Normalize newlines to system defined value | ||||
|         $nl = $this->config->get('Output.Newline'); | ||||
|         if ($nl === null) $nl = PHP_EOL; | ||||
|         if ($nl !== "\n") $html = str_replace("\n", $nl, $html); | ||||
|         if ($this->config->get('Core.NormalizeNewlines')) { | ||||
|             $nl = $this->config->get('Output.Newline'); | ||||
|             if ($nl === null) { | ||||
|                 $nl = PHP_EOL; | ||||
|             } | ||||
|             if ($nl !== "\n") { | ||||
|                 $html = str_replace("\n", $nl, $html); | ||||
|             } | ||||
|         } | ||||
|         return $html; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Generates HTML from a single token. | ||||
|      * @param $token HTMLPurifier_Token object. | ||||
|      * @return Generated HTML | ||||
|      * @param HTMLPurifier_Token $token HTMLPurifier_Token object. | ||||
|      * @return string Generated HTML | ||||
|      */ | ||||
|     public function generateFromToken($token) { | ||||
|     public function generateFromToken($token) | ||||
|     { | ||||
|         if (!$token instanceof HTMLPurifier_Token) { | ||||
|             trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); | ||||
|             return ''; | ||||
| @@ -130,19 +158,7 @@ class HTMLPurifier_Generator | ||||
|             $_extra = ''; | ||||
|             if ($this->_flashCompat) { | ||||
|                 if ($token->name == "object" && !empty($this->_flashStack)) { | ||||
|                     $flash = array_pop($this->_flashStack); | ||||
|                     $compat_token = new HTMLPurifier_Token_Empty("embed"); | ||||
|                     foreach ($flash->attr as $name => $val) { | ||||
|                         if ($name == "classid") continue; | ||||
|                         if ($name == "type") continue; | ||||
|                         if ($name == "data") $name = "src"; | ||||
|                         $compat_token->attr[$name] = $val; | ||||
|                     } | ||||
|                     foreach ($flash->param as $name => $val) { | ||||
|                         if ($name == "movie") $name = "src"; | ||||
|                         $compat_token->attr[$name] = $val; | ||||
|                     } | ||||
|                     $_extra = "<!--[if IE]>".$this->generateFromToken($compat_token)."<![endif]-->"; | ||||
|                     // doesn't do anything for now | ||||
|                 } | ||||
|             } | ||||
|             return $_extra . '</' . $token->name . '>'; | ||||
| @@ -169,11 +185,16 @@ class HTMLPurifier_Generator | ||||
|  | ||||
|     /** | ||||
|      * Special case processor for the contents of script tags | ||||
|      * @param HTMLPurifier_Token $token HTMLPurifier_Token object. | ||||
|      * @return string | ||||
|      * @warning This runs into problems if there's already a literal | ||||
|      *          --> somewhere inside the script contents. | ||||
|      */ | ||||
|     public function generateScriptFromToken($token) { | ||||
|         if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); | ||||
|     public function generateScriptFromToken($token) | ||||
|     { | ||||
|         if (!$token instanceof HTMLPurifier_Token_Text) { | ||||
|             return $this->generateFromToken($token); | ||||
|         } | ||||
|         // Thanks <http://lachy.id.au/log/2005/05/script-comments> | ||||
|         $data = preg_replace('#//\s*$#', '', $token->data); | ||||
|         return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; | ||||
| @@ -182,24 +203,60 @@ class HTMLPurifier_Generator | ||||
|     /** | ||||
|      * Generates attribute declarations from attribute array. | ||||
|      * @note This does not include the leading or trailing space. | ||||
|      * @param $assoc_array_of_attributes Attribute array | ||||
|      * @param $element Name of element attributes are for, used to check | ||||
|      * @param array $assoc_array_of_attributes Attribute array | ||||
|      * @param string $element Name of element attributes are for, used to check | ||||
|      *        attribute minimization. | ||||
|      * @return Generate HTML fragment for insertion. | ||||
|      * @return string Generated HTML fragment for insertion. | ||||
|      */ | ||||
|     public function generateAttributes($assoc_array_of_attributes, $element = false) { | ||||
|     public function generateAttributes($assoc_array_of_attributes, $element = '') | ||||
|     { | ||||
|         $html = ''; | ||||
|         if ($this->_sortAttr) ksort($assoc_array_of_attributes); | ||||
|         if ($this->_sortAttr) { | ||||
|             ksort($assoc_array_of_attributes); | ||||
|         } | ||||
|         foreach ($assoc_array_of_attributes as $key => $value) { | ||||
|             if (!$this->_xhtml) { | ||||
|                 // Remove namespaced attributes | ||||
|                 if (strpos($key, ':') !== false) continue; | ||||
|                 if (strpos($key, ':') !== false) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 // Check if we should minimize the attribute: val="val" -> val | ||||
|                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { | ||||
|                     $html .= $key . ' '; | ||||
|                     continue; | ||||
|                 } | ||||
|             } | ||||
|             // Workaround for Internet Explorer innerHTML bug. | ||||
|             // Essentially, Internet Explorer, when calculating | ||||
|             // innerHTML, omits quotes if there are no instances of | ||||
|             // angled brackets, quotes or spaces.  However, when parsing | ||||
|             // HTML (for example, when you assign to innerHTML), it | ||||
|             // treats backticks as quotes.  Thus, | ||||
|             //      <img alt="``" /> | ||||
|             // becomes | ||||
|             //      <img alt=`` /> | ||||
|             // becomes | ||||
|             //      <img alt='' /> | ||||
|             // Fortunately, all we need to do is trigger an appropriate | ||||
|             // quoting style, which we do by adding an extra space. | ||||
|             // This also is consistent with the W3C spec, which states | ||||
|             // that user agents may ignore leading or trailing | ||||
|             // whitespace (in fact, most don't, at least for attributes | ||||
|             // like alt, but an extra space at the end is barely | ||||
|             // noticeable).  Still, we have a configuration knob for | ||||
|             // this, since this transformation is not necesary if you | ||||
|             // don't process user input with innerHTML or you don't plan | ||||
|             // on supporting Internet Explorer. | ||||
|             if ($this->_innerHTMLFix) { | ||||
|                 if (strpos($value, '`') !== false) { | ||||
|                     // check if correct quoting style would not already be | ||||
|                     // triggered | ||||
|                     if (strcspn($value, '"\' <>') === strlen($value)) { | ||||
|                         // protect! | ||||
|                         $value .= ' '; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             $html .= $key.'="'.$this->escape($value).'" '; | ||||
|         } | ||||
|         return rtrim($html); | ||||
| @@ -210,15 +267,20 @@ class HTMLPurifier_Generator | ||||
|      * @todo This really ought to be protected, but until we have a facility | ||||
|      *       for properly generating HTML here w/o using tokens, it stays | ||||
|      *       public. | ||||
|      * @param $string String data to escape for HTML. | ||||
|      * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is | ||||
|      * @param string $string String data to escape for HTML. | ||||
|      * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is | ||||
|      *               permissible for non-attribute output. | ||||
|      * @return String escaped data. | ||||
|      * @return string escaped data. | ||||
|      */ | ||||
|     public function escape($string, $quote = ENT_COMPAT) { | ||||
|     public function escape($string, $quote = null) | ||||
|     { | ||||
|         // Workaround for APC bug on Mac Leopard reported by sidepodcast | ||||
|         // http://htmlpurifier.org/phorum/read.php?3,4823,4846 | ||||
|         if ($quote === null) { | ||||
|             $quote = ENT_COMPAT; | ||||
|         } | ||||
|         return htmlspecialchars($string, $quote, 'UTF-8'); | ||||
|     } | ||||
|  | ||||
| } | ||||
|  | ||||
| // vim: et sw=4 sts=4 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user