hubzilla issue #901 - unicode characters in urls tripping up url regexes - these regexes have been modified to accept unicode "letters" which may preclude emojis and control sequences and symbols in url links; but should suffice for most legal URLs containing language context "text" glyphs.
This commit is contained in:
parent
766a7fb1b4
commit
9b19e40a74
@ -50,7 +50,7 @@ function profile_activity($changed, $value) {
|
||||
|
||||
if($t == 1 && strlen($value)) {
|
||||
// if it's a url, the HTML quotes will mess it up, so link it and don't try and zidify it because we don't know what it points to.
|
||||
$value = preg_replace_callback("/([^\]\='".'"'."]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\+\,]+)/ism", 'red_zrl_callback', $value);
|
||||
$value = preg_replace_callback("/([^\]\='".'"'."]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\+\,]+)/ismu", 'red_zrl_callback', $value);
|
||||
// take out the bookmark indicator
|
||||
if(substr($value,0,2) === '#^')
|
||||
$value = str_replace('#^','',$value);
|
||||
|
@ -838,13 +838,13 @@ function bbcode($Text, $preserve_nl = false, $tryoembed = true, $cache = false)
|
||||
|
||||
// Perform URL Search
|
||||
|
||||
$urlchars = '[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]';
|
||||
$urlchars = '[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]';
|
||||
|
||||
if (strpos($Text,'http') !== false) {
|
||||
if($tryoembed) {
|
||||
$Text = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/$urlchars+)/ism", 'tryoembed', $Text);
|
||||
$Text = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/$urlchars+)/ismu", 'tryoembed', $Text);
|
||||
}
|
||||
$Text = preg_replace("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/$urlchars+)/ism", '$1<a href="$2" target="_blank" rel="nofollow noopener">$2</a>', $Text);
|
||||
$Text = preg_replace("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/$urlchars+)/ismu", '$1<a href="$2" target="_blank" rel="nofollow noopener">$2</a>', $Text);
|
||||
}
|
||||
|
||||
if (strpos($Text,'[/share]') !== false) {
|
||||
|
@ -75,10 +75,10 @@ function markdown_to_bb($s, $use_zrl = false, $options = []) {
|
||||
// Convert everything that looks like a link to a link
|
||||
if($use_zrl) {
|
||||
$s = str_replace(['[img', '/img]'], ['[zmg', '/zmg]'], $s);
|
||||
$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]+)/ism", '$1[zrl=$2$3]$2$3[/zrl]',$s);
|
||||
$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]+)/ismu", '$1[zrl=$2$3]$2$3[/zrl]',$s);
|
||||
}
|
||||
else {
|
||||
$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]+)/ism", '$1[url=$2$3]$2$3[/url]',$s);
|
||||
$s = preg_replace("/([^\]\=]|^)(https?\:\/\/)([a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,\@\(\)]+)/ismu", '$1[url=$2$3]$2$3[/url]',$s);
|
||||
}
|
||||
|
||||
// remove duplicate adjacent code tags
|
||||
|
@ -819,7 +819,7 @@ function get_tags($s) {
|
||||
// added ; to single word tags to allow emojis and other unicode character constructs in bbcode
|
||||
// (this would actually be &#xnnnnn; but the ampersand will have been escaped to & by the time we see it.)
|
||||
|
||||
if(preg_match_all('/(?<![a-zA-Z0-9=\/\?])(@[^ \x0D\x0A,:?\[]+ [^ \x0D\x0A@,:?\[]+)/',$s,$match)) {
|
||||
if(preg_match_all('/(?<![a-zA-Z0-9=\pL\/\?])(@[^ \x0D\x0A,:?\[]+ [^ \x0D\x0A@,:?\[]+)/u',$s,$match)) {
|
||||
foreach($match[1] as $mtch) {
|
||||
if(substr($mtch,-1,1) === '.')
|
||||
$ret[] = substr($mtch,0,-1);
|
||||
@ -831,7 +831,7 @@ function get_tags($s) {
|
||||
// Otherwise pull out single word tags. These can be @nickname, @first_last
|
||||
// and #hash tags.
|
||||
|
||||
if(preg_match_all('/(?<![a-zA-Z0-9=\/\?\;])([@#\!][^ \x0D\x0A,;:?\[]+)/',$s,$match)) {
|
||||
if(preg_match_all('/(?<![a-zA-Z0-9=\pL\/\?\;])([@#\!][^ \x0D\x0A,;:?\[]+)/u',$s,$match)) {
|
||||
foreach($match[1] as $mtch) {
|
||||
if(substr($mtch,-1,1) === '.')
|
||||
$mtch = substr($mtch,0,-1);
|
||||
@ -1052,7 +1052,7 @@ function searchbox($s,$id='search-box',$url='/search',$save = false) {
|
||||
* @return string
|
||||
*/
|
||||
function linkify($s, $me = false) {
|
||||
$s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\@\~\#\'\%\$\!\+\,\@]*)/", (($me) ? ' <a href="$1" rel="me" >$1</a>' : ' <a href="$1" >$1</a>'), $s);
|
||||
$s = preg_replace("/(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\_\@\~\#\'\%\$\!\+\,\@]*)/u", (($me) ? ' <a href="$1" rel="me" >$1</a>' : ' <a href="$1" >$1</a>'), $s);
|
||||
$s = preg_replace("/\<(.*?)(src|href)=(.*?)\&\;(.*?)\>/ism",'<$1$2=$3&$4>',$s);
|
||||
|
||||
return($s);
|
||||
@ -3099,10 +3099,10 @@ function cleanup_bbcode($body) {
|
||||
$body = preg_replace_callback('/\[zrl(.*?)\[\/(zrl)\]/ism','\red_escape_codeblock',$body);
|
||||
|
||||
|
||||
$body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
|
||||
+\,\(\)]+)/ism", '\nakedoembed', $body);
|
||||
$body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
|
||||
+\,\(\)]+)/ism", '\red_zrl_callback', $body);
|
||||
$body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
|
||||
+\,\(\)]+)/ismu", '\nakedoembed', $body);
|
||||
$body = preg_replace_callback("/([^\]\='".'"'."\/]|^|\#\^)(https?\:\/\/[a-zA-Z0-9\pL\:\/\-\?\&\;\.\=\@\_\~\#\%\$\!\\
|
||||
+\,\(\)]+)/ismu", '\red_zrl_callback', $body);
|
||||
|
||||
$body = preg_replace_callback('/\[\$b64zrl(.*?)\[\/(zrl)\]/ism','\red_unescape_codeblock',$body);
|
||||
$body = preg_replace_callback('/\[\$b64url(.*?)\[\/(url)\]/ism','\red_unescape_codeblock',$body);
|
||||
|
Reference in New Issue
Block a user