243 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			243 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| require_once('library/HTML5/Parser.php');
 | |
| 
 | |
| if(! function_exists('scrape_dfrn')) {
 | |
| function scrape_dfrn($url) {
 | |
| 
 | |
| 	$a = get_app();
 | |
| 
 | |
| 	$ret = array();
 | |
| 
 | |
| 	logger('scrape_dfrn: url=' . $url);
 | |
| 
 | |
| 	$s = fetch_url($url);
 | |
| 
 | |
| 	if(! $s) 
 | |
| 		return $ret;
 | |
| 
 | |
| 	$headers = $a->get_curl_headers();
 | |
| 	logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG);
 | |
| 
 | |
| 
 | |
| 	$lines = explode("\n",$headers);
 | |
| 	if(count($lines)) {
 | |
| 		foreach($lines as $line) {				
 | |
| 			// don't try and run feeds through the html5 parser
 | |
| 			if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
 | |
| 				return ret;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	$dom = HTML5_Parser::parse($s);
 | |
| 
 | |
| 	if(! $dom)
 | |
| 		return $ret;
 | |
| 
 | |
| 	$items = $dom->getElementsByTagName('link');
 | |
| 
 | |
| 	// get DFRN link elements
 | |
| 
 | |
| 	foreach($items as $item) {
 | |
| 		$x = $item->getAttribute('rel');
 | |
| 		if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
 | |
| 			$ret['feed_atom'] = $item->getAttribute('href');
 | |
| 		if(substr($x,0,5) == "dfrn-")
 | |
| 			$ret[$x] = $item->getAttribute('href');
 | |
| 		if($x === 'lrdd') {
 | |
| 			$decoded = urldecode($item->getAttribute('href'));
 | |
| 			if(preg_match('/acct:([^@]*)@/',$decoded,$matches))
 | |
| 				$ret['nick'] = $matches[1];
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Pull out hCard profile elements
 | |
| 
 | |
| 	$items = $dom->getElementsByTagName('*');
 | |
| 	foreach($items as $item) {
 | |
| 		if(attribute_contains($item->getAttribute('class'), 'vcard')) {
 | |
| 			$level2 = $item->getElementsByTagName('*');
 | |
| 			foreach($level2 as $x) {
 | |
| 				if(attribute_contains($x->getAttribute('class'),'fn'))
 | |
| 					$ret['fn'] = $x->textContent;
 | |
| 				if(attribute_contains($x->getAttribute('class'),'photo'))
 | |
| 					$ret['photo'] = $x->getAttribute('src');
 | |
| 				if(attribute_contains($x->getAttribute('class'),'key'))
 | |
| 					$ret['key'] = $x->textContent;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return $ret;
 | |
| }}
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| if(! function_exists('validate_dfrn')) {
 | |
| function validate_dfrn($a) {
 | |
| 	$errors = 0;
 | |
| 	if(! x($a,'key'))
 | |
| 		$errors ++;
 | |
| 	if(! x($a,'dfrn-request'))
 | |
| 		$errors ++;
 | |
| 	if(! x($a,'dfrn-confirm'))
 | |
| 		$errors ++;
 | |
| 	if(! x($a,'dfrn-notify'))
 | |
| 		$errors ++;
 | |
| 	if(! x($a,'dfrn-poll'))
 | |
| 		$errors ++;
 | |
| 	return $errors;
 | |
| }}
 | |
| 
 | |
| if(! function_exists('scrape_meta')) {
 | |
| function scrape_meta($url) {
 | |
| 
 | |
| 	$a = get_app();
 | |
| 
 | |
| 	$ret = array();
 | |
| 
 | |
| 	logger('scrape_meta: url=' . $url);
 | |
| 
 | |
| 	$s = fetch_url($url);
 | |
| 
 | |
| 	if(! $s) 
 | |
| 		return $ret;
 | |
| 
 | |
| 	$headers = $a->get_curl_headers();
 | |
| 	logger('scrape_meta: headers=' . $headers, LOGGER_DEBUG);
 | |
| 
 | |
| 	$lines = explode("\n",$headers);
 | |
| 	if(count($lines)) {
 | |
| 		foreach($lines as $line) {				
 | |
| 			// don't try and run feeds through the html5 parser
 | |
| 			if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
 | |
| 				return ret;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	$dom = HTML5_Parser::parse($s);
 | |
| 
 | |
| 	if(! $dom)
 | |
| 		return $ret;
 | |
| 
 | |
| 	$items = $dom->getElementsByTagName('meta');
 | |
| 
 | |
| 	// get DFRN link elements
 | |
| 
 | |
| 	foreach($items as $item) {
 | |
| 		$x = $item->getAttribute('name');
 | |
| 		if(substr($x,0,5) == "dfrn-")
 | |
| 			$ret[$x] = $item->getAttribute('content');
 | |
| 	}
 | |
| 
 | |
| 	return $ret;
 | |
| }}
 | |
| 
 | |
| 
 | |
| if(! function_exists('scrape_vcard')) {
 | |
| function scrape_vcard($url) {
 | |
| 
 | |
| 	$a = get_app();
 | |
| 
 | |
| 	$ret = array();
 | |
| 
 | |
| 	logger('scrape_vcard: url=' . $url);
 | |
| 
 | |
| 	$s = fetch_url($url);
 | |
| 
 | |
| 	if(! $s) 
 | |
| 		return $ret;
 | |
| 
 | |
| 	$headers = $a->get_curl_headers();
 | |
| 	$lines = explode("\n",$headers);
 | |
| 	if(count($lines)) {
 | |
| 		foreach($lines as $line) {				
 | |
| 			// don't try and run feeds through the html5 parser
 | |
| 			if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml'))))
 | |
| 				return ret;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	$dom = HTML5_Parser::parse($s);
 | |
| 
 | |
| 	if(! $dom)
 | |
| 		return $ret;
 | |
| 
 | |
| 	// Pull out hCard profile elements
 | |
| 
 | |
| 	$items = $dom->getElementsByTagName('*');
 | |
| 	foreach($items as $item) {
 | |
| 		if(attribute_contains($item->getAttribute('class'), 'vcard')) {
 | |
| 			$level2 = $item->getElementsByTagName('*');
 | |
| 			foreach($level2 as $x) {
 | |
| 				if(attribute_contains($x->getAttribute('class'),'fn'))
 | |
| 					$ret['fn'] = $x->textContent;
 | |
| 				if((attribute_contains($x->getAttribute('class'),'photo'))
 | |
| 					|| (attribute_contains($x->getAttribute('class'),'avatar')))
 | |
| 					$ret['photo'] = $x->getAttribute('src');
 | |
| 				if((attribute_contains($x->getAttribute('class'),'nickname'))
 | |
| 					|| (attribute_contains($x->getAttribute('class'),'uid')))
 | |
| 					$ret['nick'] = $x->textContent;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return $ret;
 | |
| }}
 | |
| 
 | |
| 
 | |
| if(! function_exists('scrape_feed')) {
 | |
| function scrape_feed($url) {
 | |
| 
 | |
| 	$a = get_app();
 | |
| 
 | |
| 	$ret = array();
 | |
| 	$s = fetch_url($url);
 | |
| 
 | |
| 	if(! $s) 
 | |
| 		return $ret;
 | |
| 
 | |
| 	$headers = $a->get_curl_headers();
 | |
| 	logger('scrape_feed: headers=' . $headers, LOGGER_DEBUG);
 | |
| 
 | |
| 	$lines = explode("\n",$headers);
 | |
| 	if(count($lines)) {
 | |
| 		foreach($lines as $line) {				
 | |
| 			if(stristr($line,'content-type:')) {
 | |
| 				if(stristr($line,'application/atom+xml') || stristr($s,'<feed')) {
 | |
| 					$ret['feed_atom'] = $url;
 | |
| 					return $ret;
 | |
| 				}
 | |
|  				if(stristr($line,'application/rss+xml') || stristr($s,'<rss')) {
 | |
| 					$ret['feed_rss'] = $url;
 | |
| 					return ret;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	$dom = HTML5_Parser::parse($s);
 | |
| 
 | |
| 	if(! $dom)
 | |
| 		return $ret;
 | |
| 
 | |
| 	$items = $dom->getElementsByTagName('link');
 | |
| 
 | |
| 	// get Atom link elements
 | |
| 
 | |
| 	foreach($items as $item) {
 | |
| 		$x = $item->getAttribute('rel');
 | |
| 		if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
 | |
| 			$ret['feed_atom'] = $item->getAttribute('href');
 | |
| 		if(($x === 'alternate') && ($item->getAttribute('type') === 'application/rss+xml'))
 | |
| 			$ret['feed_rss'] = $item->getAttribute('href');
 | |
| 	}
 | |
| 
 | |
| 	return $ret;
 | |
| }} |