<?php
// $Id: auto_discovery.class.php,v 1.1.1.1 2009/03/08 20:14:39 ohwada Exp $

//=========================================================
// weblog updated ping
// 2009-03-08 K.OHWADA
//=========================================================

//---------------------------------------------------------
// RSS auto-discovery
// http://diveintomark.org/archives/2002/05/30/rss_autodiscovery
//---------------------------------------------------------

//=========================================================
// class auto_discovery
//=========================================================
class auto_discovery
{
	var $_errors = array();

// snoopy
	var $_snoopy_read_timeout = 10;

//---------------------------------------------------------
// constructor
//---------------------------------------------------------
function auto_discovery()
{
	// dummy
}

//---------------------------------------------------------
// discover XML link
//---------------------------------------------------------
function discover($url_html, $sel='')
{
	$html_text = $this->snoopy_fetch($url_html);
	if ( empty($html_text) ) {
		return false;
	}

	list($url_rdf, $url_rss, $url_atom) = $this->find_link_xml($html_text, $url_html);

	if ( ($sel == 'atom') && $url_atom ) {
		$ret = $url_atom ;
	} elseif ( ($sel == 'rss') && $url_rss ) {
		$ret = $url_rss ;
	} elseif ( ($sel == 'rdf') && $url_rdf ) {
		$ret = $url_rdf ;
	} elseif ( $url_atom ) {
		$ret = $url_atom ;
	} elseif ( $url_rss ) {
		$ret = $url_rss ;
	} elseif ( $url_rdf ) {
		$ret = $url_rdf ;
	} else {
		$this->set_error( "not find" );
		$ret = false;
	}
	return $ret;
}

//---------------------------------------------------------
// find RDF/RSS/ATOM link in HTML
// <link rel="alternate" type="application/rdf+xml"  title="RDF"  href="xxx" /> 
// <link rel="alternate" type="application/rss+xml"  title="RSS"  href="xxx" /> 
// <link rel="alternate" type="application/atom+xml" title="ATOM" href="xxx" /> 
//---------------------------------------------------------
function find_link_xml( $html_text, $url_html='' )
{
	$href_rdf  = '';
	$href_rss  = '';
	$href_atom = '';

// save all <link> tags
	preg_match_all('/<link\s+(.*?)\s*\/?>/si', $html_text, $match);
	$link_tag_arr = $match[1];

	$link_arr = array();
	$link_tag_count = count($link_tag_arr);

// store each <link> tags's attributes
	for ($i=0; $i<$link_tag_count; $i++)
	{
		$attr_wk_arr   = array();
		$link_attr_arr = preg_split('/\s+/s', $link_tag_arr[$i]);

		foreach($link_attr_arr as $link_attr)
		{
			$link_attr_pair = preg_split('/\s*=\s*/s', $link_attr, 2);

			if( isset($link_attr_pair[0]) && isset($link_attr_pair[1]) ) {
				$key   = $link_attr_pair[0];
				$value = $link_attr_pair[1];
				$key   = strtolower( $key );
				$value = preg_replace('/([\'"]?)(.*)\1/', '$2', $value);
				$attr_wk_arr[$key] = $value;
			}
		}

		$link_arr[$i] = $attr_wk_arr;
	}

// find the link file
	for ($i=0; $i<$link_tag_count; $i++)
	{
		if ( !isset($link_arr[$i]['rel']) )   continue;
		if ( !isset($link_arr[$i]['type']) )  continue;
		if ( !isset($link_arr[$i]['href']) )  continue;

		$rel  = strtolower( $link_arr[$i]['rel'] );
		$type = strtolower( $link_arr[$i]['type'] );
		$href = $link_arr[$i]['href'];

		if ( $rel != 'alternate')  continue;

		if (empty($href_rdf) && ($type == 'application/rdf+xml')) {
			$href_rdf = $href;
		} elseif (empty($href_rss) && ($type == 'application/rss+xml')) {
			$href_rss = $href;
		} elseif (empty($href_atom) && ($type == 'application/atom+xml')) {
			$href_atom = $href;
		}
	}

	if ($url_html) {
		$href_rdf  = $this->relative_to_full_url($href_rdf,  $url_html);
		$href_rss  = $this->relative_to_full_url($href_rss,  $url_html);
		$href_atom = $this->relative_to_full_url($href_atom, $url_html);
	}

	return array($href_rdf, $href_rss, $href_atom);
}

function relative_to_full_url($url_rel, $url_html)
{
	if ( empty($url_rel) )  return '';

// start from "/"
	if ( ereg("^\/", $url_rel) ) {
		$domain = '';

	// "http://domain/***/"
		if ( preg_match("/http:\/\/(.*?)\/.*/", $url_html, $match) ) {
			$domain   = $match[1];
		}
		$url_full = "http://".$domain.$url;

// not start from "http"
	} elseif ( !ereg("^http", $url_rel) ) {
		$dir = $url_html;

	// "dir/***/"
		if ( preg_match("/^(.*)\/(.*\..*)$/", $dir, $match) ) {
			$dir = $match[1];
		}
		$url_full = $dir."/".$url_rel;

// maybe full url
	} else {
		$url_full = $url_rel;
	}

	return $url_full;
}

//---------------------------------------------------------
// error
//---------------------------------------------------------
function set_error($err)
{
	$this->_errors[] = $err;
}

function get_errors()
{
	return $this->_errors;
}

//---------------------------------------------------------
// snoopy class
//---------------------------------------------------------
function snoopy_fetch($url)
{
	$res = null;
	$snoopy = new Snoopy();
	$snoopy->read_timeout = $this->_snoopy_read_timeout;
	$ret = $snoopy->fetch($url);
	if ( $ret ) {
		$res = $snoopy->results;
	}
	if ( !$res ) {
		$this->set_error( $snoopy->error );
		return false;
	}
	return $res;
}

//----- class end -----
}

?>