<?php
// $Id: class.atom_parser.php,v 1.5 2005/11/24 00:41:26 ohwada Exp $

// 2005-11-23 K.OHWADA
// REQ 3226: parse ATOM 1.0

// 2005-01-20 K.OHWADA
// add get_raw_content()
// add mode_arrange

// 2004-11-28 K.OHWADA
// use rss_atom_parser_base

// 2004-10-24 K.OHWADA
// add function find_encoding(), convert_encode_to_utf8()

// 2004-08-30 K.OHWADA
// use parse_w3cdtf()
// add $atom_parent_num
// add convert_array_from_utf8(), convert_from_utf8()

//=========================================================
// class for ATOM Parser 
// for PHP gennerally
// 2004-08-01 K.OHWADA
//=========================================================

// The Atom Syndication Format 0.3
// http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html

// Dublin Core Metadata Element Set
// http://dublincore.org/documents/dces/

//=========================================================
// global function
//=========================================================
//  $atom_feed
//  $atom_entrys
//  $atom_entry_num
//  $atom_parent
//  $atom_parent_num
//  $atom_uris

//---------------------------------------------------------
// start element handler
//---------------------------------------------------------
function atom_start_element($parser, $name, $attrs)
{
	global $atom_parent, $atom_parent_num, $atom_uris;
	global $atom_feed, $atom_entrys, $atom_entry_num;

//  echo "<br>\n";
//  echo "parent:  $atom_parent <br>\n";
//  echo "current: $atom_current <br>\n";
//  echo "name:    $name <br>\n";
//  print_r($attrs);
//  echo "<br>\n";

	$parent = $atom_parent[$atom_parent_num];

	$parent_num_prev = $atom_parent_num - 1;
	if ($parent_num_prev < 0)  $parent_num_prev = 0;
	$parent_prev = $atom_parent[$parent_num_prev];

	$name_ns = split(':',$name);
	$name_wk = array_pop($name_ns);
	$uri1 = implode($name_ns,":");

	$name_low = strtolower( $name_wk );

	$flag = 0;
	foreach($atom_uris as $uri2)
	{
  		if ($uri1 == $uri2)
  		{
    		$flag = 1;
    		break;
  		}
	}

// FEED
	if ( $name_wk == 'FEED' )
	{
		$atom_parent_num = 0;
		$atom_parent[0]  = $name_wk;
		return;
	}

// CONTENT
	if (($parent_prev == 'ENTRY')&&($parent == 'CONTENT'))
	{
		$data = '';

  		if (($name_wk == 'P')||($name_wk == 'BR'))
  		{
			$data .= "<br>\n";
  		}
  		elseif ($name_wk == 'A')
  		{
	  		$href   = '';
	  		$target = '';
  			if ( isset($attrs['HREF']) )    $href   = $attrs['HREF'];
			if ( isset($attrs['TARGET']) )  $target = "target=\"{$attrs['TARGET']}\" ";

			$data .= "<a href=\"$href\" $target >";

  		}
  		elseif ($name_wk == 'IMG')
  		{
			$src    = '';
			$width  = '';
			$height = '';
			$border = 0;
			if ( isset($attrs['SRC']) )     $src    = $attrs['SRC'];
			if ( isset($attrs['BORDER']) )  $border = $attrs['BORDER'];
			if ( isset($attrs['WIDTH']) )   $width  = "width=\"{$attrs['WIDTH']}\" ";
			if ( isset($attrs['HEIGHT']) )  $height = "hight=\"{$attrs['HEIGHT']}\" ";

			$data .= "<img src=\"$src\" border=\"$border\" $width $height >";
  		}

		$atom_entrys[$atom_entry_num]['content'] .= $data;
		return;
	}

// LINK
	if ( $name_wk == 'LINK' )
	{
  		$rel  = '';
  		$href = '';
  		if ( isset($attrs['REL']) )   $rel  = $attrs['REL'];
  		if ( isset($attrs['HREF']) )  $href = $attrs['HREF'];

// --- ATOM 1.0 ---
		$rel = strtolower($rel);

		if ( $parent == 'FEED' )
		{
    		$atom_feed[$name_low.'_'.$rel] = $href;
		}
		elseif (($parent == 'ENTRY') && ($rel == 'alternate'))
		{
			$atom_entrys[$atom_entry_num][$name_low] = $href;
		}
		if ( strtolower($rel) != 'alternate' )  return;
	}

// --- ATOM 1.0 ---
// CATEGORY
	if ( $name_wk == 'CATEGORY' )
	{
  		$term = '';
  		if ( isset($attrs['TERM']) )  $term = $attrs['TERM'];

		if ( $parent == 'FEED' )
		{
    		$atom_feed[$name_low] = $term;
		}
		elseif ( $parent == 'ENTRY' ) 
		{
   			$atom_entrys[$atom_entry_num][$name_low] = $term;
		}
	}

// increment parent
	if ( $flag || empty($uri1) )
	{
		$atom_parent_num ++;
		$atom_parent[$atom_parent_num] = $name_wk;
	}

}

//---------------------------------------------------------
// end element handler
//---------------------------------------------------------
function atom_end_element($parser, $name)
{
	global $atom_parent, $atom_parent_num, $atom_entry_num, $atom_entrys, $atom_uris;

	$parent = $atom_parent[$atom_parent_num];

	$parent_num_prev = $atom_parent_num - 1;
	if ($parent_num_prev < 0)  $parent_num_prev = 0;
	$parent_prev = $atom_parent[$parent_num_prev];

	$name_ns = split(':',$name);
	$name_wk = array_pop($name_ns);
	$uri1 = implode($name_ns,":");

//	echo "<br>\n";
//	echo "parent num : $atom_parent_num <br>\n";
//	echo "parent prev: $parent_prev <br>\n";
//	echo "parent  :    $parent <br>\n";
//	echo "current :    $name_wk <br>\n";

	$flag = 0;
	foreach($atom_uris as $uri2)
	{
  		if ($uri1 == $uri2)
  		{
			$flag = 1;
    		break;
  		}
	}

// CONTENT
	if (($parent_prev == 'ENTRY')&&($parent == 'CONTENT'))
	{
  		if ($name_wk == 'A')
  		{
			$atom_entrys[$atom_entry_num]['content'] .= "</a>";
  		}

  		if ($name_wk != 'CONTENT')
		{
  			return;
  		}
  	}

// decrement parent
	if (( $flag || empty($uri1) )&&( $parent == $name_wk ))
	{
		$atom_parent_num --;
		if ($atom_parent_num < 0)  $atom_parent_num = 0;

      	if ($name_wk == 'ENTRY')
      	{
      		$atom_entry_num ++;
      	}
	}

}

//---------------------------------------------------------
// character data handler
//---------------------------------------------------------
function atom_character_data($parser, $data) 
{
	global $atom_parent, $atom_parent_num, $atom_feed, $atom_entrys, $atom_entry_num;

	$parent_0 = '';
	$parent_1 = '';
	$parent_2 = '';
	if ( isset($atom_parent[0]) )	$parent_0 = $atom_parent[0];
	if ( isset($atom_parent[1]) )	$parent_1 = $atom_parent[1];
	if ( isset($atom_parent[2]) )	$parent_2 = $atom_parent[2];

	$current     = $atom_parent[$atom_parent_num];
	$current_low = strtolower( $current );
	$data        = trim($data);

//	echo "<br>\n";
//	echo "parent num: $atom_parent_num <br>\n";
//	echo "parent 0:   $parent_0 <br>\n";
//	echo "parent 1:   $parent_1 <br>\n";
//	echo "parent 2:   $parent_2 <br>\n";
//	echo "current :   $current <br>\n";
//	echo "data:       $data <br>\n";
//	print_r($atom_entrys);
//	echo "<hr>\n";

	if ($parent_0 != 'FEED')  return;

	switch($parent_1)
	{
// ENTRY
		case 'ENTRY':
    		switch($parent_2)
    		{

// ENTRY AUTHOR
				case 'AUTHOR':
    				switch($current)
    				{
    					case 'NAME':
    					case 'URL':
    // atom 1.0
       					case 'URI':
    // atom 0.3
       					case 'URL':
     						$key = 'author_'.$current_low;
     						if ( isset( $atom_entrys[$atom_entry_num][$key] ) )
     						{
     							$atom_entrys[$atom_entry_num][$key] .= $data;
     						}
     						else
     						{
     							$atom_entrys[$atom_entry_num][$key] = $data;
      						}
      						break;
      				}
					break;

// ENTRY others
				default:
					switch($current)
					{
    					case 'TITLE':
    					case 'SUMMARY':
    					case 'SUBJECT':
    					case 'ID':
    					case 'CONTENT':
	//					case 'LINK':	// see atom_start_element()
    // atom 1.0
    					case 'UPDATED';
    					case 'PUBLISHED';
    					case 'CATEGORY':
    					case 'RIGHTS':
        	 			case 'SOURCE':
    // atom 0.3
    					case 'MODIFIED';
    					case 'ISSUED';
    					case 'CREATED';

// dc:xxx
//						case 'TITLE':	//already
//						case 'RIGHTS':	//already
//						case 'SOURCE':	//already
						case 'CREATOR':
						case 'SUBJECT':
						case 'DESCRIPTION':
						case 'PUBLISHER':
						case 'CONTRIBUTOR':
						case 'DATE':
						case 'TYPE':
						case 'FORMAT':
						case 'IDENTIFIER':
						case 'LANGUAGE':
						case 'RELATION':
						case 'COVERAGE':

     						if ( isset( $atom_entrys[$atom_entry_num][$current_low] ) )
     						{
     							$atom_entrys[$atom_entry_num][$current_low] .= $data;
     						}
     						else
     						{
     							$atom_entrys[$atom_entry_num][$current_low] = $data;
      						}
      						break;
      				}
					break;
			}
			break;

// FEED AUTHOR
		case 'AUTHOR':
    		switch($current)
    		{
    			case 'NAME':
    			case 'EMAIL':
    // atom 1.0
       			case 'URI':
    // atom 0.3
       			case 'URL':
    				$key = 'author_'.$current_low;
    			    if ( isset( $atom_feed[$key] ) )
     				{
     					$atom_feed[$key] .= $data;
     				}
     				else
     				{
     					$atom_feed[$key] = $data;
      				}
      				break;
      		}
      		break;

// FEED others
		default:
			switch($current)
			{
    			case 'TITLE':
    			case 'ID':
    			case 'GENERATOR':
	//			case 'LINK':	// see atom_start_element()
    // atom 1.0
    			case 'RIGHTS':
    			case 'UPDATED';
    			case 'SUBTITLE':
    			case 'CATEGORY':
    			case 'ICON':
    			case 'LOGO':
     			case 'SOURCE':
    // atom 0.3
	   			case 'COPYRIGHT':
    			case 'MODIFIED';
    			case 'TAGLINE':
    			case 'INFO':

// dc:xxx
//				case 'TITLE':	//already
//				case 'RIGHTS':	//already
//				case 'SOURCE':	//already
				case 'CREATOR':
				case 'SUBJECT':
				case 'DESCRIPTION':
				case 'PUBLISHER':
				case 'CONTRIBUTOR':
				case 'DATE':
				case 'TYPE':
				case 'FORMAT':
				case 'IDENTIFIER':
				case 'LANGUAGE':
				case 'RELATION':
				case 'COVERAGE':

					if ( isset( $atom_feed[$current_low] ) )
					{
						$atom_feed[$current_low] .= $data;
					}
					else
					{
						$atom_feed[$current_low] = $data;
					}
					break;
			}
			break;
	}

}

//---------------------------------------------------------
// start namespace handler
//---------------------------------------------------------
function atom_ns_start($parser, $prefix, $uri)
{
	global $atom_uris;
//	echo "nss;$prefix;$uri <br>\n";
	array_push($atom_uris, strtoupper($uri));
}

//---------------------------------------------------------
// end namespace handler
//---------------------------------------------------------
function atom_ns_end($parser, $prefix)
{
	global $atom_uris;
	array_pop($atom_uris);
}

//=========================================================
// class atom_parser
//=========================================================
class atom_parser extends rss_atom_parser_base
{
	var $parse_error;

//---------------------------------------------------------
// constructor
//---------------------------------------------------------
function atom_parser()
{
	rss_atom_parser_base::rss_atom_parser_base();
}

function &getInstance()
{
	static $instance;
	if (!isset($instance)) 
	{
		$instance = new atom_parser();
	}

	return $instance;
}

//---------------------------------------------------------
// get_feed
//---------------------------------------------------------
function get_feed()
{
	global $atom_feed;

// --- ATOM 1.0 ---
	$atom_feed['link'] = $this->get_link( $atom_feed );

	return $atom_feed;
}

//---------------------------------------------------------
// get_entrys
//---------------------------------------------------------
function get_entrys()
{
	global $atom_entrys;
	return $atom_entrys;
}

//---------------------------------------------------------
// get_parse_error
//---------------------------------------------------------
function get_parse_error()
{
  return $this->parse_error;
}

//---------------------------------------------------------
// parse
//---------------------------------------------------------
function parse($data)
{
	global $atom_entry_num,$atom_parent_num,$atom_parent,$atom_feed,$atom_entrys,$atom_uris;

// global
	$atom_feed       = array();
	$atom_entrys     = array();
	$atom_entry_num  = 0;
	$atom_parent     = array();
	$atom_parent[0]  = '';
	$atom_parent_num = 0;
	$atom_uris       = array();

	$this->parse_error = '';

	$xml_parser = xml_parser_create_ns("UTF-8");
	xml_set_element_handler($xml_parser, "atom_start_element", "atom_end_element");
	xml_set_character_data_handler($xml_parser, "atom_character_data");
	xml_set_start_namespace_decl_handler($xml_parser, "atom_ns_start");
	xml_set_end_namespace_decl_handler($xml_parser, "atom_ns_end");

	if (!xml_parse($xml_parser, $data, sizeof($data)))
	{
		$line  = xml_get_current_line_number($xml_parser);
		$error = xml_error_string(xml_get_error_code($xml_parser));

		if ($line == 1)
		{
			$this->parse_error = 'XML error at line 1, check URL';
		}
		else
		{
			$this->parse_error = sprintf('XML error: %s at line %d', $error, $line );
		}

		xml_parser_free($xml_parser);
		return false;
	}

	xml_parser_free($xml_parser);
  
	if (empty($atom_feed))
	{
		$this->parse_error = 'not ATOM format';
		return false;
	}

	return true;
}

//=========================================================
// output option
//=========================================================

//---------------------------------------------------------
// arrange_entrys
//---------------------------------------------------------
function arrange_entrys($entrys, $site_title='', $site_url='')
{
	$count = count($entrys);
	if ($count <= 0)  return false;

	for ($i=0; $i<$count; $i++)
	{
		$entry = $entrys[$i];

// view
//		$unixtime = $this->get_unixtime( $entry, 'issued' );
//		$entrys[$i]['unixtime'] = $unixtime;
//		$entrys[$i]['date']     = $this->format_timestamp($unixtime, "s");

		$entrys[$i]['summary']  = $this->get_summary( $entry );

// save
//		$entrys[$i]['url']      = $entry['link'];
		$entrys[$i]['entry_id'] = $entry['id'];

//		$entrys[$i]['time_issued']   = $unixtime;
//		$entrys[$i]['time_modified'] = $this->get_unixtime( $entry, "modified" );

		$entrys[$i]['time_created']  = $this->get_unixtime( $entry, "created" );

		if ($site_title)
		{	$entrys[$i]['site_title'] = $site_title;	}

		if ($site_url)
		{	$entrys[$i]['site_url'] = $site_url;	}

// view
		if ($this->mode_arrange)
		{
			$entrys[$i]['content'] = $this->get_content( $entry );
		}
// save
		else
		{
			$entrys[$i]['content'] = $this->get_raw_content( $entry );
		}

// --- ATOM 1.0 ---
		$published_unix = $this->get_published_unix( $entry );
		$updated_unix   = $this->get_updated_unix( $entry );

		if ($published_unix)
		{
			$unixtime = $published_unix;
		}
		else
		{
			$unixtime = $updated_unix;
		}

		$entrys[$i]['unixtime'] = $unixtime;
		$entrys[$i]['date']     = $this->format_timestamp($unixtime, "s");
		$entrys[$i]['time_issued']   = $published_unix;
		$entrys[$i]['time_modified'] = $updated_unix;

		$link = $this->get_link( $entry );
		$entrys[$i]['link'] = $link;
		$entrys[$i]['url']  = $link;

	}

	return $entrys;
}

//---------------------------------------------------------
// ATOM 1.0
//---------------------------------------------------------
function get_updated_unix( $entry )
{
	$unix = $this->get_updated($entry);

	if ($unix)
	{
		return $this->get_unixtime_w3cdtf( $unix );
	}

	return 0;
}

function get_published_unix( $entry )
{
	$unix = $this->get_published($entry);

	if ($unix)
	{
		return $this->get_unixtime_w3cdtf( $unix );
	}

	return 0;
}

function get_updated( $entry )
{
	if ( isset($entry['updated']) )
	{
		return $entry['updated'];
	}
	elseif ( isset($entry['modified']) )
	{
		return $entry['modified'];
	}
	return false;
}

function get_published( $entry )
{
	if ( isset($entry['published']) )
	{
		return $entry['published'];
	}
	elseif ( isset($entry['issued']) )
	{
		return  $entry['issued'];
	}
	return false;
}

function get_unixtime_w3cdtf($datetime)
{
	$time_arr = $this->parse_w3cdtf($datetime);
	$unixtime = $time_arr['timestamp'];
	return $unixtime;
}

function get_link( $entry )
{
	if ( isset($entry['link']) )
	{
		return $entry['link'];
	}
	elseif ( isset($entry['link_alternate']) )
	{
		return $entry['link_alternate'];
	}

	return false;
}

//---------------------------------------------------------
// get_unixtime
//---------------------------------------------------------
function get_unixtime($entry, $key='issued')
{
	if ( !isset($entry[$key]) )  return 0;

	$datetime = $entry[$key];

	if ( empty($datetime) ) return 0;

	$time_arr = $this->parse_w3cdtf($datetime);
	$unixtime = $time_arr['timestamp'];

	return $unixtime;
}

//---------------------------------------------------------
// get_content
//---------------------------------------------------------
function get_raw_content($entry)
{
	$content = '';
	if ( isset($entry['content']) )
	{
		$content = $entry['content'];
	}
	elseif ( isset($entry['summary']) )
	{
		$content = $entry['summary'];
	}

	return $content;
}

//---------------------------------------------------------
// get_summary
//---------------------------------------------------------
function get_raw_summary($entry)
{
	$summary= '';
	if ( isset($entry['summary']) )
	{
		$summary = $entry['summary'];
	}
	elseif ( isset($entry['content']) )
	{
		$summary = $entry['content'];
	}

	$summary = $this->prepare_strip_tags( $summary );
	$summary = strip_tags( $summary );

	return $summary;
}

//=========================================================
// print option
//=========================================================

//---------------------------------------------------------
// get_channel
//---------------------------------------------------------
function get_channel()
{
  	return $this->get_feed();
}

//---------------------------------------------------------
// get_items
//---------------------------------------------------------
function get_items()
{
  	return $this->get_entrys();
}


// --- class end ---
}

?>
