[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [syndication] PHP tool for parsing RSS1.0?
- To: <syndication@yahoogroups.com>
- Subject: Re: [syndication] PHP tool for parsing RSS1.0?
- From: kellan <kellan@protest.net>
- Date: Thu, 30 May 2002 20:31:12 -0700 (PDT)
- In-reply-to: <Pine.LNX.4.33.0205302023190.4513-100000@riseup.net>
Well I asked here first, and got some responses (if not the ones I was
looking for), so here is the solution I came up with.
Its a simple expat based php parser for RSS1.0. Its designed for parsing
conforming RSS files, and will happily accept non-standard fields.
It also includes support for namespaces. Still on the todo list is
built-in parsing of the W3CDTF in date fields into epoch seconds for more
'php like' handling of date and times. (i'm writing this for people who
want to use the rss based event feeds from protest.net)
Hope this useful to someone else.
kellan
--
Soon there will only be McDonald's, Starbucks, and PHP
-anonymous
kellan@protest.net
<?php
class RSS {
var $parser;
# current_item - item currently being parsed
# items - collection of parsed items
# channel - hash of channel fields
#
var $current_item = array();
var $items = array();
var $channel = array();
var $parent_field = array('RDF');
var $current_field = '';
var $current_namespace = false;
function RSS ($filename) {
$this->parser = xml_parser_create( );
# pass in parser, and a reference to this object
# setup handlers
#
xml_set_object( $this->parser, &$this );
xml_set_element_handler($this->parser, 'start_element',
'end_element');
xml_set_character_data_handler( $this->parser, 'cdata' );
# read in file
$data = join ("", file($filename) );
xml_parse( $this->parser, $data );
xml_parser_free( $this->parser );
}
function start_element ($p, $element, &$attrs) {
$element = strtolower( $element );
# check for a namespace, and split if found
#
$namespace = false;
if ( strpos( $element, ':' ) ) {
list($namespace, $element) = split( ':', $element, 2);
}
$this->current_field = $element;
if ( $namespace and $namespace != 'rdf' ) {
$this->current_namespace = $namespace;
}
if ( $element == 'channel' ) {
array_unshift( $this->parent_field, 'channel' );
}
elseif ( $element == 'items' ) {
array_unshift( $this->parent_field, 'items' );
}
elseif ( $element == 'item' ) {
array_unshift( $this->parent_field, 'item' );
}
}
function end_element ($p, $element) {
$element = strtolower($element);
if ( $element == 'item' ) {
$this->items[] = $this->current_item;
$this->current_item = array();
array_shift( $this->parent_field );
}
elseif ( $element == 'channel' or $element == 'items' ) {
array_shift( $this->parent_field );
}
$this->current_field = '';
$this->current_namespace = false;
}
function cdata ($p, $text) {
# skip item, channel, items first time we see them
#
if ( $this->parent_field[0] == $this->current_field or
! $this->current_field ) {
return;
}
elseif ( $this->parent_field[0] == 'channel') {
if ( $this->current_namespace ) {
$this->channel[ $this->current_namespace ][
$this->current_field ] .= $text;
}
else {
$this->channel[ $this->current_field ] .= $text;
}
}
elseif ( $this->parent_field[0] == 'item' ) {
if ( $this->current_namespace ) {
$this->current_item[ $this->current_namespace ][
$this->current_field ] .= $text;
}
else {
$this->current_item[ $this->current_field ] .= $text;
}
}
}
# debugging functions
#
function show_list () {
echo "<ol>\n";
foreach ($this->items as $item) {
echo "<li>", $this->show_item( $item );
}
echo "</ol>";
}
function show_channel () {
echo "channel:<br>";
echo "<ul>";
while ( list($key, $value) = each( $this->channel ) ) {
echo "<li> $key: $value";
}
echo "</ul>";
}
function show_item ($item) {
echo "item: $item[title]";
echo "<ul>";
while ( list($key, $value) = each($item) ) {
if ( is_array($value) ) {
echo "<br><b>$key</b>";
echo "<ul>";
while ( list( $ns_key, $ns_value) = each( $value ) ) {
echo "<li>$ns_key: $ns_value";
}
echo "</ul>";
}
else {
echo "<li> $key: $value";
}
}
echo "</ul>";
}
} # end class RSS
?>