############################################################################## # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Library General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Library General Public License for more details. # # You should have received a copy of the GNU Library General Public # License along with this library; if not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. # # Jabber # Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ # ############################################################################## package XML::Stream::Parser; =head1 NAME XML::Stream::Parser - SAX XML Parser for XML Streams =head1 SYNOPSIS Light weight XML parser that builds XML::Parser::Tree objects from the incoming stream and passes them to a function to tell whoever is using it that there are new packets. =head1 DESCRIPTION This module provides a very light weight parser =head1 METHODS =head1 EXAMPLES =head1 AUTHOR By Ryan Eatmon in January of 2001 for http://jabber.org/ =head1 COPYRIGHT This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut use strict; use vars qw( $VERSION ); $VERSION = "1.22"; sub new { my $self = { }; bless($self); my %args; while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } $self->{PARSING} = 0; $self->{DOC} = 0; $self->{XML} = ""; $self->{CNAME} = (); $self->{CURR} = 0; $args{nonblocking} = 0 unless exists($args{nonblocking}); $self->{NONBLOCKING} = delete($args{nonblocking}); $self->{DEBUGTIME} = 0; $self->{DEBUGTIME} = $args{debugtime} if exists($args{debugtime}); $self->{DEBUGLEVEL} = 0; $self->{DEBUGLEVEL} = $args{debuglevel} if exists($args{debuglevel}); $self->{DEBUGFILE} = ""; if (exists($args{debugfh}) && ($args{debugfh} ne "")) { $self->{DEBUGFILE} = $args{debugfh}; $self->{DEBUG} = 1; } if ((exists($args{debugfh}) && ($args{debugfh} eq "")) || (exists($args{debug}) && ($args{debug} ne ""))) { $self->{DEBUG} = 1; if (lc($args{debug}) eq "stdout") { $self->{DEBUGFILE} = new FileHandle(">&STDERR"); $self->{DEBUGFILE}->autoflush(1); } else { if (-e $args{debug}) { if (-w $args{debug}) { $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); $self->{DEBUGFILE}->autoflush(1); } else { print "WARNING: debug file ($args{debug}) is not writable by you\n"; print " No debug information being saved.\n"; $self->{DEBUG} = 0; } } else { $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); if (defined($self->{DEBUGFILE})) { $self->{DEBUGFILE}->autoflush(1); } else { print "WARNING: debug file ($args{debug}) does not exist \n"; print " and is not writable by you.\n"; print " No debug information being saved.\n"; $self->{DEBUG} = 0; } } } } $self->{SID} = exists($args{sid}) ? $args{sid} : "__xmlstream__:sid"; $self->{STYLE} = (exists($args{style}) ? lc($args{style}) : "tree"); $self->{DTD} = (exists($args{dtd}) ? lc($args{dtd}) : 0); if ($self->{STYLE} eq "tree") { $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; $self->{HANDLER}->{startElement} = sub{ &XML::Stream::Tree::_handle_element(@_); }; $self->{HANDLER}->{endElement} = sub{ &XML::Stream::Tree::_handle_close(@_); }; $self->{HANDLER}->{characters} = sub{ &XML::Stream::Tree::_handle_cdata(@_); }; } elsif ($self->{STYLE} eq "node") { $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; $self->{HANDLER}->{startElement} = sub{ &XML::Stream::Node::_handle_element(@_); }; $self->{HANDLER}->{endElement} = sub{ &XML::Stream::Node::_handle_close(@_); }; $self->{HANDLER}->{characters} = sub{ &XML::Stream::Node::_handle_cdata(@_); }; } $self->setHandlers(%{$args{handlers}}); $self->{XMLONHOLD} = ""; return $self; } ########################################################################### # # debug - prints the arguments to the debug log if debug is turned on. # ########################################################################### sub debug { return if ($_[1] > $_[0]->{DEBUGLEVEL}); my $self = shift; my ($limit,@args) = @_; return if ($self->{DEBUGFILE} eq ""); my $fh = $self->{DEBUGFILE}; if ($self->{DEBUGTIME} == 1) { my ($sec,$min,$hour) = localtime(time); print $fh sprintf("[%02d:%02d:%02d] ",$hour,$min,$sec); } print $fh "XML::Stream::Parser: $self->{STYLE}: @args\n"; } sub setSID { my $self = shift; my $sid = shift; $self->{SID} = $sid; } sub getSID { my $self = shift; return $self->{SID}; } sub setHandlers { my $self = shift; my (%handlers) = @_; foreach my $handler (keys(%handlers)) { $self->{HANDLER}->{$handler} = $handlers{$handler}; } } sub parse { my $self = shift; my $xml = shift; return unless defined($xml); return if ($xml eq ""); if ($self->{XMLONHOLD} ne "") { $self->{XML} = $self->{XMLONHOLD}; $self->{XMLONHOLD} = ""; } # XXX change this to not use regex? while($xml =~ s/<\!--.*?-->//gs) {} $self->{XML} .= $xml; return if ($self->{PARSING} == 1); $self->{PARSING} = 1; if(!$self->{DOC} == 1) { my $start = index($self->{XML},"<"); if ((substr($self->{XML},$start,3) eq "{XML},$start,3) eq "{XML},"?>"); if ($close == -1) { $self->{PARSING} = 0; return; } $self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2); } &{$self->{HANDLER}->{startDocument}}($self); $self->{DOC} = 1; } while(1) { if (length($self->{XML}) == 0) { $self->{PARSING} = 0; return $self->returnData(0); } my $eclose = -1; $eclose = index($self->{XML},"{CNAME}->[$self->{CURR}].">") if ($#{$self->{CNAME}} > -1); if ($eclose == 0) { $self->{XML} = substr($self->{XML},length($self->{CNAME}->[$self->{CURR}])+3,length($self->{XML})-length($self->{CNAME}->[$self->{CURR}])-3); $self->{PARSING} = 0 if ($self->{NONBLOCKING} == 1); &{$self->{HANDLER}->{endElement}}($self,$self->{CNAME}->[$self->{CURR}]); $self->{PARSING} = 1 if ($self->{NONBLOCKING} == 1); $self->{CURR}--; if ($self->{CURR} == 0) { $self->{DOC} = 0; $self->{PARSING} = 0; &{$self->{HANDLER}->{endDocument}}($self); return $self->returnData(0); } next; } my $estart = index($self->{XML},"<"); my $cdatastart = index($self->{XML},"{XML},">"); if ($close == -1) { $self->{PARSING} = 0; return $self->returnData(0); } my $empty = (substr($self->{XML},$close-1,1) eq "/"); my $starttag = substr($self->{XML},1,$close-($empty ? 2 : 1)); my $nextspace = index($starttag," "); my $attribs; my $name; if ($nextspace != -1) { $name = substr($starttag,0,$nextspace); $attribs = substr($starttag,$nextspace+1,length($starttag)-$nextspace-1); } else { $name = $starttag; } my %attribs = $self->attribution($attribs); if (($self->{DTD} == 1) && (exists($attribs{xmlns}))) { } &{$self->{HANDLER}->{startElement}}($self,$name,%attribs); if($empty == 1) { &{$self->{HANDLER}->{endElement}}($self,$name); } else { $self->{CURR}++; $self->{CNAME}->[$self->{CURR}] = $name; } $self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1); next; } if ($cdatastart == 0) { my $cdataclose = index($self->{XML},"]]>"); if ($cdataclose == -1) { $self->{PARSING} = 0; return $self->returnData(0); } &{$self->{HANDLER}->{characters}}($self,substr($self->{XML},9,$cdataclose-9)); $self->{XML} = substr($self->{XML},$cdataclose+3,length($self->{XML})-$cdataclose-3); next; } if ($estart == -1) { $self->{XMLONHOLD} = $self->{XML}; $self->{XML} = ""; } elsif (($cdatastart == -1) || ($cdatastart > $estart)) { &{$self->{HANDLER}->{characters}}($self,$self->entityCheck(substr($self->{XML},0,$estart))); $self->{XML} = substr($self->{XML},$estart,length($self->{XML})-$estart); } } } sub attribution { my $self = shift; my $str = shift; $str = "" unless defined($str); my %attribs; while(1) { my $eq = index($str,"="); if((length($str) == 0) || ($eq == -1)) { return %attribs; } my $ids; my $id; my $id1 = index($str,"\'"); my $id2 = index($str,"\""); return %attribs if ($id1 == -1 && $id2 == -1); if((($id1 < $id2) && ($id1 != -1)) || ($id2 == -1)) { $ids = $id1; $id = "\'"; } if((($id2 < $id1) && ($id2 != -1)) || ($id1 == -1)) { $ids = $id2; $id = "\""; } my $nextid = index($str,$id,$ids+1); my $val = substr($str,$ids+1,$nextid-$ids-1); my $key = substr($str,0,$eq); while($key =~ s/\s//) {} $attribs{$key} = $self->entityCheck($val); $str = substr($str,$nextid+1,length($str)-$nextid-1); } return %attribs; } sub entityCheck { my $self = shift; my $str = shift; while($str =~ s/\<\;/\/) {} while($str =~ s/\"\;/\"/) {} while($str =~ s/\&apos\;/\'/) {} while($str =~ s/\&\;/\&/) {} return $str; } sub parsefile { my $self = shift; my $fileName = shift; open(FILE,"<",$fileName); my $file; while() { $file .= $_; } $self->parse($file); close(FILE); return $self->returnData(); } sub returnData { my $self = shift; my $clearData = shift; $clearData = 1 unless defined($clearData); my $sid = $self->{SID}; if ($self->{STYLE} eq "tree") { return unless exists($self->{SIDS}->{$sid}->{tree}); my @tree = @{$self->{SIDS}->{$sid}->{tree}}; delete($self->{SIDS}->{$sid}->{tree}) if ($clearData == 1); return ( \@tree ); } if ($self->{STYLE} eq "node") { return unless exists($self->{SIDS}->{$sid}->{node}); my $node = $self->{SIDS}->{$sid}->{node}->[0]; delete($self->{SIDS}->{$sid}->{node}) if ($clearData == 1); return $node; } } sub startDocument { my $self = shift; } sub endDocument { my $self = shift; } sub startElement { my $self = shift; my ($sax, $tag, %att) = @_; return unless ($self->{DOC} == 1); if ($self->{STYLE} eq "debug") { print "$self->{DEBUGHEADER} \\\\ (",join(" ",%att),")\n"; $self->{DEBUGHEADER} .= $tag." "; } else { my @NEW; if($#{$self->{TREE}} < 0) { push @{$self->{TREE}}, $tag; } else { push @{ $self->{TREE}[ $#{$self->{TREE}}]}, $tag; } push @NEW, \%att; push @{$self->{TREE}}, \@NEW; } } sub characters { my $self = shift; my ($sax, $cdata) = @_; return unless ($self->{DOC} == 1); if ($self->{STYLE} eq "debug") { my $str = $cdata; $str =~ s/\n/\#10\;/g; print "$self->{DEBUGHEADER} || $str\n"; } else { return if ($#{$self->{TREE}} == -1); my $pos = $#{$self->{TREE}}; if ($pos > 0 && $self->{TREE}[$pos - 1] eq "0") { $self->{TREE}[$pos - 1] .= $cdata; } else { push @{$self->{TREE}[$#{$self->{TREE}}]}, 0; push @{$self->{TREE}[$#{$self->{TREE}}]}, $cdata; } } } sub endElement { my $self = shift; my ($sax, $tag) = @_; return unless ($self->{DOC} == 1); if ($self->{STYLE} eq "debug") { $self->{DEBUGHEADER} =~ s/\S+\ $//; print "$self->{DEBUGHEADER} //\n"; } else { my $CLOSED = pop @{$self->{TREE}}; if($#{$self->{TREE}} < 1) { push @{$self->{TREE}}, $CLOSED; if($self->{TREE}->[0] eq "stream:error") { $self->{STREAMERROR} = $self->{TREE}[1]->[2]; } } else { push @{$self->{TREE}[$#{$self->{TREE}}]}, $CLOSED; } } } 1;