1 | ############################################################################## |
---|
2 | # |
---|
3 | # This library is free software; you can redistribute it and/or |
---|
4 | # modify it under the terms of the GNU Library General Public |
---|
5 | # License as published by the Free Software Foundation; either |
---|
6 | # version 2 of the License, or (at your option) any later version. |
---|
7 | # |
---|
8 | # This library is distributed in the hope that it will be useful, |
---|
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
11 | # Library General Public License for more details. |
---|
12 | # |
---|
13 | # You should have received a copy of the GNU Library General Public |
---|
14 | # License along with this library; if not, write to the |
---|
15 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
---|
16 | # Boston, MA 02111-1307, USA. |
---|
17 | # |
---|
18 | # Jabber |
---|
19 | # Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/ |
---|
20 | # |
---|
21 | ############################################################################## |
---|
22 | |
---|
23 | package XML::Stream::Parser; |
---|
24 | |
---|
25 | =head1 NAME |
---|
26 | |
---|
27 | XML::Stream::Parser - SAX XML Parser for XML Streams |
---|
28 | |
---|
29 | =head1 SYNOPSIS |
---|
30 | |
---|
31 | Light weight XML parser that builds XML::Parser::Tree objects from the |
---|
32 | incoming stream and passes them to a function to tell whoever is using |
---|
33 | it that there are new packets. |
---|
34 | |
---|
35 | =head1 DESCRIPTION |
---|
36 | |
---|
37 | This module provides a very light weight parser |
---|
38 | |
---|
39 | =head1 METHODS |
---|
40 | |
---|
41 | =head1 EXAMPLES |
---|
42 | |
---|
43 | =head1 AUTHOR |
---|
44 | |
---|
45 | By Ryan Eatmon in January of 2001 for http://jabber.org/ |
---|
46 | |
---|
47 | =head1 COPYRIGHT |
---|
48 | |
---|
49 | This module is free software; you can redistribute it and/or modify |
---|
50 | it under the same terms as Perl itself. |
---|
51 | |
---|
52 | =cut |
---|
53 | |
---|
54 | use strict; |
---|
55 | use vars qw( $VERSION ); |
---|
56 | |
---|
57 | $VERSION = "1.22"; |
---|
58 | |
---|
59 | sub new |
---|
60 | { |
---|
61 | my $self = { }; |
---|
62 | |
---|
63 | bless($self); |
---|
64 | |
---|
65 | my %args; |
---|
66 | while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); } |
---|
67 | |
---|
68 | $self->{PARSING} = 0; |
---|
69 | $self->{DOC} = 0; |
---|
70 | $self->{XML} = ""; |
---|
71 | $self->{CNAME} = (); |
---|
72 | $self->{CURR} = 0; |
---|
73 | |
---|
74 | $args{nonblocking} = 0 unless exists($args{nonblocking}); |
---|
75 | |
---|
76 | $self->{NONBLOCKING} = delete($args{nonblocking}); |
---|
77 | |
---|
78 | $self->{DEBUGTIME} = 0; |
---|
79 | $self->{DEBUGTIME} = $args{debugtime} if exists($args{debugtime}); |
---|
80 | |
---|
81 | $self->{DEBUGLEVEL} = 0; |
---|
82 | $self->{DEBUGLEVEL} = $args{debuglevel} if exists($args{debuglevel}); |
---|
83 | |
---|
84 | $self->{DEBUGFILE} = ""; |
---|
85 | |
---|
86 | if (exists($args{debugfh}) && ($args{debugfh} ne "")) |
---|
87 | { |
---|
88 | $self->{DEBUGFILE} = $args{debugfh}; |
---|
89 | $self->{DEBUG} = 1; |
---|
90 | } |
---|
91 | |
---|
92 | if ((exists($args{debugfh}) && ($args{debugfh} eq "")) || |
---|
93 | (exists($args{debug}) && ($args{debug} ne ""))) |
---|
94 | { |
---|
95 | $self->{DEBUG} = 1; |
---|
96 | if (lc($args{debug}) eq "stdout") |
---|
97 | { |
---|
98 | $self->{DEBUGFILE} = new FileHandle(">&STDERR"); |
---|
99 | $self->{DEBUGFILE}->autoflush(1); |
---|
100 | } |
---|
101 | else |
---|
102 | { |
---|
103 | if (-e $args{debug}) |
---|
104 | { |
---|
105 | if (-w $args{debug}) |
---|
106 | { |
---|
107 | $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); |
---|
108 | $self->{DEBUGFILE}->autoflush(1); |
---|
109 | } |
---|
110 | else |
---|
111 | { |
---|
112 | print "WARNING: debug file ($args{debug}) is not writable by you\n"; |
---|
113 | print " No debug information being saved.\n"; |
---|
114 | $self->{DEBUG} = 0; |
---|
115 | } |
---|
116 | } |
---|
117 | else |
---|
118 | { |
---|
119 | $self->{DEBUGFILE} = new FileHandle(">$args{debug}"); |
---|
120 | if (defined($self->{DEBUGFILE})) |
---|
121 | { |
---|
122 | $self->{DEBUGFILE}->autoflush(1); |
---|
123 | } |
---|
124 | else |
---|
125 | { |
---|
126 | print "WARNING: debug file ($args{debug}) does not exist \n"; |
---|
127 | print " and is not writable by you.\n"; |
---|
128 | print " No debug information being saved.\n"; |
---|
129 | $self->{DEBUG} = 0; |
---|
130 | } |
---|
131 | } |
---|
132 | } |
---|
133 | } |
---|
134 | |
---|
135 | $self->{SID} = exists($args{sid}) ? $args{sid} : "__xmlstream__:sid"; |
---|
136 | |
---|
137 | $self->{STYLE} = (exists($args{style}) ? lc($args{style}) : "tree"); |
---|
138 | $self->{DTD} = (exists($args{dtd}) ? lc($args{dtd}) : 0); |
---|
139 | |
---|
140 | if ($self->{STYLE} eq "tree") |
---|
141 | { |
---|
142 | $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; |
---|
143 | $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; |
---|
144 | $self->{HANDLER}->{startElement} = sub{ &XML::Stream::Tree::_handle_element(@_); }; |
---|
145 | $self->{HANDLER}->{endElement} = sub{ &XML::Stream::Tree::_handle_close(@_); }; |
---|
146 | $self->{HANDLER}->{characters} = sub{ &XML::Stream::Tree::_handle_cdata(@_); }; |
---|
147 | } |
---|
148 | elsif ($self->{STYLE} eq "node") |
---|
149 | { |
---|
150 | $self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); }; |
---|
151 | $self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); }; |
---|
152 | $self->{HANDLER}->{startElement} = sub{ &XML::Stream::Node::_handle_element(@_); }; |
---|
153 | $self->{HANDLER}->{endElement} = sub{ &XML::Stream::Node::_handle_close(@_); }; |
---|
154 | $self->{HANDLER}->{characters} = sub{ &XML::Stream::Node::_handle_cdata(@_); }; |
---|
155 | } |
---|
156 | $self->setHandlers(%{$args{handlers}}); |
---|
157 | |
---|
158 | $self->{XMLONHOLD} = ""; |
---|
159 | |
---|
160 | return $self; |
---|
161 | } |
---|
162 | |
---|
163 | |
---|
164 | ########################################################################### |
---|
165 | # |
---|
166 | # debug - prints the arguments to the debug log if debug is turned on. |
---|
167 | # |
---|
168 | ########################################################################### |
---|
169 | sub debug |
---|
170 | { |
---|
171 | return if ($_[1] > $_[0]->{DEBUGLEVEL}); |
---|
172 | my $self = shift; |
---|
173 | my ($limit,@args) = @_; |
---|
174 | return if ($self->{DEBUGFILE} eq ""); |
---|
175 | my $fh = $self->{DEBUGFILE}; |
---|
176 | if ($self->{DEBUGTIME} == 1) |
---|
177 | { |
---|
178 | my ($sec,$min,$hour) = localtime(time); |
---|
179 | print $fh sprintf("[%02d:%02d:%02d] ",$hour,$min,$sec); |
---|
180 | } |
---|
181 | print $fh "XML::Stream::Parser: $self->{STYLE}: @args\n"; |
---|
182 | } |
---|
183 | |
---|
184 | |
---|
185 | sub setSID |
---|
186 | { |
---|
187 | my $self = shift; |
---|
188 | my $sid = shift; |
---|
189 | $self->{SID} = $sid; |
---|
190 | } |
---|
191 | |
---|
192 | |
---|
193 | sub getSID |
---|
194 | { |
---|
195 | my $self = shift; |
---|
196 | return $self->{SID}; |
---|
197 | } |
---|
198 | |
---|
199 | |
---|
200 | sub setHandlers |
---|
201 | { |
---|
202 | my $self = shift; |
---|
203 | my (%handlers) = @_; |
---|
204 | |
---|
205 | foreach my $handler (keys(%handlers)) |
---|
206 | { |
---|
207 | $self->{HANDLER}->{$handler} = $handlers{$handler}; |
---|
208 | } |
---|
209 | } |
---|
210 | |
---|
211 | |
---|
212 | sub parse |
---|
213 | { |
---|
214 | my $self = shift; |
---|
215 | my $xml = shift; |
---|
216 | |
---|
217 | return unless defined($xml); |
---|
218 | return if ($xml eq ""); |
---|
219 | |
---|
220 | if ($self->{XMLONHOLD} ne "") |
---|
221 | { |
---|
222 | $self->{XML} = $self->{XMLONHOLD}; |
---|
223 | $self->{XMLONHOLD} = ""; |
---|
224 | } |
---|
225 | |
---|
226 | # XXX change this to not use regex? |
---|
227 | while($xml =~ s/<\!--.*?-->//gs) {} |
---|
228 | |
---|
229 | $self->{XML} .= $xml; |
---|
230 | |
---|
231 | return if ($self->{PARSING} == 1); |
---|
232 | |
---|
233 | $self->{PARSING} = 1; |
---|
234 | |
---|
235 | if(!$self->{DOC} == 1) |
---|
236 | { |
---|
237 | my $start = index($self->{XML},"<"); |
---|
238 | |
---|
239 | if ((substr($self->{XML},$start,3) eq "<?x") || |
---|
240 | (substr($self->{XML},$start,3) eq "<?X")) |
---|
241 | { |
---|
242 | my $close = index($self->{XML},"?>"); |
---|
243 | if ($close == -1) |
---|
244 | { |
---|
245 | $self->{PARSING} = 0; |
---|
246 | return; |
---|
247 | } |
---|
248 | $self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2); |
---|
249 | } |
---|
250 | |
---|
251 | &{$self->{HANDLER}->{startDocument}}($self); |
---|
252 | $self->{DOC} = 1; |
---|
253 | } |
---|
254 | |
---|
255 | while(1) |
---|
256 | { |
---|
257 | if (length($self->{XML}) == 0) |
---|
258 | { |
---|
259 | $self->{PARSING} = 0; |
---|
260 | return $self->returnData(0); |
---|
261 | } |
---|
262 | my $eclose = -1; |
---|
263 | $eclose = index($self->{XML},"</".$self->{CNAME}->[$self->{CURR}].">") |
---|
264 | if ($#{$self->{CNAME}} > -1); |
---|
265 | |
---|
266 | if ($eclose == 0) |
---|
267 | { |
---|
268 | $self->{XML} = substr($self->{XML},length($self->{CNAME}->[$self->{CURR}])+3,length($self->{XML})-length($self->{CNAME}->[$self->{CURR}])-3); |
---|
269 | |
---|
270 | $self->{PARSING} = 0 if ($self->{NONBLOCKING} == 1); |
---|
271 | &{$self->{HANDLER}->{endElement}}($self,$self->{CNAME}->[$self->{CURR}]); |
---|
272 | $self->{PARSING} = 1 if ($self->{NONBLOCKING} == 1); |
---|
273 | |
---|
274 | $self->{CURR}--; |
---|
275 | if ($self->{CURR} == 0) |
---|
276 | { |
---|
277 | $self->{DOC} = 0; |
---|
278 | $self->{PARSING} = 0; |
---|
279 | &{$self->{HANDLER}->{endDocument}}($self); |
---|
280 | return $self->returnData(0); |
---|
281 | } |
---|
282 | next; |
---|
283 | } |
---|
284 | |
---|
285 | my $estart = index($self->{XML},"<"); |
---|
286 | my $cdatastart = index($self->{XML},"<![CDATA["); |
---|
287 | if (($estart == 0) && ($cdatastart != 0)) |
---|
288 | { |
---|
289 | my $close = index($self->{XML},">"); |
---|
290 | if ($close == -1) |
---|
291 | { |
---|
292 | $self->{PARSING} = 0; |
---|
293 | return $self->returnData(0); |
---|
294 | } |
---|
295 | my $empty = (substr($self->{XML},$close-1,1) eq "/"); |
---|
296 | my $starttag = substr($self->{XML},1,$close-($empty ? 2 : 1)); |
---|
297 | my $nextspace = index($starttag," "); |
---|
298 | my $attribs; |
---|
299 | my $name; |
---|
300 | if ($nextspace != -1) |
---|
301 | { |
---|
302 | $name = substr($starttag,0,$nextspace); |
---|
303 | $attribs = substr($starttag,$nextspace+1,length($starttag)-$nextspace-1); |
---|
304 | } |
---|
305 | else |
---|
306 | { |
---|
307 | $name = $starttag; |
---|
308 | } |
---|
309 | |
---|
310 | my %attribs = $self->attribution($attribs); |
---|
311 | if (($self->{DTD} == 1) && (exists($attribs{xmlns}))) |
---|
312 | { |
---|
313 | } |
---|
314 | |
---|
315 | &{$self->{HANDLER}->{startElement}}($self,$name,%attribs); |
---|
316 | |
---|
317 | if($empty == 1) |
---|
318 | { |
---|
319 | &{$self->{HANDLER}->{endElement}}($self,$name); |
---|
320 | } |
---|
321 | else |
---|
322 | { |
---|
323 | $self->{CURR}++; |
---|
324 | $self->{CNAME}->[$self->{CURR}] = $name; |
---|
325 | } |
---|
326 | |
---|
327 | $self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1); |
---|
328 | next; |
---|
329 | } |
---|
330 | |
---|
331 | if ($cdatastart == 0) |
---|
332 | { |
---|
333 | my $cdataclose = index($self->{XML},"]]>"); |
---|
334 | if ($cdataclose == -1) |
---|
335 | { |
---|
336 | $self->{PARSING} = 0; |
---|
337 | return $self->returnData(0); |
---|
338 | } |
---|
339 | |
---|
340 | &{$self->{HANDLER}->{characters}}($self,substr($self->{XML},9,$cdataclose-9)); |
---|
341 | |
---|
342 | $self->{XML} = substr($self->{XML},$cdataclose+3,length($self->{XML})-$cdataclose-3); |
---|
343 | next; |
---|
344 | } |
---|
345 | |
---|
346 | if ($estart == -1) |
---|
347 | { |
---|
348 | $self->{XMLONHOLD} = $self->{XML}; |
---|
349 | $self->{XML} = ""; |
---|
350 | } |
---|
351 | elsif (($cdatastart == -1) || ($cdatastart > $estart)) |
---|
352 | { |
---|
353 | &{$self->{HANDLER}->{characters}}($self,$self->entityCheck(substr($self->{XML},0,$estart))); |
---|
354 | $self->{XML} = substr($self->{XML},$estart,length($self->{XML})-$estart); |
---|
355 | } |
---|
356 | } |
---|
357 | } |
---|
358 | |
---|
359 | |
---|
360 | sub attribution |
---|
361 | { |
---|
362 | my $self = shift; |
---|
363 | my $str = shift; |
---|
364 | |
---|
365 | $str = "" unless defined($str); |
---|
366 | |
---|
367 | my %attribs; |
---|
368 | |
---|
369 | while(1) |
---|
370 | { |
---|
371 | my $eq = index($str,"="); |
---|
372 | if((length($str) == 0) || ($eq == -1)) |
---|
373 | { |
---|
374 | return %attribs; |
---|
375 | } |
---|
376 | |
---|
377 | my $ids; |
---|
378 | my $id; |
---|
379 | my $id1 = index($str,"\'"); |
---|
380 | my $id2 = index($str,"\""); |
---|
381 | |
---|
382 | return %attribs if ($id1 == -1 && $id2 == -1); |
---|
383 | |
---|
384 | if((($id1 < $id2) && ($id1 != -1)) || ($id2 == -1)) |
---|
385 | { |
---|
386 | $ids = $id1; |
---|
387 | $id = "\'"; |
---|
388 | } |
---|
389 | if((($id2 < $id1) && ($id2 != -1)) || ($id1 == -1)) |
---|
390 | { |
---|
391 | $ids = $id2; |
---|
392 | $id = "\""; |
---|
393 | } |
---|
394 | |
---|
395 | my $nextid = index($str,$id,$ids+1); |
---|
396 | my $val = substr($str,$ids+1,$nextid-$ids-1); |
---|
397 | my $key = substr($str,0,$eq); |
---|
398 | |
---|
399 | while($key =~ s/\s//) {} |
---|
400 | |
---|
401 | $attribs{$key} = $self->entityCheck($val); |
---|
402 | $str = substr($str,$nextid+1,length($str)-$nextid-1); |
---|
403 | } |
---|
404 | |
---|
405 | return %attribs; |
---|
406 | } |
---|
407 | |
---|
408 | |
---|
409 | sub entityCheck |
---|
410 | { |
---|
411 | my $self = shift; |
---|
412 | my $str = shift; |
---|
413 | |
---|
414 | while($str =~ s/\<\;/\</) {} |
---|
415 | while($str =~ s/\>\;/\>/) {} |
---|
416 | while($str =~ s/\"\;/\"/) {} |
---|
417 | while($str =~ s/\&apos\;/\'/) {} |
---|
418 | while($str =~ s/\&\;/\&/) {} |
---|
419 | |
---|
420 | return $str; |
---|
421 | } |
---|
422 | |
---|
423 | |
---|
424 | sub parsefile |
---|
425 | { |
---|
426 | my $self = shift; |
---|
427 | my $fileName = shift; |
---|
428 | |
---|
429 | open(FILE,"<",$fileName); |
---|
430 | my $file; |
---|
431 | while(<FILE>) { $file .= $_; } |
---|
432 | $self->parse($file); |
---|
433 | close(FILE); |
---|
434 | |
---|
435 | return $self->returnData(); |
---|
436 | } |
---|
437 | |
---|
438 | |
---|
439 | sub returnData |
---|
440 | { |
---|
441 | my $self = shift; |
---|
442 | my $clearData = shift; |
---|
443 | $clearData = 1 unless defined($clearData); |
---|
444 | |
---|
445 | my $sid = $self->{SID}; |
---|
446 | |
---|
447 | if ($self->{STYLE} eq "tree") |
---|
448 | { |
---|
449 | return unless exists($self->{SIDS}->{$sid}->{tree}); |
---|
450 | my @tree = @{$self->{SIDS}->{$sid}->{tree}}; |
---|
451 | delete($self->{SIDS}->{$sid}->{tree}) if ($clearData == 1); |
---|
452 | return ( \@tree ); |
---|
453 | } |
---|
454 | if ($self->{STYLE} eq "node") |
---|
455 | { |
---|
456 | return unless exists($self->{SIDS}->{$sid}->{node}); |
---|
457 | my $node = $self->{SIDS}->{$sid}->{node}->[0]; |
---|
458 | delete($self->{SIDS}->{$sid}->{node}) if ($clearData == 1); |
---|
459 | return $node; |
---|
460 | } |
---|
461 | } |
---|
462 | |
---|
463 | |
---|
464 | sub startDocument |
---|
465 | { |
---|
466 | my $self = shift; |
---|
467 | } |
---|
468 | |
---|
469 | |
---|
470 | sub endDocument |
---|
471 | { |
---|
472 | my $self = shift; |
---|
473 | } |
---|
474 | |
---|
475 | |
---|
476 | sub startElement |
---|
477 | { |
---|
478 | my $self = shift; |
---|
479 | my ($sax, $tag, %att) = @_; |
---|
480 | |
---|
481 | return unless ($self->{DOC} == 1); |
---|
482 | |
---|
483 | if ($self->{STYLE} eq "debug") |
---|
484 | { |
---|
485 | print "$self->{DEBUGHEADER} \\\\ (",join(" ",%att),")\n"; |
---|
486 | $self->{DEBUGHEADER} .= $tag." "; |
---|
487 | } |
---|
488 | else |
---|
489 | { |
---|
490 | my @NEW; |
---|
491 | if($#{$self->{TREE}} < 0) |
---|
492 | { |
---|
493 | push @{$self->{TREE}}, $tag; |
---|
494 | } |
---|
495 | else |
---|
496 | { |
---|
497 | push @{ $self->{TREE}[ $#{$self->{TREE}}]}, $tag; |
---|
498 | } |
---|
499 | push @NEW, \%att; |
---|
500 | push @{$self->{TREE}}, \@NEW; |
---|
501 | } |
---|
502 | } |
---|
503 | |
---|
504 | |
---|
505 | sub characters |
---|
506 | { |
---|
507 | my $self = shift; |
---|
508 | my ($sax, $cdata) = @_; |
---|
509 | |
---|
510 | return unless ($self->{DOC} == 1); |
---|
511 | |
---|
512 | if ($self->{STYLE} eq "debug") |
---|
513 | { |
---|
514 | my $str = $cdata; |
---|
515 | $str =~ s/\n/\#10\;/g; |
---|
516 | print "$self->{DEBUGHEADER} || $str\n"; |
---|
517 | } |
---|
518 | else |
---|
519 | { |
---|
520 | return if ($#{$self->{TREE}} == -1); |
---|
521 | |
---|
522 | my $pos = $#{$self->{TREE}}; |
---|
523 | |
---|
524 | if ($pos > 0 && $self->{TREE}[$pos - 1] eq "0") |
---|
525 | { |
---|
526 | $self->{TREE}[$pos - 1] .= $cdata; |
---|
527 | } |
---|
528 | else |
---|
529 | { |
---|
530 | push @{$self->{TREE}[$#{$self->{TREE}}]}, 0; |
---|
531 | push @{$self->{TREE}[$#{$self->{TREE}}]}, $cdata; |
---|
532 | } |
---|
533 | } |
---|
534 | } |
---|
535 | |
---|
536 | |
---|
537 | sub endElement |
---|
538 | { |
---|
539 | my $self = shift; |
---|
540 | my ($sax, $tag) = @_; |
---|
541 | |
---|
542 | return unless ($self->{DOC} == 1); |
---|
543 | |
---|
544 | if ($self->{STYLE} eq "debug") |
---|
545 | { |
---|
546 | $self->{DEBUGHEADER} =~ s/\S+\ $//; |
---|
547 | print "$self->{DEBUGHEADER} //\n"; |
---|
548 | } |
---|
549 | else |
---|
550 | { |
---|
551 | my $CLOSED = pop @{$self->{TREE}}; |
---|
552 | |
---|
553 | if($#{$self->{TREE}} < 1) |
---|
554 | { |
---|
555 | push @{$self->{TREE}}, $CLOSED; |
---|
556 | |
---|
557 | if($self->{TREE}->[0] eq "stream:error") |
---|
558 | { |
---|
559 | $self->{STREAMERROR} = $self->{TREE}[1]->[2]; |
---|
560 | } |
---|
561 | } |
---|
562 | else |
---|
563 | { |
---|
564 | push @{$self->{TREE}[$#{$self->{TREE}}]}, $CLOSED; |
---|
565 | } |
---|
566 | } |
---|
567 | } |
---|
568 | |
---|
569 | |
---|
570 | 1; |
---|