diff options
Diffstat (limited to '')
-rw-r--r-- | share/perl/lib/XML/TreePP.pm | 1228 |
1 files changed, 1228 insertions, 0 deletions
diff --git a/share/perl/lib/XML/TreePP.pm b/share/perl/lib/XML/TreePP.pm new file mode 100644 index 0000000..bd03db7 --- /dev/null +++ b/share/perl/lib/XML/TreePP.pm | |||
@@ -0,0 +1,1228 @@ | |||
1 | =head1 NAME | ||
2 | |||
3 | XML::TreePP -- Pure Perl implementation for parsing/writing xml files | ||
4 | |||
5 | =head1 SYNOPSIS | ||
6 | |||
7 | parse xml file into hash tree | ||
8 | |||
9 | use XML::TreePP; | ||
10 | my $tpp = XML::TreePP->new(); | ||
11 | my $tree = $tpp->parsefile( "index.rdf" ); | ||
12 | print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n"; | ||
13 | print "URL: ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n"; | ||
14 | |||
15 | write xml as string from hash tree | ||
16 | |||
17 | use XML::TreePP; | ||
18 | my $tpp = XML::TreePP->new(); | ||
19 | my $tree = { rss => { channel => { item => [ { | ||
20 | title => "The Perl Directory", | ||
21 | link => "http://www.perl.org/", | ||
22 | }, { | ||
23 | title => "The Comprehensive Perl Archive Network", | ||
24 | link => "http://cpan.perl.org/", | ||
25 | } ] } } }; | ||
26 | my $xml = $tpp->write( $tree ); | ||
27 | print $xml; | ||
28 | |||
29 | get remote xml file with HTTP-GET and parse it into hash tree | ||
30 | |||
31 | use XML::TreePP; | ||
32 | my $tpp = XML::TreePP->new(); | ||
33 | my $tree = $tpp->parsehttp( GET => "http://use.perl.org/index.rss" ); | ||
34 | print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n"; | ||
35 | print "URL: ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n"; | ||
36 | |||
37 | get remote xml file with HTTP-POST and parse it into hash tree | ||
38 | |||
39 | use XML::TreePP; | ||
40 | my $tpp = XML::TreePP->new( force_array => [qw( item )] ); | ||
41 | my $cgiurl = "http://search.hatena.ne.jp/keyword"; | ||
42 | my $keyword = "ajax"; | ||
43 | my $cgiquery = "mode=rss2&word=".$keyword; | ||
44 | my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery ); | ||
45 | print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n"; | ||
46 | print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n"; | ||
47 | |||
48 | =head1 DESCRIPTION | ||
49 | |||
50 | XML::TreePP module parses XML file and expands it for a hash tree. | ||
51 | And also generate XML file from a hash tree. | ||
52 | This is a pure Perl implementation. | ||
53 | You can also download XML from remote web server | ||
54 | like XMLHttpRequest object at JavaScript language. | ||
55 | |||
56 | =head1 EXAMPLES | ||
57 | |||
58 | =head2 Parse XML file | ||
59 | |||
60 | Sample XML source: | ||
61 | |||
62 | <?xml version="1.0" encoding="UTF-8"?> | ||
63 | <family name="Kawasaki"> | ||
64 | <father>Yasuhisa</father> | ||
65 | <mother>Chizuko</mother> | ||
66 | <children> | ||
67 | <girl>Shiori</girl> | ||
68 | <boy>Yusuke</boy> | ||
69 | <boy>Kairi</boy> | ||
70 | </children> | ||
71 | </family> | ||
72 | |||
73 | Sample program to read a xml file and dump it: | ||
74 | |||
75 | use XML::TreePP; | ||
76 | use Data::Dumper; | ||
77 | my $tpp = XML::TreePP->new(); | ||
78 | my $tree = $tpp->parsefile( "family.xml" ); | ||
79 | my $text = Dumper( $tree ); | ||
80 | print $text; | ||
81 | |||
82 | Result dumped: | ||
83 | |||
84 | $VAR1 = { | ||
85 | 'family' => { | ||
86 | '-name' => 'Kawasaki', | ||
87 | 'father' => 'Yasuhisa', | ||
88 | 'mother' => 'Chizuko', | ||
89 | 'children' => { | ||
90 | 'girl' => 'Shiori' | ||
91 | 'boy' => [ | ||
92 | 'Yusuke', | ||
93 | 'Kairi' | ||
94 | ], | ||
95 | } | ||
96 | } | ||
97 | }; | ||
98 | |||
99 | Details: | ||
100 | |||
101 | print $tree->{family}->{father}; # the father's given name. | ||
102 | |||
103 | The prefix '-' is added on every attribute's name. | ||
104 | |||
105 | print $tree->{family}->{"-name"}; # the family name of the family | ||
106 | |||
107 | The array is used because the family has two boys. | ||
108 | |||
109 | print $tree->{family}->{children}->{boy}->[1]; # The second boy's name | ||
110 | print $tree->{family}->{children}->{girl}; # The girl's name | ||
111 | |||
112 | =head2 Text node and attributes: | ||
113 | |||
114 | If a element has both of a text node and attributes | ||
115 | or both of a text node and other child nodes, | ||
116 | value of a text node is moved to C<#text> like child nodes. | ||
117 | |||
118 | use XML::TreePP; | ||
119 | use Data::Dumper; | ||
120 | my $tpp = XML::TreePP->new(); | ||
121 | my $source = '<span class="author">Kawasaki Yusuke</span>'; | ||
122 | my $tree = $tpp->parse( $source ); | ||
123 | my $text = Dumper( $tree ); | ||
124 | print $text; | ||
125 | |||
126 | The result dumped is following: | ||
127 | |||
128 | $VAR1 = { | ||
129 | 'span' => { | ||
130 | '-class' => 'author', | ||
131 | '#text' => 'Kawasaki Yusuke' | ||
132 | } | ||
133 | }; | ||
134 | |||
135 | The special node name of C<#text> is used because this elements | ||
136 | has attribute(s) in addition to the text node. | ||
137 | See also L</text_node_key> option. | ||
138 | |||
139 | =head1 METHODS | ||
140 | |||
141 | =head2 new | ||
142 | |||
143 | This constructor method returns a new XML::TreePP object with C<%options>. | ||
144 | |||
145 | $tpp = XML::TreePP->new( %options ); | ||
146 | |||
147 | =head2 set | ||
148 | |||
149 | This method sets a option value for C<option_name>. | ||
150 | If C<$option_value> is not defined, its option is deleted. | ||
151 | |||
152 | $tpp->set( option_name => $option_value ); | ||
153 | |||
154 | See OPTIONS section below for details. | ||
155 | |||
156 | =head2 get | ||
157 | |||
158 | This method returns a current option value for C<option_name>. | ||
159 | |||
160 | $tpp->get( 'option_name' ); | ||
161 | |||
162 | =head2 parse | ||
163 | |||
164 | This method reads XML source and returns a hash tree converted. | ||
165 | The first argument is a scalar or a reference to a scalar. | ||
166 | |||
167 | $tree = $tpp->parse( $source ); | ||
168 | |||
169 | =head2 parsefile | ||
170 | |||
171 | This method reads a XML file and returns a hash tree converted. | ||
172 | The first argument is a filename. | ||
173 | |||
174 | $tree = $tpp->parsefile( $file ); | ||
175 | |||
176 | =head2 parsehttp | ||
177 | |||
178 | This method receives a XML file from a remote server via HTTP and | ||
179 | returns a hash tree converted. | ||
180 | |||
181 | $tree = $tpp->parsehttp( $method, $url, $body, $head ); | ||
182 | |||
183 | C<$method> is a method of HTTP connection: GET/POST/PUT/DELETE | ||
184 | C<$url> is an URI of a XML file. | ||
185 | C<$body> is a request body when you use POST method. | ||
186 | C<$head> is a request headers as a hash ref. | ||
187 | L<LWP::UserAgent> module or L<HTTP::Lite> module is required to fetch a file. | ||
188 | |||
189 | ( $tree, $xml, $code ) = $tpp->parsehttp( $method, $url, $body, $head ); | ||
190 | |||
191 | In array context, This method returns also raw XML source received | ||
192 | and HTTP response's status code. | ||
193 | |||
194 | =head2 write | ||
195 | |||
196 | This method parses a hash tree and returns a XML source generated. | ||
197 | |||
198 | $source = $tpp->write( $tree, $encode ); | ||
199 | |||
200 | C<$tree> is a reference to a hash tree. | ||
201 | |||
202 | =head2 writefile | ||
203 | |||
204 | This method parses a hash tree and writes a XML source into a file. | ||
205 | |||
206 | $tpp->writefile( $file, $tree, $encode ); | ||
207 | |||
208 | C<$file> is a filename to create. | ||
209 | C<$tree> is a reference to a hash tree. | ||
210 | |||
211 | =head1 OPTIONS FOR PARSING XML | ||
212 | |||
213 | This module accepts option parameters following: | ||
214 | |||
215 | =head2 force_array | ||
216 | |||
217 | This option allows you to specify a list of element names which | ||
218 | should always be forced into an array representation. | ||
219 | |||
220 | $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] ); | ||
221 | |||
222 | The default value is null, it means that context of the elements | ||
223 | will determine to make array or to keep it scalar or hash. | ||
224 | Note that the special wildcard name C<'*'> means all elements. | ||
225 | |||
226 | =head2 force_hash | ||
227 | |||
228 | This option allows you to specify a list of element names which | ||
229 | should always be forced into an hash representation. | ||
230 | |||
231 | $tpp->set( force_hash => [ 'item', 'image' ] ); | ||
232 | |||
233 | The default value is null, it means that context of the elements | ||
234 | will determine to make hash or to keep it scalar as a text node. | ||
235 | See also L</text_node_key> option below. | ||
236 | Note that the special wildcard name C<'*'> means all elements. | ||
237 | |||
238 | =head2 cdata_scalar_ref | ||
239 | |||
240 | This option allows you to convert a cdata section into a reference | ||
241 | for scalar on parsing XML source. | ||
242 | |||
243 | $tpp->set( cdata_scalar_ref => 1 ); | ||
244 | |||
245 | The default value is false, it means that each cdata section is converted into a scalar. | ||
246 | |||
247 | =head2 user_agent | ||
248 | |||
249 | This option allows you to specify a HTTP_USER_AGENT string which | ||
250 | is used by parsehttp() method. | ||
251 | |||
252 | $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' ); | ||
253 | |||
254 | The default string is C<'XML-TreePP/#.##'>, where C<'#.##'> is | ||
255 | substituted with the version number of this library. | ||
256 | |||
257 | =head2 http_lite | ||
258 | |||
259 | This option forces pasrsehttp() method to use a L<HTTP::Lite> instance. | ||
260 | |||
261 | my $http = HTTP::Lite->new(); | ||
262 | $tpp->set( http_lite => $http ); | ||
263 | |||
264 | =head2 lwp_useragent | ||
265 | |||
266 | This option forces pasrsehttp() method to use a L<LWP::UserAgent> instance. | ||
267 | |||
268 | my $ua = LWP::UserAgent->new(); | ||
269 | $ua->timeout( 60 ); | ||
270 | $ua->env_proxy; | ||
271 | $tpp->set( lwp_useragent => $ua ); | ||
272 | |||
273 | You may use this with L<LWP::UserAgent::WithCache>. | ||
274 | |||
275 | =head2 base_class | ||
276 | |||
277 | This blesses class name for each element's hashref. | ||
278 | Each class is named straight as a child class of it parent class. | ||
279 | |||
280 | $tpp->set( base_class => 'MyElement' ); | ||
281 | my $xml = '<root><parent><child key="val">text</child></parent></root>'; | ||
282 | my $tree = $tpp->parse( $xml ); | ||
283 | print ref $tree->{root}->{parent}->{child}, "\n"; | ||
284 | |||
285 | A hash for <child> element above is blessed to C<MyElement::root::parent::child> | ||
286 | class. You may use this with L<Class::Accessor>. | ||
287 | |||
288 | =head2 elem_class | ||
289 | |||
290 | This blesses class name for each element's hashref. | ||
291 | Each class is named horizontally under the direct child of C<MyElement>. | ||
292 | |||
293 | $tpp->set( base_class => 'MyElement' ); | ||
294 | my $xml = '<root><parent><child key="val">text</child></parent></root>'; | ||
295 | my $tree = $tpp->parse( $xml ); | ||
296 | print ref $tree->{root}->{parent}->{child}, "\n"; | ||
297 | |||
298 | A hash for <child> element above is blessed to C<MyElement::child> class. | ||
299 | |||
300 | =head1 OPTIONS FOR WRITING XML | ||
301 | |||
302 | =head2 first_out | ||
303 | |||
304 | This option allows you to specify a list of element/attribute | ||
305 | names which should always appears at first on output XML code. | ||
306 | |||
307 | $tpp->set( first_out => [ 'link', 'title', '-type' ] ); | ||
308 | |||
309 | The default value is null, it means alphabetical order is used. | ||
310 | |||
311 | =head2 last_out | ||
312 | |||
313 | This option allows you to specify a list of element/attribute | ||
314 | names which should always appears at last on output XML code. | ||
315 | |||
316 | $tpp->set( last_out => [ 'items', 'item', 'entry' ] ); | ||
317 | |||
318 | =head2 indent | ||
319 | |||
320 | This makes the output more human readable by indenting appropriately. | ||
321 | |||
322 | $tpp->set( indent => 2 ); | ||
323 | |||
324 | This doesn't strictly follow the XML Document Spec but does looks nice. | ||
325 | |||
326 | =head2 xml_decl | ||
327 | |||
328 | This module generates an XML declaration on writing an XML code per default. | ||
329 | This option forces to change or leave it. | ||
330 | |||
331 | $tpp->set( xml_decl => '' ); | ||
332 | |||
333 | =head2 output_encoding | ||
334 | |||
335 | This option allows you to specify a encoding of xml file generated | ||
336 | by write/writefile methods. | ||
337 | |||
338 | $tpp->set( output_encoding => 'UTF-8' ); | ||
339 | |||
340 | On Perl 5.8.0 and later, you can select it from every | ||
341 | encodings supported by Encode.pm. On Perl 5.6.x and before with | ||
342 | Jcode.pm, you can use C<Shift_JIS>, C<EUC-JP>, C<ISO-2022-JP> and | ||
343 | C<UTF-8>. The default value is C<UTF-8> which is recommended encoding. | ||
344 | |||
345 | =head1 OPTIONS FOR BOTH | ||
346 | |||
347 | =head2 utf8_flag | ||
348 | |||
349 | This makes utf8 flag on for every element's value parsed | ||
350 | and makes it on for an XML code generated as well. | ||
351 | |||
352 | $tpp->set( utf8_flag => 1 ); | ||
353 | |||
354 | Perl 5.8.1 or later is required to use this. | ||
355 | |||
356 | =head2 attr_prefix | ||
357 | |||
358 | This option allows you to specify a prefix character(s) which | ||
359 | is inserted before each attribute names. | ||
360 | |||
361 | $tpp->set( attr_prefix => '@' ); | ||
362 | |||
363 | The default character is C<'-'>. | ||
364 | Or set C<'@'> to access attribute values like E4X, ECMAScript for XML. | ||
365 | Zero-length prefix C<''> is available as well, it means no prefix is added. | ||
366 | |||
367 | =head2 text_node_key | ||
368 | |||
369 | This option allows you to specify a hash key for text nodes. | ||
370 | |||
371 | $tpp->set( text_node_key => '#text' ); | ||
372 | |||
373 | The default key is C<#text>. | ||
374 | |||
375 | =head2 ignore_error | ||
376 | |||
377 | This module calls Carp::croak function on an error per default. | ||
378 | This option makes all errors ignored and just return. | ||
379 | |||
380 | $tpp->set( ignore_error => 1 ); | ||
381 | |||
382 | =head2 use_ixhash | ||
383 | |||
384 | This option keeps the order for each element appeared in XML. | ||
385 | L<Tie::IxHash> module is required. | ||
386 | |||
387 | $tpp->set( use_ixhash => 1 ); | ||
388 | |||
389 | This makes parsing performance slow. | ||
390 | (about 100% slower than default) | ||
391 | |||
392 | =head1 AUTHOR | ||
393 | |||
394 | Yusuke Kawasaki, http://www.kawa.net/ | ||
395 | |||
396 | =head1 COPYRIGHT AND LICENSE | ||
397 | |||
398 | Copyright (c) 2006-2007 Yusuke Kawasaki. All rights reserved. | ||
399 | This program is free software; you can redistribute it and/or | ||
400 | modify it under the same terms as Perl itself. | ||
401 | |||
402 | =cut | ||
403 | |||
404 | package XML::TreePP; | ||
405 | use strict; | ||
406 | use Carp; | ||
407 | use Symbol; | ||
408 | |||
409 | use vars qw( $VERSION ); | ||
410 | $VERSION = '0.32'; | ||
411 | |||
412 | my $XML_ENCODING = 'UTF-8'; | ||
413 | my $INTERNAL_ENCODING = 'UTF-8'; | ||
414 | my $USER_AGENT = 'XML-TreePP/'.$VERSION.' '; | ||
415 | my $ATTR_PREFIX = '-'; | ||
416 | my $TEXT_NODE_KEY = '#text'; | ||
417 | |||
418 | sub new { | ||
419 | my $package = shift; | ||
420 | my $self = {@_}; | ||
421 | bless $self, $package; | ||
422 | $self; | ||
423 | } | ||
424 | |||
425 | sub die { | ||
426 | my $self = shift; | ||
427 | my $mess = shift; | ||
428 | return if $self->{ignore_error}; | ||
429 | Carp::croak $mess; | ||
430 | } | ||
431 | |||
432 | sub warn { | ||
433 | my $self = shift; | ||
434 | my $mess = shift; | ||
435 | return if $self->{ignore_error}; | ||
436 | Carp::carp $mess; | ||
437 | } | ||
438 | |||
439 | sub set { | ||
440 | my $self = shift; | ||
441 | my $key = shift; | ||
442 | my $val = shift; | ||
443 | if ( defined $val ) { | ||
444 | $self->{$key} = $val; | ||
445 | } | ||
446 | else { | ||
447 | delete $self->{$key}; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | sub get { | ||
452 | my $self = shift; | ||
453 | my $key = shift; | ||
454 | $self->{$key} if exists $self->{$key}; | ||
455 | } | ||
456 | |||
457 | sub writefile { | ||
458 | my $self = shift; | ||
459 | my $file = shift; | ||
460 | my $tree = shift or return $self->die( 'Invalid tree' ); | ||
461 | my $encode = shift; | ||
462 | return $self->die( 'Invalid filename' ) unless defined $file; | ||
463 | my $text = $self->write( $tree, $encode ); | ||
464 | if ( $] >= 5.008001 && utf8::is_utf8( $text ) ) { | ||
465 | utf8::encode( $text ); | ||
466 | } | ||
467 | $self->write_raw_xml( $file, $text ); | ||
468 | } | ||
469 | |||
470 | sub write { | ||
471 | my $self = shift; | ||
472 | my $tree = shift or return $self->die( 'Invalid tree' ); | ||
473 | my $from = $self->{internal_encoding} || $INTERNAL_ENCODING; | ||
474 | my $to = shift || $self->{output_encoding} || $XML_ENCODING; | ||
475 | my $decl = $self->{xml_decl}; | ||
476 | $decl = '<?xml version="1.0" encoding="' . $to . '" ?>' unless defined $decl; | ||
477 | |||
478 | local $self->{__first_out}; | ||
479 | if ( exists $self->{first_out} ) { | ||
480 | my $keys = $self->{first_out}; | ||
481 | $keys = [$keys] unless ref $keys; | ||
482 | $self->{__first_out} = { map { $keys->[$_] => $_ } 0 .. $#$keys }; | ||
483 | } | ||
484 | |||
485 | local $self->{__last_out}; | ||
486 | if ( exists $self->{last_out} ) { | ||
487 | my $keys = $self->{last_out}; | ||
488 | $keys = [$keys] unless ref $keys; | ||
489 | $self->{__last_out} = { map { $keys->[$_] => $_ } 0 .. $#$keys }; | ||
490 | } | ||
491 | |||
492 | my $tnk = $self->{text_node_key} if exists $self->{text_node_key}; | ||
493 | $tnk = $TEXT_NODE_KEY unless defined $tnk; | ||
494 | local $self->{text_node_key} = $tnk; | ||
495 | |||
496 | my $apre = $self->{attr_prefix} if exists $self->{attr_prefix}; | ||
497 | $apre = $ATTR_PREFIX unless defined $apre; | ||
498 | local $self->{__attr_prefix_len} = length($apre); | ||
499 | local $self->{__attr_prefix_rex} = defined $apre ? qr/^\Q$apre\E/s : undef; | ||
500 | |||
501 | local $self->{__indent}; | ||
502 | if ( exists $self->{indent} && $self->{indent} ) { | ||
503 | $self->{__indent} = ' ' x $self->{indent}; | ||
504 | } | ||
505 | |||
506 | my $text = $self->hash_to_xml( undef, $tree ); | ||
507 | if ( $from && $to ) { | ||
508 | my $stat = $self->encode_from_to( \$text, $from, $to ); | ||
509 | return $self->die( "Unsupported encoding: $to" ) unless $stat; | ||
510 | } | ||
511 | |||
512 | return $text if ( $decl eq '' ); | ||
513 | join( "\n", $decl, $text ); | ||
514 | } | ||
515 | |||
516 | sub parsehttp { | ||
517 | my $self = shift; | ||
518 | |||
519 | local $self->{__user_agent}; | ||
520 | if ( exists $self->{user_agent} ) { | ||
521 | my $agent = $self->{user_agent}; | ||
522 | $agent .= $USER_AGENT if ( $agent =~ /\s$/s ); | ||
523 | $self->{__user_agent} = $agent if ( $agent ne '' ); | ||
524 | } else { | ||
525 | $self->{__user_agent} = $USER_AGENT; | ||
526 | } | ||
527 | |||
528 | my $http = $self->{__http_module}; | ||
529 | unless ( $http ) { | ||
530 | $http = $self->find_http_module(@_); | ||
531 | $self->{__http_module} = $http; | ||
532 | } | ||
533 | if ( $http eq 'LWP::UserAgent' ) { | ||
534 | return $self->parsehttp_lwp(@_); | ||
535 | } | ||
536 | elsif ( $http eq 'HTTP::Lite' ) { | ||
537 | return $self->parsehttp_lite(@_); | ||
538 | } | ||
539 | else { | ||
540 | return $self->die( "LWP::UserAgent or HTTP::Lite is required: $_[1]" ); | ||
541 | } | ||
542 | } | ||
543 | |||
544 | sub find_http_module { | ||
545 | my $self = shift || {}; | ||
546 | |||
547 | if ( exists $self->{lwp_useragent} && ref $self->{lwp_useragent} ) { | ||
548 | return 'LWP::UserAgent' if defined $LWP::UserAgent::VERSION; | ||
549 | return 'LWP::UserAgent' if &load_lwp_useragent(); | ||
550 | return $self->die( "LWP::UserAgent is required: $_[1]" ); | ||
551 | } | ||
552 | |||
553 | if ( exists $self->{http_lite} && ref $self->{http_lite} ) { | ||
554 | return 'HTTP::Lite' if defined $HTTP::Lite::VERSION; | ||
555 | return 'HTTP::Lite' if &load_http_lite(); | ||
556 | return $self->die( "HTTP::Lite is required: $_[1]" ); | ||
557 | } | ||
558 | |||
559 | return 'LWP::UserAgent' if defined $LWP::UserAgent::VERSION; | ||
560 | return 'HTTP::Lite' if defined $HTTP::Lite::VERSION; | ||
561 | return 'LWP::UserAgent' if &load_lwp_useragent(); | ||
562 | return 'HTTP::Lite' if &load_http_lite(); | ||
563 | return $self->die( "LWP::UserAgent or HTTP::Lite is required: $_[1]" ); | ||
564 | } | ||
565 | |||
566 | sub load_lwp_useragent { | ||
567 | return $LWP::UserAgent::VERSION if defined $LWP::UserAgent::VERSION; | ||
568 | local $@; | ||
569 | eval { require LWP::UserAgent; }; | ||
570 | $LWP::UserAgent::VERSION; | ||
571 | } | ||
572 | |||
573 | sub load_http_lite { | ||
574 | return $HTTP::Lite::VERSION if defined $HTTP::Lite::VERSION; | ||
575 | local $@; | ||
576 | eval { require HTTP::Lite; }; | ||
577 | $HTTP::Lite::VERSION; | ||
578 | } | ||
579 | |||
580 | sub load_tie_ixhash { | ||
581 | return $Tie::IxHash::VERSION if defined $Tie::IxHash::VERSION; | ||
582 | local $@; | ||
583 | eval { require Tie::IxHash; }; | ||
584 | $Tie::IxHash::VERSION; | ||
585 | } | ||
586 | |||
587 | sub parsehttp_lwp { | ||
588 | my $self = shift; | ||
589 | my $method = shift or return $self->die( 'Invalid HTTP method' ); | ||
590 | my $url = shift or return $self->die( 'Invalid URL' ); | ||
591 | my $body = shift; | ||
592 | my $header = shift; | ||
593 | |||
594 | my $ua = $self->{lwp_useragent} if exists $self->{lwp_useragent}; | ||
595 | if ( ! ref $ua ) { | ||
596 | $ua = LWP::UserAgent->new(); | ||
597 | $ua->timeout(10); | ||
598 | $ua->env_proxy(); | ||
599 | $ua->agent( $self->{__user_agent} ) if defined $self->{__user_agent}; | ||
600 | } else { | ||
601 | $ua->agent( $self->{__user_agent} ) if exists $self->{user_agent}; | ||
602 | } | ||
603 | |||
604 | my $req = HTTP::Request->new( $method, $url ); | ||
605 | my $ct = 0; | ||
606 | if ( ref $header ) { | ||
607 | foreach my $field ( sort keys %$header ) { | ||
608 | my $value = $header->{$field}; | ||
609 | $req->header( $field => $value ); | ||
610 | $ct ++ if ( $field =~ /^Content-Type$/i ); | ||
611 | } | ||
612 | } | ||
613 | if ( defined $body && ! $ct ) { | ||
614 | $req->header( 'Content-Type' => 'application/x-www-form-urlencoded' ); | ||
615 | } | ||
616 | $req->content($body) if defined $body; | ||
617 | my $res = $ua->request($req); | ||
618 | my $code = $res->code(); | ||
619 | my $text = $res->content(); | ||
620 | my $tree = $self->parse( \$text ) if $res->is_success(); | ||
621 | wantarray ? ( $tree, $text, $code ) : $tree; | ||
622 | } | ||
623 | |||
624 | sub parsehttp_lite { | ||
625 | my $self = shift; | ||
626 | my $method = shift or return $self->die( 'Invalid HTTP method' ); | ||
627 | my $url = shift or return $self->die( 'Invalid URL' ); | ||
628 | my $body = shift; | ||
629 | my $header = shift; | ||
630 | |||
631 | my $http = HTTP::Lite->new(); | ||
632 | $http->method($method); | ||
633 | my $ua = 0; | ||
634 | if ( ref $header ) { | ||
635 | foreach my $field ( sort keys %$header ) { | ||
636 | my $value = $header->{$field}; | ||
637 | $http->add_req_header( $field, $value ); | ||
638 | $ua ++ if ( $field =~ /^User-Agent$/i ); | ||
639 | } | ||
640 | } | ||
641 | if ( defined $self->{__user_agent} && ! $ua ) { | ||
642 | $http->add_req_header( 'User-Agent', $self->{__user_agent} ); | ||
643 | } | ||
644 | $http->{content} = $body if defined $body; | ||
645 | my $code = $http->request($url) or return; | ||
646 | my $text = $http->body(); | ||
647 | my $tree = $self->parse( \$text ); | ||
648 | wantarray ? ( $tree, $text, $code ) : $tree; | ||
649 | } | ||
650 | |||
651 | sub parsefile { | ||
652 | my $self = shift; | ||
653 | my $file = shift; | ||
654 | return $self->die( 'Invalid filename' ) unless defined $file; | ||
655 | my $text = $self->read_raw_xml($file); | ||
656 | $self->parse( \$text ); | ||
657 | } | ||
658 | |||
659 | sub parse { | ||
660 | my $self = shift; | ||
661 | my $text = ref $_[0] ? ${$_[0]} : $_[0]; | ||
662 | return $self->die( 'Null XML source' ) unless defined $text; | ||
663 | |||
664 | my $from = &xml_decl_encoding(\$text) || $XML_ENCODING; | ||
665 | my $to = $self->{internal_encoding} || $INTERNAL_ENCODING; | ||
666 | if ( $from && $to ) { | ||
667 | my $stat = $self->encode_from_to( \$text, $from, $to ); | ||
668 | return $self->die( "Unsupported encoding: $from" ) unless $stat; | ||
669 | } | ||
670 | |||
671 | local $self->{__force_array}; | ||
672 | local $self->{__force_array_all}; | ||
673 | if ( exists $self->{force_array} ) { | ||
674 | my $force = $self->{force_array}; | ||
675 | $force = [$force] unless ref $force; | ||
676 | $self->{__force_array} = { map { $_ => 1 } @$force }; | ||
677 | $self->{__force_array_all} = $self->{__force_array}->{'*'}; | ||
678 | } | ||
679 | |||
680 | local $self->{__force_hash}; | ||
681 | local $self->{__force_hash_all}; | ||
682 | if ( exists $self->{force_hash} ) { | ||
683 | my $force = $self->{force_hash}; | ||
684 | $force = [$force] unless ref $force; | ||
685 | $self->{__force_hash} = { map { $_ => 1 } @$force }; | ||
686 | $self->{__force_hash_all} = $self->{__force_hash}->{'*'}; | ||
687 | } | ||
688 | |||
689 | my $tnk = $self->{text_node_key} if exists $self->{text_node_key}; | ||
690 | $tnk = $TEXT_NODE_KEY unless defined $tnk; | ||
691 | local $self->{text_node_key} = $tnk; | ||
692 | |||
693 | my $apre = $self->{attr_prefix} if exists $self->{attr_prefix}; | ||
694 | $apre = $ATTR_PREFIX unless defined $apre; | ||
695 | local $self->{attr_prefix} = $apre; | ||
696 | |||
697 | if ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { | ||
698 | return $self->die( "Tie::IxHash is required." ) unless &load_tie_ixhash(); | ||
699 | } | ||
700 | |||
701 | my $flat = $self->xml_to_flat(\$text); | ||
702 | my $class = $self->{base_class} if exists $self->{base_class}; | ||
703 | my $tree = $self->flat_to_tree( $flat, '', $class ); | ||
704 | if ( ref $tree ) { | ||
705 | if ( defined $class ) { | ||
706 | bless( $tree, $class ); | ||
707 | } | ||
708 | elsif ( exists $self->{elem_class} && $self->{elem_class} ) { | ||
709 | bless( $tree, $self->{elem_class} ); | ||
710 | } | ||
711 | } | ||
712 | wantarray ? ( $tree, $text ) : $tree; | ||
713 | } | ||
714 | |||
715 | sub xml_to_flat { | ||
716 | my $self = shift; | ||
717 | my $textref = shift; # reference | ||
718 | my $flat = []; | ||
719 | my $prefix = $self->{attr_prefix}; | ||
720 | my $ixhash = ( exists $self->{use_ixhash} && $self->{use_ixhash} ); | ||
721 | |||
722 | while ( $$textref =~ m{ | ||
723 | ([^<]*) < | ||
724 | (( | ||
725 | \? ([^<>]*) \? | ||
726 | )|( | ||
727 | \!\[CDATA\[(.*?)\]\] | ||
728 | )|( | ||
729 | \!DOCTYPE\s+([^\[\]<>]*(?:\[.*?\]\s*)?) | ||
730 | )|( | ||
731 | \!--(.*?)-- | ||
732 | )|( | ||
733 | ([^\!\?\s<>](?:"[^"]*"|'[^']*'|[^"'<>])*) | ||
734 | )) | ||
735 | > ([^<]*) | ||
736 | }sxg ) { | ||
737 | my ( | ||
738 | $ahead, $match, $typePI, $contPI, $typeCDATA, | ||
739 | $contCDATA, $typeDocT, $contDocT, $typeCmnt, $contCmnt, | ||
740 | $typeElem, $contElem, $follow | ||
741 | ) | ||
742 | = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 ); | ||
743 | if ( defined $ahead && $ahead =~ /\S/ ) { | ||
744 | $self->warn( "Invalid string: [$ahead] before <$match>" ); | ||
745 | } | ||
746 | |||
747 | if ($typeElem) { # Element | ||
748 | my $node = {}; | ||
749 | if ( $contElem =~ s#^/## ) { | ||
750 | $node->{endTag}++; | ||
751 | } | ||
752 | elsif ( $contElem =~ s#/$## ) { | ||
753 | $node->{emptyTag}++; | ||
754 | } | ||
755 | else { | ||
756 | $node->{startTag}++; | ||
757 | } | ||
758 | $node->{tagName} = $1 if ( $contElem =~ s#^(\S+)\s*## ); | ||
759 | unless ( $node->{endTag} ) { | ||
760 | my $attr; | ||
761 | while ( $contElem =~ m{ | ||
762 | ([^\s\=\"\']+)=(?:(")(.*?)"|'(.*?)') | ||
763 | }sxg ) { | ||
764 | my $key = $1; | ||
765 | my $val = &xml_unescape( $2 ? $3 : $4 ); | ||
766 | if ( ! ref $attr ) { | ||
767 | $attr = {}; | ||
768 | tie( %$attr, 'Tie::IxHash' ) if $ixhash; | ||
769 | } | ||
770 | $attr->{$prefix.$key} = $val; | ||
771 | } | ||
772 | $node->{attributes} = $attr if ref $attr; | ||
773 | } | ||
774 | push( @$flat, $node ); | ||
775 | } | ||
776 | elsif ($typeCDATA) { ## CDATASection | ||
777 | if ( exists $self->{cdata_scalar_ref} && $self->{cdata_scalar_ref} ) { | ||
778 | push( @$flat, \$contCDATA ); # as reference for scalar | ||
779 | } | ||
780 | else { | ||
781 | push( @$flat, $contCDATA ); # as scalar like text node | ||
782 | } | ||
783 | } | ||
784 | elsif ($typeCmnt) { # Comment (ignore) | ||
785 | } | ||
786 | elsif ($typeDocT) { # DocumentType (ignore) | ||
787 | } | ||
788 | elsif ($typePI) { # ProcessingInstruction (ignore) | ||
789 | } | ||
790 | else { | ||
791 | $self->warn( "Invalid Tag: <$match>" ); | ||
792 | } | ||
793 | if ( $follow =~ /\S/ ) { # text node | ||
794 | my $val = &xml_unescape($follow); | ||
795 | push( @$flat, $val ); | ||
796 | } | ||
797 | } | ||
798 | $flat; | ||
799 | } | ||
800 | |||
801 | sub flat_to_tree { | ||
802 | my $self = shift; | ||
803 | my $source = shift; | ||
804 | my $parent = shift; | ||
805 | my $class = shift; | ||
806 | my $tree = {}; | ||
807 | my $text = []; | ||
808 | |||
809 | if ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { | ||
810 | tie( %$tree, 'Tie::IxHash' ); | ||
811 | } | ||
812 | |||
813 | while ( scalar @$source ) { | ||
814 | my $node = shift @$source; | ||
815 | if ( !ref $node || UNIVERSAL::isa( $node, "SCALAR" ) ) { | ||
816 | push( @$text, $node ); # cdata or text node | ||
817 | next; | ||
818 | } | ||
819 | my $name = $node->{tagName}; | ||
820 | if ( $node->{endTag} ) { | ||
821 | last if ( $parent eq $name ); | ||
822 | return $self->die( "Invalid tag sequence: <$parent></$name>" ); | ||
823 | } | ||
824 | my $elem = $node->{attributes}; | ||
825 | my $forcehash = $self->{__force_hash_all} || $self->{__force_hash}->{$name}; | ||
826 | my $subclass; | ||
827 | if ( defined $class ) { | ||
828 | my $escname = $name; | ||
829 | $escname =~ s/\W/_/sg; | ||
830 | $subclass = $class.'::'.$escname; | ||
831 | } | ||
832 | if ( $node->{startTag} ) { # recursive call | ||
833 | my $child = $self->flat_to_tree( $source, $name, $subclass ); | ||
834 | next unless defined $child; | ||
835 | my $hasattr = scalar keys %$elem if ref $elem; | ||
836 | if ( UNIVERSAL::isa( $child, "HASH" ) ) { | ||
837 | if ( $hasattr ) { | ||
838 | # some attributes and some child nodes | ||
839 | %$elem = ( %$elem, %$child ); | ||
840 | } | ||
841 | else { | ||
842 | # some child nodes without attributes | ||
843 | $elem = $child; | ||
844 | } | ||
845 | } | ||
846 | else { | ||
847 | if ( $hasattr ) { | ||
848 | # some attributes and text node | ||
849 | $elem->{$self->{text_node_key}} = $child; | ||
850 | } | ||
851 | elsif ( $forcehash ) { | ||
852 | # only text node without attributes | ||
853 | $elem = { $self->{text_node_key} => $child }; | ||
854 | } | ||
855 | else { | ||
856 | # text node without attributes | ||
857 | $elem = $child; | ||
858 | } | ||
859 | } | ||
860 | } | ||
861 | elsif ( $forcehash && ! ref $elem ) { | ||
862 | $elem = {}; | ||
863 | } | ||
864 | # bless to a class by base_class or elem_class | ||
865 | if ( ref $elem && UNIVERSAL::isa( $elem, "HASH" ) ) { | ||
866 | if ( defined $subclass ) { | ||
867 | bless( $elem, $subclass ); | ||
868 | } elsif ( exists $self->{elem_class} && $self->{elem_class} ) { | ||
869 | my $escname = $name; | ||
870 | $escname =~ s/\W/_/sg; | ||
871 | my $elmclass = $self->{elem_class}.'::'.$escname; | ||
872 | bless( $elem, $elmclass ); | ||
873 | } | ||
874 | } | ||
875 | # next unless defined $elem; | ||
876 | $tree->{$name} ||= []; | ||
877 | push( @{ $tree->{$name} }, $elem ); | ||
878 | } | ||
879 | if ( ! $self->{__force_array_all} ) { | ||
880 | foreach my $key ( keys %$tree ) { | ||
881 | next if $self->{__force_array}->{$key}; | ||
882 | next if ( 1 < scalar @{ $tree->{$key} } ); | ||
883 | $tree->{$key} = shift @{ $tree->{$key} }; | ||
884 | } | ||
885 | } | ||
886 | my $haschild = scalar keys %$tree; | ||
887 | if ( scalar @$text ) { | ||
888 | if ( scalar @$text == 1 ) { | ||
889 | # one text node (normal) | ||
890 | $text = shift @$text; | ||
891 | } | ||
892 | elsif ( ! scalar grep {ref $_} @$text ) { | ||
893 | # some text node splitted | ||
894 | $text = join( '', @$text ); | ||
895 | } | ||
896 | else { | ||
897 | # some cdata node | ||
898 | my $join = join( '', map {ref $_ ? $$_ : $_} @$text ); | ||
899 | $text = \$join; | ||
900 | } | ||
901 | if ( $haschild ) { | ||
902 | # some child nodes and also text node | ||
903 | $tree->{$self->{text_node_key}} = $text; | ||
904 | } | ||
905 | else { | ||
906 | # only text node without child nodes | ||
907 | $tree = $text; | ||
908 | } | ||
909 | } | ||
910 | elsif ( ! $haschild ) { | ||
911 | # no child and no text | ||
912 | $tree = ""; | ||
913 | } | ||
914 | $tree; | ||
915 | } | ||
916 | |||
917 | sub hash_to_xml { | ||
918 | my $self = shift; | ||
919 | my $name = shift; | ||
920 | my $hash = shift; | ||
921 | my $out = []; | ||
922 | my $attr = []; | ||
923 | my $allkeys = [ keys %$hash ]; | ||
924 | my $fo = $self->{__first_out} if ref $self->{__first_out}; | ||
925 | my $lo = $self->{__last_out} if ref $self->{__last_out}; | ||
926 | my $firstkeys = [ sort { $fo->{$a} <=> $fo->{$b} } grep { exists $fo->{$_} } @$allkeys ] if ref $fo; | ||
927 | my $lastkeys = [ sort { $lo->{$a} <=> $lo->{$b} } grep { exists $lo->{$_} } @$allkeys ] if ref $lo; | ||
928 | $allkeys = [ grep { ! exists $fo->{$_} } @$allkeys ] if ref $fo; | ||
929 | $allkeys = [ grep { ! exists $lo->{$_} } @$allkeys ] if ref $lo; | ||
930 | unless ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { | ||
931 | $allkeys = [ sort @$allkeys ]; | ||
932 | } | ||
933 | my $prelen = $self->{__attr_prefix_len}; | ||
934 | my $pregex = $self->{__attr_prefix_rex}; | ||
935 | |||
936 | foreach my $keys ( $firstkeys, $allkeys, $lastkeys ) { | ||
937 | next unless ref $keys; | ||
938 | my $elemkey = $prelen ? [ grep { $_ !~ $pregex } @$keys ] : $keys; | ||
939 | my $attrkey = $prelen ? [ grep { $_ =~ $pregex } @$keys ] : []; | ||
940 | |||
941 | foreach my $key ( @$elemkey ) { | ||
942 | my $val = $hash->{$key}; | ||
943 | if ( !defined $val ) { | ||
944 | push( @$out, "<$key />" ); | ||
945 | } | ||
946 | elsif ( UNIVERSAL::isa( $val, 'ARRAY' ) ) { | ||
947 | my $child = $self->array_to_xml( $key, $val ); | ||
948 | push( @$out, $child ); | ||
949 | } | ||
950 | elsif ( UNIVERSAL::isa( $val, 'SCALAR' ) ) { | ||
951 | my $child = $self->scalaref_to_cdata( $key, $val ); | ||
952 | push( @$out, $child ); | ||
953 | } | ||
954 | elsif ( ref $val ) { | ||
955 | my $child = $self->hash_to_xml( $key, $val ); | ||
956 | push( @$out, $child ); | ||
957 | } | ||
958 | else { | ||
959 | my $child = $self->scalar_to_xml( $key, $val ); | ||
960 | push( @$out, $child ); | ||
961 | } | ||
962 | } | ||
963 | |||
964 | foreach my $key ( @$attrkey ) { | ||
965 | my $name = substr( $key, $prelen ); | ||
966 | my $val = &xml_escape( $hash->{$key} ); | ||
967 | push( @$attr, ' ' . $name . '="' . $val . '"' ); | ||
968 | } | ||
969 | } | ||
970 | my $jattr = join( '', @$attr ); | ||
971 | |||
972 | if ( defined $name && scalar @$out && ! grep { ! /^</s } @$out ) { | ||
973 | # Use human-friendly white spacing | ||
974 | if ( defined $self->{__indent} ) { | ||
975 | s/^(\s*<)/$self->{__indent}$1/mg foreach @$out; | ||
976 | } | ||
977 | unshift( @$out, "\n" ); | ||
978 | } | ||
979 | |||
980 | my $text = join( '', @$out ); | ||
981 | if ( defined $name ) { | ||
982 | if ( scalar @$out ) { | ||
983 | $text = "<$name$jattr>$text</$name>\n"; | ||
984 | } | ||
985 | else { | ||
986 | $text = "<$name$jattr />\n"; | ||
987 | } | ||
988 | } | ||
989 | $text; | ||
990 | } | ||
991 | |||
992 | sub array_to_xml { | ||
993 | my $self = shift; | ||
994 | my $name = shift; | ||
995 | my $array = shift; | ||
996 | my $out = []; | ||
997 | foreach my $val (@$array) { | ||
998 | if ( !defined $val ) { | ||
999 | push( @$out, "<$name />\n" ); | ||
1000 | } | ||
1001 | elsif ( UNIVERSAL::isa( $val, 'ARRAY' ) ) { | ||
1002 | my $child = $self->array_to_xml( $name, $val ); | ||
1003 | push( @$out, $child ); | ||
1004 | } | ||
1005 | elsif ( UNIVERSAL::isa( $val, 'SCALAR' ) ) { | ||
1006 | my $child = $self->scalaref_to_cdata( $name, $val ); | ||
1007 | push( @$out, $child ); | ||
1008 | } | ||
1009 | elsif ( ref $val ) { | ||
1010 | my $child = $self->hash_to_xml( $name, $val ); | ||
1011 | push( @$out, $child ); | ||
1012 | } | ||
1013 | else { | ||
1014 | my $child = $self->scalar_to_xml( $name, $val ); | ||
1015 | push( @$out, $child ); | ||
1016 | } | ||
1017 | } | ||
1018 | |||
1019 | my $text = join( '', @$out ); | ||
1020 | $text; | ||
1021 | } | ||
1022 | |||
1023 | sub scalaref_to_cdata { | ||
1024 | my $self = shift; | ||
1025 | my $name = shift; | ||
1026 | my $ref = shift; | ||
1027 | my $data = defined $$ref ? $$ref : ''; | ||
1028 | $data =~ s#(]])(>)#$1]]><![CDATA[$2#g; | ||
1029 | #my $text = '<![CDATA[' . $data . ']]>'; | ||
1030 | my $text = $data; | ||
1031 | $text = "<$name>$text</$name>\n" if ( $name ne $self->{text_node_key} ); | ||
1032 | $text; | ||
1033 | } | ||
1034 | |||
1035 | sub scalar_to_xml { | ||
1036 | my $self = shift; | ||
1037 | my $name = shift; | ||
1038 | my $scalar = shift; | ||
1039 | my $copy = $scalar; | ||
1040 | my $text = &xml_escape($copy); | ||
1041 | $text = "<$name>$text</$name>\n" if ( $name ne $self->{text_node_key} ); | ||
1042 | $text; | ||
1043 | } | ||
1044 | |||
1045 | sub write_raw_xml { | ||
1046 | my $self = shift; | ||
1047 | my $file = shift; | ||
1048 | my $fh = Symbol::gensym(); | ||
1049 | open( $fh, ">$file" ) or return $self->die( "$! - $file" ); | ||
1050 | print $fh @_; | ||
1051 | close($fh); | ||
1052 | } | ||
1053 | |||
1054 | sub read_raw_xml { | ||
1055 | my $self = shift; | ||
1056 | my $file = shift; | ||
1057 | my $fh = Symbol::gensym(); | ||
1058 | open( $fh, $file ) or return $self->die( "$! - $file" ); | ||
1059 | local $/ = undef; | ||
1060 | my $text = <$fh>; | ||
1061 | close($fh); | ||
1062 | $text; | ||
1063 | } | ||
1064 | |||
1065 | sub xml_decl_encoding { | ||
1066 | my $textref = shift; | ||
1067 | return unless defined $$textref; | ||
1068 | my $args = ( $$textref =~ /^\s*<\?xml(\s+\S.*)\?>/s )[0] or return; | ||
1069 | my $getcode = ( $args =~ /\s+encoding=(".*?"|'.*?')/ )[0] or return; | ||
1070 | $getcode =~ s/^['"]//; | ||
1071 | $getcode =~ s/['"]$//; | ||
1072 | $getcode; | ||
1073 | } | ||
1074 | |||
1075 | sub encode_from_to { | ||
1076 | my $self = shift; | ||
1077 | my $txtref = shift or return; | ||
1078 | my $from = shift or return; | ||
1079 | my $to = shift or return; | ||
1080 | |||
1081 | unless ( defined $Encode::EUCJPMS::VERSION ) { | ||
1082 | $from = 'EUC-JP' if ( $from =~ /\beuc-?jp-?(win|ms)$/i ); | ||
1083 | $to = 'EUC-JP' if ( $to =~ /\beuc-?jp-?(win|ms)$/i ); | ||
1084 | } | ||
1085 | |||
1086 | my $setflag = $self->{utf8_flag} if exists $self->{utf8_flag}; | ||
1087 | if ( $] < 5.008001 && $setflag ) { | ||
1088 | return $self->die( "Perl 5.8.1 is required for utf8_flag: $]" ); | ||
1089 | } | ||
1090 | |||
1091 | if ( $] >= 5.008 ) { | ||
1092 | &load_encode(); | ||
1093 | my $check = ( $Encode::VERSION < 2.13 ) ? 0x400 : Encode::FB_XMLCREF(); | ||
1094 | if ( $] >= 5.008001 && utf8::is_utf8( $$txtref ) ) { | ||
1095 | if ( $to =~ /^utf-?8$/i ) { | ||
1096 | # skip | ||
1097 | } else { | ||
1098 | $$txtref = Encode::encode( $to, $$txtref, $check ); | ||
1099 | } | ||
1100 | } else { | ||
1101 | $$txtref = Encode::decode( $from, $$txtref ); | ||
1102 | if ( $to =~ /^utf-?8$/i && $setflag ) { | ||
1103 | # skip | ||
1104 | } else { | ||
1105 | $$txtref = Encode::encode( $to, $$txtref, $check ); | ||
1106 | } | ||
1107 | } | ||
1108 | } | ||
1109 | elsif ( ( uc($from) eq 'ISO-8859-1' | ||
1110 | || uc($from) eq 'US-ASCII' | ||
1111 | || uc($from) eq 'LATIN-1' ) && uc($to) eq 'UTF-8' ) { | ||
1112 | &latin1_to_utf8($txtref); | ||
1113 | } | ||
1114 | else { | ||
1115 | my $jfrom = &get_jcode_name($from); | ||
1116 | my $jto = &get_jcode_name($to); | ||
1117 | return $to if ( uc($jfrom) eq uc($jto) ); | ||
1118 | if ( $jfrom && $jto ) { | ||
1119 | &load_jcode(); | ||
1120 | if ( defined $Jcode::VERSION ) { | ||
1121 | Jcode::convert( $txtref, $jto, $jfrom ); | ||
1122 | } | ||
1123 | else { | ||
1124 | return $self->die( "Jcode.pm is required: $from to $to" ); | ||
1125 | } | ||
1126 | } | ||
1127 | else { | ||
1128 | return $self->die( "Encode.pm is required: $from to $to" ); | ||
1129 | } | ||
1130 | } | ||
1131 | $to; | ||
1132 | } | ||
1133 | |||
1134 | sub load_jcode { | ||
1135 | return if defined $Jcode::VERSION; | ||
1136 | local $@; | ||
1137 | eval { require Jcode; }; | ||
1138 | } | ||
1139 | |||
1140 | sub load_encode { | ||
1141 | return if defined $Encode::VERSION; | ||
1142 | local $@; | ||
1143 | eval { require Encode; }; | ||
1144 | } | ||
1145 | |||
1146 | sub latin1_to_utf8 { | ||
1147 | my $strref = shift; | ||
1148 | $$strref =~ s{ | ||
1149 | ([\x80-\xFF]) | ||
1150 | }{ | ||
1151 | pack( 'C2' => 0xC0|(ord($1)>>6),0x80|(ord($1)&0x3F) ) | ||
1152 | }exg; | ||
1153 | } | ||
1154 | |||
1155 | sub get_jcode_name { | ||
1156 | my $src = shift; | ||
1157 | my $dst; | ||
1158 | if ( $src =~ /^utf-?8$/i ) { | ||
1159 | $dst = 'utf8'; | ||
1160 | } | ||
1161 | elsif ( $src =~ /^euc.*jp(-?(win|ms))?$/i ) { | ||
1162 | $dst = 'euc'; | ||
1163 | } | ||
1164 | elsif ( $src =~ /^(shift.*jis|cp932|windows-31j)$/i ) { | ||
1165 | $dst = 'sjis'; | ||
1166 | } | ||
1167 | elsif ( $src =~ /^iso-2022-jp/ ) { | ||
1168 | $dst = 'jis'; | ||
1169 | } | ||
1170 | $dst; | ||
1171 | } | ||
1172 | |||
1173 | sub xml_escape { | ||
1174 | my $str = shift; | ||
1175 | return '' unless defined $str; | ||
1176 | # except for TAB(\x09),CR(\x0D),LF(\x0A) | ||
1177 | $str =~ s{ | ||
1178 | ([\x00-\x08\x0B\x0C\x0E-\x1F\x7F]) | ||
1179 | }{ | ||
1180 | sprintf( '&#%d;', ord($1) ); | ||
1181 | }gex; | ||
1182 | $str =~ s/&(?!#(\d+;|x[\dA-Fa-f]+;))/&/g; | ||
1183 | $str =~ s/</</g; | ||
1184 | $str =~ s/>/>/g; | ||
1185 | $str =~ s/'/'/g; | ||
1186 | $str =~ s/"/"/g; | ||
1187 | $str; | ||
1188 | } | ||
1189 | |||
1190 | sub xml_unescape { | ||
1191 | my $str = shift; | ||
1192 | my $map = {qw( quot " lt < gt > apos ' amp & )}; | ||
1193 | $str =~ s{ | ||
1194 | (&(?:\#(\d+)|\#x([0-9a-fA-F]+)|(quot|lt|gt|apos|amp));) | ||
1195 | }{ | ||
1196 | $4 ? $map->{$4} : &char_deref($1,$2,$3); | ||
1197 | }gex; | ||
1198 | $str; | ||
1199 | } | ||
1200 | |||
1201 | sub char_deref { | ||
1202 | my( $str, $dec, $hex ) = @_; | ||
1203 | if ( defined $dec ) { | ||
1204 | return &code_to_utf8( $dec ) if ( $dec < 256 ); | ||
1205 | } | ||
1206 | elsif ( defined $hex ) { | ||
1207 | my $num = hex($hex); | ||
1208 | return &code_to_utf8( $num ) if ( $num < 256 ); | ||
1209 | } | ||
1210 | return $str; | ||
1211 | } | ||
1212 | |||
1213 | sub code_to_utf8 { | ||
1214 | my $code = shift; | ||
1215 | if ( $code < 128 ) { | ||
1216 | return pack( C => $code ); | ||
1217 | } | ||
1218 | elsif ( $code < 256 ) { | ||
1219 | return pack( C2 => 0xC0|($code>>6), 0x80|($code&0x3F)); | ||
1220 | } | ||
1221 | elsif ( $code < 65536 ) { | ||
1222 | return pack( C3 => 0xC0|($code>>12), 0x80|(($code>>6)&0x3F), 0x80|($code&0x3F)); | ||
1223 | } | ||
1224 | return shift if scalar @_; # default value | ||
1225 | sprintf( '&#x%04X;', $code ); | ||
1226 | } | ||
1227 | |||
1228 | 1; | ||