=head1 NAME XML::TreePP -- Pure Perl implementation for parsing/writing xml files =head1 SYNOPSIS parse xml file into hash tree use XML::TreePP; my $tpp = XML::TreePP->new(); my $tree = $tpp->parsefile( "index.rdf" ); print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n"; print "URL: ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n"; write xml as string from hash tree use XML::TreePP; my $tpp = XML::TreePP->new(); my $tree = { rss => { channel => { item => [ { title => "The Perl Directory", link => "http://www.perl.org/", }, { title => "The Comprehensive Perl Archive Network", link => "http://cpan.perl.org/", } ] } } }; my $xml = $tpp->write( $tree ); print $xml; get remote xml file with HTTP-GET and parse it into hash tree use XML::TreePP; my $tpp = XML::TreePP->new(); my $tree = $tpp->parsehttp( GET => "http://use.perl.org/index.rss" ); print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n"; print "URL: ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n"; get remote xml file with HTTP-POST and parse it into hash tree use XML::TreePP; my $tpp = XML::TreePP->new( force_array => [qw( item )] ); my $cgiurl = "http://search.hatena.ne.jp/keyword"; my $keyword = "ajax"; my $cgiquery = "mode=rss2&word=".$keyword; my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery ); print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n"; print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n"; =head1 DESCRIPTION XML::TreePP module parses XML file and expands it for a hash tree. And also generate XML file from a hash tree. This is a pure Perl implementation. You can also download XML from remote web server like XMLHttpRequest object at JavaScript language. =head1 EXAMPLES =head2 Parse XML file Sample XML source: Yasuhisa Chizuko Shiori Yusuke Kairi Sample program to read a xml file and dump it: use XML::TreePP; use Data::Dumper; my $tpp = XML::TreePP->new(); my $tree = $tpp->parsefile( "family.xml" ); my $text = Dumper( $tree ); print $text; Result dumped: $VAR1 = { 'family' => { '-name' => 'Kawasaki', 'father' => 'Yasuhisa', 'mother' => 'Chizuko', 'children' => { 'girl' => 'Shiori' 'boy' => [ 'Yusuke', 'Kairi' ], } } }; Details: print $tree->{family}->{father}; # the father's given name. The prefix '-' is added on every attribute's name. print $tree->{family}->{"-name"}; # the family name of the family The array is used because the family has two boys. print $tree->{family}->{children}->{boy}->[1]; # The second boy's name print $tree->{family}->{children}->{girl}; # The girl's name =head2 Text node and attributes: If a element has both of a text node and attributes or both of a text node and other child nodes, value of a text node is moved to C<#text> like child nodes. use XML::TreePP; use Data::Dumper; my $tpp = XML::TreePP->new(); my $source = 'Kawasaki Yusuke'; my $tree = $tpp->parse( $source ); my $text = Dumper( $tree ); print $text; The result dumped is following: $VAR1 = { 'span' => { '-class' => 'author', '#text' => 'Kawasaki Yusuke' } }; The special node name of C<#text> is used because this elements has attribute(s) in addition to the text node. See also L option. =head1 METHODS =head2 new This constructor method returns a new XML::TreePP object with C<%options>. $tpp = XML::TreePP->new( %options ); =head2 set This method sets a option value for C. If C<$option_value> is not defined, its option is deleted. $tpp->set( option_name => $option_value ); See OPTIONS section below for details. =head2 get This method returns a current option value for C. $tpp->get( 'option_name' ); =head2 parse This method reads XML source and returns a hash tree converted. The first argument is a scalar or a reference to a scalar. $tree = $tpp->parse( $source ); =head2 parsefile This method reads a XML file and returns a hash tree converted. The first argument is a filename. $tree = $tpp->parsefile( $file ); =head2 parsehttp This method receives a XML file from a remote server via HTTP and returns a hash tree converted. $tree = $tpp->parsehttp( $method, $url, $body, $head ); C<$method> is a method of HTTP connection: GET/POST/PUT/DELETE C<$url> is an URI of a XML file. C<$body> is a request body when you use POST method. C<$head> is a request headers as a hash ref. L module or L module is required to fetch a file. ( $tree, $xml, $code ) = $tpp->parsehttp( $method, $url, $body, $head ); In array context, This method returns also raw XML source received and HTTP response's status code. =head2 write This method parses a hash tree and returns a XML source generated. $source = $tpp->write( $tree, $encode ); C<$tree> is a reference to a hash tree. =head2 writefile This method parses a hash tree and writes a XML source into a file. $tpp->writefile( $file, $tree, $encode ); C<$file> is a filename to create. C<$tree> is a reference to a hash tree. =head1 OPTIONS FOR PARSING XML This module accepts option parameters following: =head2 force_array This option allows you to specify a list of element names which should always be forced into an array representation. $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] ); The default value is null, it means that context of the elements will determine to make array or to keep it scalar or hash. Note that the special wildcard name C<'*'> means all elements. =head2 force_hash This option allows you to specify a list of element names which should always be forced into an hash representation. $tpp->set( force_hash => [ 'item', 'image' ] ); The default value is null, it means that context of the elements will determine to make hash or to keep it scalar as a text node. See also L option below. Note that the special wildcard name C<'*'> means all elements. =head2 cdata_scalar_ref This option allows you to convert a cdata section into a reference for scalar on parsing XML source. $tpp->set( cdata_scalar_ref => 1 ); The default value is false, it means that each cdata section is converted into a scalar. =head2 user_agent This option allows you to specify a HTTP_USER_AGENT string which is used by parsehttp() method. $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' ); The default string is C<'XML-TreePP/#.##'>, where C<'#.##'> is substituted with the version number of this library. =head2 http_lite This option forces pasrsehttp() method to use a L instance. my $http = HTTP::Lite->new(); $tpp->set( http_lite => $http ); =head2 lwp_useragent This option forces pasrsehttp() method to use a L instance. my $ua = LWP::UserAgent->new(); $ua->timeout( 60 ); $ua->env_proxy; $tpp->set( lwp_useragent => $ua ); You may use this with L. =head2 base_class This blesses class name for each element's hashref. Each class is named straight as a child class of it parent class. $tpp->set( base_class => 'MyElement' ); my $xml = 'text'; my $tree = $tpp->parse( $xml ); print ref $tree->{root}->{parent}->{child}, "\n"; A hash for element above is blessed to C class. You may use this with L. =head2 elem_class This blesses class name for each element's hashref. Each class is named horizontally under the direct child of C. $tpp->set( base_class => 'MyElement' ); my $xml = 'text'; my $tree = $tpp->parse( $xml ); print ref $tree->{root}->{parent}->{child}, "\n"; A hash for element above is blessed to C class. =head1 OPTIONS FOR WRITING XML =head2 first_out This option allows you to specify a list of element/attribute names which should always appears at first on output XML code. $tpp->set( first_out => [ 'link', 'title', '-type' ] ); The default value is null, it means alphabetical order is used. =head2 last_out This option allows you to specify a list of element/attribute names which should always appears at last on output XML code. $tpp->set( last_out => [ 'items', 'item', 'entry' ] ); =head2 indent This makes the output more human readable by indenting appropriately. $tpp->set( indent => 2 ); This doesn't strictly follow the XML Document Spec but does looks nice. =head2 xml_decl This module generates an XML declaration on writing an XML code per default. This option forces to change or leave it. $tpp->set( xml_decl => '' ); =head2 output_encoding This option allows you to specify a encoding of xml file generated by write/writefile methods. $tpp->set( output_encoding => 'UTF-8' ); On Perl 5.8.0 and later, you can select it from every encodings supported by Encode.pm. On Perl 5.6.x and before with Jcode.pm, you can use C, C, C and C. The default value is C which is recommended encoding. =head1 OPTIONS FOR BOTH =head2 utf8_flag This makes utf8 flag on for every element's value parsed and makes it on for an XML code generated as well. $tpp->set( utf8_flag => 1 ); Perl 5.8.1 or later is required to use this. =head2 attr_prefix This option allows you to specify a prefix character(s) which is inserted before each attribute names. $tpp->set( attr_prefix => '@' ); The default character is C<'-'>. Or set C<'@'> to access attribute values like E4X, ECMAScript for XML. Zero-length prefix C<''> is available as well, it means no prefix is added. =head2 text_node_key This option allows you to specify a hash key for text nodes. $tpp->set( text_node_key => '#text' ); The default key is C<#text>. =head2 ignore_error This module calls Carp::croak function on an error per default. This option makes all errors ignored and just return. $tpp->set( ignore_error => 1 ); =head2 use_ixhash This option keeps the order for each element appeared in XML. L module is required. $tpp->set( use_ixhash => 1 ); This makes parsing performance slow. (about 100% slower than default) =head1 AUTHOR Yusuke Kawasaki, http://www.kawa.net/ =head1 COPYRIGHT AND LICENSE Copyright (c) 2006-2007 Yusuke Kawasaki. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut package XML::TreePP; use strict; use Carp; use Symbol; use vars qw( $VERSION ); $VERSION = '0.32'; my $XML_ENCODING = 'UTF-8'; my $INTERNAL_ENCODING = 'UTF-8'; my $USER_AGENT = 'XML-TreePP/'.$VERSION.' '; my $ATTR_PREFIX = '-'; my $TEXT_NODE_KEY = '#text'; sub new { my $package = shift; my $self = {@_}; bless $self, $package; $self; } sub die { my $self = shift; my $mess = shift; return if $self->{ignore_error}; Carp::croak $mess; } sub warn { my $self = shift; my $mess = shift; return if $self->{ignore_error}; Carp::carp $mess; } sub set { my $self = shift; my $key = shift; my $val = shift; if ( defined $val ) { $self->{$key} = $val; } else { delete $self->{$key}; } } sub get { my $self = shift; my $key = shift; $self->{$key} if exists $self->{$key}; } sub writefile { my $self = shift; my $file = shift; my $tree = shift or return $self->die( 'Invalid tree' ); my $encode = shift; return $self->die( 'Invalid filename' ) unless defined $file; my $text = $self->write( $tree, $encode ); if ( $] >= 5.008001 && utf8::is_utf8( $text ) ) { utf8::encode( $text ); } $self->write_raw_xml( $file, $text ); } sub write { my $self = shift; my $tree = shift or return $self->die( 'Invalid tree' ); my $from = $self->{internal_encoding} || $INTERNAL_ENCODING; my $to = shift || $self->{output_encoding} || $XML_ENCODING; my $decl = $self->{xml_decl}; $decl = '' unless defined $decl; local $self->{__first_out}; if ( exists $self->{first_out} ) { my $keys = $self->{first_out}; $keys = [$keys] unless ref $keys; $self->{__first_out} = { map { $keys->[$_] => $_ } 0 .. $#$keys }; } local $self->{__last_out}; if ( exists $self->{last_out} ) { my $keys = $self->{last_out}; $keys = [$keys] unless ref $keys; $self->{__last_out} = { map { $keys->[$_] => $_ } 0 .. $#$keys }; } my $tnk = $self->{text_node_key} if exists $self->{text_node_key}; $tnk = $TEXT_NODE_KEY unless defined $tnk; local $self->{text_node_key} = $tnk; my $apre = $self->{attr_prefix} if exists $self->{attr_prefix}; $apre = $ATTR_PREFIX unless defined $apre; local $self->{__attr_prefix_len} = length($apre); local $self->{__attr_prefix_rex} = defined $apre ? qr/^\Q$apre\E/s : undef; local $self->{__indent}; if ( exists $self->{indent} && $self->{indent} ) { $self->{__indent} = ' ' x $self->{indent}; } my $text = $self->hash_to_xml( undef, $tree ); if ( $from && $to ) { my $stat = $self->encode_from_to( \$text, $from, $to ); return $self->die( "Unsupported encoding: $to" ) unless $stat; } return $text if ( $decl eq '' ); join( "\n", $decl, $text ); } sub parsehttp { my $self = shift; local $self->{__user_agent}; if ( exists $self->{user_agent} ) { my $agent = $self->{user_agent}; $agent .= $USER_AGENT if ( $agent =~ /\s$/s ); $self->{__user_agent} = $agent if ( $agent ne '' ); } else { $self->{__user_agent} = $USER_AGENT; } my $http = $self->{__http_module}; unless ( $http ) { $http = $self->find_http_module(@_); $self->{__http_module} = $http; } if ( $http eq 'LWP::UserAgent' ) { return $self->parsehttp_lwp(@_); } elsif ( $http eq 'HTTP::Lite' ) { return $self->parsehttp_lite(@_); } else { return $self->die( "LWP::UserAgent or HTTP::Lite is required: $_[1]" ); } } sub find_http_module { my $self = shift || {}; if ( exists $self->{lwp_useragent} && ref $self->{lwp_useragent} ) { return 'LWP::UserAgent' if defined $LWP::UserAgent::VERSION; return 'LWP::UserAgent' if &load_lwp_useragent(); return $self->die( "LWP::UserAgent is required: $_[1]" ); } if ( exists $self->{http_lite} && ref $self->{http_lite} ) { return 'HTTP::Lite' if defined $HTTP::Lite::VERSION; return 'HTTP::Lite' if &load_http_lite(); return $self->die( "HTTP::Lite is required: $_[1]" ); } return 'LWP::UserAgent' if defined $LWP::UserAgent::VERSION; return 'HTTP::Lite' if defined $HTTP::Lite::VERSION; return 'LWP::UserAgent' if &load_lwp_useragent(); return 'HTTP::Lite' if &load_http_lite(); return $self->die( "LWP::UserAgent or HTTP::Lite is required: $_[1]" ); } sub load_lwp_useragent { return $LWP::UserAgent::VERSION if defined $LWP::UserAgent::VERSION; local $@; eval { require LWP::UserAgent; }; $LWP::UserAgent::VERSION; } sub load_http_lite { return $HTTP::Lite::VERSION if defined $HTTP::Lite::VERSION; local $@; eval { require HTTP::Lite; }; $HTTP::Lite::VERSION; } sub load_tie_ixhash { return $Tie::IxHash::VERSION if defined $Tie::IxHash::VERSION; local $@; eval { require Tie::IxHash; }; $Tie::IxHash::VERSION; } sub parsehttp_lwp { my $self = shift; my $method = shift or return $self->die( 'Invalid HTTP method' ); my $url = shift or return $self->die( 'Invalid URL' ); my $body = shift; my $header = shift; my $ua = $self->{lwp_useragent} if exists $self->{lwp_useragent}; if ( ! ref $ua ) { $ua = LWP::UserAgent->new(); $ua->timeout(10); $ua->env_proxy(); $ua->agent( $self->{__user_agent} ) if defined $self->{__user_agent}; } else { $ua->agent( $self->{__user_agent} ) if exists $self->{user_agent}; } my $req = HTTP::Request->new( $method, $url ); my $ct = 0; if ( ref $header ) { foreach my $field ( sort keys %$header ) { my $value = $header->{$field}; $req->header( $field => $value ); $ct ++ if ( $field =~ /^Content-Type$/i ); } } if ( defined $body && ! $ct ) { $req->header( 'Content-Type' => 'application/x-www-form-urlencoded' ); } $req->content($body) if defined $body; my $res = $ua->request($req); my $code = $res->code(); my $text = $res->content(); my $tree = $self->parse( \$text ) if $res->is_success(); wantarray ? ( $tree, $text, $code ) : $tree; } sub parsehttp_lite { my $self = shift; my $method = shift or return $self->die( 'Invalid HTTP method' ); my $url = shift or return $self->die( 'Invalid URL' ); my $body = shift; my $header = shift; my $http = HTTP::Lite->new(); $http->method($method); my $ua = 0; if ( ref $header ) { foreach my $field ( sort keys %$header ) { my $value = $header->{$field}; $http->add_req_header( $field, $value ); $ua ++ if ( $field =~ /^User-Agent$/i ); } } if ( defined $self->{__user_agent} && ! $ua ) { $http->add_req_header( 'User-Agent', $self->{__user_agent} ); } $http->{content} = $body if defined $body; my $code = $http->request($url) or return; my $text = $http->body(); my $tree = $self->parse( \$text ); wantarray ? ( $tree, $text, $code ) : $tree; } sub parsefile { my $self = shift; my $file = shift; return $self->die( 'Invalid filename' ) unless defined $file; my $text = $self->read_raw_xml($file); $self->parse( \$text ); } sub parse { my $self = shift; my $text = ref $_[0] ? ${$_[0]} : $_[0]; return $self->die( 'Null XML source' ) unless defined $text; my $from = &xml_decl_encoding(\$text) || $XML_ENCODING; my $to = $self->{internal_encoding} || $INTERNAL_ENCODING; if ( $from && $to ) { my $stat = $self->encode_from_to( \$text, $from, $to ); return $self->die( "Unsupported encoding: $from" ) unless $stat; } local $self->{__force_array}; local $self->{__force_array_all}; if ( exists $self->{force_array} ) { my $force = $self->{force_array}; $force = [$force] unless ref $force; $self->{__force_array} = { map { $_ => 1 } @$force }; $self->{__force_array_all} = $self->{__force_array}->{'*'}; } local $self->{__force_hash}; local $self->{__force_hash_all}; if ( exists $self->{force_hash} ) { my $force = $self->{force_hash}; $force = [$force] unless ref $force; $self->{__force_hash} = { map { $_ => 1 } @$force }; $self->{__force_hash_all} = $self->{__force_hash}->{'*'}; } my $tnk = $self->{text_node_key} if exists $self->{text_node_key}; $tnk = $TEXT_NODE_KEY unless defined $tnk; local $self->{text_node_key} = $tnk; my $apre = $self->{attr_prefix} if exists $self->{attr_prefix}; $apre = $ATTR_PREFIX unless defined $apre; local $self->{attr_prefix} = $apre; if ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { return $self->die( "Tie::IxHash is required." ) unless &load_tie_ixhash(); } my $flat = $self->xml_to_flat(\$text); my $class = $self->{base_class} if exists $self->{base_class}; my $tree = $self->flat_to_tree( $flat, '', $class ); if ( ref $tree ) { if ( defined $class ) { bless( $tree, $class ); } elsif ( exists $self->{elem_class} && $self->{elem_class} ) { bless( $tree, $self->{elem_class} ); } } wantarray ? ( $tree, $text ) : $tree; } sub xml_to_flat { my $self = shift; my $textref = shift; # reference my $flat = []; my $prefix = $self->{attr_prefix}; my $ixhash = ( exists $self->{use_ixhash} && $self->{use_ixhash} ); while ( $$textref =~ m{ ([^<]*) < (( \? ([^<>]*) \? )|( \!\[CDATA\[(.*?)\]\] )|( \!DOCTYPE\s+([^\[\]<>]*(?:\[.*?\]\s*)?) )|( \!--(.*?)-- )|( ([^\!\?\s<>](?:"[^"]*"|'[^']*'|[^"'<>])*) )) > ([^<]*) }sxg ) { my ( $ahead, $match, $typePI, $contPI, $typeCDATA, $contCDATA, $typeDocT, $contDocT, $typeCmnt, $contCmnt, $typeElem, $contElem, $follow ) = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 ); if ( defined $ahead && $ahead =~ /\S/ ) { $self->warn( "Invalid string: [$ahead] before <$match>" ); } if ($typeElem) { # Element my $node = {}; if ( $contElem =~ s#^/## ) { $node->{endTag}++; } elsif ( $contElem =~ s#/$## ) { $node->{emptyTag}++; } else { $node->{startTag}++; } $node->{tagName} = $1 if ( $contElem =~ s#^(\S+)\s*## ); unless ( $node->{endTag} ) { my $attr; while ( $contElem =~ m{ ([^\s\=\"\']+)=(?:(")(.*?)"|'(.*?)') }sxg ) { my $key = $1; my $val = &xml_unescape( $2 ? $3 : $4 ); if ( ! ref $attr ) { $attr = {}; tie( %$attr, 'Tie::IxHash' ) if $ixhash; } $attr->{$prefix.$key} = $val; } $node->{attributes} = $attr if ref $attr; } push( @$flat, $node ); } elsif ($typeCDATA) { ## CDATASection if ( exists $self->{cdata_scalar_ref} && $self->{cdata_scalar_ref} ) { push( @$flat, \$contCDATA ); # as reference for scalar } else { push( @$flat, $contCDATA ); # as scalar like text node } } elsif ($typeCmnt) { # Comment (ignore) } elsif ($typeDocT) { # DocumentType (ignore) } elsif ($typePI) { # ProcessingInstruction (ignore) } else { $self->warn( "Invalid Tag: <$match>" ); } if ( $follow =~ /\S/ ) { # text node my $val = &xml_unescape($follow); push( @$flat, $val ); } } $flat; } sub flat_to_tree { my $self = shift; my $source = shift; my $parent = shift; my $class = shift; my $tree = {}; my $text = []; if ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { tie( %$tree, 'Tie::IxHash' ); } while ( scalar @$source ) { my $node = shift @$source; if ( !ref $node || UNIVERSAL::isa( $node, "SCALAR" ) ) { push( @$text, $node ); # cdata or text node next; } my $name = $node->{tagName}; if ( $node->{endTag} ) { last if ( $parent eq $name ); return $self->die( "Invalid tag sequence: <$parent>" ); } my $elem = $node->{attributes}; my $forcehash = $self->{__force_hash_all} || $self->{__force_hash}->{$name}; my $subclass; if ( defined $class ) { my $escname = $name; $escname =~ s/\W/_/sg; $subclass = $class.'::'.$escname; } if ( $node->{startTag} ) { # recursive call my $child = $self->flat_to_tree( $source, $name, $subclass ); next unless defined $child; my $hasattr = scalar keys %$elem if ref $elem; if ( UNIVERSAL::isa( $child, "HASH" ) ) { if ( $hasattr ) { # some attributes and some child nodes %$elem = ( %$elem, %$child ); } else { # some child nodes without attributes $elem = $child; } } else { if ( $hasattr ) { # some attributes and text node $elem->{$self->{text_node_key}} = $child; } elsif ( $forcehash ) { # only text node without attributes $elem = { $self->{text_node_key} => $child }; } else { # text node without attributes $elem = $child; } } } elsif ( $forcehash && ! ref $elem ) { $elem = {}; } # bless to a class by base_class or elem_class if ( ref $elem && UNIVERSAL::isa( $elem, "HASH" ) ) { if ( defined $subclass ) { bless( $elem, $subclass ); } elsif ( exists $self->{elem_class} && $self->{elem_class} ) { my $escname = $name; $escname =~ s/\W/_/sg; my $elmclass = $self->{elem_class}.'::'.$escname; bless( $elem, $elmclass ); } } # next unless defined $elem; $tree->{$name} ||= []; push( @{ $tree->{$name} }, $elem ); } if ( ! $self->{__force_array_all} ) { foreach my $key ( keys %$tree ) { next if $self->{__force_array}->{$key}; next if ( 1 < scalar @{ $tree->{$key} } ); $tree->{$key} = shift @{ $tree->{$key} }; } } my $haschild = scalar keys %$tree; if ( scalar @$text ) { if ( scalar @$text == 1 ) { # one text node (normal) $text = shift @$text; } elsif ( ! scalar grep {ref $_} @$text ) { # some text node splitted $text = join( '', @$text ); } else { # some cdata node my $join = join( '', map {ref $_ ? $$_ : $_} @$text ); $text = \$join; } if ( $haschild ) { # some child nodes and also text node $tree->{$self->{text_node_key}} = $text; } else { # only text node without child nodes $tree = $text; } } elsif ( ! $haschild ) { # no child and no text $tree = ""; } $tree; } sub hash_to_xml { my $self = shift; my $name = shift; my $hash = shift; my $out = []; my $attr = []; my $allkeys = [ keys %$hash ]; my $fo = $self->{__first_out} if ref $self->{__first_out}; my $lo = $self->{__last_out} if ref $self->{__last_out}; my $firstkeys = [ sort { $fo->{$a} <=> $fo->{$b} } grep { exists $fo->{$_} } @$allkeys ] if ref $fo; my $lastkeys = [ sort { $lo->{$a} <=> $lo->{$b} } grep { exists $lo->{$_} } @$allkeys ] if ref $lo; $allkeys = [ grep { ! exists $fo->{$_} } @$allkeys ] if ref $fo; $allkeys = [ grep { ! exists $lo->{$_} } @$allkeys ] if ref $lo; unless ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { $allkeys = [ sort @$allkeys ]; } my $prelen = $self->{__attr_prefix_len}; my $pregex = $self->{__attr_prefix_rex}; foreach my $keys ( $firstkeys, $allkeys, $lastkeys ) { next unless ref $keys; my $elemkey = $prelen ? [ grep { $_ !~ $pregex } @$keys ] : $keys; my $attrkey = $prelen ? [ grep { $_ =~ $pregex } @$keys ] : []; foreach my $key ( @$elemkey ) { my $val = $hash->{$key}; if ( !defined $val ) { push( @$out, "<$key />" ); } elsif ( UNIVERSAL::isa( $val, 'ARRAY' ) ) { my $child = $self->array_to_xml( $key, $val ); push( @$out, $child ); } elsif ( UNIVERSAL::isa( $val, 'SCALAR' ) ) { my $child = $self->scalaref_to_cdata( $key, $val ); push( @$out, $child ); } elsif ( ref $val ) { my $child = $self->hash_to_xml( $key, $val ); push( @$out, $child ); } else { my $child = $self->scalar_to_xml( $key, $val ); push( @$out, $child ); } } foreach my $key ( @$attrkey ) { my $name = substr( $key, $prelen ); my $val = &xml_escape( $hash->{$key} ); push( @$attr, ' ' . $name . '="' . $val . '"' ); } } my $jattr = join( '', @$attr ); if ( defined $name && scalar @$out && ! grep { ! /^{__indent} ) { s/^(\s*<)/$self->{__indent}$1/mg foreach @$out; } unshift( @$out, "\n" ); } my $text = join( '', @$out ); if ( defined $name ) { if ( scalar @$out ) { $text = "<$name$jattr>$text\n"; } else { $text = "<$name$jattr />\n"; } } $text; } sub array_to_xml { my $self = shift; my $name = shift; my $array = shift; my $out = []; foreach my $val (@$array) { if ( !defined $val ) { push( @$out, "<$name />\n" ); } elsif ( UNIVERSAL::isa( $val, 'ARRAY' ) ) { my $child = $self->array_to_xml( $name, $val ); push( @$out, $child ); } elsif ( UNIVERSAL::isa( $val, 'SCALAR' ) ) { my $child = $self->scalaref_to_cdata( $name, $val ); push( @$out, $child ); } elsif ( ref $val ) { my $child = $self->hash_to_xml( $name, $val ); push( @$out, $child ); } else { my $child = $self->scalar_to_xml( $name, $val ); push( @$out, $child ); } } my $text = join( '', @$out ); $text; } sub scalaref_to_cdata { my $self = shift; my $name = shift; my $ref = shift; my $data = defined $$ref ? $$ref : ''; $data =~ s#(]])(>)#$1]]>'; my $text = $data; $text = "<$name>$text\n" if ( $name ne $self->{text_node_key} ); $text; } sub scalar_to_xml { my $self = shift; my $name = shift; my $scalar = shift; my $copy = $scalar; my $text = &xml_escape($copy); $text = "<$name>$text\n" if ( $name ne $self->{text_node_key} ); $text; } sub write_raw_xml { my $self = shift; my $file = shift; my $fh = Symbol::gensym(); open( $fh, ">$file" ) or return $self->die( "$! - $file" ); print $fh @_; close($fh); } sub read_raw_xml { my $self = shift; my $file = shift; my $fh = Symbol::gensym(); open( $fh, $file ) or return $self->die( "$! - $file" ); local $/ = undef; my $text = <$fh>; close($fh); $text; } sub xml_decl_encoding { my $textref = shift; return unless defined $$textref; my $args = ( $$textref =~ /^\s*<\?xml(\s+\S.*)\?>/s )[0] or return; my $getcode = ( $args =~ /\s+encoding=(".*?"|'.*?')/ )[0] or return; $getcode =~ s/^['"]//; $getcode =~ s/['"]$//; $getcode; } sub encode_from_to { my $self = shift; my $txtref = shift or return; my $from = shift or return; my $to = shift or return; unless ( defined $Encode::EUCJPMS::VERSION ) { $from = 'EUC-JP' if ( $from =~ /\beuc-?jp-?(win|ms)$/i ); $to = 'EUC-JP' if ( $to =~ /\beuc-?jp-?(win|ms)$/i ); } my $setflag = $self->{utf8_flag} if exists $self->{utf8_flag}; if ( $] < 5.008001 && $setflag ) { return $self->die( "Perl 5.8.1 is required for utf8_flag: $]" ); } if ( $] >= 5.008 ) { &load_encode(); my $check = ( $Encode::VERSION < 2.13 ) ? 0x400 : Encode::FB_XMLCREF(); if ( $] >= 5.008001 && utf8::is_utf8( $$txtref ) ) { if ( $to =~ /^utf-?8$/i ) { # skip } else { $$txtref = Encode::encode( $to, $$txtref, $check ); } } else { $$txtref = Encode::decode( $from, $$txtref ); if ( $to =~ /^utf-?8$/i && $setflag ) { # skip } else { $$txtref = Encode::encode( $to, $$txtref, $check ); } } } elsif ( ( uc($from) eq 'ISO-8859-1' || uc($from) eq 'US-ASCII' || uc($from) eq 'LATIN-1' ) && uc($to) eq 'UTF-8' ) { &latin1_to_utf8($txtref); } else { my $jfrom = &get_jcode_name($from); my $jto = &get_jcode_name($to); return $to if ( uc($jfrom) eq uc($jto) ); if ( $jfrom && $jto ) { &load_jcode(); if ( defined $Jcode::VERSION ) { Jcode::convert( $txtref, $jto, $jfrom ); } else { return $self->die( "Jcode.pm is required: $from to $to" ); } } else { return $self->die( "Encode.pm is required: $from to $to" ); } } $to; } sub load_jcode { return if defined $Jcode::VERSION; local $@; eval { require Jcode; }; } sub load_encode { return if defined $Encode::VERSION; local $@; eval { require Encode; }; } sub latin1_to_utf8 { my $strref = shift; $$strref =~ s{ ([\x80-\xFF]) }{ pack( 'C2' => 0xC0|(ord($1)>>6),0x80|(ord($1)&0x3F) ) }exg; } sub get_jcode_name { my $src = shift; my $dst; if ( $src =~ /^utf-?8$/i ) { $dst = 'utf8'; } elsif ( $src =~ /^euc.*jp(-?(win|ms))?$/i ) { $dst = 'euc'; } elsif ( $src =~ /^(shift.*jis|cp932|windows-31j)$/i ) { $dst = 'sjis'; } elsif ( $src =~ /^iso-2022-jp/ ) { $dst = 'jis'; } $dst; } sub xml_escape { my $str = shift; return '' unless defined $str; # except for TAB(\x09),CR(\x0D),LF(\x0A) $str =~ s{ ([\x00-\x08\x0B\x0C\x0E-\x1F\x7F]) }{ sprintf( '&#%d;', ord($1) ); }gex; $str =~ s/&(?!#(\d+;|x[\dA-Fa-f]+;))/&/g; $str =~ s//>/g; $str =~ s/'/'/g; $str =~ s/"/"/g; $str; } sub xml_unescape { my $str = shift; my $map = {qw( quot " lt < gt > apos ' amp & )}; $str =~ s{ (&(?:\#(\d+)|\#x([0-9a-fA-F]+)|(quot|lt|gt|apos|amp));) }{ $4 ? $map->{$4} : &char_deref($1,$2,$3); }gex; $str; } sub char_deref { my( $str, $dec, $hex ) = @_; if ( defined $dec ) { return &code_to_utf8( $dec ) if ( $dec < 256 ); } elsif ( defined $hex ) { my $num = hex($hex); return &code_to_utf8( $num ) if ( $num < 256 ); } return $str; } sub code_to_utf8 { my $code = shift; if ( $code < 128 ) { return pack( C => $code ); } elsif ( $code < 256 ) { return pack( C2 => 0xC0|($code>>6), 0x80|($code&0x3F)); } elsif ( $code < 65536 ) { return pack( C3 => 0xC0|($code>>12), 0x80|(($code>>6)&0x3F), 0x80|($code&0x3F)); } return shift if scalar @_; # default value sprintf( '&#x%04X;', $code ); } 1;