diff options
Diffstat (limited to 'share/perl/lib/XML')
-rw-r--r-- | share/perl/lib/XML/RPC.pm | 217 | ||||
-rw-r--r-- | share/perl/lib/XML/Serializer.pm | 163 | ||||
-rw-r--r-- | share/perl/lib/XML/Simple.pm | 3284 | ||||
-rw-r--r-- | share/perl/lib/XML/TreePP.pm | 1228 |
4 files changed, 4892 insertions, 0 deletions
diff --git a/share/perl/lib/XML/RPC.pm b/share/perl/lib/XML/RPC.pm new file mode 100644 index 0000000..2e08867 --- /dev/null +++ b/share/perl/lib/XML/RPC.pm | |||
@@ -0,0 +1,217 @@ | |||
1 | package XML::RPC; | ||
2 | |||
3 | use strict; | ||
4 | use XML::TreePP; | ||
5 | use Data::Dumper; | ||
6 | use vars qw($VERSION $faultCode); | ||
7 | no strict 'refs'; | ||
8 | |||
9 | $VERSION = 0.5; | ||
10 | |||
11 | sub new { | ||
12 | my $package = shift; | ||
13 | my $self = { }; | ||
14 | bless $self, $package; | ||
15 | $self->{url} = shift; | ||
16 | $self->{tpp} = XML::TreePP->new(@_); | ||
17 | return $self; | ||
18 | } | ||
19 | |||
20 | sub call { | ||
21 | my $self = shift; | ||
22 | my ( $methodname, @params ) = @_; | ||
23 | |||
24 | die 'no url' if ( !$self->{url} ); | ||
25 | |||
26 | $faultCode = 0; | ||
27 | my $xml = $self->create_call_xml( $methodname, @params ); | ||
28 | #print STDERR $xml; | ||
29 | my $result = $self->{tpp}->parsehttp( | ||
30 | POST => $self->{url}, | ||
31 | $xml, | ||
32 | { | ||
33 | 'Content-Type' => 'text/xml', | ||
34 | 'User-Agent' => 'XML-RPC/' . $VERSION, | ||
35 | 'Content-Length' => length($xml) | ||
36 | } | ||
37 | ); | ||
38 | |||
39 | my @data = $self->unparse_response($result); | ||
40 | return @data == 1 ? $data[0] : @data; | ||
41 | } | ||
42 | |||
43 | sub receive { | ||
44 | my $self = shift; | ||
45 | my $result = eval { | ||
46 | my $xml = shift || die 'no xml'; | ||
47 | my $handler = shift || die 'no handler'; | ||
48 | my $hash = $self->{tpp}->parse($xml); | ||
49 | my ( $methodname, @params ) = $self->unparse_call($hash); | ||
50 | $self->create_response_xml( $handler->( $methodname, @params ) ); | ||
51 | }; | ||
52 | return $self->create_fault_xml($@) if ($@); | ||
53 | return $result; | ||
54 | |||
55 | } | ||
56 | |||
57 | sub create_fault_xml { | ||
58 | my $self = shift; | ||
59 | my $error = shift; | ||
60 | chomp($error); | ||
61 | return $self->{tpp} | ||
62 | ->write( { methodResponse => { fault => $self->parse( { faultString => $error, faultCode => $faultCode } ) } } ); | ||
63 | } | ||
64 | |||
65 | sub create_call_xml { | ||
66 | my $self = shift; | ||
67 | my ( $methodname, @params ) = @_; | ||
68 | |||
69 | return $self->{tpp}->write( | ||
70 | { | ||
71 | methodCall => { | ||
72 | methodName => $methodname, | ||
73 | params => { param => [ map { $self->parse($_) } @params ] } | ||
74 | } | ||
75 | } | ||
76 | ); | ||
77 | } | ||
78 | |||
79 | sub create_response_xml { | ||
80 | my $self = shift; | ||
81 | my @params = @_; | ||
82 | |||
83 | return $self->{tpp}->write( { methodResponse => { params => { param => [ map { $self->parse($_) } @params ] } } } ); | ||
84 | } | ||
85 | |||
86 | sub parse { | ||
87 | my $self = shift; | ||
88 | my $p = shift; | ||
89 | my $result; | ||
90 | |||
91 | if ( ref($p) eq 'HASH' ) { | ||
92 | $result = $self->parse_struct($p); | ||
93 | } | ||
94 | elsif ( ref($p) eq 'ARRAY' ) { | ||
95 | $result = $self->parse_array($p); | ||
96 | } | ||
97 | else { | ||
98 | $result = $self->parse_scalar($p); | ||
99 | } | ||
100 | |||
101 | return { value => $result }; | ||
102 | } | ||
103 | |||
104 | sub parse_scalar { | ||
105 | my $self = shift; | ||
106 | my $scalar = shift; | ||
107 | local $^W = undef; | ||
108 | |||
109 | if ( ( $scalar =~ m/^[\-+]?\d+$/ ) | ||
110 | && ( abs($scalar) <= ( 0xffffffff >> 1 ) ) ) | ||
111 | { | ||
112 | return { i4 => $scalar }; | ||
113 | } | ||
114 | elsif ( $scalar =~ m/^[\-+]?\d+\.\d+$/ ) { | ||
115 | return { double => $scalar }; | ||
116 | } | ||
117 | else { | ||
118 | return { string => \$scalar }; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | sub parse_struct { | ||
123 | my $self = shift; | ||
124 | my $hash = shift; | ||
125 | my @members; | ||
126 | while ( my ( $k, $v ) = each(%$hash) ) { | ||
127 | push @members, { name => $k, %{ $self->parse($v) } }; | ||
128 | } | ||
129 | return { struct => { member => \@members } }; | ||
130 | } | ||
131 | |||
132 | sub parse_array { | ||
133 | my $self = shift; | ||
134 | my $array = shift; | ||
135 | |||
136 | return { array => { data => { value => [ map { $self->parse($_)->{value} } $self->list($array) ] } } }; | ||
137 | } | ||
138 | |||
139 | sub unparse_response { | ||
140 | my $self = shift; | ||
141 | my $hash = shift; | ||
142 | |||
143 | my $response = $hash->{methodResponse} || die 'no data'; | ||
144 | |||
145 | if ( $response->{fault} ) { | ||
146 | return $self->unparse_value( $response->{fault}->{value} ); | ||
147 | } | ||
148 | else { | ||
149 | return map { $self->unparse_value( $_->{value} ) } $self->list( $response->{params}->{param} ); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | sub unparse_call { | ||
154 | my $self = shift; | ||
155 | my $hash = shift; | ||
156 | |||
157 | my $response = $hash->{methodCall} || die 'no data'; | ||
158 | |||
159 | my $methodname = $response->{methodName}; | ||
160 | my @args = | ||
161 | map { $self->unparse_value( $_->{value} ) } $self->list( $response->{params}->{param} ); | ||
162 | return ( $methodname, @args ); | ||
163 | } | ||
164 | |||
165 | sub unparse_value { | ||
166 | my $self = shift; | ||
167 | my $value = shift; | ||
168 | my $result; | ||
169 | |||
170 | return $value if ( ref($value) ne 'HASH' ); # for unspecified params | ||
171 | if ( $value->{struct} ) { | ||
172 | $result = $self->unparse_struct( $value->{struct} ); | ||
173 | return !%$result | ||
174 | ? undef | ||
175 | : $result; # fix for empty hashrefs from XML::TreePP | ||
176 | } | ||
177 | elsif ( $value->{array} ) { | ||
178 | return $self->unparse_array( $value->{array} ); | ||
179 | } | ||
180 | else { | ||
181 | return $self->unparse_scalar($value); | ||
182 | } | ||
183 | } | ||
184 | |||
185 | sub unparse_scalar { | ||
186 | my $self = shift; | ||
187 | my $scalar = shift; | ||
188 | my ($result) = values(%$scalar); | ||
189 | return ( ref($result) eq 'HASH' && !%$result ) | ||
190 | ? undef | ||
191 | : $result; # fix for empty hashrefs from XML::TreePP | ||
192 | } | ||
193 | |||
194 | sub unparse_struct { | ||
195 | my $self = shift; | ||
196 | my $struct = shift; | ||
197 | |||
198 | return { map { $_->{name} => $self->unparse_value( $_->{value} ) } $self->list( $struct->{member} ) }; | ||
199 | } | ||
200 | |||
201 | sub unparse_array { | ||
202 | my $self = shift; | ||
203 | my $array = shift; | ||
204 | my $data = $array->{data}; | ||
205 | |||
206 | return [ map { $self->unparse_value($_) } $self->list( $data->{value} ) ]; | ||
207 | } | ||
208 | |||
209 | sub list { | ||
210 | my $self = shift; | ||
211 | my $param = shift; | ||
212 | return () if ( !$param ); | ||
213 | return @$param if ( ref($param) eq 'ARRAY' ); | ||
214 | return ($param); | ||
215 | } | ||
216 | |||
217 | 1; | ||
diff --git a/share/perl/lib/XML/Serializer.pm b/share/perl/lib/XML/Serializer.pm new file mode 100644 index 0000000..6e64f17 --- /dev/null +++ b/share/perl/lib/XML/Serializer.pm | |||
@@ -0,0 +1,163 @@ | |||
1 | package XML::Serializer; | ||
2 | |||
3 | use strict; | ||
4 | |||
5 | my $root_element = "root"; | ||
6 | my $indent = " "; | ||
7 | #my $XML_HEADER = << "XMLHEADER"; | ||
8 | #<?xml version="1.0" encoding="__CHARSET__"?> | ||
9 | #<?xml-stylesheet type="text/xsl" href="__XSLT__" ?> | ||
10 | #XMLHEADER | ||
11 | my $XML_HEADER = << "XMLHEADER"; | ||
12 | <?xml version="1.0" encoding="__CHARSET__"?> | ||
13 | XMLHEADER | ||
14 | |||
15 | sub WITH_HEADER { | ||
16 | return 1; | ||
17 | } | ||
18 | |||
19 | sub new { | ||
20 | my ($this, $data, $root_name, $xslt) = @_; | ||
21 | my %fields = ( | ||
22 | _charset => "utf-8", | ||
23 | _data => "", | ||
24 | _output => "", | ||
25 | _root_name => $root_name ? $root_name : "root", | ||
26 | _xslt => $xslt ? $xslt : "" | ||
27 | ); | ||
28 | if (defined $data) { | ||
29 | $fields{_data} = $data; | ||
30 | } | ||
31 | return bless \%fields, $this; | ||
32 | } | ||
33 | |||
34 | sub set_root_name { | ||
35 | my ($this, $root_name) = @_; | ||
36 | $this->{_root_name} = $root_name; | ||
37 | } | ||
38 | |||
39 | sub set_data { | ||
40 | my ($this, $data) = @_; | ||
41 | $this->{_data} = $data; | ||
42 | } | ||
43 | |||
44 | sub set_charset { | ||
45 | my ($this, $charset) = @_; | ||
46 | $this->{_charset} = $charset; | ||
47 | } | ||
48 | |||
49 | sub set_xslt { | ||
50 | my ($this, $xslt) = @_; | ||
51 | $this->{_xslt} = $xslt; | ||
52 | } | ||
53 | |||
54 | sub to_string{ | ||
55 | my ($this, $header) = @_; | ||
56 | if ($header) { | ||
57 | $this->{_output} = &_make_xml_header($this->{_charset}, $this->{_xslt}); | ||
58 | } | ||
59 | $this->{_output} .= &_to_string($this->{_data}, $this->{_root_name}); | ||
60 | } | ||
61 | |||
62 | sub to_formatted{ | ||
63 | my ($this, $header) = @_; | ||
64 | if ($header) { | ||
65 | $this->{_output} = &_make_xml_header($this->{_charset}, $this->{_xslt}); | ||
66 | } | ||
67 | $this->{_output} .= &_to_formatted($this->{_root_name}, $this->{_data}); | ||
68 | } | ||
69 | |||
70 | sub _make_xml_header { | ||
71 | my $header = $XML_HEADER; | ||
72 | $header =~ s/__CHARSET__/$_[0]/; | ||
73 | $header =~ s/__XSLT__/$_[1]/; | ||
74 | return $header; | ||
75 | } | ||
76 | |||
77 | sub _to_string { | ||
78 | my ($obj, $name) = @_; | ||
79 | my $output = ""; | ||
80 | |||
81 | if (ref($obj) eq "HASH") { | ||
82 | my $attr_list = ""; | ||
83 | my $tmp_mid = ""; | ||
84 | foreach (sort keys %$obj) { | ||
85 | if ($_ =~ /^@/) { | ||
86 | $attr_list = &_to_string($_, $obj->{$_}); | ||
87 | } | ||
88 | $tmp_mid .= &_to_string($_, $obj->{$_}); | ||
89 | } | ||
90 | $output = &_start_node($name, $attr_list) . $tmp_mid . &_end_node($name); | ||
91 | } | ||
92 | elsif (ref($obj) eq "ARRAY") { | ||
93 | foreach (@$obj) { | ||
94 | $output .= &_to_string($_, $name); | ||
95 | } | ||
96 | } | ||
97 | else { | ||
98 | if ($_ =~ /^@(.+)$/) { | ||
99 | return "$1=\"$obj\" "; | ||
100 | } else { | ||
101 | $output = &_start_node($name) . $obj . &_end_node($name); | ||
102 | } | ||
103 | } | ||
104 | return $output; | ||
105 | } | ||
106 | |||
107 | sub _to_formatted { | ||
108 | my ($name, $obj, $depth) = @_; | ||
109 | # if (!$obj) { $obj = ""; } | ||
110 | if (!defined($depth)) { $depth = 0; } | ||
111 | my $output = ""; | ||
112 | if (ref($obj) eq "HASH") { | ||
113 | my $attr_list = ""; | ||
114 | my $tmp_mid = ""; | ||
115 | foreach (sort keys %$obj) { | ||
116 | if ($_ =~ /^@/) { | ||
117 | $attr_list = &_to_string($_, $obj->{$_}); | ||
118 | } | ||
119 | $tmp_mid .= &_to_formatted($_, $obj->{$_}, $depth+1); | ||
120 | } | ||
121 | $output = &_start_node($name, $attr_list, $depth) . "\n" . $tmp_mid . &_end_node($name, $depth); | ||
122 | } | ||
123 | elsif (ref($obj) eq "ARRAY") { | ||
124 | foreach (@$obj) { | ||
125 | $output .= &_to_formatted($name, $_, $depth); | ||
126 | } | ||
127 | } | ||
128 | else { | ||
129 | if ($_ =~ /^@(.+)$/) { | ||
130 | #return "$1=\"$obj\" "; | ||
131 | } else { | ||
132 | $output .= &_start_node($name, "", $depth); | ||
133 | $output .= $obj; | ||
134 | $output .= &_end_node($name); | ||
135 | } | ||
136 | } | ||
137 | return $output; | ||
138 | } | ||
139 | |||
140 | sub _start_node { | ||
141 | my $ret = ""; | ||
142 | if (defined $_[2]) { | ||
143 | for(1..$_[2]) { $ret .= $indent; } | ||
144 | } | ||
145 | my $tag = $_[0] ? $_[0] : ""; | ||
146 | my $attr = $_[1] ? $_[1] : ""; | ||
147 | $ret .= "<$tag $attr>"; | ||
148 | return $ret; | ||
149 | } | ||
150 | |||
151 | sub _end_node { | ||
152 | my $ret = ""; | ||
153 | if (defined $_[1]) { | ||
154 | for(1..$_[1]) { $ret .= $indent; } | ||
155 | } | ||
156 | if (defined $_[0]) { | ||
157 | $ret .= "</$_[0]>\n"; | ||
158 | } | ||
159 | return $ret; | ||
160 | } | ||
161 | |||
162 | 1; | ||
163 | |||
diff --git a/share/perl/lib/XML/Simple.pm b/share/perl/lib/XML/Simple.pm new file mode 100644 index 0000000..993669b --- /dev/null +++ b/share/perl/lib/XML/Simple.pm | |||
@@ -0,0 +1,3284 @@ | |||
1 | # $Id: Simple.pm,v 1.1 2008/01/18 09:10:19 ryu Exp $ | ||
2 | |||
3 | package XML::Simple; | ||
4 | |||
5 | =head1 NAME | ||
6 | |||
7 | XML::Simple - Easy API to maintain XML (esp config files) | ||
8 | |||
9 | =head1 SYNOPSIS | ||
10 | |||
11 | use XML::Simple; | ||
12 | |||
13 | my $ref = XMLin([<xml file or string>] [, <options>]); | ||
14 | |||
15 | my $xml = XMLout($hashref [, <options>]); | ||
16 | |||
17 | Or the object oriented way: | ||
18 | |||
19 | require XML::Simple; | ||
20 | |||
21 | my $xs = XML::Simple->new(options); | ||
22 | |||
23 | my $ref = $xs->XMLin([<xml file or string>] [, <options>]); | ||
24 | |||
25 | my $xml = $xs->XMLout($hashref [, <options>]); | ||
26 | |||
27 | (or see L<"SAX SUPPORT"> for 'the SAX way'). | ||
28 | |||
29 | To catch common errors: | ||
30 | |||
31 | use XML::Simple qw(:strict); | ||
32 | |||
33 | (see L<"STRICT MODE"> for more details). | ||
34 | |||
35 | =cut | ||
36 | |||
37 | # See after __END__ for more POD documentation | ||
38 | |||
39 | |||
40 | # Load essentials here, other modules loaded on demand later | ||
41 | |||
42 | use strict; | ||
43 | use Carp; | ||
44 | require Exporter; | ||
45 | |||
46 | |||
47 | ############################################################################## | ||
48 | # Define some constants | ||
49 | # | ||
50 | |||
51 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER); | ||
52 | |||
53 | @ISA = qw(Exporter); | ||
54 | @EXPORT = qw(XMLin XMLout); | ||
55 | @EXPORT_OK = qw(xml_in xml_out); | ||
56 | $VERSION = '2.18'; | ||
57 | $PREFERRED_PARSER = undef; | ||
58 | |||
59 | my $StrictMode = 0; | ||
60 | |||
61 | my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr | ||
62 | searchpath forcearray cache suppressempty parseropts | ||
63 | grouptags nsexpand datahandler varattr variables | ||
64 | normalisespace normalizespace valueattr); | ||
65 | |||
66 | my @KnownOptOut = qw(keyattr keeproot contentkey noattr | ||
67 | rootname xmldecl outputfile noescape suppressempty | ||
68 | grouptags nsexpand handler noindent attrindent nosort | ||
69 | valueattr numericescape); | ||
70 | |||
71 | my @DefKeyAttr = qw(name key id); | ||
72 | my $DefRootName = qq(opt); | ||
73 | my $DefContentKey = qq(content); | ||
74 | my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>); | ||
75 | |||
76 | my $xmlns_ns = 'http://www.w3.org/2000/xmlns/'; | ||
77 | my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround | ||
78 | |||
79 | |||
80 | ############################################################################## | ||
81 | # Globals for use by caching routines | ||
82 | # | ||
83 | |||
84 | my %MemShareCache = (); | ||
85 | my %MemCopyCache = (); | ||
86 | |||
87 | |||
88 | ############################################################################## | ||
89 | # Wrapper for Exporter - handles ':strict' | ||
90 | # | ||
91 | |||
92 | sub import { | ||
93 | # Handle the :strict tag | ||
94 | |||
95 | $StrictMode = 1 if grep(/^:strict$/, @_); | ||
96 | |||
97 | # Pass everything else to Exporter.pm | ||
98 | |||
99 | @_ = grep(!/^:strict$/, @_); | ||
100 | goto &Exporter::import; | ||
101 | } | ||
102 | |||
103 | |||
104 | ############################################################################## | ||
105 | # Constructor for optional object interface. | ||
106 | # | ||
107 | |||
108 | sub new { | ||
109 | my $class = shift; | ||
110 | |||
111 | if(@_ % 2) { | ||
112 | croak "Default options must be name=>value pairs (odd number supplied)"; | ||
113 | } | ||
114 | |||
115 | my %known_opt; | ||
116 | @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100; | ||
117 | |||
118 | my %raw_opt = @_; | ||
119 | my %def_opt; | ||
120 | while(my($key, $val) = each %raw_opt) { | ||
121 | my $lkey = lc($key); | ||
122 | $lkey =~ s/_//g; | ||
123 | croak "Unrecognised option: $key" unless(exists($known_opt{$lkey})); | ||
124 | $def_opt{$lkey} = $val; | ||
125 | } | ||
126 | my $self = { def_opt => \%def_opt }; | ||
127 | |||
128 | return(bless($self, $class)); | ||
129 | } | ||
130 | |||
131 | |||
132 | ############################################################################## | ||
133 | # Sub: _get_object() | ||
134 | # | ||
135 | # Helper routine called from XMLin() and XMLout() to create an object if none | ||
136 | # was provided. Note, this routine does mess with the caller's @_ array. | ||
137 | # | ||
138 | |||
139 | sub _get_object { | ||
140 | my $self; | ||
141 | if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { | ||
142 | $self = shift; | ||
143 | } | ||
144 | else { | ||
145 | $self = XML::Simple->new(); | ||
146 | } | ||
147 | |||
148 | return $self; | ||
149 | } | ||
150 | |||
151 | |||
152 | ############################################################################## | ||
153 | # Sub/Method: XMLin() | ||
154 | # | ||
155 | # Exported routine for slurping XML into a hashref - see pod for info. | ||
156 | # | ||
157 | # May be called as object method or as a plain function. | ||
158 | # | ||
159 | # Expects one arg for the source XML, optionally followed by a number of | ||
160 | # name => value option pairs. | ||
161 | # | ||
162 | |||
163 | sub XMLin { | ||
164 | my $self = &_get_object; # note, @_ is passed implicitly | ||
165 | |||
166 | my $target = shift; | ||
167 | |||
168 | |||
169 | # Work out whether to parse a string, a file or a filehandle | ||
170 | |||
171 | if(not defined $target) { | ||
172 | return $self->parse_file(undef, @_); | ||
173 | } | ||
174 | |||
175 | elsif($target eq '-') { | ||
176 | local($/) = undef; | ||
177 | $target = <STDIN>; | ||
178 | return $self->parse_string(\$target, @_); | ||
179 | } | ||
180 | |||
181 | elsif(my $type = ref($target)) { | ||
182 | if($type eq 'SCALAR') { | ||
183 | return $self->parse_string($target, @_); | ||
184 | } | ||
185 | else { | ||
186 | return $self->parse_fh($target, @_); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | elsif($target =~ m{<.*?>}s) { | ||
191 | return $self->parse_string(\$target, @_); | ||
192 | } | ||
193 | |||
194 | else { | ||
195 | return $self->parse_file($target, @_); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | |||
200 | ############################################################################## | ||
201 | # Sub/Method: parse_file() | ||
202 | # | ||
203 | # Same as XMLin, but only parses from a named file. | ||
204 | # | ||
205 | |||
206 | sub parse_file { | ||
207 | my $self = &_get_object; # note, @_ is passed implicitly | ||
208 | |||
209 | my $filename = shift; | ||
210 | |||
211 | $self->handle_options('in', @_); | ||
212 | |||
213 | $filename = $self->default_config_file if not defined $filename; | ||
214 | |||
215 | $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}}); | ||
216 | |||
217 | # Check cache for previous parse | ||
218 | |||
219 | if($self->{opt}->{cache}) { | ||
220 | foreach my $scheme (@{$self->{opt}->{cache}}) { | ||
221 | my $method = 'cache_read_' . $scheme; | ||
222 | my $opt = $self->$method($filename); | ||
223 | return($opt) if($opt); | ||
224 | } | ||
225 | } | ||
226 | |||
227 | my $ref = $self->build_simple_tree($filename, undef); | ||
228 | |||
229 | if($self->{opt}->{cache}) { | ||
230 | my $method = 'cache_write_' . $self->{opt}->{cache}->[0]; | ||
231 | $self->$method($ref, $filename); | ||
232 | } | ||
233 | |||
234 | return $ref; | ||
235 | } | ||
236 | |||
237 | |||
238 | ############################################################################## | ||
239 | # Sub/Method: parse_fh() | ||
240 | # | ||
241 | # Same as XMLin, but only parses from a filehandle. | ||
242 | # | ||
243 | |||
244 | sub parse_fh { | ||
245 | my $self = &_get_object; # note, @_ is passed implicitly | ||
246 | |||
247 | my $fh = shift; | ||
248 | croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') . | ||
249 | " as a filehandle" unless ref $fh; | ||
250 | |||
251 | $self->handle_options('in', @_); | ||
252 | |||
253 | return $self->build_simple_tree(undef, $fh); | ||
254 | } | ||
255 | |||
256 | |||
257 | ############################################################################## | ||
258 | # Sub/Method: parse_string() | ||
259 | # | ||
260 | # Same as XMLin, but only parses from a string or a reference to a string. | ||
261 | # | ||
262 | |||
263 | sub parse_string { | ||
264 | my $self = &_get_object; # note, @_ is passed implicitly | ||
265 | |||
266 | my $string = shift; | ||
267 | |||
268 | $self->handle_options('in', @_); | ||
269 | |||
270 | return $self->build_simple_tree(undef, ref $string ? $string : \$string); | ||
271 | } | ||
272 | |||
273 | |||
274 | ############################################################################## | ||
275 | # Method: default_config_file() | ||
276 | # | ||
277 | # Returns the name of the XML file to parse if no filename (or XML string) | ||
278 | # was provided. | ||
279 | # | ||
280 | |||
281 | sub default_config_file { | ||
282 | my $self = shift; | ||
283 | |||
284 | require File::Basename; | ||
285 | |||
286 | my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+'); | ||
287 | |||
288 | # Add script directory to searchpath | ||
289 | |||
290 | if($script_dir) { | ||
291 | unshift(@{$self->{opt}->{searchpath}}, $script_dir); | ||
292 | } | ||
293 | |||
294 | return $basename . '.xml'; | ||
295 | } | ||
296 | |||
297 | |||
298 | ############################################################################## | ||
299 | # Method: build_simple_tree() | ||
300 | # | ||
301 | # Builds a 'tree' data structure as provided by XML::Parser and then | ||
302 | # 'simplifies' it as specified by the various options in effect. | ||
303 | # | ||
304 | |||
305 | sub build_simple_tree { | ||
306 | my $self = shift; | ||
307 | |||
308 | my $tree = $self->build_tree(@_); | ||
309 | |||
310 | return $self->{opt}->{keeproot} | ||
311 | ? $self->collapse({}, @$tree) | ||
312 | : $self->collapse(@{$tree->[1]}); | ||
313 | } | ||
314 | |||
315 | |||
316 | ############################################################################## | ||
317 | # Method: build_tree() | ||
318 | # | ||
319 | # This routine will be called if there is no suitable pre-parsed tree in a | ||
320 | # cache. It parses the XML and returns an XML::Parser 'Tree' style data | ||
321 | # structure (summarised in the comments for the collapse() routine below). | ||
322 | # | ||
323 | # XML::Simple requires the services of another module that knows how to parse | ||
324 | # XML. If XML::SAX is installed, the default SAX parser will be used, | ||
325 | # otherwise XML::Parser will be used. | ||
326 | # | ||
327 | # This routine expects to be passed a filename as argument 1 or a 'string' as | ||
328 | # argument 2. The 'string' might be a string of XML (passed by reference to | ||
329 | # save memory) or it might be a reference to an IO::Handle. (This | ||
330 | # non-intuitive mess results in part from the way XML::Parser works but that's | ||
331 | # really no excuse). | ||
332 | # | ||
333 | |||
334 | sub build_tree { | ||
335 | my $self = shift; | ||
336 | my $filename = shift; | ||
337 | my $string = shift; | ||
338 | |||
339 | |||
340 | my $preferred_parser = $PREFERRED_PARSER; | ||
341 | unless(defined($preferred_parser)) { | ||
342 | $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || ''; | ||
343 | } | ||
344 | if($preferred_parser eq 'XML::Parser') { | ||
345 | return($self->build_tree_xml_parser($filename, $string)); | ||
346 | } | ||
347 | |||
348 | eval { require XML::SAX; }; # We didn't need it until now | ||
349 | if($@) { # No XML::SAX - fall back to XML::Parser | ||
350 | if($preferred_parser) { # unless a SAX parser was expressly requested | ||
351 | croak "XMLin() could not load XML::SAX"; | ||
352 | } | ||
353 | return($self->build_tree_xml_parser($filename, $string)); | ||
354 | } | ||
355 | |||
356 | $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser); | ||
357 | |||
358 | my $sp = XML::SAX::ParserFactory->parser(Handler => $self); | ||
359 | |||
360 | $self->{nocollapse} = 1; | ||
361 | my($tree); | ||
362 | if($filename) { | ||
363 | $tree = $sp->parse_uri($filename); | ||
364 | } | ||
365 | else { | ||
366 | if(ref($string) && ref($string) ne 'SCALAR') { | ||
367 | $tree = $sp->parse_file($string); | ||
368 | } | ||
369 | else { | ||
370 | $tree = $sp->parse_string($$string); | ||
371 | } | ||
372 | } | ||
373 | |||
374 | return($tree); | ||
375 | } | ||
376 | |||
377 | |||
378 | ############################################################################## | ||
379 | # Method: build_tree_xml_parser() | ||
380 | # | ||
381 | # This routine will be called if XML::SAX is not installed, or if XML::Parser | ||
382 | # was specifically requested. It takes the same arguments as build_tree() and | ||
383 | # returns the same data structure (XML::Parser 'Tree' style). | ||
384 | # | ||
385 | |||
386 | sub build_tree_xml_parser { | ||
387 | my $self = shift; | ||
388 | my $filename = shift; | ||
389 | my $string = shift; | ||
390 | |||
391 | |||
392 | eval { | ||
393 | local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load() | ||
394 | require XML::Parser; # We didn't need it until now | ||
395 | }; | ||
396 | if($@) { | ||
397 | croak "XMLin() requires either XML::SAX or XML::Parser"; | ||
398 | } | ||
399 | |||
400 | if($self->{opt}->{nsexpand}) { | ||
401 | carp "'nsexpand' option requires XML::SAX"; | ||
402 | } | ||
403 | |||
404 | my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}}); | ||
405 | my($tree); | ||
406 | if($filename) { | ||
407 | # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl | ||
408 | local(*XML_FILE); | ||
409 | open(XML_FILE, '<', $filename) || croak qq($filename - $!); | ||
410 | $tree = $xp->parse(*XML_FILE); | ||
411 | close(XML_FILE); | ||
412 | } | ||
413 | else { | ||
414 | $tree = $xp->parse($$string); | ||
415 | } | ||
416 | |||
417 | return($tree); | ||
418 | } | ||
419 | |||
420 | |||
421 | ############################################################################## | ||
422 | # Method: cache_write_storable() | ||
423 | # | ||
424 | # Wrapper routine for invoking Storable::nstore() to cache a parsed data | ||
425 | # structure. | ||
426 | # | ||
427 | |||
428 | sub cache_write_storable { | ||
429 | my($self, $data, $filename) = @_; | ||
430 | |||
431 | my $cachefile = $self->storable_filename($filename); | ||
432 | |||
433 | require Storable; # We didn't need it until now | ||
434 | |||
435 | if ('VMS' eq $^O) { | ||
436 | Storable::nstore($data, $cachefile); | ||
437 | } | ||
438 | else { | ||
439 | # If the following line fails for you, your Storable.pm is old - upgrade | ||
440 | Storable::lock_nstore($data, $cachefile); | ||
441 | } | ||
442 | |||
443 | } | ||
444 | |||
445 | |||
446 | ############################################################################## | ||
447 | # Method: cache_read_storable() | ||
448 | # | ||
449 | # Wrapper routine for invoking Storable::retrieve() to read a cached parsed | ||
450 | # data structure. Only returns cached data if the cache file exists and is | ||
451 | # newer than the source XML file. | ||
452 | # | ||
453 | |||
454 | sub cache_read_storable { | ||
455 | my($self, $filename) = @_; | ||
456 | |||
457 | my $cachefile = $self->storable_filename($filename); | ||
458 | |||
459 | return unless(-r $cachefile); | ||
460 | return unless((stat($cachefile))[9] > (stat($filename))[9]); | ||
461 | |||
462 | require Storable; # We didn't need it until now | ||
463 | |||
464 | if ('VMS' eq $^O) { | ||
465 | return(Storable::retrieve($cachefile)); | ||
466 | } | ||
467 | else { | ||
468 | return(Storable::lock_retrieve($cachefile)); | ||
469 | } | ||
470 | |||
471 | } | ||
472 | |||
473 | |||
474 | ############################################################################## | ||
475 | # Method: storable_filename() | ||
476 | # | ||
477 | # Translates the supplied source XML filename into a filename for the storable | ||
478 | # cached data. A '.stor' suffix is added after stripping an optional '.xml' | ||
479 | # suffix. | ||
480 | # | ||
481 | |||
482 | sub storable_filename { | ||
483 | my($self, $cachefile) = @_; | ||
484 | |||
485 | $cachefile =~ s{(\.xml)?$}{.stor}; | ||
486 | return $cachefile; | ||
487 | } | ||
488 | |||
489 | |||
490 | ############################################################################## | ||
491 | # Method: cache_write_memshare() | ||
492 | # | ||
493 | # Takes the supplied data structure reference and stores it away in a global | ||
494 | # hash structure. | ||
495 | # | ||
496 | |||
497 | sub cache_write_memshare { | ||
498 | my($self, $data, $filename) = @_; | ||
499 | |||
500 | $MemShareCache{$filename} = [time(), $data]; | ||
501 | } | ||
502 | |||
503 | |||
504 | ############################################################################## | ||
505 | # Method: cache_read_memshare() | ||
506 | # | ||
507 | # Takes a filename and looks in a global hash for a cached parsed version. | ||
508 | # | ||
509 | |||
510 | sub cache_read_memshare { | ||
511 | my($self, $filename) = @_; | ||
512 | |||
513 | return unless($MemShareCache{$filename}); | ||
514 | return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]); | ||
515 | |||
516 | return($MemShareCache{$filename}->[1]); | ||
517 | |||
518 | } | ||
519 | |||
520 | |||
521 | ############################################################################## | ||
522 | # Method: cache_write_memcopy() | ||
523 | # | ||
524 | # Takes the supplied data structure and stores a copy of it in a global hash | ||
525 | # structure. | ||
526 | # | ||
527 | |||
528 | sub cache_write_memcopy { | ||
529 | my($self, $data, $filename) = @_; | ||
530 | |||
531 | require Storable; # We didn't need it until now | ||
532 | |||
533 | $MemCopyCache{$filename} = [time(), Storable::dclone($data)]; | ||
534 | } | ||
535 | |||
536 | |||
537 | ############################################################################## | ||
538 | # Method: cache_read_memcopy() | ||
539 | # | ||
540 | # Takes a filename and looks in a global hash for a cached parsed version. | ||
541 | # Returns a reference to a copy of that data structure. | ||
542 | # | ||
543 | |||
544 | sub cache_read_memcopy { | ||
545 | my($self, $filename) = @_; | ||
546 | |||
547 | return unless($MemCopyCache{$filename}); | ||
548 | return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]); | ||
549 | |||
550 | return(Storable::dclone($MemCopyCache{$filename}->[1])); | ||
551 | |||
552 | } | ||
553 | |||
554 | |||
555 | ############################################################################## | ||
556 | # Sub/Method: XMLout() | ||
557 | # | ||
558 | # Exported routine for 'unslurping' a data structure out to XML. | ||
559 | # | ||
560 | # Expects a reference to a data structure and an optional list of option | ||
561 | # name => value pairs. | ||
562 | # | ||
563 | |||
564 | sub XMLout { | ||
565 | my $self = &_get_object; # note, @_ is passed implicitly | ||
566 | |||
567 | croak "XMLout() requires at least one argument" unless(@_); | ||
568 | my $ref = shift; | ||
569 | |||
570 | $self->handle_options('out', @_); | ||
571 | |||
572 | |||
573 | # If namespace expansion is set, XML::NamespaceSupport is required | ||
574 | |||
575 | if($self->{opt}->{nsexpand}) { | ||
576 | require XML::NamespaceSupport; | ||
577 | $self->{nsup} = XML::NamespaceSupport->new(); | ||
578 | $self->{ns_prefix} = 'aaa'; | ||
579 | } | ||
580 | |||
581 | |||
582 | # Wrap top level arrayref in a hash | ||
583 | |||
584 | if(UNIVERSAL::isa($ref, 'ARRAY')) { | ||
585 | $ref = { anon => $ref }; | ||
586 | } | ||
587 | |||
588 | |||
589 | # Extract rootname from top level hash if keeproot enabled | ||
590 | |||
591 | if($self->{opt}->{keeproot}) { | ||
592 | my(@keys) = keys(%$ref); | ||
593 | if(@keys == 1) { | ||
594 | $ref = $ref->{$keys[0]}; | ||
595 | $self->{opt}->{rootname} = $keys[0]; | ||
596 | } | ||
597 | } | ||
598 | |||
599 | # Ensure there are no top level attributes if we're not adding root elements | ||
600 | |||
601 | elsif($self->{opt}->{rootname} eq '') { | ||
602 | if(UNIVERSAL::isa($ref, 'HASH')) { | ||
603 | my $refsave = $ref; | ||
604 | $ref = {}; | ||
605 | foreach (keys(%$refsave)) { | ||
606 | if(ref($refsave->{$_})) { | ||
607 | $ref->{$_} = $refsave->{$_}; | ||
608 | } | ||
609 | else { | ||
610 | $ref->{$_} = [ $refsave->{$_} ]; | ||
611 | } | ||
612 | } | ||
613 | } | ||
614 | } | ||
615 | |||
616 | |||
617 | # Encode the hashref and write to file if necessary | ||
618 | |||
619 | $self->{_ancestors} = []; | ||
620 | my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, ''); | ||
621 | delete $self->{_ancestors}; | ||
622 | |||
623 | if($self->{opt}->{xmldecl}) { | ||
624 | $xml = $self->{opt}->{xmldecl} . "\n" . $xml; | ||
625 | } | ||
626 | |||
627 | if($self->{opt}->{outputfile}) { | ||
628 | if(ref($self->{opt}->{outputfile})) { | ||
629 | my $fh = $self->{opt}->{outputfile}; | ||
630 | if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) { | ||
631 | eval { require IO::Handle; }; | ||
632 | croak $@ if $@; | ||
633 | } | ||
634 | return($fh->print($xml)); | ||
635 | } | ||
636 | else { | ||
637 | local(*OUT); | ||
638 | open(OUT, '>', "$self->{opt}->{outputfile}") || | ||
639 | croak "open($self->{opt}->{outputfile}): $!"; | ||
640 | binmode(OUT, ':utf8') if($] >= 5.008); | ||
641 | print OUT $xml || croak "print: $!"; | ||
642 | close(OUT); | ||
643 | } | ||
644 | } | ||
645 | elsif($self->{opt}->{handler}) { | ||
646 | require XML::SAX; | ||
647 | my $sp = XML::SAX::ParserFactory->parser( | ||
648 | Handler => $self->{opt}->{handler} | ||
649 | ); | ||
650 | return($sp->parse_string($xml)); | ||
651 | } | ||
652 | else { | ||
653 | return($xml); | ||
654 | } | ||
655 | } | ||
656 | |||
657 | |||
658 | ############################################################################## | ||
659 | # Method: handle_options() | ||
660 | # | ||
661 | # Helper routine for both XMLin() and XMLout(). Both routines handle their | ||
662 | # first argument and assume all other args are options handled by this routine. | ||
663 | # Saves a hash of options in $self->{opt}. | ||
664 | # | ||
665 | # If default options were passed to the constructor, they will be retrieved | ||
666 | # here and merged with options supplied to the method call. | ||
667 | # | ||
668 | # First argument should be the string 'in' or the string 'out'. | ||
669 | # | ||
670 | # Remaining arguments should be name=>value pairs. Sets up default values | ||
671 | # for options not supplied. Unrecognised options are a fatal error. | ||
672 | # | ||
673 | |||
674 | sub handle_options { | ||
675 | my $self = shift; | ||
676 | my $dirn = shift; | ||
677 | |||
678 | |||
679 | # Determine valid options based on context | ||
680 | |||
681 | my %known_opt; | ||
682 | if($dirn eq 'in') { | ||
683 | @known_opt{@KnownOptIn} = @KnownOptIn; | ||
684 | } | ||
685 | else { | ||
686 | @known_opt{@KnownOptOut} = @KnownOptOut; | ||
687 | } | ||
688 | |||
689 | |||
690 | # Store supplied options in hashref and weed out invalid ones | ||
691 | |||
692 | if(@_ % 2) { | ||
693 | croak "Options must be name=>value pairs (odd number supplied)"; | ||
694 | } | ||
695 | my %raw_opt = @_; | ||
696 | my $opt = {}; | ||
697 | $self->{opt} = $opt; | ||
698 | |||
699 | while(my($key, $val) = each %raw_opt) { | ||
700 | my $lkey = lc($key); | ||
701 | $lkey =~ s/_//g; | ||
702 | croak "Unrecognised option: $key" unless($known_opt{$lkey}); | ||
703 | $opt->{$lkey} = $val; | ||
704 | } | ||
705 | |||
706 | |||
707 | # Merge in options passed to constructor | ||
708 | |||
709 | foreach (keys(%known_opt)) { | ||
710 | unless(exists($opt->{$_})) { | ||
711 | if(exists($self->{def_opt}->{$_})) { | ||
712 | $opt->{$_} = $self->{def_opt}->{$_}; | ||
713 | } | ||
714 | } | ||
715 | } | ||
716 | |||
717 | |||
718 | # Set sensible defaults if not supplied | ||
719 | |||
720 | if(exists($opt->{rootname})) { | ||
721 | unless(defined($opt->{rootname})) { | ||
722 | $opt->{rootname} = ''; | ||
723 | } | ||
724 | } | ||
725 | else { | ||
726 | $opt->{rootname} = $DefRootName; | ||
727 | } | ||
728 | |||
729 | if($opt->{xmldecl} and $opt->{xmldecl} eq '1') { | ||
730 | $opt->{xmldecl} = $DefXmlDecl; | ||
731 | } | ||
732 | |||
733 | if(exists($opt->{contentkey})) { | ||
734 | if($opt->{contentkey} =~ m{^-(.*)$}) { | ||
735 | $opt->{contentkey} = $1; | ||
736 | $opt->{collapseagain} = 1; | ||
737 | } | ||
738 | } | ||
739 | else { | ||
740 | $opt->{contentkey} = $DefContentKey; | ||
741 | } | ||
742 | |||
743 | unless(exists($opt->{normalisespace})) { | ||
744 | $opt->{normalisespace} = $opt->{normalizespace}; | ||
745 | } | ||
746 | $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace})); | ||
747 | |||
748 | # Cleanups for values assumed to be arrays later | ||
749 | |||
750 | if($opt->{searchpath}) { | ||
751 | unless(ref($opt->{searchpath})) { | ||
752 | $opt->{searchpath} = [ $opt->{searchpath} ]; | ||
753 | } | ||
754 | } | ||
755 | else { | ||
756 | $opt->{searchpath} = [ ]; | ||
757 | } | ||
758 | |||
759 | if($opt->{cache} and !ref($opt->{cache})) { | ||
760 | $opt->{cache} = [ $opt->{cache} ]; | ||
761 | } | ||
762 | if($opt->{cache}) { | ||
763 | $_ = lc($_) foreach (@{$opt->{cache}}); | ||
764 | foreach my $scheme (@{$opt->{cache}}) { | ||
765 | my $method = 'cache_read_' . $scheme; | ||
766 | croak "Unsupported caching scheme: $scheme" | ||
767 | unless($self->can($method)); | ||
768 | } | ||
769 | } | ||
770 | |||
771 | if(exists($opt->{parseropts})) { | ||
772 | if($^W) { | ||
773 | carp "Warning: " . | ||
774 | "'ParserOpts' is deprecated, contact the author if you need it"; | ||
775 | } | ||
776 | } | ||
777 | else { | ||
778 | $opt->{parseropts} = [ ]; | ||
779 | } | ||
780 | |||
781 | |||
782 | # Special cleanup for {forcearray} which could be regex, arrayref or boolean | ||
783 | # or left to default to 0 | ||
784 | |||
785 | if(exists($opt->{forcearray})) { | ||
786 | if(ref($opt->{forcearray}) eq 'Regexp') { | ||
787 | $opt->{forcearray} = [ $opt->{forcearray} ]; | ||
788 | } | ||
789 | |||
790 | if(ref($opt->{forcearray}) eq 'ARRAY') { | ||
791 | my @force_list = @{$opt->{forcearray}}; | ||
792 | if(@force_list) { | ||
793 | $opt->{forcearray} = {}; | ||
794 | foreach my $tag (@force_list) { | ||
795 | if(ref($tag) eq 'Regexp') { | ||
796 | push @{$opt->{forcearray}->{_regex}}, $tag; | ||
797 | } | ||
798 | else { | ||
799 | $opt->{forcearray}->{$tag} = 1; | ||
800 | } | ||
801 | } | ||
802 | } | ||
803 | else { | ||
804 | $opt->{forcearray} = 0; | ||
805 | } | ||
806 | } | ||
807 | else { | ||
808 | $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 ); | ||
809 | } | ||
810 | } | ||
811 | else { | ||
812 | if($StrictMode and $dirn eq 'in') { | ||
813 | croak "No value specified for 'ForceArray' option in call to XML$dirn()"; | ||
814 | } | ||
815 | $opt->{forcearray} = 0; | ||
816 | } | ||
817 | |||
818 | |||
819 | # Special cleanup for {keyattr} which could be arrayref or hashref or left | ||
820 | # to default to arrayref | ||
821 | |||
822 | if(exists($opt->{keyattr})) { | ||
823 | if(ref($opt->{keyattr})) { | ||
824 | if(ref($opt->{keyattr}) eq 'HASH') { | ||
825 | |||
826 | # Make a copy so we can mess with it | ||
827 | |||
828 | $opt->{keyattr} = { %{$opt->{keyattr}} }; | ||
829 | |||
830 | |||
831 | # Convert keyattr => { elem => '+attr' } | ||
832 | # to keyattr => { elem => [ 'attr', '+' ] } | ||
833 | |||
834 | foreach my $el (keys(%{$opt->{keyattr}})) { | ||
835 | if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) { | ||
836 | $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ]; | ||
837 | if($StrictMode and $dirn eq 'in') { | ||
838 | next if($opt->{forcearray} == 1); | ||
839 | next if(ref($opt->{forcearray}) eq 'HASH' | ||
840 | and $opt->{forcearray}->{$el}); | ||
841 | croak "<$el> set in KeyAttr but not in ForceArray"; | ||
842 | } | ||
843 | } | ||
844 | else { | ||
845 | delete($opt->{keyattr}->{$el}); # Never reached (famous last words?) | ||
846 | } | ||
847 | } | ||
848 | } | ||
849 | else { | ||
850 | if(@{$opt->{keyattr}} == 0) { | ||
851 | delete($opt->{keyattr}); | ||
852 | } | ||
853 | } | ||
854 | } | ||
855 | else { | ||
856 | $opt->{keyattr} = [ $opt->{keyattr} ]; | ||
857 | } | ||
858 | } | ||
859 | else { | ||
860 | if($StrictMode) { | ||
861 | croak "No value specified for 'KeyAttr' option in call to XML$dirn()"; | ||
862 | } | ||
863 | $opt->{keyattr} = [ @DefKeyAttr ]; | ||
864 | } | ||
865 | |||
866 | |||
867 | # Special cleanup for {valueattr} which could be arrayref or hashref | ||
868 | |||
869 | if(exists($opt->{valueattr})) { | ||
870 | if(ref($opt->{valueattr}) eq 'ARRAY') { | ||
871 | $opt->{valueattrlist} = {}; | ||
872 | $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} }); | ||
873 | } | ||
874 | } | ||
875 | |||
876 | # make sure there's nothing weird in {grouptags} | ||
877 | |||
878 | if($opt->{grouptags}) { | ||
879 | croak "Illegal value for 'GroupTags' option - expected a hashref" | ||
880 | unless UNIVERSAL::isa($opt->{grouptags}, 'HASH'); | ||
881 | |||
882 | while(my($key, $val) = each %{$opt->{grouptags}}) { | ||
883 | next if $key ne $val; | ||
884 | croak "Bad value in GroupTags: '$key' => '$val'"; | ||
885 | } | ||
886 | } | ||
887 | |||
888 | |||
889 | # Check the {variables} option is valid and initialise variables hash | ||
890 | |||
891 | if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) { | ||
892 | croak "Illegal value for 'Variables' option - expected a hashref"; | ||
893 | } | ||
894 | |||
895 | if($opt->{variables}) { | ||
896 | $self->{_var_values} = { %{$opt->{variables}} }; | ||
897 | } | ||
898 | elsif($opt->{varattr}) { | ||
899 | $self->{_var_values} = {}; | ||
900 | } | ||
901 | |||
902 | } | ||
903 | |||
904 | |||
905 | ############################################################################## | ||
906 | # Method: find_xml_file() | ||
907 | # | ||
908 | # Helper routine for XMLin(). | ||
909 | # Takes a filename, and a list of directories, attempts to locate the file in | ||
910 | # the directories listed. | ||
911 | # Returns a full pathname on success; croaks on failure. | ||
912 | # | ||
913 | |||
914 | sub find_xml_file { | ||
915 | my $self = shift; | ||
916 | my $file = shift; | ||
917 | my @search_path = @_; | ||
918 | |||
919 | |||
920 | require File::Basename; | ||
921 | require File::Spec; | ||
922 | |||
923 | my($filename, $filedir) = File::Basename::fileparse($file); | ||
924 | |||
925 | if($filename ne $file) { # Ignore searchpath if dir component | ||
926 | return($file) if(-e $file); | ||
927 | } | ||
928 | else { | ||
929 | my($path); | ||
930 | foreach $path (@search_path) { | ||
931 | my $fullpath = File::Spec->catfile($path, $file); | ||
932 | return($fullpath) if(-e $fullpath); | ||
933 | } | ||
934 | } | ||
935 | |||
936 | # If user did not supply a search path, default to current directory | ||
937 | |||
938 | if(!@search_path) { | ||
939 | return($file) if(-e $file); | ||
940 | croak "File does not exist: $file"; | ||
941 | } | ||
942 | |||
943 | croak "Could not find $file in ", join(':', @search_path); | ||
944 | } | ||
945 | |||
946 | |||
947 | ############################################################################## | ||
948 | # Method: collapse() | ||
949 | # | ||
950 | # Helper routine for XMLin(). This routine really comprises the 'smarts' (or | ||
951 | # value add) of this module. | ||
952 | # | ||
953 | # Takes the parse tree that XML::Parser produced from the supplied XML and | ||
954 | # recurses through it 'collapsing' unnecessary levels of indirection (nested | ||
955 | # arrays etc) to produce a data structure that is easier to work with. | ||
956 | # | ||
957 | # Elements in the original parser tree are represented as an element name | ||
958 | # followed by an arrayref. The first element of the array is a hashref | ||
959 | # containing the attributes. The rest of the array contains a list of any | ||
960 | # nested elements as name+arrayref pairs: | ||
961 | # | ||
962 | # <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ] | ||
963 | # | ||
964 | # The special element name '0' (zero) flags text content. | ||
965 | # | ||
966 | # This routine cuts down the noise by discarding any text content consisting of | ||
967 | # only whitespace and then moves the nested elements into the attribute hash | ||
968 | # using the name of the nested element as the hash key and the collapsed | ||
969 | # version of the nested element as the value. Multiple nested elements with | ||
970 | # the same name will initially be represented as an arrayref, but this may be | ||
971 | # 'folded' into a hashref depending on the value of the keyattr option. | ||
972 | # | ||
973 | |||
974 | sub collapse { | ||
975 | my $self = shift; | ||
976 | |||
977 | |||
978 | # Start with the hash of attributes | ||
979 | |||
980 | my $attr = shift; | ||
981 | if($self->{opt}->{noattr}) { # Discard if 'noattr' set | ||
982 | $attr = {}; | ||
983 | } | ||
984 | elsif($self->{opt}->{normalisespace} == 2) { | ||
985 | while(my($key, $value) = each %$attr) { | ||
986 | $attr->{$key} = $self->normalise_space($value) | ||
987 | } | ||
988 | } | ||
989 | |||
990 | |||
991 | # Do variable substitutions | ||
992 | |||
993 | if(my $var = $self->{_var_values}) { | ||
994 | while(my($key, $val) = each(%$attr)) { | ||
995 | $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge; | ||
996 | $attr->{$key} = $val; | ||
997 | } | ||
998 | } | ||
999 | |||
1000 | |||
1001 | # Roll up 'value' attributes (but only if no nested elements) | ||
1002 | |||
1003 | if(!@_ and keys %$attr == 1) { | ||
1004 | my($k) = keys %$attr; | ||
1005 | if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) { | ||
1006 | return $attr->{$k}; | ||
1007 | } | ||
1008 | } | ||
1009 | |||
1010 | |||
1011 | # Add any nested elements | ||
1012 | |||
1013 | my($key, $val); | ||
1014 | while(@_) { | ||
1015 | $key = shift; | ||
1016 | $val = shift; | ||
1017 | |||
1018 | if(ref($val)) { | ||
1019 | $val = $self->collapse(@$val); | ||
1020 | next if(!defined($val) and $self->{opt}->{suppressempty}); | ||
1021 | } | ||
1022 | elsif($key eq '0') { | ||
1023 | next if($val =~ m{^\s*$}s); # Skip all whitespace content | ||
1024 | |||
1025 | $val = $self->normalise_space($val) | ||
1026 | if($self->{opt}->{normalisespace} == 2); | ||
1027 | |||
1028 | # do variable substitutions | ||
1029 | |||
1030 | if(my $var = $self->{_var_values}) { | ||
1031 | $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge; | ||
1032 | } | ||
1033 | |||
1034 | |||
1035 | # look for variable definitions | ||
1036 | |||
1037 | if(my $var = $self->{opt}->{varattr}) { | ||
1038 | if(exists $attr->{$var}) { | ||
1039 | $self->set_var($attr->{$var}, $val); | ||
1040 | } | ||
1041 | } | ||
1042 | |||
1043 | |||
1044 | # Collapse text content in element with no attributes to a string | ||
1045 | |||
1046 | if(!%$attr and !@_) { | ||
1047 | return($self->{opt}->{forcecontent} ? | ||
1048 | { $self->{opt}->{contentkey} => $val } : $val | ||
1049 | ); | ||
1050 | } | ||
1051 | $key = $self->{opt}->{contentkey}; | ||
1052 | } | ||
1053 | |||
1054 | |||
1055 | # Combine duplicate attributes into arrayref if required | ||
1056 | |||
1057 | if(exists($attr->{$key})) { | ||
1058 | if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) { | ||
1059 | push(@{$attr->{$key}}, $val); | ||
1060 | } | ||
1061 | else { | ||
1062 | $attr->{$key} = [ $attr->{$key}, $val ]; | ||
1063 | } | ||
1064 | } | ||
1065 | elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { | ||
1066 | $attr->{$key} = [ $val ]; | ||
1067 | } | ||
1068 | else { | ||
1069 | if( $key ne $self->{opt}->{contentkey} | ||
1070 | and ( | ||
1071 | ($self->{opt}->{forcearray} == 1) | ||
1072 | or ( | ||
1073 | (ref($self->{opt}->{forcearray}) eq 'HASH') | ||
1074 | and ( | ||
1075 | $self->{opt}->{forcearray}->{$key} | ||
1076 | or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}}) | ||
1077 | ) | ||
1078 | ) | ||
1079 | ) | ||
1080 | ) { | ||
1081 | $attr->{$key} = [ $val ]; | ||
1082 | } | ||
1083 | else { | ||
1084 | $attr->{$key} = $val; | ||
1085 | } | ||
1086 | } | ||
1087 | |||
1088 | } | ||
1089 | |||
1090 | |||
1091 | # Turn arrayrefs into hashrefs if key fields present | ||
1092 | |||
1093 | if($self->{opt}->{keyattr}) { | ||
1094 | while(($key,$val) = each %$attr) { | ||
1095 | if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { | ||
1096 | $attr->{$key} = $self->array_to_hash($key, $val); | ||
1097 | } | ||
1098 | } | ||
1099 | } | ||
1100 | |||
1101 | |||
1102 | # disintermediate grouped tags | ||
1103 | |||
1104 | if($self->{opt}->{grouptags}) { | ||
1105 | while(my($key, $val) = each(%$attr)) { | ||
1106 | next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); | ||
1107 | next unless(exists($self->{opt}->{grouptags}->{$key})); | ||
1108 | |||
1109 | my($child_key, $child_val) = %$val; | ||
1110 | |||
1111 | if($self->{opt}->{grouptags}->{$key} eq $child_key) { | ||
1112 | $attr->{$key}= $child_val; | ||
1113 | } | ||
1114 | } | ||
1115 | } | ||
1116 | |||
1117 | |||
1118 | # Fold hashes containing a single anonymous array up into just the array | ||
1119 | |||
1120 | my $count = scalar keys %$attr; | ||
1121 | if($count == 1 | ||
1122 | and exists $attr->{anon} | ||
1123 | and UNIVERSAL::isa($attr->{anon}, 'ARRAY') | ||
1124 | ) { | ||
1125 | return($attr->{anon}); | ||
1126 | } | ||
1127 | |||
1128 | |||
1129 | # Do the right thing if hash is empty, otherwise just return it | ||
1130 | |||
1131 | if(!%$attr and exists($self->{opt}->{suppressempty})) { | ||
1132 | if(defined($self->{opt}->{suppressempty}) and | ||
1133 | $self->{opt}->{suppressempty} eq '') { | ||
1134 | return(''); | ||
1135 | } | ||
1136 | return(undef); | ||
1137 | } | ||
1138 | |||
1139 | |||
1140 | # Roll up named elements with named nested 'value' attributes | ||
1141 | |||
1142 | if($self->{opt}->{valueattr}) { | ||
1143 | while(my($key, $val) = each(%$attr)) { | ||
1144 | next unless($self->{opt}->{valueattr}->{$key}); | ||
1145 | next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); | ||
1146 | my($k) = keys %$val; | ||
1147 | next unless($k eq $self->{opt}->{valueattr}->{$key}); | ||
1148 | $attr->{$key} = $val->{$k}; | ||
1149 | } | ||
1150 | } | ||
1151 | |||
1152 | return($attr) | ||
1153 | |||
1154 | } | ||
1155 | |||
1156 | |||
1157 | ############################################################################## | ||
1158 | # Method: set_var() | ||
1159 | # | ||
1160 | # Called when a variable definition is encountered in the XML. (A variable | ||
1161 | # definition looks like <element attrname="name">value</element> where attrname | ||
1162 | # matches the varattr setting). | ||
1163 | # | ||
1164 | |||
1165 | sub set_var { | ||
1166 | my($self, $name, $value) = @_; | ||
1167 | |||
1168 | $self->{_var_values}->{$name} = $value; | ||
1169 | } | ||
1170 | |||
1171 | |||
1172 | ############################################################################## | ||
1173 | # Method: get_var() | ||
1174 | # | ||
1175 | # Called during variable substitution to get the value for the named variable. | ||
1176 | # | ||
1177 | |||
1178 | sub get_var { | ||
1179 | my($self, $name) = @_; | ||
1180 | |||
1181 | my $value = $self->{_var_values}->{$name}; | ||
1182 | return $value if(defined($value)); | ||
1183 | |||
1184 | return '${' . $name . '}'; | ||
1185 | } | ||
1186 | |||
1187 | |||
1188 | ############################################################################## | ||
1189 | # Method: normalise_space() | ||
1190 | # | ||
1191 | # Strips leading and trailing whitespace and collapses sequences of whitespace | ||
1192 | # characters to a single space. | ||
1193 | # | ||
1194 | |||
1195 | sub normalise_space { | ||
1196 | my($self, $text) = @_; | ||
1197 | |||
1198 | $text =~ s/^\s+//s; | ||
1199 | $text =~ s/\s+$//s; | ||
1200 | $text =~ s/\s\s+/ /sg; | ||
1201 | |||
1202 | return $text; | ||
1203 | } | ||
1204 | |||
1205 | |||
1206 | ############################################################################## | ||
1207 | # Method: array_to_hash() | ||
1208 | # | ||
1209 | # Helper routine for collapse(). | ||
1210 | # Attempts to 'fold' an array of hashes into an hash of hashes. Returns a | ||
1211 | # reference to the hash on success or the original array if folding is | ||
1212 | # not possible. Behaviour is controlled by 'keyattr' option. | ||
1213 | # | ||
1214 | |||
1215 | sub array_to_hash { | ||
1216 | my $self = shift; | ||
1217 | my $name = shift; | ||
1218 | my $arrayref = shift; | ||
1219 | |||
1220 | my $hashref = $self->new_hashref; | ||
1221 | |||
1222 | my($i, $key, $val, $flag); | ||
1223 | |||
1224 | |||
1225 | # Handle keyattr => { .... } | ||
1226 | |||
1227 | if(ref($self->{opt}->{keyattr}) eq 'HASH') { | ||
1228 | return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name})); | ||
1229 | ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}}; | ||
1230 | for($i = 0; $i < @$arrayref; $i++) { | ||
1231 | if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and | ||
1232 | exists($arrayref->[$i]->{$key}) | ||
1233 | ) { | ||
1234 | $val = $arrayref->[$i]->{$key}; | ||
1235 | if(ref($val)) { | ||
1236 | $self->die_or_warn("<$name> element has non-scalar '$key' key attribute"); | ||
1237 | return($arrayref); | ||
1238 | } | ||
1239 | $val = $self->normalise_space($val) | ||
1240 | if($self->{opt}->{normalisespace} == 1); | ||
1241 | $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") | ||
1242 | if(exists($hashref->{$val})); | ||
1243 | $hashref->{$val} = { %{$arrayref->[$i]} }; | ||
1244 | $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-'); | ||
1245 | delete $hashref->{$val}->{$key} unless($flag eq '+'); | ||
1246 | } | ||
1247 | else { | ||
1248 | $self->die_or_warn("<$name> element has no '$key' key attribute"); | ||
1249 | return($arrayref); | ||
1250 | } | ||
1251 | } | ||
1252 | } | ||
1253 | |||
1254 | |||
1255 | # Or assume keyattr => [ .... ] | ||
1256 | |||
1257 | else { | ||
1258 | my $default_keys = | ||
1259 | join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}}); | ||
1260 | |||
1261 | ELEMENT: for($i = 0; $i < @$arrayref; $i++) { | ||
1262 | return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH')); | ||
1263 | |||
1264 | foreach $key (@{$self->{opt}->{keyattr}}) { | ||
1265 | if(defined($arrayref->[$i]->{$key})) { | ||
1266 | $val = $arrayref->[$i]->{$key}; | ||
1267 | if(ref($val)) { | ||
1268 | $self->die_or_warn("<$name> element has non-scalar '$key' key attribute") | ||
1269 | if not $default_keys; | ||
1270 | return($arrayref); | ||
1271 | } | ||
1272 | $val = $self->normalise_space($val) | ||
1273 | if($self->{opt}->{normalisespace} == 1); | ||
1274 | $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") | ||
1275 | if(exists($hashref->{$val})); | ||
1276 | $hashref->{$val} = { %{$arrayref->[$i]} }; | ||
1277 | delete $hashref->{$val}->{$key}; | ||
1278 | next ELEMENT; | ||
1279 | } | ||
1280 | } | ||
1281 | |||
1282 | return($arrayref); # No keyfield matched | ||
1283 | } | ||
1284 | } | ||
1285 | |||
1286 | # collapse any hashes which now only have a 'content' key | ||
1287 | |||
1288 | if($self->{opt}->{collapseagain}) { | ||
1289 | $hashref = $self->collapse_content($hashref); | ||
1290 | } | ||
1291 | |||
1292 | return($hashref); | ||
1293 | } | ||
1294 | |||
1295 | |||
1296 | ############################################################################## | ||
1297 | # Method: die_or_warn() | ||
1298 | # | ||
1299 | # Takes a diagnostic message and does one of three things: | ||
1300 | # 1. dies if strict mode is enabled | ||
1301 | # 2. warns if warnings are enabled but strict mode is not | ||
1302 | # 3. ignores message and resturns silently if neither strict mode nor warnings | ||
1303 | # are enabled | ||
1304 | # | ||
1305 | |||
1306 | sub die_or_warn { | ||
1307 | my $self = shift; | ||
1308 | my $msg = shift; | ||
1309 | |||
1310 | croak $msg if($StrictMode); | ||
1311 | carp "Warning: $msg" if($^W); | ||
1312 | } | ||
1313 | |||
1314 | |||
1315 | ############################################################################## | ||
1316 | # Method: new_hashref() | ||
1317 | # | ||
1318 | # This is a hook routine for overriding in a sub-class. Some people believe | ||
1319 | # that using Tie::IxHash here will solve order-loss problems. | ||
1320 | # | ||
1321 | |||
1322 | sub new_hashref { | ||
1323 | my $self = shift; | ||
1324 | |||
1325 | return { @_ }; | ||
1326 | } | ||
1327 | |||
1328 | |||
1329 | ############################################################################## | ||
1330 | # Method: collapse_content() | ||
1331 | # | ||
1332 | # Helper routine for array_to_hash | ||
1333 | # | ||
1334 | # Arguments expected are: | ||
1335 | # - an XML::Simple object | ||
1336 | # - a hasref | ||
1337 | # the hashref is a former array, turned into a hash by array_to_hash because | ||
1338 | # of the presence of key attributes | ||
1339 | # at this point collapse_content avoids over-complicated structures like | ||
1340 | # dir => { libexecdir => { content => '$exec_prefix/libexec' }, | ||
1341 | # localstatedir => { content => '$prefix' }, | ||
1342 | # } | ||
1343 | # into | ||
1344 | # dir => { libexecdir => '$exec_prefix/libexec', | ||
1345 | # localstatedir => '$prefix', | ||
1346 | # } | ||
1347 | |||
1348 | sub collapse_content { | ||
1349 | my $self = shift; | ||
1350 | my $hashref = shift; | ||
1351 | |||
1352 | my $contentkey = $self->{opt}->{contentkey}; | ||
1353 | |||
1354 | # first go through the values,checking that they are fit to collapse | ||
1355 | foreach my $val (values %$hashref) { | ||
1356 | return $hashref unless ( (ref($val) eq 'HASH') | ||
1357 | and (keys %$val == 1) | ||
1358 | and (exists $val->{$contentkey}) | ||
1359 | ); | ||
1360 | } | ||
1361 | |||
1362 | # now collapse them | ||
1363 | foreach my $key (keys %$hashref) { | ||
1364 | $hashref->{$key}= $hashref->{$key}->{$contentkey}; | ||
1365 | } | ||
1366 | |||
1367 | return $hashref; | ||
1368 | } | ||
1369 | |||
1370 | |||
1371 | ############################################################################## | ||
1372 | # Method: value_to_xml() | ||
1373 | # | ||
1374 | # Helper routine for XMLout() - recurses through a data structure building up | ||
1375 | # and returning an XML representation of that structure as a string. | ||
1376 | # | ||
1377 | # Arguments expected are: | ||
1378 | # - the data structure to be encoded (usually a reference) | ||
1379 | # - the XML tag name to use for this item | ||
1380 | # - a string of spaces for use as the current indent level | ||
1381 | # | ||
1382 | |||
1383 | sub value_to_xml { | ||
1384 | my $self = shift;; | ||
1385 | |||
1386 | |||
1387 | # Grab the other arguments | ||
1388 | |||
1389 | my($ref, $name, $indent) = @_; | ||
1390 | |||
1391 | my $named = (defined($name) and $name ne '' ? 1 : 0); | ||
1392 | |||
1393 | my $nl = "\n"; | ||
1394 | |||
1395 | my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack! | ||
1396 | if($self->{opt}->{noindent}) { | ||
1397 | $indent = ''; | ||
1398 | $nl = ''; | ||
1399 | } | ||
1400 | |||
1401 | |||
1402 | # Convert to XML | ||
1403 | |||
1404 | if(ref($ref)) { | ||
1405 | croak "circular data structures not supported" | ||
1406 | if(grep($_ == $ref, @{$self->{_ancestors}})); | ||
1407 | push @{$self->{_ancestors}}, $ref; | ||
1408 | } | ||
1409 | else { | ||
1410 | if($named) { | ||
1411 | return(join('', | ||
1412 | $indent, '<', $name, '>', | ||
1413 | ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)), | ||
1414 | '</', $name, ">", $nl | ||
1415 | )); | ||
1416 | } | ||
1417 | else { | ||
1418 | return("$ref$nl"); | ||
1419 | } | ||
1420 | } | ||
1421 | |||
1422 | |||
1423 | # Unfold hash to array if possible | ||
1424 | |||
1425 | if(UNIVERSAL::isa($ref, 'HASH') # It is a hash | ||
1426 | and keys %$ref # and it's not empty | ||
1427 | and $self->{opt}->{keyattr} # and folding is enabled | ||
1428 | and !$is_root # and its not the root element | ||
1429 | ) { | ||
1430 | $ref = $self->hash_to_array($name, $ref); | ||
1431 | } | ||
1432 | |||
1433 | |||
1434 | my @result = (); | ||
1435 | my($key, $value); | ||
1436 | |||
1437 | |||
1438 | # Handle hashrefs | ||
1439 | |||
1440 | if(UNIVERSAL::isa($ref, 'HASH')) { | ||
1441 | |||
1442 | # Reintermediate grouped values if applicable | ||
1443 | |||
1444 | if($self->{opt}->{grouptags}) { | ||
1445 | $ref = $self->copy_hash($ref); | ||
1446 | while(my($key, $val) = each %$ref) { | ||
1447 | if($self->{opt}->{grouptags}->{$key}) { | ||
1448 | $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val }; | ||
1449 | } | ||
1450 | } | ||
1451 | } | ||
1452 | |||
1453 | |||
1454 | # Scan for namespace declaration attributes | ||
1455 | |||
1456 | my $nsdecls = ''; | ||
1457 | my $default_ns_uri; | ||
1458 | if($self->{nsup}) { | ||
1459 | $ref = $self->copy_hash($ref); | ||
1460 | $self->{nsup}->push_context(); | ||
1461 | |||
1462 | # Look for default namespace declaration first | ||
1463 | |||
1464 | if(exists($ref->{xmlns})) { | ||
1465 | $self->{nsup}->declare_prefix('', $ref->{xmlns}); | ||
1466 | $nsdecls .= qq( xmlns="$ref->{xmlns}"); | ||
1467 | delete($ref->{xmlns}); | ||
1468 | } | ||
1469 | $default_ns_uri = $self->{nsup}->get_uri(''); | ||
1470 | |||
1471 | |||
1472 | # Then check all the other keys | ||
1473 | |||
1474 | foreach my $qname (keys(%$ref)) { | ||
1475 | my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); | ||
1476 | if($uri) { | ||
1477 | if($uri eq $xmlns_ns) { | ||
1478 | $self->{nsup}->declare_prefix($lname, $ref->{$qname}); | ||
1479 | $nsdecls .= qq( xmlns:$lname="$ref->{$qname}"); | ||
1480 | delete($ref->{$qname}); | ||
1481 | } | ||
1482 | } | ||
1483 | } | ||
1484 | |||
1485 | # Translate any remaining Clarkian names | ||
1486 | |||
1487 | foreach my $qname (keys(%$ref)) { | ||
1488 | my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); | ||
1489 | if($uri) { | ||
1490 | if($default_ns_uri and $uri eq $default_ns_uri) { | ||
1491 | $ref->{$lname} = $ref->{$qname}; | ||
1492 | delete($ref->{$qname}); | ||
1493 | } | ||
1494 | else { | ||
1495 | my $prefix = $self->{nsup}->get_prefix($uri); | ||
1496 | unless($prefix) { | ||
1497 | # $self->{nsup}->declare_prefix(undef, $uri); | ||
1498 | # $prefix = $self->{nsup}->get_prefix($uri); | ||
1499 | $prefix = $self->{ns_prefix}++; | ||
1500 | $self->{nsup}->declare_prefix($prefix, $uri); | ||
1501 | $nsdecls .= qq( xmlns:$prefix="$uri"); | ||
1502 | } | ||
1503 | $ref->{"$prefix:$lname"} = $ref->{$qname}; | ||
1504 | delete($ref->{$qname}); | ||
1505 | } | ||
1506 | } | ||
1507 | } | ||
1508 | } | ||
1509 | |||
1510 | |||
1511 | my @nested = (); | ||
1512 | my $text_content = undef; | ||
1513 | if($named) { | ||
1514 | push @result, $indent, '<', $name, $nsdecls; | ||
1515 | } | ||
1516 | |||
1517 | if(keys %$ref) { | ||
1518 | my $first_arg = 1; | ||
1519 | foreach my $key ($self->sorted_keys($name, $ref)) { | ||
1520 | my $value = $ref->{$key}; | ||
1521 | next if(substr($key, 0, 1) eq '-'); | ||
1522 | if(!defined($value)) { | ||
1523 | next if $self->{opt}->{suppressempty}; | ||
1524 | unless(exists($self->{opt}->{suppressempty}) | ||
1525 | and !defined($self->{opt}->{suppressempty}) | ||
1526 | ) { | ||
1527 | carp 'Use of uninitialized value' if($^W); | ||
1528 | } | ||
1529 | if($key eq $self->{opt}->{contentkey}) { | ||
1530 | $text_content = ''; | ||
1531 | } | ||
1532 | else { | ||
1533 | $value = exists($self->{opt}->{suppressempty}) ? {} : ''; | ||
1534 | } | ||
1535 | } | ||
1536 | |||
1537 | if(!ref($value) | ||
1538 | and $self->{opt}->{valueattr} | ||
1539 | and $self->{opt}->{valueattr}->{$key} | ||
1540 | ) { | ||
1541 | $value = { $self->{opt}->{valueattr}->{$key} => $value }; | ||
1542 | } | ||
1543 | |||
1544 | if(ref($value) or $self->{opt}->{noattr}) { | ||
1545 | push @nested, | ||
1546 | $self->value_to_xml($value, $key, "$indent "); | ||
1547 | } | ||
1548 | else { | ||
1549 | $value = $self->escape_value($value) unless($self->{opt}->{noescape}); | ||
1550 | if($key eq $self->{opt}->{contentkey}) { | ||
1551 | $text_content = $value; | ||
1552 | } | ||
1553 | else { | ||
1554 | push @result, "\n$indent " . ' ' x length($name) | ||
1555 | if($self->{opt}->{attrindent} and !$first_arg); | ||
1556 | push @result, ' ', $key, '="', $value , '"'; | ||
1557 | $first_arg = 0; | ||
1558 | } | ||
1559 | } | ||
1560 | } | ||
1561 | } | ||
1562 | else { | ||
1563 | $text_content = ''; | ||
1564 | } | ||
1565 | |||
1566 | if(@nested or defined($text_content)) { | ||
1567 | if($named) { | ||
1568 | push @result, ">"; | ||
1569 | if(defined($text_content)) { | ||
1570 | push @result, $text_content; | ||
1571 | $nested[0] =~ s/^\s+// if(@nested); | ||
1572 | } | ||
1573 | else { | ||
1574 | push @result, $nl; | ||
1575 | } | ||
1576 | if(@nested) { | ||
1577 | push @result, @nested, $indent; | ||
1578 | } | ||
1579 | push @result, '</', $name, ">", $nl; | ||
1580 | } | ||
1581 | else { | ||
1582 | push @result, @nested; # Special case if no root elements | ||
1583 | } | ||
1584 | } | ||
1585 | else { | ||
1586 | push @result, " />", $nl; | ||
1587 | } | ||
1588 | $self->{nsup}->pop_context() if($self->{nsup}); | ||
1589 | } | ||
1590 | |||
1591 | |||
1592 | # Handle arrayrefs | ||
1593 | |||
1594 | elsif(UNIVERSAL::isa($ref, 'ARRAY')) { | ||
1595 | foreach $value (@$ref) { | ||
1596 | next if !defined($value) and $self->{opt}->{suppressempty}; | ||
1597 | if(!ref($value)) { | ||
1598 | push @result, | ||
1599 | $indent, '<', $name, '>', | ||
1600 | ($self->{opt}->{noescape} ? $value : $self->escape_value($value)), | ||
1601 | '</', $name, ">$nl"; | ||
1602 | } | ||
1603 | elsif(UNIVERSAL::isa($value, 'HASH')) { | ||
1604 | push @result, $self->value_to_xml($value, $name, $indent); | ||
1605 | } | ||
1606 | else { | ||
1607 | push @result, | ||
1608 | $indent, '<', $name, ">$nl", | ||
1609 | $self->value_to_xml($value, 'anon', "$indent "), | ||
1610 | $indent, '</', $name, ">$nl"; | ||
1611 | } | ||
1612 | } | ||
1613 | } | ||
1614 | |||
1615 | else { | ||
1616 | croak "Can't encode a value of type: " . ref($ref); | ||
1617 | } | ||
1618 | |||
1619 | |||
1620 | pop @{$self->{_ancestors}} if(ref($ref)); | ||
1621 | |||
1622 | return(join('', @result)); | ||
1623 | } | ||
1624 | |||
1625 | |||
1626 | ############################################################################## | ||
1627 | # Method: sorted_keys() | ||
1628 | # | ||
1629 | # Returns the keys of the referenced hash sorted into alphabetical order, but | ||
1630 | # with the 'key' key (as in KeyAttr) first, if there is one. | ||
1631 | # | ||
1632 | |||
1633 | sub sorted_keys { | ||
1634 | my($self, $name, $ref) = @_; | ||
1635 | |||
1636 | return keys %$ref if $self->{opt}->{nosort}; | ||
1637 | |||
1638 | my %hash = %$ref; | ||
1639 | my $keyattr = $self->{opt}->{keyattr}; | ||
1640 | |||
1641 | my @key; | ||
1642 | |||
1643 | if(ref $keyattr eq 'HASH') { | ||
1644 | if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) { | ||
1645 | push @key, $keyattr->{$name}->[0]; | ||
1646 | delete $hash{$keyattr->{$name}->[0]}; | ||
1647 | } | ||
1648 | } | ||
1649 | elsif(ref $keyattr eq 'ARRAY') { | ||
1650 | foreach (@{$keyattr}) { | ||
1651 | if(exists $hash{$_}) { | ||
1652 | push @key, $_; | ||
1653 | delete $hash{$_}; | ||
1654 | last; | ||
1655 | } | ||
1656 | } | ||
1657 | } | ||
1658 | |||
1659 | return(@key, sort keys %hash); | ||
1660 | } | ||
1661 | |||
1662 | ############################################################################## | ||
1663 | # Method: escape_value() | ||
1664 | # | ||
1665 | # Helper routine for automatically escaping values for XMLout(). | ||
1666 | # Expects a scalar data value. Returns escaped version. | ||
1667 | # | ||
1668 | |||
1669 | sub escape_value { | ||
1670 | my($self, $data) = @_; | ||
1671 | |||
1672 | return '' unless(defined($data)); | ||
1673 | |||
1674 | $data =~ s/&/&/sg; | ||
1675 | $data =~ s/</</sg; | ||
1676 | $data =~ s/>/>/sg; | ||
1677 | $data =~ s/"/"/sg; | ||
1678 | |||
1679 | my $level = $self->{opt}->{numericescape} or return $data; | ||
1680 | |||
1681 | return $self->numeric_escape($data, $level); | ||
1682 | } | ||
1683 | |||
1684 | sub numeric_escape { | ||
1685 | my($self, $data, $level) = @_; | ||
1686 | |||
1687 | use utf8; # required for 5.6 | ||
1688 | |||
1689 | if($self->{opt}->{numericescape} eq '2') { | ||
1690 | $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; | ||
1691 | } | ||
1692 | else { | ||
1693 | $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; | ||
1694 | } | ||
1695 | |||
1696 | return $data; | ||
1697 | } | ||
1698 | |||
1699 | |||
1700 | ############################################################################## | ||
1701 | # Method: hash_to_array() | ||
1702 | # | ||
1703 | # Helper routine for value_to_xml(). | ||
1704 | # Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a | ||
1705 | # reference to the array on success or the original hash if unfolding is | ||
1706 | # not possible. | ||
1707 | # | ||
1708 | |||
1709 | sub hash_to_array { | ||
1710 | my $self = shift; | ||
1711 | my $parent = shift; | ||
1712 | my $hashref = shift; | ||
1713 | |||
1714 | my $arrayref = []; | ||
1715 | |||
1716 | my($key, $value); | ||
1717 | |||
1718 | my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref; | ||
1719 | foreach $key (@keys) { | ||
1720 | $value = $hashref->{$key}; | ||
1721 | return($hashref) unless(UNIVERSAL::isa($value, 'HASH')); | ||
1722 | |||
1723 | if(ref($self->{opt}->{keyattr}) eq 'HASH') { | ||
1724 | return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent})); | ||
1725 | push @$arrayref, $self->copy_hash( | ||
1726 | $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key | ||
1727 | ); | ||
1728 | } | ||
1729 | else { | ||
1730 | push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value }); | ||
1731 | } | ||
1732 | } | ||
1733 | |||
1734 | return($arrayref); | ||
1735 | } | ||
1736 | |||
1737 | |||
1738 | ############################################################################## | ||
1739 | # Method: copy_hash() | ||
1740 | # | ||
1741 | # Helper routine for hash_to_array(). When unfolding a hash of hashes into | ||
1742 | # an array of hashes, we need to copy the key from the outer hash into the | ||
1743 | # inner hash. This routine makes a copy of the original hash so we don't | ||
1744 | # destroy the original data structure. You might wish to override this | ||
1745 | # method if you're using tied hashes and don't want them to get untied. | ||
1746 | # | ||
1747 | |||
1748 | sub copy_hash { | ||
1749 | my($self, $orig, @extra) = @_; | ||
1750 | |||
1751 | return { @extra, %$orig }; | ||
1752 | } | ||
1753 | |||
1754 | ############################################################################## | ||
1755 | # Methods required for building trees from SAX events | ||
1756 | ############################################################################## | ||
1757 | |||
1758 | sub start_document { | ||
1759 | my $self = shift; | ||
1760 | |||
1761 | $self->handle_options('in') unless($self->{opt}); | ||
1762 | |||
1763 | $self->{lists} = []; | ||
1764 | $self->{curlist} = $self->{tree} = []; | ||
1765 | } | ||
1766 | |||
1767 | |||
1768 | sub start_element { | ||
1769 | my $self = shift; | ||
1770 | my $element = shift; | ||
1771 | |||
1772 | my $name = $element->{Name}; | ||
1773 | if($self->{opt}->{nsexpand}) { | ||
1774 | $name = $element->{LocalName} || ''; | ||
1775 | if($element->{NamespaceURI}) { | ||
1776 | $name = '{' . $element->{NamespaceURI} . '}' . $name; | ||
1777 | } | ||
1778 | } | ||
1779 | my $attributes = {}; | ||
1780 | if($element->{Attributes}) { # Might be undef | ||
1781 | foreach my $attr (values %{$element->{Attributes}}) { | ||
1782 | if($self->{opt}->{nsexpand}) { | ||
1783 | my $name = $attr->{LocalName} || ''; | ||
1784 | if($attr->{NamespaceURI}) { | ||
1785 | $name = '{' . $attr->{NamespaceURI} . '}' . $name | ||
1786 | } | ||
1787 | $name = 'xmlns' if($name eq $bad_def_ns_jcn); | ||
1788 | $attributes->{$name} = $attr->{Value}; | ||
1789 | } | ||
1790 | else { | ||
1791 | $attributes->{$attr->{Name}} = $attr->{Value}; | ||
1792 | } | ||
1793 | } | ||
1794 | } | ||
1795 | my $newlist = [ $attributes ]; | ||
1796 | push @{ $self->{lists} }, $self->{curlist}; | ||
1797 | push @{ $self->{curlist} }, $name => $newlist; | ||
1798 | $self->{curlist} = $newlist; | ||
1799 | } | ||
1800 | |||
1801 | |||
1802 | sub characters { | ||
1803 | my $self = shift; | ||
1804 | my $chars = shift; | ||
1805 | |||
1806 | my $text = $chars->{Data}; | ||
1807 | my $clist = $self->{curlist}; | ||
1808 | my $pos = $#$clist; | ||
1809 | |||
1810 | if ($pos > 0 and $clist->[$pos - 1] eq '0') { | ||
1811 | $clist->[$pos] .= $text; | ||
1812 | } | ||
1813 | else { | ||
1814 | push @$clist, 0 => $text; | ||
1815 | } | ||
1816 | } | ||
1817 | |||
1818 | |||
1819 | sub end_element { | ||
1820 | my $self = shift; | ||
1821 | |||
1822 | $self->{curlist} = pop @{ $self->{lists} }; | ||
1823 | } | ||
1824 | |||
1825 | |||
1826 | sub end_document { | ||
1827 | my $self = shift; | ||
1828 | |||
1829 | delete($self->{curlist}); | ||
1830 | delete($self->{lists}); | ||
1831 | |||
1832 | my $tree = $self->{tree}; | ||
1833 | delete($self->{tree}); | ||
1834 | |||
1835 | |||
1836 | # Return tree as-is to XMLin() | ||
1837 | |||
1838 | return($tree) if($self->{nocollapse}); | ||
1839 | |||
1840 | |||
1841 | # Or collapse it before returning it to SAX parser class | ||
1842 | |||
1843 | if($self->{opt}->{keeproot}) { | ||
1844 | $tree = $self->collapse({}, @$tree); | ||
1845 | } | ||
1846 | else { | ||
1847 | $tree = $self->collapse(@{$tree->[1]}); | ||
1848 | } | ||
1849 | |||
1850 | if($self->{opt}->{datahandler}) { | ||
1851 | return($self->{opt}->{datahandler}->($self, $tree)); | ||
1852 | } | ||
1853 | |||
1854 | return($tree); | ||
1855 | } | ||
1856 | |||
1857 | *xml_in = \&XMLin; | ||
1858 | *xml_out = \&XMLout; | ||
1859 | |||
1860 | 1; | ||
1861 | |||
1862 | __END__ | ||
1863 | |||
1864 | =head1 QUICK START | ||
1865 | |||
1866 | Say you have a script called B<foo> and a file of configuration options | ||
1867 | called B<foo.xml> containing this: | ||
1868 | |||
1869 | <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug"> | ||
1870 | <server name="sahara" osname="solaris" osversion="2.6"> | ||
1871 | <address>10.0.0.101</address> | ||
1872 | <address>10.0.1.101</address> | ||
1873 | </server> | ||
1874 | <server name="gobi" osname="irix" osversion="6.5"> | ||
1875 | <address>10.0.0.102</address> | ||
1876 | </server> | ||
1877 | <server name="kalahari" osname="linux" osversion="2.0.34"> | ||
1878 | <address>10.0.0.103</address> | ||
1879 | <address>10.0.1.103</address> | ||
1880 | </server> | ||
1881 | </config> | ||
1882 | |||
1883 | The following lines of code in B<foo>: | ||
1884 | |||
1885 | use XML::Simple; | ||
1886 | |||
1887 | my $config = XMLin(); | ||
1888 | |||
1889 | will 'slurp' the configuration options into the hashref $config (because no | ||
1890 | arguments are passed to C<XMLin()> the name and location of the XML file will | ||
1891 | be inferred from name and location of the script). You can dump out the | ||
1892 | contents of the hashref using Data::Dumper: | ||
1893 | |||
1894 | use Data::Dumper; | ||
1895 | |||
1896 | print Dumper($config); | ||
1897 | |||
1898 | which will produce something like this (formatting has been adjusted for | ||
1899 | brevity): | ||
1900 | |||
1901 | { | ||
1902 | 'logdir' => '/var/log/foo/', | ||
1903 | 'debugfile' => '/tmp/foo.debug', | ||
1904 | 'server' => { | ||
1905 | 'sahara' => { | ||
1906 | 'osversion' => '2.6', | ||
1907 | 'osname' => 'solaris', | ||
1908 | 'address' => [ '10.0.0.101', '10.0.1.101' ] | ||
1909 | }, | ||
1910 | 'gobi' => { | ||
1911 | 'osversion' => '6.5', | ||
1912 | 'osname' => 'irix', | ||
1913 | 'address' => '10.0.0.102' | ||
1914 | }, | ||
1915 | 'kalahari' => { | ||
1916 | 'osversion' => '2.0.34', | ||
1917 | 'osname' => 'linux', | ||
1918 | 'address' => [ '10.0.0.103', '10.0.1.103' ] | ||
1919 | } | ||
1920 | } | ||
1921 | } | ||
1922 | |||
1923 | Your script could then access the name of the log directory like this: | ||
1924 | |||
1925 | print $config->{logdir}; | ||
1926 | |||
1927 | similarly, the second address on the server 'kalahari' could be referenced as: | ||
1928 | |||
1929 | print $config->{server}->{kalahari}->{address}->[1]; | ||
1930 | |||
1931 | What could be simpler? (Rhetorical). | ||
1932 | |||
1933 | For simple requirements, that's really all there is to it. If you want to | ||
1934 | store your XML in a different directory or file, or pass it in as a string or | ||
1935 | even pass it in via some derivative of an IO::Handle, you'll need to check out | ||
1936 | L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that | ||
1937 | neat little transformation that produced $config->{server}) you'll find options | ||
1938 | for that as well. | ||
1939 | |||
1940 | If you want to generate XML (for example to write a modified version of | ||
1941 | $config back out as XML), check out C<XMLout()>. | ||
1942 | |||
1943 | If your needs are not so simple, this may not be the module for you. In that | ||
1944 | case, you might want to read L<"WHERE TO FROM HERE?">. | ||
1945 | |||
1946 | =head1 DESCRIPTION | ||
1947 | |||
1948 | The XML::Simple module provides a simple API layer on top of an underlying XML | ||
1949 | parsing module (either XML::Parser or one of the SAX2 parser modules). Two | ||
1950 | functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity | ||
1951 | request the lower case versions of the function names: C<xml_in()> and | ||
1952 | C<xml_out()>. | ||
1953 | |||
1954 | The simplest approach is to call these two functions directly, but an | ||
1955 | optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below) | ||
1956 | allows them to be called as methods of an B<XML::Simple> object. The object | ||
1957 | interface can also be used at either end of a SAX pipeline. | ||
1958 | |||
1959 | =head2 XMLin() | ||
1960 | |||
1961 | Parses XML formatted data and returns a reference to a data structure which | ||
1962 | contains the same information in a more readily accessible form. (Skip | ||
1963 | down to L<"EXAMPLES"> below, for more sample code). | ||
1964 | |||
1965 | C<XMLin()> accepts an optional XML specifier followed by zero or more 'name => | ||
1966 | value' option pairs. The XML specifier can be one of the following: | ||
1967 | |||
1968 | =over 4 | ||
1969 | |||
1970 | =item A filename | ||
1971 | |||
1972 | If the filename contains no directory components C<XMLin()> will look for the | ||
1973 | file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the | ||
1974 | current directory if the SearchPath option is not defined. eg: | ||
1975 | |||
1976 | $ref = XMLin('/etc/params.xml'); | ||
1977 | |||
1978 | Note, the filename '-' can be used to parse from STDIN. | ||
1979 | |||
1980 | =item undef | ||
1981 | |||
1982 | If there is no XML specifier, C<XMLin()> will check the script directory and | ||
1983 | each of the SearchPath directories for a file with the same name as the script | ||
1984 | but with the extension '.xml'. Note: if you wish to specify options, you | ||
1985 | must specify the value 'undef'. eg: | ||
1986 | |||
1987 | $ref = XMLin(undef, ForceArray => 1); | ||
1988 | |||
1989 | =item A string of XML | ||
1990 | |||
1991 | A string containing XML (recognised by the presence of '<' and '>' characters) | ||
1992 | will be parsed directly. eg: | ||
1993 | |||
1994 | $ref = XMLin('<opt username="bob" password="flurp" />'); | ||
1995 | |||
1996 | =item An IO::Handle object | ||
1997 | |||
1998 | An IO::Handle object will be read to EOF and its contents parsed. eg: | ||
1999 | |||
2000 | $fh = IO::File->new('/etc/params.xml'); | ||
2001 | $ref = XMLin($fh); | ||
2002 | |||
2003 | =back | ||
2004 | |||
2005 | =head2 XMLout() | ||
2006 | |||
2007 | Takes a data structure (generally a hashref) and returns an XML encoding of | ||
2008 | that structure. If the resulting XML is parsed using C<XMLin()>, it should | ||
2009 | return a data structure equivalent to the original (see caveats below). | ||
2010 | |||
2011 | The C<XMLout()> function can also be used to output the XML as SAX events | ||
2012 | see the C<Handler> option and L<"SAX SUPPORT"> for more details). | ||
2013 | |||
2014 | When translating hashes to XML, hash keys which have a leading '-' will be | ||
2015 | silently skipped. This is the approved method for marking elements of a | ||
2016 | data structure which should be ignored by C<XMLout>. (Note: If these items | ||
2017 | were not skipped the key names would be emitted as element or attribute names | ||
2018 | with a leading '-' which would not be valid XML). | ||
2019 | |||
2020 | =head2 Caveats | ||
2021 | |||
2022 | Some care is required in creating data structures which will be passed to | ||
2023 | C<XMLout()>. Hash keys from the data structure will be encoded as either XML | ||
2024 | element names or attribute names. Therefore, you should use hash key names | ||
2025 | which conform to the relatively strict XML naming rules: | ||
2026 | |||
2027 | Names in XML must begin with a letter. The remaining characters may be | ||
2028 | letters, digits, hyphens (-), underscores (_) or full stops (.). It is also | ||
2029 | allowable to include one colon (:) in an element name but this should only be | ||
2030 | used when working with namespaces (B<XML::Simple> can only usefully work with | ||
2031 | namespaces when teamed with a SAX Parser). | ||
2032 | |||
2033 | You can use other punctuation characters in hash values (just not in hash | ||
2034 | keys) however B<XML::Simple> does not support dumping binary data. | ||
2035 | |||
2036 | If you break these rules, the current implementation of C<XMLout()> will | ||
2037 | simply emit non-compliant XML which will be rejected if you try to read it | ||
2038 | back in. (A later version of B<XML::Simple> might take a more proactive | ||
2039 | approach). | ||
2040 | |||
2041 | Note also that although you can nest hashes and arrays to arbitrary levels, | ||
2042 | circular data structures are not supported and will cause C<XMLout()> to die. | ||
2043 | |||
2044 | If you wish to 'round-trip' arbitrary data structures from Perl to XML and back | ||
2045 | to Perl, then you should probably disable array folding (using the KeyAttr | ||
2046 | option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the | ||
2047 | expected results, you may prefer to use L<XML::Dumper> which is designed for | ||
2048 | exactly that purpose. | ||
2049 | |||
2050 | Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs. | ||
2051 | |||
2052 | |||
2053 | =head1 OPTIONS | ||
2054 | |||
2055 | B<XML::Simple> supports a number of options (in fact as each release of | ||
2056 | B<XML::Simple> adds more options, the module's claim to the name 'Simple' | ||
2057 | becomes increasingly tenuous). If you find yourself repeatedly having to | ||
2058 | specify the same options, you might like to investigate L<"OPTIONAL OO | ||
2059 | INTERFACE"> below. | ||
2060 | |||
2061 | If you can't be bothered reading the documentation, refer to | ||
2062 | L<"STRICT MODE"> to automatically catch common mistakes. | ||
2063 | |||
2064 | Because there are so many options, it's hard for new users to know which ones | ||
2065 | are important, so here are the two you really need to know about: | ||
2066 | |||
2067 | =over 4 | ||
2068 | |||
2069 | =item * | ||
2070 | |||
2071 | check out C<ForceArray> because you'll almost certainly want to turn it on | ||
2072 | |||
2073 | =item * | ||
2074 | |||
2075 | make sure you know what the C<KeyAttr> option does and what its default value is | ||
2076 | because it may surprise you otherwise (note in particular that 'KeyAttr' | ||
2077 | affects both C<XMLin> and C<XMLout>) | ||
2078 | |||
2079 | =back | ||
2080 | |||
2081 | The option name headings below have a trailing 'comment' - a hash followed by | ||
2082 | two pieces of metadata: | ||
2083 | |||
2084 | =over 4 | ||
2085 | |||
2086 | =item * | ||
2087 | |||
2088 | Options are marked with 'I<in>' if they are recognised by C<XMLin()> and | ||
2089 | 'I<out>' if they are recognised by C<XMLout()>. | ||
2090 | |||
2091 | =item * | ||
2092 | |||
2093 | Each option is also flagged to indicate whether it is: | ||
2094 | |||
2095 | 'important' - don't use the module until you understand this one | ||
2096 | 'handy' - you can skip this on the first time through | ||
2097 | 'advanced' - you can skip this on the second time through | ||
2098 | 'SAX only' - don't worry about this unless you're using SAX (or | ||
2099 | alternatively if you need this, you also need SAX) | ||
2100 | 'seldom used' - you'll probably never use this unless you were the | ||
2101 | person that requested the feature | ||
2102 | |||
2103 | =back | ||
2104 | |||
2105 | The options are listed alphabetically: | ||
2106 | |||
2107 | Note: option names are no longer case sensitive so you can use the mixed case | ||
2108 | versions shown here; all lower case as required by versions 2.03 and earlier; | ||
2109 | or you can add underscores between the words (eg: key_attr). | ||
2110 | |||
2111 | |||
2112 | =head2 AttrIndent => 1 I<# out - handy> | ||
2113 | |||
2114 | When you are using C<XMLout()>, enable this option to have attributes printed | ||
2115 | one-per-line with sensible indentation rather than all on one line. | ||
2116 | |||
2117 | =head2 Cache => [ cache schemes ] I<# in - advanced> | ||
2118 | |||
2119 | Because loading the B<XML::Parser> module and parsing an XML file can consume a | ||
2120 | significant number of CPU cycles, it is often desirable to cache the output of | ||
2121 | C<XMLin()> for later reuse. | ||
2122 | |||
2123 | When parsing from a named file, B<XML::Simple> supports a number of caching | ||
2124 | schemes. The 'Cache' option may be used to specify one or more schemes (using | ||
2125 | an anonymous array). Each scheme will be tried in turn in the hope of finding | ||
2126 | a cached pre-parsed representation of the XML file. If no cached copy is | ||
2127 | found, the file will be parsed and the first cache scheme in the list will be | ||
2128 | used to save a copy of the results. The following cache schemes have been | ||
2129 | implemented: | ||
2130 | |||
2131 | =over 4 | ||
2132 | |||
2133 | =item storable | ||
2134 | |||
2135 | Utilises B<Storable.pm> to read/write a cache file with the same name as the | ||
2136 | XML file but with the extension .stor | ||
2137 | |||
2138 | =item memshare | ||
2139 | |||
2140 | When a file is first parsed, a copy of the resulting data structure is retained | ||
2141 | in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse | ||
2142 | the same file will return a reference to this structure. This cached version | ||
2143 | will persist only for the life of the Perl interpreter (which in the case of | ||
2144 | mod_perl for example, may be some significant time). | ||
2145 | |||
2146 | Because each caller receives a reference to the same data structure, a change | ||
2147 | made by one caller will be visible to all. For this reason, the reference | ||
2148 | returned should be treated as read-only. | ||
2149 | |||
2150 | =item memcopy | ||
2151 | |||
2152 | This scheme works identically to 'memshare' (above) except that each caller | ||
2153 | receives a reference to a new data structure which is a copy of the cached | ||
2154 | version. Copying the data structure will add a little processing overhead, | ||
2155 | therefore this scheme should only be used where the caller intends to modify | ||
2156 | the data structure (or wishes to protect itself from others who might). This | ||
2157 | scheme uses B<Storable.pm> to perform the copy. | ||
2158 | |||
2159 | =back | ||
2160 | |||
2161 | Warning! The memory-based caching schemes compare the timestamp on the file to | ||
2162 | the time when it was last parsed. If the file is stored on an NFS filesystem | ||
2163 | (or other network share) and the clock on the file server is not exactly | ||
2164 | synchronised with the clock where your script is run, updates to the source XML | ||
2165 | file may appear to be ignored. | ||
2166 | |||
2167 | =head2 ContentKey => 'keyname' I<# in+out - seldom used> | ||
2168 | |||
2169 | When text content is parsed to a hash value, this option let's you specify a | ||
2170 | name for the hash key to override the default 'content'. So for example: | ||
2171 | |||
2172 | XMLin('<opt one="1">Text</opt>', ContentKey => 'text') | ||
2173 | |||
2174 | will parse to: | ||
2175 | |||
2176 | { 'one' => 1, 'text' => 'Text' } | ||
2177 | |||
2178 | instead of: | ||
2179 | |||
2180 | { 'one' => 1, 'content' => 'Text' } | ||
2181 | |||
2182 | C<XMLout()> will also honour the value of this option when converting a hashref | ||
2183 | to XML. | ||
2184 | |||
2185 | You can also prefix your selected key name with a '-' character to have | ||
2186 | C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after | ||
2187 | array folding. For example: | ||
2188 | |||
2189 | XMLin( | ||
2190 | '<opt><item name="one">First</item><item name="two">Second</item></opt>', | ||
2191 | KeyAttr => {item => 'name'}, | ||
2192 | ForceArray => [ 'item' ], | ||
2193 | ContentKey => '-content' | ||
2194 | ) | ||
2195 | |||
2196 | will parse to: | ||
2197 | |||
2198 | { | ||
2199 | 'item' => { | ||
2200 | 'one' => 'First' | ||
2201 | 'two' => 'Second' | ||
2202 | } | ||
2203 | } | ||
2204 | |||
2205 | rather than this (without the '-'): | ||
2206 | |||
2207 | { | ||
2208 | 'item' => { | ||
2209 | 'one' => { 'content' => 'First' } | ||
2210 | 'two' => { 'content' => 'Second' } | ||
2211 | } | ||
2212 | } | ||
2213 | |||
2214 | =head2 DataHandler => code_ref I<# in - SAX only> | ||
2215 | |||
2216 | When you use an B<XML::Simple> object as a SAX handler, it will return a | ||
2217 | 'simple tree' data structure in the same format as C<XMLin()> would return. If | ||
2218 | this option is set (to a subroutine reference), then when the tree is built the | ||
2219 | subroutine will be called and passed two arguments: a reference to the | ||
2220 | B<XML::Simple> object and a reference to the data tree. The return value from | ||
2221 | the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for | ||
2222 | more details). | ||
2223 | |||
2224 | =head2 ForceArray => 1 I<# in - important> | ||
2225 | |||
2226 | This option should be set to '1' to force nested elements to be represented | ||
2227 | as arrays even when there is only one. Eg, with ForceArray enabled, this | ||
2228 | XML: | ||
2229 | |||
2230 | <opt> | ||
2231 | <name>value</name> | ||
2232 | </opt> | ||
2233 | |||
2234 | would parse to this: | ||
2235 | |||
2236 | { | ||
2237 | 'name' => [ | ||
2238 | 'value' | ||
2239 | ] | ||
2240 | } | ||
2241 | |||
2242 | instead of this (the default): | ||
2243 | |||
2244 | { | ||
2245 | 'name' => 'value' | ||
2246 | } | ||
2247 | |||
2248 | This option is especially useful if the data structure is likely to be written | ||
2249 | back out as XML and the default behaviour of rolling single nested elements up | ||
2250 | into attributes is not desirable. | ||
2251 | |||
2252 | If you are using the array folding feature, you should almost certainly enable | ||
2253 | this option. If you do not, single nested elements will not be parsed to | ||
2254 | arrays and therefore will not be candidates for folding to a hash. (Given that | ||
2255 | the default value of 'KeyAttr' enables array folding, the default value of this | ||
2256 | option should probably also have been enabled too - sorry). | ||
2257 | |||
2258 | =head2 ForceArray => [ names ] I<# in - important> | ||
2259 | |||
2260 | This alternative (and preferred) form of the 'ForceArray' option allows you to | ||
2261 | specify a list of element names which should always be forced into an array | ||
2262 | representation, rather than the 'all or nothing' approach above. | ||
2263 | |||
2264 | It is also possible (since version 2.05) to include compiled regular | ||
2265 | expressions in the list - any element names which match the pattern will be | ||
2266 | forced to arrays. If the list contains only a single regex, then it is not | ||
2267 | necessary to enclose it in an arrayref. Eg: | ||
2268 | |||
2269 | ForceArray => qr/_list$/ | ||
2270 | |||
2271 | =head2 ForceContent => 1 I<# in - seldom used> | ||
2272 | |||
2273 | When C<XMLin()> parses elements which have text content as well as attributes, | ||
2274 | the text content must be represented as a hash value rather than a simple | ||
2275 | scalar. This option allows you to force text content to always parse to | ||
2276 | a hash value even when there are no attributes. So for example: | ||
2277 | |||
2278 | XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) | ||
2279 | |||
2280 | will parse to: | ||
2281 | |||
2282 | { | ||
2283 | 'x' => { 'content' => 'text1' }, | ||
2284 | 'y' => { 'a' => 2, 'content' => 'text2' } | ||
2285 | } | ||
2286 | |||
2287 | instead of: | ||
2288 | |||
2289 | { | ||
2290 | 'x' => 'text1', | ||
2291 | 'y' => { 'a' => 2, 'content' => 'text2' } | ||
2292 | } | ||
2293 | |||
2294 | =head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy> | ||
2295 | |||
2296 | You can use this option to eliminate extra levels of indirection in your Perl | ||
2297 | data structure. For example this XML: | ||
2298 | |||
2299 | <opt> | ||
2300 | <searchpath> | ||
2301 | <dir>/usr/bin</dir> | ||
2302 | <dir>/usr/local/bin</dir> | ||
2303 | <dir>/usr/X11/bin</dir> | ||
2304 | </searchpath> | ||
2305 | </opt> | ||
2306 | |||
2307 | Would normally be read into a structure like this: | ||
2308 | |||
2309 | { | ||
2310 | searchpath => { | ||
2311 | dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | ||
2312 | } | ||
2313 | } | ||
2314 | |||
2315 | But when read in with the appropriate value for 'GroupTags': | ||
2316 | |||
2317 | my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); | ||
2318 | |||
2319 | It will return this simpler structure: | ||
2320 | |||
2321 | { | ||
2322 | searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | ||
2323 | } | ||
2324 | |||
2325 | The grouping element (C<< <searchpath> >> in the example) must not contain any | ||
2326 | attributes or elements other than the grouped element. | ||
2327 | |||
2328 | You can specify multiple 'grouping element' to 'grouped element' mappings in | ||
2329 | the same hashref. If this option is combined with C<KeyAttr>, the array | ||
2330 | folding will occur first and then the grouped element names will be eliminated. | ||
2331 | |||
2332 | C<XMLout> will also use the grouptag mappings to re-introduce the tags around | ||
2333 | the grouped elements. Beware though that this will occur in all places that | ||
2334 | the 'grouping tag' name occurs - you probably don't want to use the same name | ||
2335 | for elements as well as attributes. | ||
2336 | |||
2337 | =head2 Handler => object_ref I<# out - SAX only> | ||
2338 | |||
2339 | Use the 'Handler' option to have C<XMLout()> generate SAX events rather than | ||
2340 | returning a string of XML. For more details see L<"SAX SUPPORT"> below. | ||
2341 | |||
2342 | Note: the current implementation of this option generates a string of XML | ||
2343 | and uses a SAX parser to translate it into SAX events. The normal encoding | ||
2344 | rules apply here - your data must be UTF8 encoded unless you specify an | ||
2345 | alternative encoding via the 'XMLDecl' option; and by the time the data reaches | ||
2346 | the handler object, it will be in UTF8 form regardless of the encoding you | ||
2347 | supply. A future implementation of this option may generate the events | ||
2348 | directly. | ||
2349 | |||
2350 | =head2 KeepRoot => 1 I<# in+out - handy> | ||
2351 | |||
2352 | In its attempt to return a data structure free of superfluous detail and | ||
2353 | unnecessary levels of indirection, C<XMLin()> normally discards the root | ||
2354 | element name. Setting the 'KeepRoot' option to '1' will cause the root element | ||
2355 | name to be retained. So after executing this code: | ||
2356 | |||
2357 | $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) | ||
2358 | |||
2359 | You'll be able to reference the tempdir as | ||
2360 | C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default | ||
2361 | C<$config-E<gt>{tempdir}>. | ||
2362 | |||
2363 | Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the | ||
2364 | data structure already contains a root element name and it is not necessary to | ||
2365 | add another. | ||
2366 | |||
2367 | =head2 KeyAttr => [ list ] I<# in+out - important> | ||
2368 | |||
2369 | This option controls the 'array folding' feature which translates nested | ||
2370 | elements from an array to a hash. It also controls the 'unfolding' of hashes | ||
2371 | to arrays. | ||
2372 | |||
2373 | For example, this XML: | ||
2374 | |||
2375 | <opt> | ||
2376 | <user login="grep" fullname="Gary R Epstein" /> | ||
2377 | <user login="stty" fullname="Simon T Tyson" /> | ||
2378 | </opt> | ||
2379 | |||
2380 | would, by default, parse to this: | ||
2381 | |||
2382 | { | ||
2383 | 'user' => [ | ||
2384 | { | ||
2385 | 'login' => 'grep', | ||
2386 | 'fullname' => 'Gary R Epstein' | ||
2387 | }, | ||
2388 | { | ||
2389 | 'login' => 'stty', | ||
2390 | 'fullname' => 'Simon T Tyson' | ||
2391 | } | ||
2392 | ] | ||
2393 | } | ||
2394 | |||
2395 | If the option 'KeyAttr => "login"' were used to specify that the 'login' | ||
2396 | attribute is a key, the same XML would parse to: | ||
2397 | |||
2398 | { | ||
2399 | 'user' => { | ||
2400 | 'stty' => { | ||
2401 | 'fullname' => 'Simon T Tyson' | ||
2402 | }, | ||
2403 | 'grep' => { | ||
2404 | 'fullname' => 'Gary R Epstein' | ||
2405 | } | ||
2406 | } | ||
2407 | } | ||
2408 | |||
2409 | The key attribute names should be supplied in an arrayref if there is more | ||
2410 | than one. C<XMLin()> will attempt to match attribute names in the order | ||
2411 | supplied. C<XMLout()> will use the first attribute name supplied when | ||
2412 | 'unfolding' a hash into an array. | ||
2413 | |||
2414 | Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do | ||
2415 | not want folding on input or unfolding on output you must setting this option | ||
2416 | to an empty list to disable the feature. | ||
2417 | |||
2418 | Note 2: If you wish to use this option, you should also enable the | ||
2419 | C<ForceArray> option. Without 'ForceArray', a single nested element will be | ||
2420 | rolled up into a scalar rather than an array and therefore will not be folded | ||
2421 | (since only arrays get folded). | ||
2422 | |||
2423 | =head2 KeyAttr => { list } I<# in+out - important> | ||
2424 | |||
2425 | This alternative (and preferred) method of specifiying the key attributes | ||
2426 | allows more fine grained control over which elements are folded and on which | ||
2427 | attributes. For example the option 'KeyAttr => { package => 'id' } will cause | ||
2428 | any package elements to be folded on the 'id' attribute. No other elements | ||
2429 | which have an 'id' attribute will be folded at all. | ||
2430 | |||
2431 | Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">) | ||
2432 | if this syntax is used and an element which does not have the specified key | ||
2433 | attribute is encountered (eg: a 'package' element without an 'id' attribute, to | ||
2434 | use the example above). Warnings will only be generated if B<-w> is in force. | ||
2435 | |||
2436 | Two further variations are made possible by prefixing a '+' or a '-' character | ||
2437 | to the attribute name: | ||
2438 | |||
2439 | The option 'KeyAttr => { user => "+login" }' will cause this XML: | ||
2440 | |||
2441 | <opt> | ||
2442 | <user login="grep" fullname="Gary R Epstein" /> | ||
2443 | <user login="stty" fullname="Simon T Tyson" /> | ||
2444 | </opt> | ||
2445 | |||
2446 | to parse to this data structure: | ||
2447 | |||
2448 | { | ||
2449 | 'user' => { | ||
2450 | 'stty' => { | ||
2451 | 'fullname' => 'Simon T Tyson', | ||
2452 | 'login' => 'stty' | ||
2453 | }, | ||
2454 | 'grep' => { | ||
2455 | 'fullname' => 'Gary R Epstein', | ||
2456 | 'login' => 'grep' | ||
2457 | } | ||
2458 | } | ||
2459 | } | ||
2460 | |||
2461 | The '+' indicates that the value of the key attribute should be copied rather | ||
2462 | than moved to the folded hash key. | ||
2463 | |||
2464 | A '-' prefix would produce this result: | ||
2465 | |||
2466 | { | ||
2467 | 'user' => { | ||
2468 | 'stty' => { | ||
2469 | 'fullname' => 'Simon T Tyson', | ||
2470 | '-login' => 'stty' | ||
2471 | }, | ||
2472 | 'grep' => { | ||
2473 | 'fullname' => 'Gary R Epstein', | ||
2474 | '-login' => 'grep' | ||
2475 | } | ||
2476 | } | ||
2477 | } | ||
2478 | |||
2479 | As described earlier, C<XMLout> will ignore hash keys starting with a '-'. | ||
2480 | |||
2481 | =head2 NoAttr => 1 I<# in+out - handy> | ||
2482 | |||
2483 | When used with C<XMLout()>, the generated XML will contain no attributes. | ||
2484 | All hash key/values will be represented as nested elements instead. | ||
2485 | |||
2486 | When used with C<XMLin()>, any attributes in the XML will be ignored. | ||
2487 | |||
2488 | =head2 NoEscape => 1 I<# out - seldom used> | ||
2489 | |||
2490 | By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and | ||
2491 | '"' to '<', '>', '&' and '"' respectively. Use this option to | ||
2492 | suppress escaping (presumably because you've already escaped the data in some | ||
2493 | more sophisticated manner). | ||
2494 | |||
2495 | =head2 NoIndent => 1 I<# out - seldom used> | ||
2496 | |||
2497 | Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode. | ||
2498 | With this option enabled, the XML output will all be on one line (unless there | ||
2499 | are newlines in the data) - this may be easier for downstream processing. | ||
2500 | |||
2501 | =head2 NoSort => 1 I<# out - seldom used> | ||
2502 | |||
2503 | Newer versions of XML::Simple sort elements and attributes alphabetically (*), | ||
2504 | by default. Enable this option to suppress the sorting - possibly for | ||
2505 | backwards compatibility. | ||
2506 | |||
2507 | * Actually, sorting is alphabetical but 'key' attribute or element names (as in | ||
2508 | 'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements | ||
2509 | are sorted alphabetically by the value of the key field. | ||
2510 | |||
2511 | =head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy> | ||
2512 | |||
2513 | This option controls how whitespace in text content is handled. Recognised | ||
2514 | values for the option are: | ||
2515 | |||
2516 | =over 4 | ||
2517 | |||
2518 | =item * | ||
2519 | |||
2520 | 0 = (default) whitespace is passed through unaltered (except of course for the | ||
2521 | normalisation of whitespace in attribute values which is mandated by the XML | ||
2522 | recommendation) | ||
2523 | |||
2524 | =item * | ||
2525 | |||
2526 | 1 = whitespace is normalised in any value used as a hash key (normalising means | ||
2527 | removing leading and trailing whitespace and collapsing sequences of whitespace | ||
2528 | characters to a single space) | ||
2529 | |||
2530 | =item * | ||
2531 | |||
2532 | 2 = whitespace is normalised in all text content | ||
2533 | |||
2534 | =back | ||
2535 | |||
2536 | Note: you can spell this option with a 'z' if that is more natural for you. | ||
2537 | |||
2538 | =head2 NSExpand => 1 I<# in+out handy - SAX only> | ||
2539 | |||
2540 | This option controls namespace expansion - the translation of element and | ||
2541 | attribute names of the form 'prefix:name' to '{uri}name'. For example the | ||
2542 | element name 'xsl:template' might be expanded to: | ||
2543 | '{http://www.w3.org/1999/XSL/Transform}template'. | ||
2544 | |||
2545 | By default, C<XMLin()> will return element names and attribute names exactly as | ||
2546 | they appear in the XML. Setting this option to 1 will cause all element and | ||
2547 | attribute names to be expanded to include their namespace prefix. | ||
2548 | |||
2549 | I<Note: You must be using a SAX parser for this option to work (ie: it does not | ||
2550 | work with XML::Parser)>. | ||
2551 | |||
2552 | This option also controls whether C<XMLout()> performs the reverse translation | ||
2553 | from '{uri}name' back to 'prefix:name'. The default is no translation. If | ||
2554 | your data contains expanded names, you should set this option to 1 otherwise | ||
2555 | C<XMLout> will emit XML which is not well formed. | ||
2556 | |||
2557 | I<Note: You must have the XML::NamespaceSupport module installed if you want | ||
2558 | C<XMLout()> to translate URIs back to prefixes>. | ||
2559 | |||
2560 | =head2 NumericEscape => 0 | 1 | 2 I<# out - handy> | ||
2561 | |||
2562 | Use this option to have 'high' (non-ASCII) characters in your Perl data | ||
2563 | structure converted to numeric entities (eg: €) in the XML output. Three | ||
2564 | levels are possible: | ||
2565 | |||
2566 | 0 - default: no numeric escaping (OK if you're writing out UTF8) | ||
2567 | |||
2568 | 1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output | ||
2569 | |||
2570 | 2 - all characters above 0x7F are escaped (good for plain ASCII output) | ||
2571 | |||
2572 | =head2 OutputFile => <file specifier> I<# out - handy> | ||
2573 | |||
2574 | The default behaviour of C<XMLout()> is to return the XML as a string. If you | ||
2575 | wish to write the XML to a file, simply supply the filename using the | ||
2576 | 'OutputFile' option. | ||
2577 | |||
2578 | This option also accepts an IO handle object - especially useful in Perl 5.8.0 | ||
2579 | and later for output using an encoding other than UTF-8, eg: | ||
2580 | |||
2581 | open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; | ||
2582 | XMLout($ref, OutputFile => $fh); | ||
2583 | |||
2584 | Note, XML::Simple does not require that the object you pass in to the | ||
2585 | OutputFile option inherits from L<IO::Handle> - it simply assumes the object | ||
2586 | supports a C<print> method. | ||
2587 | |||
2588 | =head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> | ||
2589 | |||
2590 | I<Note: This option is now officially deprecated. If you find it useful, email | ||
2591 | the author with an example of what you use it for. Do not use this option to | ||
2592 | set the ProtocolEncoding, that's just plain wrong - fix the XML>. | ||
2593 | |||
2594 | This option allows you to pass parameters to the constructor of the underlying | ||
2595 | XML::Parser object (which of course assumes you're not using SAX). | ||
2596 | |||
2597 | =head2 RootName => 'string' I<# out - handy> | ||
2598 | |||
2599 | By default, when C<XMLout()> generates XML, the root element will be named | ||
2600 | 'opt'. This option allows you to specify an alternative name. | ||
2601 | |||
2602 | Specifying either undef or the empty string for the RootName option will | ||
2603 | produce XML with no root elements. In most cases the resulting XML fragment | ||
2604 | will not be 'well formed' and therefore could not be read back in by C<XMLin()>. | ||
2605 | Nevertheless, the option has been found to be useful in certain circumstances. | ||
2606 | |||
2607 | =head2 SearchPath => [ list ] I<# in - handy> | ||
2608 | |||
2609 | If you pass C<XMLin()> a filename, but the filename include no directory | ||
2610 | component, you can use this option to specify which directories should be | ||
2611 | searched to locate the file. You might use this option to search first in the | ||
2612 | user's home directory, then in a global directory such as /etc. | ||
2613 | |||
2614 | If a filename is provided to C<XMLin()> but SearchPath is not defined, the | ||
2615 | file is assumed to be in the current directory. | ||
2616 | |||
2617 | If the first parameter to C<XMLin()> is undefined, the default SearchPath | ||
2618 | will contain only the directory in which the script itself is located. | ||
2619 | Otherwise the default SearchPath will be empty. | ||
2620 | |||
2621 | =head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy> | ||
2622 | |||
2623 | This option controls what C<XMLin()> should do with empty elements (no | ||
2624 | attributes and no content). The default behaviour is to represent them as | ||
2625 | empty hashes. Setting this option to a true value (eg: 1) will cause empty | ||
2626 | elements to be skipped altogether. Setting the option to 'undef' or the empty | ||
2627 | string will cause empty elements to be represented as the undefined value or | ||
2628 | the empty string respectively. The latter two alternatives are a little | ||
2629 | easier to test for in your code than a hash with no keys. | ||
2630 | |||
2631 | The option also controls what C<XMLout()> does with undefined values. Setting | ||
2632 | the option to undef causes undefined values to be output as empty elements | ||
2633 | (rather than empty attributes), it also suppresses the generation of warnings | ||
2634 | about undefined values. Setting the option to a true value (eg: 1) causes | ||
2635 | undefined values to be skipped altogether on output. | ||
2636 | |||
2637 | =head2 ValueAttr => [ names ] I<# in - handy> | ||
2638 | |||
2639 | Use this option to deal elements which always have a single attribute and no | ||
2640 | content. Eg: | ||
2641 | |||
2642 | <opt> | ||
2643 | <colour value="red" /> | ||
2644 | <size value="XXL" /> | ||
2645 | </opt> | ||
2646 | |||
2647 | Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: | ||
2648 | |||
2649 | { | ||
2650 | colour => 'red', | ||
2651 | size => 'XXL' | ||
2652 | } | ||
2653 | |||
2654 | instead of this (the default): | ||
2655 | |||
2656 | { | ||
2657 | colour => { value => 'red' }, | ||
2658 | size => { value => 'XXL' } | ||
2659 | } | ||
2660 | |||
2661 | Note: This form of the ValueAttr option is not compatible with C<XMLout()> - | ||
2662 | since the attribute name is discarded at parse time, the original XML cannot be | ||
2663 | reconstructed. | ||
2664 | |||
2665 | =head2 ValueAttr => { element => attribute, ... } I<# in+out - handy> | ||
2666 | |||
2667 | This (preferred) form of the ValueAttr option requires you to specify both | ||
2668 | the element and the attribute names. This is not only safer, it also allows | ||
2669 | the original XML to be reconstructed by C<XMLout()>. | ||
2670 | |||
2671 | Note: You probably don't want to use this option and the NoAttr option at the | ||
2672 | same time. | ||
2673 | |||
2674 | =head2 Variables => { name => value } I<# in - handy> | ||
2675 | |||
2676 | This option allows variables in the XML to be expanded when the file is read. | ||
2677 | (there is no facility for putting the variable names back if you regenerate | ||
2678 | XML using C<XMLout>). | ||
2679 | |||
2680 | A 'variable' is any text of the form C<${name}> which occurs in an attribute | ||
2681 | value or in the text content of an element. If 'name' matches a key in the | ||
2682 | supplied hashref, C<${name}> will be replaced with the corresponding value from | ||
2683 | the hashref. If no matching key is found, the variable will not be replaced. | ||
2684 | Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are | ||
2685 | allowed). | ||
2686 | |||
2687 | =head2 VarAttr => 'attr_name' I<# in - handy> | ||
2688 | |||
2689 | In addition to the variables defined using C<Variables>, this option allows | ||
2690 | variables to be defined in the XML. A variable definition consists of an | ||
2691 | element with an attribute called 'attr_name' (the value of the C<VarAttr> | ||
2692 | option). The value of the attribute will be used as the variable name and the | ||
2693 | text content of the element will be used as the value. A variable defined in | ||
2694 | this way will override a variable defined using the C<Variables> option. For | ||
2695 | example: | ||
2696 | |||
2697 | XMLin( '<opt> | ||
2698 | <dir name="prefix">/usr/local/apache</dir> | ||
2699 | <dir name="exec_prefix">${prefix}</dir> | ||
2700 | <dir name="bindir">${exec_prefix}/bin</dir> | ||
2701 | </opt>', | ||
2702 | VarAttr => 'name', ContentKey => '-content' | ||
2703 | ); | ||
2704 | |||
2705 | produces the following data structure: | ||
2706 | |||
2707 | { | ||
2708 | dir => { | ||
2709 | prefix => '/usr/local/apache', | ||
2710 | exec_prefix => '/usr/local/apache', | ||
2711 | bindir => '/usr/local/apache/bin', | ||
2712 | } | ||
2713 | } | ||
2714 | |||
2715 | =head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy> | ||
2716 | |||
2717 | If you want the output from C<XMLout()> to start with the optional XML | ||
2718 | declaration, simply set the option to '1'. The default XML declaration is: | ||
2719 | |||
2720 | <?xml version='1.0' standalone='yes'?> | ||
2721 | |||
2722 | If you want some other string (for example to declare an encoding value), set | ||
2723 | the value of this option to the complete string you require. | ||
2724 | |||
2725 | |||
2726 | =head1 OPTIONAL OO INTERFACE | ||
2727 | |||
2728 | The procedural interface is both simple and convenient however there are a | ||
2729 | couple of reasons why you might prefer to use the object oriented (OO) | ||
2730 | interface: | ||
2731 | |||
2732 | =over 4 | ||
2733 | |||
2734 | =item * | ||
2735 | |||
2736 | to define a set of default values which should be used on all subsequent calls | ||
2737 | to C<XMLin()> or C<XMLout()> | ||
2738 | |||
2739 | =item * | ||
2740 | |||
2741 | to override methods in B<XML::Simple> to provide customised behaviour | ||
2742 | |||
2743 | =back | ||
2744 | |||
2745 | The default values for the options described above are unlikely to suit | ||
2746 | everyone. The OO interface allows you to effectively override B<XML::Simple>'s | ||
2747 | defaults with your preferred values. It works like this: | ||
2748 | |||
2749 | First create an XML::Simple parser object with your preferred defaults: | ||
2750 | |||
2751 | my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); | ||
2752 | |||
2753 | then call C<XMLin()> or C<XMLout()> as a method of that object: | ||
2754 | |||
2755 | my $ref = $xs->XMLin($xml); | ||
2756 | my $xml = $xs->XMLout($ref); | ||
2757 | |||
2758 | You can also specify options when you make the method calls and these values | ||
2759 | will be merged with the values specified when the object was created. Values | ||
2760 | specified in a method call take precedence. | ||
2761 | |||
2762 | Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be | ||
2763 | called as C<xml_in()> or C<xml_out()>. The method names are aliased so the | ||
2764 | only difference is the aesthetics. | ||
2765 | |||
2766 | =head2 Parsing Methods | ||
2767 | |||
2768 | You can explicitly call one of the following methods rather than rely on the | ||
2769 | C<xml_in()> method automatically determining whether the target to be parsed is | ||
2770 | a string, a file or a filehandle: | ||
2771 | |||
2772 | =over 4 | ||
2773 | |||
2774 | =item parse_string(text) | ||
2775 | |||
2776 | Works exactly like the C<xml_in()> method but assumes the first argument is | ||
2777 | a string of XML (or a reference to a scalar containing a string of XML). | ||
2778 | |||
2779 | =item parse_file(filename) | ||
2780 | |||
2781 | Works exactly like the C<xml_in()> method but assumes the first argument is | ||
2782 | the name of a file containing XML. | ||
2783 | |||
2784 | =item parse_fh(file_handle) | ||
2785 | |||
2786 | Works exactly like the C<xml_in()> method but assumes the first argument is | ||
2787 | a filehandle which can be read to get XML. | ||
2788 | |||
2789 | =back | ||
2790 | |||
2791 | =head2 Hook Methods | ||
2792 | |||
2793 | You can make your own class which inherits from XML::Simple and overrides | ||
2794 | certain behaviours. The following methods may provide useful 'hooks' upon | ||
2795 | which to hang your modified behaviour. You may find other undocumented methods | ||
2796 | by examining the source, but those may be subject to change in future releases. | ||
2797 | |||
2798 | =over 4 | ||
2799 | |||
2800 | =item handle_options(direction, name => value ...) | ||
2801 | |||
2802 | This method will be called when one of the parsing methods or the C<XMLout()> | ||
2803 | method is called. The initial argument will be a string (either 'in' or 'out') | ||
2804 | and the remaining arguments will be name value pairs. | ||
2805 | |||
2806 | =item default_config_file() | ||
2807 | |||
2808 | Calculates and returns the name of the file which should be parsed if no | ||
2809 | filename is passed to C<XMLin()> (default: C<$0.xml>). | ||
2810 | |||
2811 | =item build_simple_tree(filename, string) | ||
2812 | |||
2813 | Called from C<XMLin()> or any of the parsing methods. Takes either a file name | ||
2814 | as the first argument or C<undef> followed by a 'string' as the second | ||
2815 | argument. Returns a simple tree data structure. You could override this | ||
2816 | method to apply your own transformations before the data structure is returned | ||
2817 | to the caller. | ||
2818 | |||
2819 | =item new_hashref() | ||
2820 | |||
2821 | When the 'simple tree' data structure is being built, this method will be | ||
2822 | called to create any required anonymous hashrefs. | ||
2823 | |||
2824 | =item sorted_keys(name, hashref) | ||
2825 | |||
2826 | Called when C<XMLout()> is translating a hashref to XML. This routine returns | ||
2827 | a list of hash keys in the order that the corresponding attributes/elements | ||
2828 | should appear in the output. | ||
2829 | |||
2830 | =item escape_value(string) | ||
2831 | |||
2832 | Called from C<XMLout()>, takes a string and returns a copy of the string with | ||
2833 | XML character escaping rules applied. | ||
2834 | |||
2835 | =item numeric_escape(string) | ||
2836 | |||
2837 | Called from C<escape_value()>, to handle non-ASCII characters (depending on the | ||
2838 | value of the NumericEscape option). | ||
2839 | |||
2840 | =item copy_hash(hashref, extra_key => value, ...) | ||
2841 | |||
2842 | Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of | ||
2843 | hashes. You might wish to override this method if you're using tied hashes and | ||
2844 | don't want them to get untied. | ||
2845 | |||
2846 | =back | ||
2847 | |||
2848 | =head2 Cache Methods | ||
2849 | |||
2850 | XML::Simple implements three caching schemes ('storable', 'memshare' and | ||
2851 | 'memcopy'). You can implement a custom caching scheme by implementing | ||
2852 | two methods - one for reading from the cache and one for writing to it. | ||
2853 | |||
2854 | For example, you might implement a new 'dbm' scheme that stores cached data | ||
2855 | structures using the L<MLDBM> module. First, you would add a | ||
2856 | C<cache_read_dbm()> method which accepted a filename for use as a lookup key | ||
2857 | and returned a data structure on success, or undef on failure. Then, you would | ||
2858 | implement a C<cache_read_dbm()> method which accepted a data structure and a | ||
2859 | filename. | ||
2860 | |||
2861 | You would use this caching scheme by specifying the option: | ||
2862 | |||
2863 | Cache => [ 'dbm' ] | ||
2864 | |||
2865 | =head1 STRICT MODE | ||
2866 | |||
2867 | If you import the B<XML::Simple> routines like this: | ||
2868 | |||
2869 | use XML::Simple qw(:strict); | ||
2870 | |||
2871 | the following common mistakes will be detected and treated as fatal errors | ||
2872 | |||
2873 | =over 4 | ||
2874 | |||
2875 | =item * | ||
2876 | |||
2877 | Failing to explicitly set the C<KeyAttr> option - if you can't be bothered | ||
2878 | reading about this option, turn it off with: KeyAttr => [ ] | ||
2879 | |||
2880 | =item * | ||
2881 | |||
2882 | Failing to explicitly set the C<ForceArray> option - if you can't be bothered | ||
2883 | reading about this option, set it to the safest mode with: ForceArray => 1 | ||
2884 | |||
2885 | =item * | ||
2886 | |||
2887 | Setting ForceArray to an array, but failing to list all the elements from the | ||
2888 | KeyAttr hash. | ||
2889 | |||
2890 | =item * | ||
2891 | |||
2892 | Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains | ||
2893 | one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested | ||
2894 | element). Note: if strict mode is not set but -w is, this condition triggers a | ||
2895 | warning. | ||
2896 | |||
2897 | =item * | ||
2898 | |||
2899 | Data error - as above, but non-unique values are present in the key attribute | ||
2900 | (eg: more than one E<lt>partE<gt> element with the same partnum). This will | ||
2901 | also trigger a warning if strict mode is not enabled. | ||
2902 | |||
2903 | =item * | ||
2904 | |||
2905 | Data error - as above, but value of key attribute (eg: partnum) is not a | ||
2906 | scalar string (due to nested elements etc). This will also trigger a warning | ||
2907 | if strict mode is not enabled. | ||
2908 | |||
2909 | =back | ||
2910 | |||
2911 | =head1 SAX SUPPORT | ||
2912 | |||
2913 | From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API | ||
2914 | for XML) - specifically SAX2. | ||
2915 | |||
2916 | In a typical SAX application, an XML parser (or SAX 'driver') module generates | ||
2917 | SAX events (start of element, character data, end of element, etc) as it parses | ||
2918 | an XML document and a 'handler' module processes the events to extract the | ||
2919 | required data. This simple model allows for some interesting and powerful | ||
2920 | possibilities: | ||
2921 | |||
2922 | =over 4 | ||
2923 | |||
2924 | =item * | ||
2925 | |||
2926 | Applications written to the SAX API can extract data from huge XML documents | ||
2927 | without the memory overheads of a DOM or tree API. | ||
2928 | |||
2929 | =item * | ||
2930 | |||
2931 | The SAX API allows for plug and play interchange of parser modules without | ||
2932 | having to change your code to fit a new module's API. A number of SAX parsers | ||
2933 | are available with capabilities ranging from extreme portability to blazing | ||
2934 | performance. | ||
2935 | |||
2936 | =item * | ||
2937 | |||
2938 | A SAX 'filter' module can implement both a handler interface for receiving | ||
2939 | data and a generator interface for passing modified data on to a downstream | ||
2940 | handler. Filters can be chained together in 'pipelines'. | ||
2941 | |||
2942 | =item * | ||
2943 | |||
2944 | One filter module might split a data stream to direct data to two or more | ||
2945 | downstream handlers. | ||
2946 | |||
2947 | =item * | ||
2948 | |||
2949 | Generating SAX events is not the exclusive preserve of XML parsing modules. | ||
2950 | For example, a module might extract data from a relational database using DBI | ||
2951 | and pass it on to a SAX pipeline for filtering and formatting. | ||
2952 | |||
2953 | =back | ||
2954 | |||
2955 | B<XML::Simple> can operate at either end of a SAX pipeline. For example, | ||
2956 | you can take a data structure in the form of a hashref and pass it into a | ||
2957 | SAX pipeline using the 'Handler' option on C<XMLout()>: | ||
2958 | |||
2959 | use XML::Simple; | ||
2960 | use Some::SAX::Filter; | ||
2961 | use XML::SAX::Writer; | ||
2962 | |||
2963 | my $ref = { | ||
2964 | .... # your data here | ||
2965 | }; | ||
2966 | |||
2967 | my $writer = XML::SAX::Writer->new(); | ||
2968 | my $filter = Some::SAX::Filter->new(Handler => $writer); | ||
2969 | my $simple = XML::Simple->new(Handler => $filter); | ||
2970 | $simple->XMLout($ref); | ||
2971 | |||
2972 | You can also put B<XML::Simple> at the opposite end of the pipeline to take | ||
2973 | advantage of the simple 'tree' data structure once the relevant data has been | ||
2974 | isolated through filtering: | ||
2975 | |||
2976 | use XML::SAX; | ||
2977 | use Some::SAX::Filter; | ||
2978 | use XML::Simple; | ||
2979 | |||
2980 | my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); | ||
2981 | my $filter = Some::SAX::Filter->new(Handler => $simple); | ||
2982 | my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | ||
2983 | |||
2984 | my $ref = $parser->parse_uri('some_huge_file.xml'); | ||
2985 | |||
2986 | print $ref->{part}->{'555-1234'}; | ||
2987 | |||
2988 | You can build a filter by using an XML::Simple object as a handler and setting | ||
2989 | its DataHandler option to point to a routine which takes the resulting tree, | ||
2990 | modifies it and sends it off as SAX events to a downstream handler: | ||
2991 | |||
2992 | my $writer = XML::SAX::Writer->new(); | ||
2993 | my $filter = XML::Simple->new( | ||
2994 | DataHandler => sub { | ||
2995 | my $simple = shift; | ||
2996 | my $data = shift; | ||
2997 | |||
2998 | # Modify $data here | ||
2999 | |||
3000 | $simple->XMLout($data, Handler => $writer); | ||
3001 | } | ||
3002 | ); | ||
3003 | my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | ||
3004 | |||
3005 | $parser->parse_uri($filename); | ||
3006 | |||
3007 | I<Note: In this last example, the 'Handler' option was specified in the call to | ||
3008 | C<XMLout()> but it could also have been specified in the constructor>. | ||
3009 | |||
3010 | =head1 ENVIRONMENT | ||
3011 | |||
3012 | If you don't care which parser module B<XML::Simple> uses then skip this | ||
3013 | section entirely (it looks more complicated than it really is). | ||
3014 | |||
3015 | B<XML::Simple> will default to using a B<SAX> parser if one is available or | ||
3016 | B<XML::Parser> if SAX is not available. | ||
3017 | |||
3018 | You can dictate which parser module is used by setting either the environment | ||
3019 | variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable | ||
3020 | $XML::Simple::PREFERRED_PARSER to contain the module name. The following rules | ||
3021 | are used: | ||
3022 | |||
3023 | =over 4 | ||
3024 | |||
3025 | =item * | ||
3026 | |||
3027 | The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use | ||
3028 | its default rules, you can set the package variable to an empty string. | ||
3029 | |||
3030 | =item * | ||
3031 | |||
3032 | If the 'preferred parser' is set to the string 'XML::Parser', then | ||
3033 | L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not | ||
3034 | installed). | ||
3035 | |||
3036 | =item * | ||
3037 | |||
3038 | If the 'preferred parser' is set to some other value, then it is assumed to be | ||
3039 | the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.> | ||
3040 | If L<XML::SAX> is not installed, or the requested parser module is not | ||
3041 | installed, then C<XMLin()> will die. | ||
3042 | |||
3043 | =item * | ||
3044 | |||
3045 | If the 'preferred parser' is not defined at all (the normal default | ||
3046 | state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is | ||
3047 | installed, then a parser module will be selected according to | ||
3048 | L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX | ||
3049 | parser installed). | ||
3050 | |||
3051 | =item * | ||
3052 | |||
3053 | if the 'preferred parser' is not defined and B<XML::SAX> is not | ||
3054 | installed, then B<XML::Parser> will be used. C<XMLin()> will die if | ||
3055 | L<XML::Parser> is not installed. | ||
3056 | |||
3057 | =back | ||
3058 | |||
3059 | Note: The B<XML::SAX> distribution includes an XML parser written entirely in | ||
3060 | Perl. It is very portable but it is not very fast. You should consider | ||
3061 | installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your | ||
3062 | platform. | ||
3063 | |||
3064 | =head1 ERROR HANDLING | ||
3065 | |||
3066 | The XML standard is very clear on the issue of non-compliant documents. An | ||
3067 | error in parsing any single element (for example a missing end tag) must cause | ||
3068 | the whole document to be rejected. B<XML::Simple> will die with an appropriate | ||
3069 | message if it encounters a parsing error. | ||
3070 | |||
3071 | If dying is not appropriate for your application, you should arrange to call | ||
3072 | C<XMLin()> in an eval block and look for errors in $@. eg: | ||
3073 | |||
3074 | my $config = eval { XMLin() }; | ||
3075 | PopUpMessage($@) if($@); | ||
3076 | |||
3077 | Note, there is a common misconception that use of B<eval> will significantly | ||
3078 | slow down a script. While that may be true when the code being eval'd is in a | ||
3079 | string, it is not true of code like the sample above. | ||
3080 | |||
3081 | =head1 EXAMPLES | ||
3082 | |||
3083 | When C<XMLin()> reads the following very simple piece of XML: | ||
3084 | |||
3085 | <opt username="testuser" password="frodo"></opt> | ||
3086 | |||
3087 | it returns the following data structure: | ||
3088 | |||
3089 | { | ||
3090 | 'username' => 'testuser', | ||
3091 | 'password' => 'frodo' | ||
3092 | } | ||
3093 | |||
3094 | The identical result could have been produced with this alternative XML: | ||
3095 | |||
3096 | <opt username="testuser" password="frodo" /> | ||
3097 | |||
3098 | Or this (although see 'ForceArray' option for variations): | ||
3099 | |||
3100 | <opt> | ||
3101 | <username>testuser</username> | ||
3102 | <password>frodo</password> | ||
3103 | </opt> | ||
3104 | |||
3105 | Repeated nested elements are represented as anonymous arrays: | ||
3106 | |||
3107 | <opt> | ||
3108 | <person firstname="Joe" lastname="Smith"> | ||
3109 | <email>joe@smith.com</email> | ||
3110 | <email>jsmith@yahoo.com</email> | ||
3111 | </person> | ||
3112 | <person firstname="Bob" lastname="Smith"> | ||
3113 | <email>bob@smith.com</email> | ||
3114 | </person> | ||
3115 | </opt> | ||
3116 | |||
3117 | { | ||
3118 | 'person' => [ | ||
3119 | { | ||
3120 | 'email' => [ | ||
3121 | 'joe@smith.com', | ||
3122 | 'jsmith@yahoo.com' | ||
3123 | ], | ||
3124 | 'firstname' => 'Joe', | ||
3125 | 'lastname' => 'Smith' | ||
3126 | }, | ||
3127 | { | ||
3128 | 'email' => 'bob@smith.com', | ||
3129 | 'firstname' => 'Bob', | ||
3130 | 'lastname' => 'Smith' | ||
3131 | } | ||
3132 | ] | ||
3133 | } | ||
3134 | |||
3135 | Nested elements with a recognised key attribute are transformed (folded) from | ||
3136 | an array into a hash keyed on the value of that attribute (see the C<KeyAttr> | ||
3137 | option): | ||
3138 | |||
3139 | <opt> | ||
3140 | <person key="jsmith" firstname="Joe" lastname="Smith" /> | ||
3141 | <person key="tsmith" firstname="Tom" lastname="Smith" /> | ||
3142 | <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> | ||
3143 | </opt> | ||
3144 | |||
3145 | { | ||
3146 | 'person' => { | ||
3147 | 'jbloggs' => { | ||
3148 | 'firstname' => 'Joe', | ||
3149 | 'lastname' => 'Bloggs' | ||
3150 | }, | ||
3151 | 'tsmith' => { | ||
3152 | 'firstname' => 'Tom', | ||
3153 | 'lastname' => 'Smith' | ||
3154 | }, | ||
3155 | 'jsmith' => { | ||
3156 | 'firstname' => 'Joe', | ||
3157 | 'lastname' => 'Smith' | ||
3158 | } | ||
3159 | } | ||
3160 | } | ||
3161 | |||
3162 | |||
3163 | The <anon> tag can be used to form anonymous arrays: | ||
3164 | |||
3165 | <opt> | ||
3166 | <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> | ||
3167 | <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> | ||
3168 | <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> | ||
3169 | <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> | ||
3170 | </opt> | ||
3171 | |||
3172 | { | ||
3173 | 'head' => [ | ||
3174 | [ 'Col 1', 'Col 2', 'Col 3' ] | ||
3175 | ], | ||
3176 | 'data' => [ | ||
3177 | [ 'R1C1', 'R1C2', 'R1C3' ], | ||
3178 | [ 'R2C1', 'R2C2', 'R2C3' ], | ||
3179 | [ 'R3C1', 'R3C2', 'R3C3' ] | ||
3180 | ] | ||
3181 | } | ||
3182 | |||
3183 | Anonymous arrays can be nested to arbirtrary levels and as a special case, if | ||
3184 | the surrounding tags for an XML document contain only an anonymous array the | ||
3185 | arrayref will be returned directly rather than the usual hashref: | ||
3186 | |||
3187 | <opt> | ||
3188 | <anon><anon>Col 1</anon><anon>Col 2</anon></anon> | ||
3189 | <anon><anon>R1C1</anon><anon>R1C2</anon></anon> | ||
3190 | <anon><anon>R2C1</anon><anon>R2C2</anon></anon> | ||
3191 | </opt> | ||
3192 | |||
3193 | [ | ||
3194 | [ 'Col 1', 'Col 2' ], | ||
3195 | [ 'R1C1', 'R1C2' ], | ||
3196 | [ 'R2C1', 'R2C2' ] | ||
3197 | ] | ||
3198 | |||
3199 | Elements which only contain text content will simply be represented as a | ||
3200 | scalar. Where an element has both attributes and text content, the element | ||
3201 | will be represented as a hashref with the text content in the 'content' key | ||
3202 | (see the C<ContentKey> option): | ||
3203 | |||
3204 | <opt> | ||
3205 | <one>first</one> | ||
3206 | <two attr="value">second</two> | ||
3207 | </opt> | ||
3208 | |||
3209 | { | ||
3210 | 'one' => 'first', | ||
3211 | 'two' => { 'attr' => 'value', 'content' => 'second' } | ||
3212 | } | ||
3213 | |||
3214 | Mixed content (elements which contain both text content and nested elements) | ||
3215 | will be not be represented in a useful way - element order and significant | ||
3216 | whitespace will be lost. If you need to work with mixed content, then | ||
3217 | XML::Simple is not the right tool for your job - check out the next section. | ||
3218 | |||
3219 | =head1 WHERE TO FROM HERE? | ||
3220 | |||
3221 | B<XML::Simple> is able to present a simple API because it makes some | ||
3222 | assumptions on your behalf. These include: | ||
3223 | |||
3224 | =over 4 | ||
3225 | |||
3226 | =item * | ||
3227 | |||
3228 | You're not interested in text content consisting only of whitespace | ||
3229 | |||
3230 | =item * | ||
3231 | |||
3232 | You don't mind that when things get slurped into a hash the order is lost | ||
3233 | |||
3234 | =item * | ||
3235 | |||
3236 | You don't want fine-grained control of the formatting of generated XML | ||
3237 | |||
3238 | =item * | ||
3239 | |||
3240 | You would never use a hash key that was not a legal XML element name | ||
3241 | |||
3242 | =item * | ||
3243 | |||
3244 | You don't need help converting between different encodings | ||
3245 | |||
3246 | =back | ||
3247 | |||
3248 | In a serious XML project, you'll probably outgrow these assumptions fairly | ||
3249 | quickly. This section of the document used to offer some advice on chosing a | ||
3250 | more powerful option. That advice has now grown into the 'Perl-XML FAQ' | ||
3251 | document which you can find at: L<http://perl-xml.sourceforge.net/faq/> | ||
3252 | |||
3253 | The advice in the FAQ boils down to a quick explanation of tree versus | ||
3254 | event based parsers and then recommends: | ||
3255 | |||
3256 | For event based parsing, use SAX (do not set out to write any new code for | ||
3257 | XML::Parser's handler API - it is obselete). | ||
3258 | |||
3259 | For tree-based parsing, you could choose between the 'Perlish' approach of | ||
3260 | L<XML::Twig> and more standards based DOM implementations - preferably one with | ||
3261 | XPath support. | ||
3262 | |||
3263 | |||
3264 | =head1 SEE ALSO | ||
3265 | |||
3266 | B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>. | ||
3267 | |||
3268 | To generate documents with namespaces, L<XML::NamespaceSupport> is required. | ||
3269 | |||
3270 | The optional caching functions require L<Storable>. | ||
3271 | |||
3272 | Answers to Frequently Asked Questions about XML::Simple are bundled with this | ||
3273 | distribution as: L<XML::Simple::FAQ> | ||
3274 | |||
3275 | =head1 COPYRIGHT | ||
3276 | |||
3277 | Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt> | ||
3278 | |||
3279 | This library is free software; you can redistribute it and/or modify it | ||
3280 | under the same terms as Perl itself. | ||
3281 | |||
3282 | =cut | ||
3283 | |||
3284 | |||
diff --git a/share/perl/lib/XML/TreePP.pm b/share/perl/lib/XML/TreePP.pm new file mode 100644 index 0000000..bd03db7 --- /dev/null +++ b/share/perl/lib/XML/TreePP.pm | |||
@@ -0,0 +1,1228 @@ | |||
1 | =head1 NAME | ||
2 | |||
3 | XML::TreePP -- Pure Perl implementation for parsing/writing xml files | ||
4 | |||
5 | =head1 SYNOPSIS | ||
6 | |||
7 | parse xml file into hash tree | ||
8 | |||
9 | use XML::TreePP; | ||
10 | my $tpp = XML::TreePP->new(); | ||
11 | my $tree = $tpp->parsefile( "index.rdf" ); | ||
12 | print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n"; | ||
13 | print "URL: ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n"; | ||
14 | |||
15 | write xml as string from hash tree | ||
16 | |||
17 | use XML::TreePP; | ||
18 | my $tpp = XML::TreePP->new(); | ||
19 | my $tree = { rss => { channel => { item => [ { | ||
20 | title => "The Perl Directory", | ||
21 | link => "http://www.perl.org/", | ||
22 | }, { | ||
23 | title => "The Comprehensive Perl Archive Network", | ||
24 | link => "http://cpan.perl.org/", | ||
25 | } ] } } }; | ||
26 | my $xml = $tpp->write( $tree ); | ||
27 | print $xml; | ||
28 | |||
29 | get remote xml file with HTTP-GET and parse it into hash tree | ||
30 | |||
31 | use XML::TreePP; | ||
32 | my $tpp = XML::TreePP->new(); | ||
33 | my $tree = $tpp->parsehttp( GET => "http://use.perl.org/index.rss" ); | ||
34 | print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n"; | ||
35 | print "URL: ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n"; | ||
36 | |||
37 | get remote xml file with HTTP-POST and parse it into hash tree | ||
38 | |||
39 | use XML::TreePP; | ||
40 | my $tpp = XML::TreePP->new( force_array => [qw( item )] ); | ||
41 | my $cgiurl = "http://search.hatena.ne.jp/keyword"; | ||
42 | my $keyword = "ajax"; | ||
43 | my $cgiquery = "mode=rss2&word=".$keyword; | ||
44 | my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery ); | ||
45 | print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n"; | ||
46 | print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n"; | ||
47 | |||
48 | =head1 DESCRIPTION | ||
49 | |||
50 | XML::TreePP module parses XML file and expands it for a hash tree. | ||
51 | And also generate XML file from a hash tree. | ||
52 | This is a pure Perl implementation. | ||
53 | You can also download XML from remote web server | ||
54 | like XMLHttpRequest object at JavaScript language. | ||
55 | |||
56 | =head1 EXAMPLES | ||
57 | |||
58 | =head2 Parse XML file | ||
59 | |||
60 | Sample XML source: | ||
61 | |||
62 | <?xml version="1.0" encoding="UTF-8"?> | ||
63 | <family name="Kawasaki"> | ||
64 | <father>Yasuhisa</father> | ||
65 | <mother>Chizuko</mother> | ||
66 | <children> | ||
67 | <girl>Shiori</girl> | ||
68 | <boy>Yusuke</boy> | ||
69 | <boy>Kairi</boy> | ||
70 | </children> | ||
71 | </family> | ||
72 | |||
73 | Sample program to read a xml file and dump it: | ||
74 | |||
75 | use XML::TreePP; | ||
76 | use Data::Dumper; | ||
77 | my $tpp = XML::TreePP->new(); | ||
78 | my $tree = $tpp->parsefile( "family.xml" ); | ||
79 | my $text = Dumper( $tree ); | ||
80 | print $text; | ||
81 | |||
82 | Result dumped: | ||
83 | |||
84 | $VAR1 = { | ||
85 | 'family' => { | ||
86 | '-name' => 'Kawasaki', | ||
87 | 'father' => 'Yasuhisa', | ||
88 | 'mother' => 'Chizuko', | ||
89 | 'children' => { | ||
90 | 'girl' => 'Shiori' | ||
91 | 'boy' => [ | ||
92 | 'Yusuke', | ||
93 | 'Kairi' | ||
94 | ], | ||
95 | } | ||
96 | } | ||
97 | }; | ||
98 | |||
99 | Details: | ||
100 | |||
101 | print $tree->{family}->{father}; # the father's given name. | ||
102 | |||
103 | The prefix '-' is added on every attribute's name. | ||
104 | |||
105 | print $tree->{family}->{"-name"}; # the family name of the family | ||
106 | |||
107 | The array is used because the family has two boys. | ||
108 | |||
109 | print $tree->{family}->{children}->{boy}->[1]; # The second boy's name | ||
110 | print $tree->{family}->{children}->{girl}; # The girl's name | ||
111 | |||
112 | =head2 Text node and attributes: | ||
113 | |||
114 | If a element has both of a text node and attributes | ||
115 | or both of a text node and other child nodes, | ||
116 | value of a text node is moved to C<#text> like child nodes. | ||
117 | |||
118 | use XML::TreePP; | ||
119 | use Data::Dumper; | ||
120 | my $tpp = XML::TreePP->new(); | ||
121 | my $source = '<span class="author">Kawasaki Yusuke</span>'; | ||
122 | my $tree = $tpp->parse( $source ); | ||
123 | my $text = Dumper( $tree ); | ||
124 | print $text; | ||
125 | |||
126 | The result dumped is following: | ||
127 | |||
128 | $VAR1 = { | ||
129 | 'span' => { | ||
130 | '-class' => 'author', | ||
131 | '#text' => 'Kawasaki Yusuke' | ||
132 | } | ||
133 | }; | ||
134 | |||
135 | The special node name of C<#text> is used because this elements | ||
136 | has attribute(s) in addition to the text node. | ||
137 | See also L</text_node_key> option. | ||
138 | |||
139 | =head1 METHODS | ||
140 | |||
141 | =head2 new | ||
142 | |||
143 | This constructor method returns a new XML::TreePP object with C<%options>. | ||
144 | |||
145 | $tpp = XML::TreePP->new( %options ); | ||
146 | |||
147 | =head2 set | ||
148 | |||
149 | This method sets a option value for C<option_name>. | ||
150 | If C<$option_value> is not defined, its option is deleted. | ||
151 | |||
152 | $tpp->set( option_name => $option_value ); | ||
153 | |||
154 | See OPTIONS section below for details. | ||
155 | |||
156 | =head2 get | ||
157 | |||
158 | This method returns a current option value for C<option_name>. | ||
159 | |||
160 | $tpp->get( 'option_name' ); | ||
161 | |||
162 | =head2 parse | ||
163 | |||
164 | This method reads XML source and returns a hash tree converted. | ||
165 | The first argument is a scalar or a reference to a scalar. | ||
166 | |||
167 | $tree = $tpp->parse( $source ); | ||
168 | |||
169 | =head2 parsefile | ||
170 | |||
171 | This method reads a XML file and returns a hash tree converted. | ||
172 | The first argument is a filename. | ||
173 | |||
174 | $tree = $tpp->parsefile( $file ); | ||
175 | |||
176 | =head2 parsehttp | ||
177 | |||
178 | This method receives a XML file from a remote server via HTTP and | ||
179 | returns a hash tree converted. | ||
180 | |||
181 | $tree = $tpp->parsehttp( $method, $url, $body, $head ); | ||
182 | |||
183 | C<$method> is a method of HTTP connection: GET/POST/PUT/DELETE | ||
184 | C<$url> is an URI of a XML file. | ||
185 | C<$body> is a request body when you use POST method. | ||
186 | C<$head> is a request headers as a hash ref. | ||
187 | L<LWP::UserAgent> module or L<HTTP::Lite> module is required to fetch a file. | ||
188 | |||
189 | ( $tree, $xml, $code ) = $tpp->parsehttp( $method, $url, $body, $head ); | ||
190 | |||
191 | In array context, This method returns also raw XML source received | ||
192 | and HTTP response's status code. | ||
193 | |||
194 | =head2 write | ||
195 | |||
196 | This method parses a hash tree and returns a XML source generated. | ||
197 | |||
198 | $source = $tpp->write( $tree, $encode ); | ||
199 | |||
200 | C<$tree> is a reference to a hash tree. | ||
201 | |||
202 | =head2 writefile | ||
203 | |||
204 | This method parses a hash tree and writes a XML source into a file. | ||
205 | |||
206 | $tpp->writefile( $file, $tree, $encode ); | ||
207 | |||
208 | C<$file> is a filename to create. | ||
209 | C<$tree> is a reference to a hash tree. | ||
210 | |||
211 | =head1 OPTIONS FOR PARSING XML | ||
212 | |||
213 | This module accepts option parameters following: | ||
214 | |||
215 | =head2 force_array | ||
216 | |||
217 | This option allows you to specify a list of element names which | ||
218 | should always be forced into an array representation. | ||
219 | |||
220 | $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] ); | ||
221 | |||
222 | The default value is null, it means that context of the elements | ||
223 | will determine to make array or to keep it scalar or hash. | ||
224 | Note that the special wildcard name C<'*'> means all elements. | ||
225 | |||
226 | =head2 force_hash | ||
227 | |||
228 | This option allows you to specify a list of element names which | ||
229 | should always be forced into an hash representation. | ||
230 | |||
231 | $tpp->set( force_hash => [ 'item', 'image' ] ); | ||
232 | |||
233 | The default value is null, it means that context of the elements | ||
234 | will determine to make hash or to keep it scalar as a text node. | ||
235 | See also L</text_node_key> option below. | ||
236 | Note that the special wildcard name C<'*'> means all elements. | ||
237 | |||
238 | =head2 cdata_scalar_ref | ||
239 | |||
240 | This option allows you to convert a cdata section into a reference | ||
241 | for scalar on parsing XML source. | ||
242 | |||
243 | $tpp->set( cdata_scalar_ref => 1 ); | ||
244 | |||
245 | The default value is false, it means that each cdata section is converted into a scalar. | ||
246 | |||
247 | =head2 user_agent | ||
248 | |||
249 | This option allows you to specify a HTTP_USER_AGENT string which | ||
250 | is used by parsehttp() method. | ||
251 | |||
252 | $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' ); | ||
253 | |||
254 | The default string is C<'XML-TreePP/#.##'>, where C<'#.##'> is | ||
255 | substituted with the version number of this library. | ||
256 | |||
257 | =head2 http_lite | ||
258 | |||
259 | This option forces pasrsehttp() method to use a L<HTTP::Lite> instance. | ||
260 | |||
261 | my $http = HTTP::Lite->new(); | ||
262 | $tpp->set( http_lite => $http ); | ||
263 | |||
264 | =head2 lwp_useragent | ||
265 | |||
266 | This option forces pasrsehttp() method to use a L<LWP::UserAgent> instance. | ||
267 | |||
268 | my $ua = LWP::UserAgent->new(); | ||
269 | $ua->timeout( 60 ); | ||
270 | $ua->env_proxy; | ||
271 | $tpp->set( lwp_useragent => $ua ); | ||
272 | |||
273 | You may use this with L<LWP::UserAgent::WithCache>. | ||
274 | |||
275 | =head2 base_class | ||
276 | |||
277 | This blesses class name for each element's hashref. | ||
278 | Each class is named straight as a child class of it parent class. | ||
279 | |||
280 | $tpp->set( base_class => 'MyElement' ); | ||
281 | my $xml = '<root><parent><child key="val">text</child></parent></root>'; | ||
282 | my $tree = $tpp->parse( $xml ); | ||
283 | print ref $tree->{root}->{parent}->{child}, "\n"; | ||
284 | |||
285 | A hash for <child> element above is blessed to C<MyElement::root::parent::child> | ||
286 | class. You may use this with L<Class::Accessor>. | ||
287 | |||
288 | =head2 elem_class | ||
289 | |||
290 | This blesses class name for each element's hashref. | ||
291 | Each class is named horizontally under the direct child of C<MyElement>. | ||
292 | |||
293 | $tpp->set( base_class => 'MyElement' ); | ||
294 | my $xml = '<root><parent><child key="val">text</child></parent></root>'; | ||
295 | my $tree = $tpp->parse( $xml ); | ||
296 | print ref $tree->{root}->{parent}->{child}, "\n"; | ||
297 | |||
298 | A hash for <child> element above is blessed to C<MyElement::child> class. | ||
299 | |||
300 | =head1 OPTIONS FOR WRITING XML | ||
301 | |||
302 | =head2 first_out | ||
303 | |||
304 | This option allows you to specify a list of element/attribute | ||
305 | names which should always appears at first on output XML code. | ||
306 | |||
307 | $tpp->set( first_out => [ 'link', 'title', '-type' ] ); | ||
308 | |||
309 | The default value is null, it means alphabetical order is used. | ||
310 | |||
311 | =head2 last_out | ||
312 | |||
313 | This option allows you to specify a list of element/attribute | ||
314 | names which should always appears at last on output XML code. | ||
315 | |||
316 | $tpp->set( last_out => [ 'items', 'item', 'entry' ] ); | ||
317 | |||
318 | =head2 indent | ||
319 | |||
320 | This makes the output more human readable by indenting appropriately. | ||
321 | |||
322 | $tpp->set( indent => 2 ); | ||
323 | |||
324 | This doesn't strictly follow the XML Document Spec but does looks nice. | ||
325 | |||
326 | =head2 xml_decl | ||
327 | |||
328 | This module generates an XML declaration on writing an XML code per default. | ||
329 | This option forces to change or leave it. | ||
330 | |||
331 | $tpp->set( xml_decl => '' ); | ||
332 | |||
333 | =head2 output_encoding | ||
334 | |||
335 | This option allows you to specify a encoding of xml file generated | ||
336 | by write/writefile methods. | ||
337 | |||
338 | $tpp->set( output_encoding => 'UTF-8' ); | ||
339 | |||
340 | On Perl 5.8.0 and later, you can select it from every | ||
341 | encodings supported by Encode.pm. On Perl 5.6.x and before with | ||
342 | Jcode.pm, you can use C<Shift_JIS>, C<EUC-JP>, C<ISO-2022-JP> and | ||
343 | C<UTF-8>. The default value is C<UTF-8> which is recommended encoding. | ||
344 | |||
345 | =head1 OPTIONS FOR BOTH | ||
346 | |||
347 | =head2 utf8_flag | ||
348 | |||
349 | This makes utf8 flag on for every element's value parsed | ||
350 | and makes it on for an XML code generated as well. | ||
351 | |||
352 | $tpp->set( utf8_flag => 1 ); | ||
353 | |||
354 | Perl 5.8.1 or later is required to use this. | ||
355 | |||
356 | =head2 attr_prefix | ||
357 | |||
358 | This option allows you to specify a prefix character(s) which | ||
359 | is inserted before each attribute names. | ||
360 | |||
361 | $tpp->set( attr_prefix => '@' ); | ||
362 | |||
363 | The default character is C<'-'>. | ||
364 | Or set C<'@'> to access attribute values like E4X, ECMAScript for XML. | ||
365 | Zero-length prefix C<''> is available as well, it means no prefix is added. | ||
366 | |||
367 | =head2 text_node_key | ||
368 | |||
369 | This option allows you to specify a hash key for text nodes. | ||
370 | |||
371 | $tpp->set( text_node_key => '#text' ); | ||
372 | |||
373 | The default key is C<#text>. | ||
374 | |||
375 | =head2 ignore_error | ||
376 | |||
377 | This module calls Carp::croak function on an error per default. | ||
378 | This option makes all errors ignored and just return. | ||
379 | |||
380 | $tpp->set( ignore_error => 1 ); | ||
381 | |||
382 | =head2 use_ixhash | ||
383 | |||
384 | This option keeps the order for each element appeared in XML. | ||
385 | L<Tie::IxHash> module is required. | ||
386 | |||
387 | $tpp->set( use_ixhash => 1 ); | ||
388 | |||
389 | This makes parsing performance slow. | ||
390 | (about 100% slower than default) | ||
391 | |||
392 | =head1 AUTHOR | ||
393 | |||
394 | Yusuke Kawasaki, http://www.kawa.net/ | ||
395 | |||
396 | =head1 COPYRIGHT AND LICENSE | ||
397 | |||
398 | Copyright (c) 2006-2007 Yusuke Kawasaki. All rights reserved. | ||
399 | This program is free software; you can redistribute it and/or | ||
400 | modify it under the same terms as Perl itself. | ||
401 | |||
402 | =cut | ||
403 | |||
404 | package XML::TreePP; | ||
405 | use strict; | ||
406 | use Carp; | ||
407 | use Symbol; | ||
408 | |||
409 | use vars qw( $VERSION ); | ||
410 | $VERSION = '0.32'; | ||
411 | |||
412 | my $XML_ENCODING = 'UTF-8'; | ||
413 | my $INTERNAL_ENCODING = 'UTF-8'; | ||
414 | my $USER_AGENT = 'XML-TreePP/'.$VERSION.' '; | ||
415 | my $ATTR_PREFIX = '-'; | ||
416 | my $TEXT_NODE_KEY = '#text'; | ||
417 | |||
418 | sub new { | ||
419 | my $package = shift; | ||
420 | my $self = {@_}; | ||
421 | bless $self, $package; | ||
422 | $self; | ||
423 | } | ||
424 | |||
425 | sub die { | ||
426 | my $self = shift; | ||
427 | my $mess = shift; | ||
428 | return if $self->{ignore_error}; | ||
429 | Carp::croak $mess; | ||
430 | } | ||
431 | |||
432 | sub warn { | ||
433 | my $self = shift; | ||
434 | my $mess = shift; | ||
435 | return if $self->{ignore_error}; | ||
436 | Carp::carp $mess; | ||
437 | } | ||
438 | |||
439 | sub set { | ||
440 | my $self = shift; | ||
441 | my $key = shift; | ||
442 | my $val = shift; | ||
443 | if ( defined $val ) { | ||
444 | $self->{$key} = $val; | ||
445 | } | ||
446 | else { | ||
447 | delete $self->{$key}; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | sub get { | ||
452 | my $self = shift; | ||
453 | my $key = shift; | ||
454 | $self->{$key} if exists $self->{$key}; | ||
455 | } | ||
456 | |||
457 | sub writefile { | ||
458 | my $self = shift; | ||
459 | my $file = shift; | ||
460 | my $tree = shift or return $self->die( 'Invalid tree' ); | ||
461 | my $encode = shift; | ||
462 | return $self->die( 'Invalid filename' ) unless defined $file; | ||
463 | my $text = $self->write( $tree, $encode ); | ||
464 | if ( $] >= 5.008001 && utf8::is_utf8( $text ) ) { | ||
465 | utf8::encode( $text ); | ||
466 | } | ||
467 | $self->write_raw_xml( $file, $text ); | ||
468 | } | ||
469 | |||
470 | sub write { | ||
471 | my $self = shift; | ||
472 | my $tree = shift or return $self->die( 'Invalid tree' ); | ||
473 | my $from = $self->{internal_encoding} || $INTERNAL_ENCODING; | ||
474 | my $to = shift || $self->{output_encoding} || $XML_ENCODING; | ||
475 | my $decl = $self->{xml_decl}; | ||
476 | $decl = '<?xml version="1.0" encoding="' . $to . '" ?>' unless defined $decl; | ||
477 | |||
478 | local $self->{__first_out}; | ||
479 | if ( exists $self->{first_out} ) { | ||
480 | my $keys = $self->{first_out}; | ||
481 | $keys = [$keys] unless ref $keys; | ||
482 | $self->{__first_out} = { map { $keys->[$_] => $_ } 0 .. $#$keys }; | ||
483 | } | ||
484 | |||
485 | local $self->{__last_out}; | ||
486 | if ( exists $self->{last_out} ) { | ||
487 | my $keys = $self->{last_out}; | ||
488 | $keys = [$keys] unless ref $keys; | ||
489 | $self->{__last_out} = { map { $keys->[$_] => $_ } 0 .. $#$keys }; | ||
490 | } | ||
491 | |||
492 | my $tnk = $self->{text_node_key} if exists $self->{text_node_key}; | ||
493 | $tnk = $TEXT_NODE_KEY unless defined $tnk; | ||
494 | local $self->{text_node_key} = $tnk; | ||
495 | |||
496 | my $apre = $self->{attr_prefix} if exists $self->{attr_prefix}; | ||
497 | $apre = $ATTR_PREFIX unless defined $apre; | ||
498 | local $self->{__attr_prefix_len} = length($apre); | ||
499 | local $self->{__attr_prefix_rex} = defined $apre ? qr/^\Q$apre\E/s : undef; | ||
500 | |||
501 | local $self->{__indent}; | ||
502 | if ( exists $self->{indent} && $self->{indent} ) { | ||
503 | $self->{__indent} = ' ' x $self->{indent}; | ||
504 | } | ||
505 | |||
506 | my $text = $self->hash_to_xml( undef, $tree ); | ||
507 | if ( $from && $to ) { | ||
508 | my $stat = $self->encode_from_to( \$text, $from, $to ); | ||
509 | return $self->die( "Unsupported encoding: $to" ) unless $stat; | ||
510 | } | ||
511 | |||
512 | return $text if ( $decl eq '' ); | ||
513 | join( "\n", $decl, $text ); | ||
514 | } | ||
515 | |||
516 | sub parsehttp { | ||
517 | my $self = shift; | ||
518 | |||
519 | local $self->{__user_agent}; | ||
520 | if ( exists $self->{user_agent} ) { | ||
521 | my $agent = $self->{user_agent}; | ||
522 | $agent .= $USER_AGENT if ( $agent =~ /\s$/s ); | ||
523 | $self->{__user_agent} = $agent if ( $agent ne '' ); | ||
524 | } else { | ||
525 | $self->{__user_agent} = $USER_AGENT; | ||
526 | } | ||
527 | |||
528 | my $http = $self->{__http_module}; | ||
529 | unless ( $http ) { | ||
530 | $http = $self->find_http_module(@_); | ||
531 | $self->{__http_module} = $http; | ||
532 | } | ||
533 | if ( $http eq 'LWP::UserAgent' ) { | ||
534 | return $self->parsehttp_lwp(@_); | ||
535 | } | ||
536 | elsif ( $http eq 'HTTP::Lite' ) { | ||
537 | return $self->parsehttp_lite(@_); | ||
538 | } | ||
539 | else { | ||
540 | return $self->die( "LWP::UserAgent or HTTP::Lite is required: $_[1]" ); | ||
541 | } | ||
542 | } | ||
543 | |||
544 | sub find_http_module { | ||
545 | my $self = shift || {}; | ||
546 | |||
547 | if ( exists $self->{lwp_useragent} && ref $self->{lwp_useragent} ) { | ||
548 | return 'LWP::UserAgent' if defined $LWP::UserAgent::VERSION; | ||
549 | return 'LWP::UserAgent' if &load_lwp_useragent(); | ||
550 | return $self->die( "LWP::UserAgent is required: $_[1]" ); | ||
551 | } | ||
552 | |||
553 | if ( exists $self->{http_lite} && ref $self->{http_lite} ) { | ||
554 | return 'HTTP::Lite' if defined $HTTP::Lite::VERSION; | ||
555 | return 'HTTP::Lite' if &load_http_lite(); | ||
556 | return $self->die( "HTTP::Lite is required: $_[1]" ); | ||
557 | } | ||
558 | |||
559 | return 'LWP::UserAgent' if defined $LWP::UserAgent::VERSION; | ||
560 | return 'HTTP::Lite' if defined $HTTP::Lite::VERSION; | ||
561 | return 'LWP::UserAgent' if &load_lwp_useragent(); | ||
562 | return 'HTTP::Lite' if &load_http_lite(); | ||
563 | return $self->die( "LWP::UserAgent or HTTP::Lite is required: $_[1]" ); | ||
564 | } | ||
565 | |||
566 | sub load_lwp_useragent { | ||
567 | return $LWP::UserAgent::VERSION if defined $LWP::UserAgent::VERSION; | ||
568 | local $@; | ||
569 | eval { require LWP::UserAgent; }; | ||
570 | $LWP::UserAgent::VERSION; | ||
571 | } | ||
572 | |||
573 | sub load_http_lite { | ||
574 | return $HTTP::Lite::VERSION if defined $HTTP::Lite::VERSION; | ||
575 | local $@; | ||
576 | eval { require HTTP::Lite; }; | ||
577 | $HTTP::Lite::VERSION; | ||
578 | } | ||
579 | |||
580 | sub load_tie_ixhash { | ||
581 | return $Tie::IxHash::VERSION if defined $Tie::IxHash::VERSION; | ||
582 | local $@; | ||
583 | eval { require Tie::IxHash; }; | ||
584 | $Tie::IxHash::VERSION; | ||
585 | } | ||
586 | |||
587 | sub parsehttp_lwp { | ||
588 | my $self = shift; | ||
589 | my $method = shift or return $self->die( 'Invalid HTTP method' ); | ||
590 | my $url = shift or return $self->die( 'Invalid URL' ); | ||
591 | my $body = shift; | ||
592 | my $header = shift; | ||
593 | |||
594 | my $ua = $self->{lwp_useragent} if exists $self->{lwp_useragent}; | ||
595 | if ( ! ref $ua ) { | ||
596 | $ua = LWP::UserAgent->new(); | ||
597 | $ua->timeout(10); | ||
598 | $ua->env_proxy(); | ||
599 | $ua->agent( $self->{__user_agent} ) if defined $self->{__user_agent}; | ||
600 | } else { | ||
601 | $ua->agent( $self->{__user_agent} ) if exists $self->{user_agent}; | ||
602 | } | ||
603 | |||
604 | my $req = HTTP::Request->new( $method, $url ); | ||
605 | my $ct = 0; | ||
606 | if ( ref $header ) { | ||
607 | foreach my $field ( sort keys %$header ) { | ||
608 | my $value = $header->{$field}; | ||
609 | $req->header( $field => $value ); | ||
610 | $ct ++ if ( $field =~ /^Content-Type$/i ); | ||
611 | } | ||
612 | } | ||
613 | if ( defined $body && ! $ct ) { | ||
614 | $req->header( 'Content-Type' => 'application/x-www-form-urlencoded' ); | ||
615 | } | ||
616 | $req->content($body) if defined $body; | ||
617 | my $res = $ua->request($req); | ||
618 | my $code = $res->code(); | ||
619 | my $text = $res->content(); | ||
620 | my $tree = $self->parse( \$text ) if $res->is_success(); | ||
621 | wantarray ? ( $tree, $text, $code ) : $tree; | ||
622 | } | ||
623 | |||
624 | sub parsehttp_lite { | ||
625 | my $self = shift; | ||
626 | my $method = shift or return $self->die( 'Invalid HTTP method' ); | ||
627 | my $url = shift or return $self->die( 'Invalid URL' ); | ||
628 | my $body = shift; | ||
629 | my $header = shift; | ||
630 | |||
631 | my $http = HTTP::Lite->new(); | ||
632 | $http->method($method); | ||
633 | my $ua = 0; | ||
634 | if ( ref $header ) { | ||
635 | foreach my $field ( sort keys %$header ) { | ||
636 | my $value = $header->{$field}; | ||
637 | $http->add_req_header( $field, $value ); | ||
638 | $ua ++ if ( $field =~ /^User-Agent$/i ); | ||
639 | } | ||
640 | } | ||
641 | if ( defined $self->{__user_agent} && ! $ua ) { | ||
642 | $http->add_req_header( 'User-Agent', $self->{__user_agent} ); | ||
643 | } | ||
644 | $http->{content} = $body if defined $body; | ||
645 | my $code = $http->request($url) or return; | ||
646 | my $text = $http->body(); | ||
647 | my $tree = $self->parse( \$text ); | ||
648 | wantarray ? ( $tree, $text, $code ) : $tree; | ||
649 | } | ||
650 | |||
651 | sub parsefile { | ||
652 | my $self = shift; | ||
653 | my $file = shift; | ||
654 | return $self->die( 'Invalid filename' ) unless defined $file; | ||
655 | my $text = $self->read_raw_xml($file); | ||
656 | $self->parse( \$text ); | ||
657 | } | ||
658 | |||
659 | sub parse { | ||
660 | my $self = shift; | ||
661 | my $text = ref $_[0] ? ${$_[0]} : $_[0]; | ||
662 | return $self->die( 'Null XML source' ) unless defined $text; | ||
663 | |||
664 | my $from = &xml_decl_encoding(\$text) || $XML_ENCODING; | ||
665 | my $to = $self->{internal_encoding} || $INTERNAL_ENCODING; | ||
666 | if ( $from && $to ) { | ||
667 | my $stat = $self->encode_from_to( \$text, $from, $to ); | ||
668 | return $self->die( "Unsupported encoding: $from" ) unless $stat; | ||
669 | } | ||
670 | |||
671 | local $self->{__force_array}; | ||
672 | local $self->{__force_array_all}; | ||
673 | if ( exists $self->{force_array} ) { | ||
674 | my $force = $self->{force_array}; | ||
675 | $force = [$force] unless ref $force; | ||
676 | $self->{__force_array} = { map { $_ => 1 } @$force }; | ||
677 | $self->{__force_array_all} = $self->{__force_array}->{'*'}; | ||
678 | } | ||
679 | |||
680 | local $self->{__force_hash}; | ||
681 | local $self->{__force_hash_all}; | ||
682 | if ( exists $self->{force_hash} ) { | ||
683 | my $force = $self->{force_hash}; | ||
684 | $force = [$force] unless ref $force; | ||
685 | $self->{__force_hash} = { map { $_ => 1 } @$force }; | ||
686 | $self->{__force_hash_all} = $self->{__force_hash}->{'*'}; | ||
687 | } | ||
688 | |||
689 | my $tnk = $self->{text_node_key} if exists $self->{text_node_key}; | ||
690 | $tnk = $TEXT_NODE_KEY unless defined $tnk; | ||
691 | local $self->{text_node_key} = $tnk; | ||
692 | |||
693 | my $apre = $self->{attr_prefix} if exists $self->{attr_prefix}; | ||
694 | $apre = $ATTR_PREFIX unless defined $apre; | ||
695 | local $self->{attr_prefix} = $apre; | ||
696 | |||
697 | if ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { | ||
698 | return $self->die( "Tie::IxHash is required." ) unless &load_tie_ixhash(); | ||
699 | } | ||
700 | |||
701 | my $flat = $self->xml_to_flat(\$text); | ||
702 | my $class = $self->{base_class} if exists $self->{base_class}; | ||
703 | my $tree = $self->flat_to_tree( $flat, '', $class ); | ||
704 | if ( ref $tree ) { | ||
705 | if ( defined $class ) { | ||
706 | bless( $tree, $class ); | ||
707 | } | ||
708 | elsif ( exists $self->{elem_class} && $self->{elem_class} ) { | ||
709 | bless( $tree, $self->{elem_class} ); | ||
710 | } | ||
711 | } | ||
712 | wantarray ? ( $tree, $text ) : $tree; | ||
713 | } | ||
714 | |||
715 | sub xml_to_flat { | ||
716 | my $self = shift; | ||
717 | my $textref = shift; # reference | ||
718 | my $flat = []; | ||
719 | my $prefix = $self->{attr_prefix}; | ||
720 | my $ixhash = ( exists $self->{use_ixhash} && $self->{use_ixhash} ); | ||
721 | |||
722 | while ( $$textref =~ m{ | ||
723 | ([^<]*) < | ||
724 | (( | ||
725 | \? ([^<>]*) \? | ||
726 | )|( | ||
727 | \!\[CDATA\[(.*?)\]\] | ||
728 | )|( | ||
729 | \!DOCTYPE\s+([^\[\]<>]*(?:\[.*?\]\s*)?) | ||
730 | )|( | ||
731 | \!--(.*?)-- | ||
732 | )|( | ||
733 | ([^\!\?\s<>](?:"[^"]*"|'[^']*'|[^"'<>])*) | ||
734 | )) | ||
735 | > ([^<]*) | ||
736 | }sxg ) { | ||
737 | my ( | ||
738 | $ahead, $match, $typePI, $contPI, $typeCDATA, | ||
739 | $contCDATA, $typeDocT, $contDocT, $typeCmnt, $contCmnt, | ||
740 | $typeElem, $contElem, $follow | ||
741 | ) | ||
742 | = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 ); | ||
743 | if ( defined $ahead && $ahead =~ /\S/ ) { | ||
744 | $self->warn( "Invalid string: [$ahead] before <$match>" ); | ||
745 | } | ||
746 | |||
747 | if ($typeElem) { # Element | ||
748 | my $node = {}; | ||
749 | if ( $contElem =~ s#^/## ) { | ||
750 | $node->{endTag}++; | ||
751 | } | ||
752 | elsif ( $contElem =~ s#/$## ) { | ||
753 | $node->{emptyTag}++; | ||
754 | } | ||
755 | else { | ||
756 | $node->{startTag}++; | ||
757 | } | ||
758 | $node->{tagName} = $1 if ( $contElem =~ s#^(\S+)\s*## ); | ||
759 | unless ( $node->{endTag} ) { | ||
760 | my $attr; | ||
761 | while ( $contElem =~ m{ | ||
762 | ([^\s\=\"\']+)=(?:(")(.*?)"|'(.*?)') | ||
763 | }sxg ) { | ||
764 | my $key = $1; | ||
765 | my $val = &xml_unescape( $2 ? $3 : $4 ); | ||
766 | if ( ! ref $attr ) { | ||
767 | $attr = {}; | ||
768 | tie( %$attr, 'Tie::IxHash' ) if $ixhash; | ||
769 | } | ||
770 | $attr->{$prefix.$key} = $val; | ||
771 | } | ||
772 | $node->{attributes} = $attr if ref $attr; | ||
773 | } | ||
774 | push( @$flat, $node ); | ||
775 | } | ||
776 | elsif ($typeCDATA) { ## CDATASection | ||
777 | if ( exists $self->{cdata_scalar_ref} && $self->{cdata_scalar_ref} ) { | ||
778 | push( @$flat, \$contCDATA ); # as reference for scalar | ||
779 | } | ||
780 | else { | ||
781 | push( @$flat, $contCDATA ); # as scalar like text node | ||
782 | } | ||
783 | } | ||
784 | elsif ($typeCmnt) { # Comment (ignore) | ||
785 | } | ||
786 | elsif ($typeDocT) { # DocumentType (ignore) | ||
787 | } | ||
788 | elsif ($typePI) { # ProcessingInstruction (ignore) | ||
789 | } | ||
790 | else { | ||
791 | $self->warn( "Invalid Tag: <$match>" ); | ||
792 | } | ||
793 | if ( $follow =~ /\S/ ) { # text node | ||
794 | my $val = &xml_unescape($follow); | ||
795 | push( @$flat, $val ); | ||
796 | } | ||
797 | } | ||
798 | $flat; | ||
799 | } | ||
800 | |||
801 | sub flat_to_tree { | ||
802 | my $self = shift; | ||
803 | my $source = shift; | ||
804 | my $parent = shift; | ||
805 | my $class = shift; | ||
806 | my $tree = {}; | ||
807 | my $text = []; | ||
808 | |||
809 | if ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { | ||
810 | tie( %$tree, 'Tie::IxHash' ); | ||
811 | } | ||
812 | |||
813 | while ( scalar @$source ) { | ||
814 | my $node = shift @$source; | ||
815 | if ( !ref $node || UNIVERSAL::isa( $node, "SCALAR" ) ) { | ||
816 | push( @$text, $node ); # cdata or text node | ||
817 | next; | ||
818 | } | ||
819 | my $name = $node->{tagName}; | ||
820 | if ( $node->{endTag} ) { | ||
821 | last if ( $parent eq $name ); | ||
822 | return $self->die( "Invalid tag sequence: <$parent></$name>" ); | ||
823 | } | ||
824 | my $elem = $node->{attributes}; | ||
825 | my $forcehash = $self->{__force_hash_all} || $self->{__force_hash}->{$name}; | ||
826 | my $subclass; | ||
827 | if ( defined $class ) { | ||
828 | my $escname = $name; | ||
829 | $escname =~ s/\W/_/sg; | ||
830 | $subclass = $class.'::'.$escname; | ||
831 | } | ||
832 | if ( $node->{startTag} ) { # recursive call | ||
833 | my $child = $self->flat_to_tree( $source, $name, $subclass ); | ||
834 | next unless defined $child; | ||
835 | my $hasattr = scalar keys %$elem if ref $elem; | ||
836 | if ( UNIVERSAL::isa( $child, "HASH" ) ) { | ||
837 | if ( $hasattr ) { | ||
838 | # some attributes and some child nodes | ||
839 | %$elem = ( %$elem, %$child ); | ||
840 | } | ||
841 | else { | ||
842 | # some child nodes without attributes | ||
843 | $elem = $child; | ||
844 | } | ||
845 | } | ||
846 | else { | ||
847 | if ( $hasattr ) { | ||
848 | # some attributes and text node | ||
849 | $elem->{$self->{text_node_key}} = $child; | ||
850 | } | ||
851 | elsif ( $forcehash ) { | ||
852 | # only text node without attributes | ||
853 | $elem = { $self->{text_node_key} => $child }; | ||
854 | } | ||
855 | else { | ||
856 | # text node without attributes | ||
857 | $elem = $child; | ||
858 | } | ||
859 | } | ||
860 | } | ||
861 | elsif ( $forcehash && ! ref $elem ) { | ||
862 | $elem = {}; | ||
863 | } | ||
864 | # bless to a class by base_class or elem_class | ||
865 | if ( ref $elem && UNIVERSAL::isa( $elem, "HASH" ) ) { | ||
866 | if ( defined $subclass ) { | ||
867 | bless( $elem, $subclass ); | ||
868 | } elsif ( exists $self->{elem_class} && $self->{elem_class} ) { | ||
869 | my $escname = $name; | ||
870 | $escname =~ s/\W/_/sg; | ||
871 | my $elmclass = $self->{elem_class}.'::'.$escname; | ||
872 | bless( $elem, $elmclass ); | ||
873 | } | ||
874 | } | ||
875 | # next unless defined $elem; | ||
876 | $tree->{$name} ||= []; | ||
877 | push( @{ $tree->{$name} }, $elem ); | ||
878 | } | ||
879 | if ( ! $self->{__force_array_all} ) { | ||
880 | foreach my $key ( keys %$tree ) { | ||
881 | next if $self->{__force_array}->{$key}; | ||
882 | next if ( 1 < scalar @{ $tree->{$key} } ); | ||
883 | $tree->{$key} = shift @{ $tree->{$key} }; | ||
884 | } | ||
885 | } | ||
886 | my $haschild = scalar keys %$tree; | ||
887 | if ( scalar @$text ) { | ||
888 | if ( scalar @$text == 1 ) { | ||
889 | # one text node (normal) | ||
890 | $text = shift @$text; | ||
891 | } | ||
892 | elsif ( ! scalar grep {ref $_} @$text ) { | ||
893 | # some text node splitted | ||
894 | $text = join( '', @$text ); | ||
895 | } | ||
896 | else { | ||
897 | # some cdata node | ||
898 | my $join = join( '', map {ref $_ ? $$_ : $_} @$text ); | ||
899 | $text = \$join; | ||
900 | } | ||
901 | if ( $haschild ) { | ||
902 | # some child nodes and also text node | ||
903 | $tree->{$self->{text_node_key}} = $text; | ||
904 | } | ||
905 | else { | ||
906 | # only text node without child nodes | ||
907 | $tree = $text; | ||
908 | } | ||
909 | } | ||
910 | elsif ( ! $haschild ) { | ||
911 | # no child and no text | ||
912 | $tree = ""; | ||
913 | } | ||
914 | $tree; | ||
915 | } | ||
916 | |||
917 | sub hash_to_xml { | ||
918 | my $self = shift; | ||
919 | my $name = shift; | ||
920 | my $hash = shift; | ||
921 | my $out = []; | ||
922 | my $attr = []; | ||
923 | my $allkeys = [ keys %$hash ]; | ||
924 | my $fo = $self->{__first_out} if ref $self->{__first_out}; | ||
925 | my $lo = $self->{__last_out} if ref $self->{__last_out}; | ||
926 | my $firstkeys = [ sort { $fo->{$a} <=> $fo->{$b} } grep { exists $fo->{$_} } @$allkeys ] if ref $fo; | ||
927 | my $lastkeys = [ sort { $lo->{$a} <=> $lo->{$b} } grep { exists $lo->{$_} } @$allkeys ] if ref $lo; | ||
928 | $allkeys = [ grep { ! exists $fo->{$_} } @$allkeys ] if ref $fo; | ||
929 | $allkeys = [ grep { ! exists $lo->{$_} } @$allkeys ] if ref $lo; | ||
930 | unless ( exists $self->{use_ixhash} && $self->{use_ixhash} ) { | ||
931 | $allkeys = [ sort @$allkeys ]; | ||
932 | } | ||
933 | my $prelen = $self->{__attr_prefix_len}; | ||
934 | my $pregex = $self->{__attr_prefix_rex}; | ||
935 | |||
936 | foreach my $keys ( $firstkeys, $allkeys, $lastkeys ) { | ||
937 | next unless ref $keys; | ||
938 | my $elemkey = $prelen ? [ grep { $_ !~ $pregex } @$keys ] : $keys; | ||
939 | my $attrkey = $prelen ? [ grep { $_ =~ $pregex } @$keys ] : []; | ||
940 | |||
941 | foreach my $key ( @$elemkey ) { | ||
942 | my $val = $hash->{$key}; | ||
943 | if ( !defined $val ) { | ||
944 | push( @$out, "<$key />" ); | ||
945 | } | ||
946 | elsif ( UNIVERSAL::isa( $val, 'ARRAY' ) ) { | ||
947 | my $child = $self->array_to_xml( $key, $val ); | ||
948 | push( @$out, $child ); | ||
949 | } | ||
950 | elsif ( UNIVERSAL::isa( $val, 'SCALAR' ) ) { | ||
951 | my $child = $self->scalaref_to_cdata( $key, $val ); | ||
952 | push( @$out, $child ); | ||
953 | } | ||
954 | elsif ( ref $val ) { | ||
955 | my $child = $self->hash_to_xml( $key, $val ); | ||
956 | push( @$out, $child ); | ||
957 | } | ||
958 | else { | ||
959 | my $child = $self->scalar_to_xml( $key, $val ); | ||
960 | push( @$out, $child ); | ||
961 | } | ||
962 | } | ||
963 | |||
964 | foreach my $key ( @$attrkey ) { | ||
965 | my $name = substr( $key, $prelen ); | ||
966 | my $val = &xml_escape( $hash->{$key} ); | ||
967 | push( @$attr, ' ' . $name . '="' . $val . '"' ); | ||
968 | } | ||
969 | } | ||
970 | my $jattr = join( '', @$attr ); | ||
971 | |||
972 | if ( defined $name && scalar @$out && ! grep { ! /^</s } @$out ) { | ||
973 | # Use human-friendly white spacing | ||
974 | if ( defined $self->{__indent} ) { | ||
975 | s/^(\s*<)/$self->{__indent}$1/mg foreach @$out; | ||
976 | } | ||
977 | unshift( @$out, "\n" ); | ||
978 | } | ||
979 | |||
980 | my $text = join( '', @$out ); | ||
981 | if ( defined $name ) { | ||
982 | if ( scalar @$out ) { | ||
983 | $text = "<$name$jattr>$text</$name>\n"; | ||
984 | } | ||
985 | else { | ||
986 | $text = "<$name$jattr />\n"; | ||
987 | } | ||
988 | } | ||
989 | $text; | ||
990 | } | ||
991 | |||
992 | sub array_to_xml { | ||
993 | my $self = shift; | ||
994 | my $name = shift; | ||
995 | my $array = shift; | ||
996 | my $out = []; | ||
997 | foreach my $val (@$array) { | ||
998 | if ( !defined $val ) { | ||
999 | push( @$out, "<$name />\n" ); | ||
1000 | } | ||
1001 | elsif ( UNIVERSAL::isa( $val, 'ARRAY' ) ) { | ||
1002 | my $child = $self->array_to_xml( $name, $val ); | ||
1003 | push( @$out, $child ); | ||
1004 | } | ||
1005 | elsif ( UNIVERSAL::isa( $val, 'SCALAR' ) ) { | ||
1006 | my $child = $self->scalaref_to_cdata( $name, $val ); | ||
1007 | push( @$out, $child ); | ||
1008 | } | ||
1009 | elsif ( ref $val ) { | ||
1010 | my $child = $self->hash_to_xml( $name, $val ); | ||
1011 | push( @$out, $child ); | ||
1012 | } | ||
1013 | else { | ||
1014 | my $child = $self->scalar_to_xml( $name, $val ); | ||
1015 | push( @$out, $child ); | ||
1016 | } | ||
1017 | } | ||
1018 | |||
1019 | my $text = join( '', @$out ); | ||
1020 | $text; | ||
1021 | } | ||
1022 | |||
1023 | sub scalaref_to_cdata { | ||
1024 | my $self = shift; | ||
1025 | my $name = shift; | ||
1026 | my $ref = shift; | ||
1027 | my $data = defined $$ref ? $$ref : ''; | ||
1028 | $data =~ s#(]])(>)#$1]]><![CDATA[$2#g; | ||
1029 | #my $text = '<![CDATA[' . $data . ']]>'; | ||
1030 | my $text = $data; | ||
1031 | $text = "<$name>$text</$name>\n" if ( $name ne $self->{text_node_key} ); | ||
1032 | $text; | ||
1033 | } | ||
1034 | |||
1035 | sub scalar_to_xml { | ||
1036 | my $self = shift; | ||
1037 | my $name = shift; | ||
1038 | my $scalar = shift; | ||
1039 | my $copy = $scalar; | ||
1040 | my $text = &xml_escape($copy); | ||
1041 | $text = "<$name>$text</$name>\n" if ( $name ne $self->{text_node_key} ); | ||
1042 | $text; | ||
1043 | } | ||
1044 | |||
1045 | sub write_raw_xml { | ||
1046 | my $self = shift; | ||
1047 | my $file = shift; | ||
1048 | my $fh = Symbol::gensym(); | ||
1049 | open( $fh, ">$file" ) or return $self->die( "$! - $file" ); | ||
1050 | print $fh @_; | ||
1051 | close($fh); | ||
1052 | } | ||
1053 | |||
1054 | sub read_raw_xml { | ||
1055 | my $self = shift; | ||
1056 | my $file = shift; | ||
1057 | my $fh = Symbol::gensym(); | ||
1058 | open( $fh, $file ) or return $self->die( "$! - $file" ); | ||
1059 | local $/ = undef; | ||
1060 | my $text = <$fh>; | ||
1061 | close($fh); | ||
1062 | $text; | ||
1063 | } | ||
1064 | |||
1065 | sub xml_decl_encoding { | ||
1066 | my $textref = shift; | ||
1067 | return unless defined $$textref; | ||
1068 | my $args = ( $$textref =~ /^\s*<\?xml(\s+\S.*)\?>/s )[0] or return; | ||
1069 | my $getcode = ( $args =~ /\s+encoding=(".*?"|'.*?')/ )[0] or return; | ||
1070 | $getcode =~ s/^['"]//; | ||
1071 | $getcode =~ s/['"]$//; | ||
1072 | $getcode; | ||
1073 | } | ||
1074 | |||
1075 | sub encode_from_to { | ||
1076 | my $self = shift; | ||
1077 | my $txtref = shift or return; | ||
1078 | my $from = shift or return; | ||
1079 | my $to = shift or return; | ||
1080 | |||
1081 | unless ( defined $Encode::EUCJPMS::VERSION ) { | ||
1082 | $from = 'EUC-JP' if ( $from =~ /\beuc-?jp-?(win|ms)$/i ); | ||
1083 | $to = 'EUC-JP' if ( $to =~ /\beuc-?jp-?(win|ms)$/i ); | ||
1084 | } | ||
1085 | |||
1086 | my $setflag = $self->{utf8_flag} if exists $self->{utf8_flag}; | ||
1087 | if ( $] < 5.008001 && $setflag ) { | ||
1088 | return $self->die( "Perl 5.8.1 is required for utf8_flag: $]" ); | ||
1089 | } | ||
1090 | |||
1091 | if ( $] >= 5.008 ) { | ||
1092 | &load_encode(); | ||
1093 | my $check = ( $Encode::VERSION < 2.13 ) ? 0x400 : Encode::FB_XMLCREF(); | ||
1094 | if ( $] >= 5.008001 && utf8::is_utf8( $$txtref ) ) { | ||
1095 | if ( $to =~ /^utf-?8$/i ) { | ||
1096 | # skip | ||
1097 | } else { | ||
1098 | $$txtref = Encode::encode( $to, $$txtref, $check ); | ||
1099 | } | ||
1100 | } else { | ||
1101 | $$txtref = Encode::decode( $from, $$txtref ); | ||
1102 | if ( $to =~ /^utf-?8$/i && $setflag ) { | ||
1103 | # skip | ||
1104 | } else { | ||
1105 | $$txtref = Encode::encode( $to, $$txtref, $check ); | ||
1106 | } | ||
1107 | } | ||
1108 | } | ||
1109 | elsif ( ( uc($from) eq 'ISO-8859-1' | ||
1110 | || uc($from) eq 'US-ASCII' | ||
1111 | || uc($from) eq 'LATIN-1' ) && uc($to) eq 'UTF-8' ) { | ||
1112 | &latin1_to_utf8($txtref); | ||
1113 | } | ||
1114 | else { | ||
1115 | my $jfrom = &get_jcode_name($from); | ||
1116 | my $jto = &get_jcode_name($to); | ||
1117 | return $to if ( uc($jfrom) eq uc($jto) ); | ||
1118 | if ( $jfrom && $jto ) { | ||
1119 | &load_jcode(); | ||
1120 | if ( defined $Jcode::VERSION ) { | ||
1121 | Jcode::convert( $txtref, $jto, $jfrom ); | ||
1122 | } | ||
1123 | else { | ||
1124 | return $self->die( "Jcode.pm is required: $from to $to" ); | ||
1125 | } | ||
1126 | } | ||
1127 | else { | ||
1128 | return $self->die( "Encode.pm is required: $from to $to" ); | ||
1129 | } | ||
1130 | } | ||
1131 | $to; | ||
1132 | } | ||
1133 | |||
1134 | sub load_jcode { | ||
1135 | return if defined $Jcode::VERSION; | ||
1136 | local $@; | ||
1137 | eval { require Jcode; }; | ||
1138 | } | ||
1139 | |||
1140 | sub load_encode { | ||
1141 | return if defined $Encode::VERSION; | ||
1142 | local $@; | ||
1143 | eval { require Encode; }; | ||
1144 | } | ||
1145 | |||
1146 | sub latin1_to_utf8 { | ||
1147 | my $strref = shift; | ||
1148 | $$strref =~ s{ | ||
1149 | ([\x80-\xFF]) | ||
1150 | }{ | ||
1151 | pack( 'C2' => 0xC0|(ord($1)>>6),0x80|(ord($1)&0x3F) ) | ||
1152 | }exg; | ||
1153 | } | ||
1154 | |||
1155 | sub get_jcode_name { | ||
1156 | my $src = shift; | ||
1157 | my $dst; | ||
1158 | if ( $src =~ /^utf-?8$/i ) { | ||
1159 | $dst = 'utf8'; | ||
1160 | } | ||
1161 | elsif ( $src =~ /^euc.*jp(-?(win|ms))?$/i ) { | ||
1162 | $dst = 'euc'; | ||
1163 | } | ||
1164 | elsif ( $src =~ /^(shift.*jis|cp932|windows-31j)$/i ) { | ||
1165 | $dst = 'sjis'; | ||
1166 | } | ||
1167 | elsif ( $src =~ /^iso-2022-jp/ ) { | ||
1168 | $dst = 'jis'; | ||
1169 | } | ||
1170 | $dst; | ||
1171 | } | ||
1172 | |||
1173 | sub xml_escape { | ||
1174 | my $str = shift; | ||
1175 | return '' unless defined $str; | ||
1176 | # except for TAB(\x09),CR(\x0D),LF(\x0A) | ||
1177 | $str =~ s{ | ||
1178 | ([\x00-\x08\x0B\x0C\x0E-\x1F\x7F]) | ||
1179 | }{ | ||
1180 | sprintf( '&#%d;', ord($1) ); | ||
1181 | }gex; | ||
1182 | $str =~ s/&(?!#(\d+;|x[\dA-Fa-f]+;))/&/g; | ||
1183 | $str =~ s/</</g; | ||
1184 | $str =~ s/>/>/g; | ||
1185 | $str =~ s/'/'/g; | ||
1186 | $str =~ s/"/"/g; | ||
1187 | $str; | ||
1188 | } | ||
1189 | |||
1190 | sub xml_unescape { | ||
1191 | my $str = shift; | ||
1192 | my $map = {qw( quot " lt < gt > apos ' amp & )}; | ||
1193 | $str =~ s{ | ||
1194 | (&(?:\#(\d+)|\#x([0-9a-fA-F]+)|(quot|lt|gt|apos|amp));) | ||
1195 | }{ | ||
1196 | $4 ? $map->{$4} : &char_deref($1,$2,$3); | ||
1197 | }gex; | ||
1198 | $str; | ||
1199 | } | ||
1200 | |||
1201 | sub char_deref { | ||
1202 | my( $str, $dec, $hex ) = @_; | ||
1203 | if ( defined $dec ) { | ||
1204 | return &code_to_utf8( $dec ) if ( $dec < 256 ); | ||
1205 | } | ||
1206 | elsif ( defined $hex ) { | ||
1207 | my $num = hex($hex); | ||
1208 | return &code_to_utf8( $num ) if ( $num < 256 ); | ||
1209 | } | ||
1210 | return $str; | ||
1211 | } | ||
1212 | |||
1213 | sub code_to_utf8 { | ||
1214 | my $code = shift; | ||
1215 | if ( $code < 128 ) { | ||
1216 | return pack( C => $code ); | ||
1217 | } | ||
1218 | elsif ( $code < 256 ) { | ||
1219 | return pack( C2 => 0xC0|($code>>6), 0x80|($code&0x3F)); | ||
1220 | } | ||
1221 | elsif ( $code < 65536 ) { | ||
1222 | return pack( C3 => 0xC0|($code>>12), 0x80|(($code>>6)&0x3F), 0x80|($code&0x3F)); | ||
1223 | } | ||
1224 | return shift if scalar @_; # default value | ||
1225 | sprintf( '&#x%04X;', $code ); | ||
1226 | } | ||
1227 | |||
1228 | 1; | ||