aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/share/perl/lib/XML/Simple.pm
diff options
context:
space:
mode:
Diffstat (limited to 'share/perl/lib/XML/Simple.pm')
-rw-r--r--share/perl/lib/XML/Simple.pm3284
1 files changed, 3284 insertions, 0 deletions
diff --git a/share/perl/lib/XML/Simple.pm b/share/perl/lib/XML/Simple.pm
new file mode 100644
index 0000000..993669b
--- /dev/null
+++ b/share/perl/lib/XML/Simple.pm
@@ -0,0 +1,3284 @@
1# $Id: Simple.pm,v 1.1 2008/01/18 09:10:19 ryu Exp $
2
3package XML::Simple;
4
5=head1 NAME
6
7XML::Simple - Easy API to maintain XML (esp config files)
8
9=head1 SYNOPSIS
10
11 use XML::Simple;
12
13 my $ref = XMLin([<xml file or string>] [, <options>]);
14
15 my $xml = XMLout($hashref [, <options>]);
16
17Or the object oriented way:
18
19 require XML::Simple;
20
21 my $xs = XML::Simple->new(options);
22
23 my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
24
25 my $xml = $xs->XMLout($hashref [, <options>]);
26
27(or see L<"SAX SUPPORT"> for 'the SAX way').
28
29To catch common errors:
30
31 use XML::Simple qw(:strict);
32
33(see L<"STRICT MODE"> for more details).
34
35=cut
36
37# See after __END__ for more POD documentation
38
39
40# Load essentials here, other modules loaded on demand later
41
42use strict;
43use Carp;
44require Exporter;
45
46
47##############################################################################
48# Define some constants
49#
50
51use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER);
52
53@ISA = qw(Exporter);
54@EXPORT = qw(XMLin XMLout);
55@EXPORT_OK = qw(xml_in xml_out);
56$VERSION = '2.18';
57$PREFERRED_PARSER = undef;
58
59my $StrictMode = 0;
60
61my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr
62 searchpath forcearray cache suppressempty parseropts
63 grouptags nsexpand datahandler varattr variables
64 normalisespace normalizespace valueattr);
65
66my @KnownOptOut = qw(keyattr keeproot contentkey noattr
67 rootname xmldecl outputfile noescape suppressempty
68 grouptags nsexpand handler noindent attrindent nosort
69 valueattr numericescape);
70
71my @DefKeyAttr = qw(name key id);
72my $DefRootName = qq(opt);
73my $DefContentKey = qq(content);
74my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>);
75
76my $xmlns_ns = 'http://www.w3.org/2000/xmlns/';
77my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround
78
79
80##############################################################################
81# Globals for use by caching routines
82#
83
84my %MemShareCache = ();
85my %MemCopyCache = ();
86
87
88##############################################################################
89# Wrapper for Exporter - handles ':strict'
90#
91
92sub import {
93 # Handle the :strict tag
94
95 $StrictMode = 1 if grep(/^:strict$/, @_);
96
97 # Pass everything else to Exporter.pm
98
99 @_ = grep(!/^:strict$/, @_);
100 goto &Exporter::import;
101}
102
103
104##############################################################################
105# Constructor for optional object interface.
106#
107
108sub new {
109 my $class = shift;
110
111 if(@_ % 2) {
112 croak "Default options must be name=>value pairs (odd number supplied)";
113 }
114
115 my %known_opt;
116 @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100;
117
118 my %raw_opt = @_;
119 my %def_opt;
120 while(my($key, $val) = each %raw_opt) {
121 my $lkey = lc($key);
122 $lkey =~ s/_//g;
123 croak "Unrecognised option: $key" unless(exists($known_opt{$lkey}));
124 $def_opt{$lkey} = $val;
125 }
126 my $self = { def_opt => \%def_opt };
127
128 return(bless($self, $class));
129}
130
131
132##############################################################################
133# Sub: _get_object()
134#
135# Helper routine called from XMLin() and XMLout() to create an object if none
136# was provided. Note, this routine does mess with the caller's @_ array.
137#
138
139sub _get_object {
140 my $self;
141 if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) {
142 $self = shift;
143 }
144 else {
145 $self = XML::Simple->new();
146 }
147
148 return $self;
149}
150
151
152##############################################################################
153# Sub/Method: XMLin()
154#
155# Exported routine for slurping XML into a hashref - see pod for info.
156#
157# May be called as object method or as a plain function.
158#
159# Expects one arg for the source XML, optionally followed by a number of
160# name => value option pairs.
161#
162
163sub XMLin {
164 my $self = &_get_object; # note, @_ is passed implicitly
165
166 my $target = shift;
167
168
169 # Work out whether to parse a string, a file or a filehandle
170
171 if(not defined $target) {
172 return $self->parse_file(undef, @_);
173 }
174
175 elsif($target eq '-') {
176 local($/) = undef;
177 $target = <STDIN>;
178 return $self->parse_string(\$target, @_);
179 }
180
181 elsif(my $type = ref($target)) {
182 if($type eq 'SCALAR') {
183 return $self->parse_string($target, @_);
184 }
185 else {
186 return $self->parse_fh($target, @_);
187 }
188 }
189
190 elsif($target =~ m{<.*?>}s) {
191 return $self->parse_string(\$target, @_);
192 }
193
194 else {
195 return $self->parse_file($target, @_);
196 }
197}
198
199
200##############################################################################
201# Sub/Method: parse_file()
202#
203# Same as XMLin, but only parses from a named file.
204#
205
206sub parse_file {
207 my $self = &_get_object; # note, @_ is passed implicitly
208
209 my $filename = shift;
210
211 $self->handle_options('in', @_);
212
213 $filename = $self->default_config_file if not defined $filename;
214
215 $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}});
216
217 # Check cache for previous parse
218
219 if($self->{opt}->{cache}) {
220 foreach my $scheme (@{$self->{opt}->{cache}}) {
221 my $method = 'cache_read_' . $scheme;
222 my $opt = $self->$method($filename);
223 return($opt) if($opt);
224 }
225 }
226
227 my $ref = $self->build_simple_tree($filename, undef);
228
229 if($self->{opt}->{cache}) {
230 my $method = 'cache_write_' . $self->{opt}->{cache}->[0];
231 $self->$method($ref, $filename);
232 }
233
234 return $ref;
235}
236
237
238##############################################################################
239# Sub/Method: parse_fh()
240#
241# Same as XMLin, but only parses from a filehandle.
242#
243
244sub parse_fh {
245 my $self = &_get_object; # note, @_ is passed implicitly
246
247 my $fh = shift;
248 croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') .
249 " as a filehandle" unless ref $fh;
250
251 $self->handle_options('in', @_);
252
253 return $self->build_simple_tree(undef, $fh);
254}
255
256
257##############################################################################
258# Sub/Method: parse_string()
259#
260# Same as XMLin, but only parses from a string or a reference to a string.
261#
262
263sub parse_string {
264 my $self = &_get_object; # note, @_ is passed implicitly
265
266 my $string = shift;
267
268 $self->handle_options('in', @_);
269
270 return $self->build_simple_tree(undef, ref $string ? $string : \$string);
271}
272
273
274##############################################################################
275# Method: default_config_file()
276#
277# Returns the name of the XML file to parse if no filename (or XML string)
278# was provided.
279#
280
281sub default_config_file {
282 my $self = shift;
283
284 require File::Basename;
285
286 my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+');
287
288 # Add script directory to searchpath
289
290 if($script_dir) {
291 unshift(@{$self->{opt}->{searchpath}}, $script_dir);
292 }
293
294 return $basename . '.xml';
295}
296
297
298##############################################################################
299# Method: build_simple_tree()
300#
301# Builds a 'tree' data structure as provided by XML::Parser and then
302# 'simplifies' it as specified by the various options in effect.
303#
304
305sub build_simple_tree {
306 my $self = shift;
307
308 my $tree = $self->build_tree(@_);
309
310 return $self->{opt}->{keeproot}
311 ? $self->collapse({}, @$tree)
312 : $self->collapse(@{$tree->[1]});
313}
314
315
316##############################################################################
317# Method: build_tree()
318#
319# This routine will be called if there is no suitable pre-parsed tree in a
320# cache. It parses the XML and returns an XML::Parser 'Tree' style data
321# structure (summarised in the comments for the collapse() routine below).
322#
323# XML::Simple requires the services of another module that knows how to parse
324# XML. If XML::SAX is installed, the default SAX parser will be used,
325# otherwise XML::Parser will be used.
326#
327# This routine expects to be passed a filename as argument 1 or a 'string' as
328# argument 2. The 'string' might be a string of XML (passed by reference to
329# save memory) or it might be a reference to an IO::Handle. (This
330# non-intuitive mess results in part from the way XML::Parser works but that's
331# really no excuse).
332#
333
334sub build_tree {
335 my $self = shift;
336 my $filename = shift;
337 my $string = shift;
338
339
340 my $preferred_parser = $PREFERRED_PARSER;
341 unless(defined($preferred_parser)) {
342 $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || '';
343 }
344 if($preferred_parser eq 'XML::Parser') {
345 return($self->build_tree_xml_parser($filename, $string));
346 }
347
348 eval { require XML::SAX; }; # We didn't need it until now
349 if($@) { # No XML::SAX - fall back to XML::Parser
350 if($preferred_parser) { # unless a SAX parser was expressly requested
351 croak "XMLin() could not load XML::SAX";
352 }
353 return($self->build_tree_xml_parser($filename, $string));
354 }
355
356 $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser);
357
358 my $sp = XML::SAX::ParserFactory->parser(Handler => $self);
359
360 $self->{nocollapse} = 1;
361 my($tree);
362 if($filename) {
363 $tree = $sp->parse_uri($filename);
364 }
365 else {
366 if(ref($string) && ref($string) ne 'SCALAR') {
367 $tree = $sp->parse_file($string);
368 }
369 else {
370 $tree = $sp->parse_string($$string);
371 }
372 }
373
374 return($tree);
375}
376
377
378##############################################################################
379# Method: build_tree_xml_parser()
380#
381# This routine will be called if XML::SAX is not installed, or if XML::Parser
382# was specifically requested. It takes the same arguments as build_tree() and
383# returns the same data structure (XML::Parser 'Tree' style).
384#
385
386sub build_tree_xml_parser {
387 my $self = shift;
388 my $filename = shift;
389 my $string = shift;
390
391
392 eval {
393 local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load()
394 require XML::Parser; # We didn't need it until now
395 };
396 if($@) {
397 croak "XMLin() requires either XML::SAX or XML::Parser";
398 }
399
400 if($self->{opt}->{nsexpand}) {
401 carp "'nsexpand' option requires XML::SAX";
402 }
403
404 my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}});
405 my($tree);
406 if($filename) {
407 # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl
408 local(*XML_FILE);
409 open(XML_FILE, '<', $filename) || croak qq($filename - $!);
410 $tree = $xp->parse(*XML_FILE);
411 close(XML_FILE);
412 }
413 else {
414 $tree = $xp->parse($$string);
415 }
416
417 return($tree);
418}
419
420
421##############################################################################
422# Method: cache_write_storable()
423#
424# Wrapper routine for invoking Storable::nstore() to cache a parsed data
425# structure.
426#
427
428sub cache_write_storable {
429 my($self, $data, $filename) = @_;
430
431 my $cachefile = $self->storable_filename($filename);
432
433 require Storable; # We didn't need it until now
434
435 if ('VMS' eq $^O) {
436 Storable::nstore($data, $cachefile);
437 }
438 else {
439 # If the following line fails for you, your Storable.pm is old - upgrade
440 Storable::lock_nstore($data, $cachefile);
441 }
442
443}
444
445
446##############################################################################
447# Method: cache_read_storable()
448#
449# Wrapper routine for invoking Storable::retrieve() to read a cached parsed
450# data structure. Only returns cached data if the cache file exists and is
451# newer than the source XML file.
452#
453
454sub cache_read_storable {
455 my($self, $filename) = @_;
456
457 my $cachefile = $self->storable_filename($filename);
458
459 return unless(-r $cachefile);
460 return unless((stat($cachefile))[9] > (stat($filename))[9]);
461
462 require Storable; # We didn't need it until now
463
464 if ('VMS' eq $^O) {
465 return(Storable::retrieve($cachefile));
466 }
467 else {
468 return(Storable::lock_retrieve($cachefile));
469 }
470
471}
472
473
474##############################################################################
475# Method: storable_filename()
476#
477# Translates the supplied source XML filename into a filename for the storable
478# cached data. A '.stor' suffix is added after stripping an optional '.xml'
479# suffix.
480#
481
482sub storable_filename {
483 my($self, $cachefile) = @_;
484
485 $cachefile =~ s{(\.xml)?$}{.stor};
486 return $cachefile;
487}
488
489
490##############################################################################
491# Method: cache_write_memshare()
492#
493# Takes the supplied data structure reference and stores it away in a global
494# hash structure.
495#
496
497sub cache_write_memshare {
498 my($self, $data, $filename) = @_;
499
500 $MemShareCache{$filename} = [time(), $data];
501}
502
503
504##############################################################################
505# Method: cache_read_memshare()
506#
507# Takes a filename and looks in a global hash for a cached parsed version.
508#
509
510sub cache_read_memshare {
511 my($self, $filename) = @_;
512
513 return unless($MemShareCache{$filename});
514 return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]);
515
516 return($MemShareCache{$filename}->[1]);
517
518}
519
520
521##############################################################################
522# Method: cache_write_memcopy()
523#
524# Takes the supplied data structure and stores a copy of it in a global hash
525# structure.
526#
527
528sub cache_write_memcopy {
529 my($self, $data, $filename) = @_;
530
531 require Storable; # We didn't need it until now
532
533 $MemCopyCache{$filename} = [time(), Storable::dclone($data)];
534}
535
536
537##############################################################################
538# Method: cache_read_memcopy()
539#
540# Takes a filename and looks in a global hash for a cached parsed version.
541# Returns a reference to a copy of that data structure.
542#
543
544sub cache_read_memcopy {
545 my($self, $filename) = @_;
546
547 return unless($MemCopyCache{$filename});
548 return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]);
549
550 return(Storable::dclone($MemCopyCache{$filename}->[1]));
551
552}
553
554
555##############################################################################
556# Sub/Method: XMLout()
557#
558# Exported routine for 'unslurping' a data structure out to XML.
559#
560# Expects a reference to a data structure and an optional list of option
561# name => value pairs.
562#
563
564sub XMLout {
565 my $self = &_get_object; # note, @_ is passed implicitly
566
567 croak "XMLout() requires at least one argument" unless(@_);
568 my $ref = shift;
569
570 $self->handle_options('out', @_);
571
572
573 # If namespace expansion is set, XML::NamespaceSupport is required
574
575 if($self->{opt}->{nsexpand}) {
576 require XML::NamespaceSupport;
577 $self->{nsup} = XML::NamespaceSupport->new();
578 $self->{ns_prefix} = 'aaa';
579 }
580
581
582 # Wrap top level arrayref in a hash
583
584 if(UNIVERSAL::isa($ref, 'ARRAY')) {
585 $ref = { anon => $ref };
586 }
587
588
589 # Extract rootname from top level hash if keeproot enabled
590
591 if($self->{opt}->{keeproot}) {
592 my(@keys) = keys(%$ref);
593 if(@keys == 1) {
594 $ref = $ref->{$keys[0]};
595 $self->{opt}->{rootname} = $keys[0];
596 }
597 }
598
599 # Ensure there are no top level attributes if we're not adding root elements
600
601 elsif($self->{opt}->{rootname} eq '') {
602 if(UNIVERSAL::isa($ref, 'HASH')) {
603 my $refsave = $ref;
604 $ref = {};
605 foreach (keys(%$refsave)) {
606 if(ref($refsave->{$_})) {
607 $ref->{$_} = $refsave->{$_};
608 }
609 else {
610 $ref->{$_} = [ $refsave->{$_} ];
611 }
612 }
613 }
614 }
615
616
617 # Encode the hashref and write to file if necessary
618
619 $self->{_ancestors} = [];
620 my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, '');
621 delete $self->{_ancestors};
622
623 if($self->{opt}->{xmldecl}) {
624 $xml = $self->{opt}->{xmldecl} . "\n" . $xml;
625 }
626
627 if($self->{opt}->{outputfile}) {
628 if(ref($self->{opt}->{outputfile})) {
629 my $fh = $self->{opt}->{outputfile};
630 if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) {
631 eval { require IO::Handle; };
632 croak $@ if $@;
633 }
634 return($fh->print($xml));
635 }
636 else {
637 local(*OUT);
638 open(OUT, '>', "$self->{opt}->{outputfile}") ||
639 croak "open($self->{opt}->{outputfile}): $!";
640 binmode(OUT, ':utf8') if($] >= 5.008);
641 print OUT $xml || croak "print: $!";
642 close(OUT);
643 }
644 }
645 elsif($self->{opt}->{handler}) {
646 require XML::SAX;
647 my $sp = XML::SAX::ParserFactory->parser(
648 Handler => $self->{opt}->{handler}
649 );
650 return($sp->parse_string($xml));
651 }
652 else {
653 return($xml);
654 }
655}
656
657
658##############################################################################
659# Method: handle_options()
660#
661# Helper routine for both XMLin() and XMLout(). Both routines handle their
662# first argument and assume all other args are options handled by this routine.
663# Saves a hash of options in $self->{opt}.
664#
665# If default options were passed to the constructor, they will be retrieved
666# here and merged with options supplied to the method call.
667#
668# First argument should be the string 'in' or the string 'out'.
669#
670# Remaining arguments should be name=>value pairs. Sets up default values
671# for options not supplied. Unrecognised options are a fatal error.
672#
673
674sub handle_options {
675 my $self = shift;
676 my $dirn = shift;
677
678
679 # Determine valid options based on context
680
681 my %known_opt;
682 if($dirn eq 'in') {
683 @known_opt{@KnownOptIn} = @KnownOptIn;
684 }
685 else {
686 @known_opt{@KnownOptOut} = @KnownOptOut;
687 }
688
689
690 # Store supplied options in hashref and weed out invalid ones
691
692 if(@_ % 2) {
693 croak "Options must be name=>value pairs (odd number supplied)";
694 }
695 my %raw_opt = @_;
696 my $opt = {};
697 $self->{opt} = $opt;
698
699 while(my($key, $val) = each %raw_opt) {
700 my $lkey = lc($key);
701 $lkey =~ s/_//g;
702 croak "Unrecognised option: $key" unless($known_opt{$lkey});
703 $opt->{$lkey} = $val;
704 }
705
706
707 # Merge in options passed to constructor
708
709 foreach (keys(%known_opt)) {
710 unless(exists($opt->{$_})) {
711 if(exists($self->{def_opt}->{$_})) {
712 $opt->{$_} = $self->{def_opt}->{$_};
713 }
714 }
715 }
716
717
718 # Set sensible defaults if not supplied
719
720 if(exists($opt->{rootname})) {
721 unless(defined($opt->{rootname})) {
722 $opt->{rootname} = '';
723 }
724 }
725 else {
726 $opt->{rootname} = $DefRootName;
727 }
728
729 if($opt->{xmldecl} and $opt->{xmldecl} eq '1') {
730 $opt->{xmldecl} = $DefXmlDecl;
731 }
732
733 if(exists($opt->{contentkey})) {
734 if($opt->{contentkey} =~ m{^-(.*)$}) {
735 $opt->{contentkey} = $1;
736 $opt->{collapseagain} = 1;
737 }
738 }
739 else {
740 $opt->{contentkey} = $DefContentKey;
741 }
742
743 unless(exists($opt->{normalisespace})) {
744 $opt->{normalisespace} = $opt->{normalizespace};
745 }
746 $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace}));
747
748 # Cleanups for values assumed to be arrays later
749
750 if($opt->{searchpath}) {
751 unless(ref($opt->{searchpath})) {
752 $opt->{searchpath} = [ $opt->{searchpath} ];
753 }
754 }
755 else {
756 $opt->{searchpath} = [ ];
757 }
758
759 if($opt->{cache} and !ref($opt->{cache})) {
760 $opt->{cache} = [ $opt->{cache} ];
761 }
762 if($opt->{cache}) {
763 $_ = lc($_) foreach (@{$opt->{cache}});
764 foreach my $scheme (@{$opt->{cache}}) {
765 my $method = 'cache_read_' . $scheme;
766 croak "Unsupported caching scheme: $scheme"
767 unless($self->can($method));
768 }
769 }
770
771 if(exists($opt->{parseropts})) {
772 if($^W) {
773 carp "Warning: " .
774 "'ParserOpts' is deprecated, contact the author if you need it";
775 }
776 }
777 else {
778 $opt->{parseropts} = [ ];
779 }
780
781
782 # Special cleanup for {forcearray} which could be regex, arrayref or boolean
783 # or left to default to 0
784
785 if(exists($opt->{forcearray})) {
786 if(ref($opt->{forcearray}) eq 'Regexp') {
787 $opt->{forcearray} = [ $opt->{forcearray} ];
788 }
789
790 if(ref($opt->{forcearray}) eq 'ARRAY') {
791 my @force_list = @{$opt->{forcearray}};
792 if(@force_list) {
793 $opt->{forcearray} = {};
794 foreach my $tag (@force_list) {
795 if(ref($tag) eq 'Regexp') {
796 push @{$opt->{forcearray}->{_regex}}, $tag;
797 }
798 else {
799 $opt->{forcearray}->{$tag} = 1;
800 }
801 }
802 }
803 else {
804 $opt->{forcearray} = 0;
805 }
806 }
807 else {
808 $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 );
809 }
810 }
811 else {
812 if($StrictMode and $dirn eq 'in') {
813 croak "No value specified for 'ForceArray' option in call to XML$dirn()";
814 }
815 $opt->{forcearray} = 0;
816 }
817
818
819 # Special cleanup for {keyattr} which could be arrayref or hashref or left
820 # to default to arrayref
821
822 if(exists($opt->{keyattr})) {
823 if(ref($opt->{keyattr})) {
824 if(ref($opt->{keyattr}) eq 'HASH') {
825
826 # Make a copy so we can mess with it
827
828 $opt->{keyattr} = { %{$opt->{keyattr}} };
829
830
831 # Convert keyattr => { elem => '+attr' }
832 # to keyattr => { elem => [ 'attr', '+' ] }
833
834 foreach my $el (keys(%{$opt->{keyattr}})) {
835 if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) {
836 $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ];
837 if($StrictMode and $dirn eq 'in') {
838 next if($opt->{forcearray} == 1);
839 next if(ref($opt->{forcearray}) eq 'HASH'
840 and $opt->{forcearray}->{$el});
841 croak "<$el> set in KeyAttr but not in ForceArray";
842 }
843 }
844 else {
845 delete($opt->{keyattr}->{$el}); # Never reached (famous last words?)
846 }
847 }
848 }
849 else {
850 if(@{$opt->{keyattr}} == 0) {
851 delete($opt->{keyattr});
852 }
853 }
854 }
855 else {
856 $opt->{keyattr} = [ $opt->{keyattr} ];
857 }
858 }
859 else {
860 if($StrictMode) {
861 croak "No value specified for 'KeyAttr' option in call to XML$dirn()";
862 }
863 $opt->{keyattr} = [ @DefKeyAttr ];
864 }
865
866
867 # Special cleanup for {valueattr} which could be arrayref or hashref
868
869 if(exists($opt->{valueattr})) {
870 if(ref($opt->{valueattr}) eq 'ARRAY') {
871 $opt->{valueattrlist} = {};
872 $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} });
873 }
874 }
875
876 # make sure there's nothing weird in {grouptags}
877
878 if($opt->{grouptags}) {
879 croak "Illegal value for 'GroupTags' option - expected a hashref"
880 unless UNIVERSAL::isa($opt->{grouptags}, 'HASH');
881
882 while(my($key, $val) = each %{$opt->{grouptags}}) {
883 next if $key ne $val;
884 croak "Bad value in GroupTags: '$key' => '$val'";
885 }
886 }
887
888
889 # Check the {variables} option is valid and initialise variables hash
890
891 if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) {
892 croak "Illegal value for 'Variables' option - expected a hashref";
893 }
894
895 if($opt->{variables}) {
896 $self->{_var_values} = { %{$opt->{variables}} };
897 }
898 elsif($opt->{varattr}) {
899 $self->{_var_values} = {};
900 }
901
902}
903
904
905##############################################################################
906# Method: find_xml_file()
907#
908# Helper routine for XMLin().
909# Takes a filename, and a list of directories, attempts to locate the file in
910# the directories listed.
911# Returns a full pathname on success; croaks on failure.
912#
913
914sub find_xml_file {
915 my $self = shift;
916 my $file = shift;
917 my @search_path = @_;
918
919
920 require File::Basename;
921 require File::Spec;
922
923 my($filename, $filedir) = File::Basename::fileparse($file);
924
925 if($filename ne $file) { # Ignore searchpath if dir component
926 return($file) if(-e $file);
927 }
928 else {
929 my($path);
930 foreach $path (@search_path) {
931 my $fullpath = File::Spec->catfile($path, $file);
932 return($fullpath) if(-e $fullpath);
933 }
934 }
935
936 # If user did not supply a search path, default to current directory
937
938 if(!@search_path) {
939 return($file) if(-e $file);
940 croak "File does not exist: $file";
941 }
942
943 croak "Could not find $file in ", join(':', @search_path);
944}
945
946
947##############################################################################
948# Method: collapse()
949#
950# Helper routine for XMLin(). This routine really comprises the 'smarts' (or
951# value add) of this module.
952#
953# Takes the parse tree that XML::Parser produced from the supplied XML and
954# recurses through it 'collapsing' unnecessary levels of indirection (nested
955# arrays etc) to produce a data structure that is easier to work with.
956#
957# Elements in the original parser tree are represented as an element name
958# followed by an arrayref. The first element of the array is a hashref
959# containing the attributes. The rest of the array contains a list of any
960# nested elements as name+arrayref pairs:
961#
962# <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ]
963#
964# The special element name '0' (zero) flags text content.
965#
966# This routine cuts down the noise by discarding any text content consisting of
967# only whitespace and then moves the nested elements into the attribute hash
968# using the name of the nested element as the hash key and the collapsed
969# version of the nested element as the value. Multiple nested elements with
970# the same name will initially be represented as an arrayref, but this may be
971# 'folded' into a hashref depending on the value of the keyattr option.
972#
973
974sub collapse {
975 my $self = shift;
976
977
978 # Start with the hash of attributes
979
980 my $attr = shift;
981 if($self->{opt}->{noattr}) { # Discard if 'noattr' set
982 $attr = {};
983 }
984 elsif($self->{opt}->{normalisespace} == 2) {
985 while(my($key, $value) = each %$attr) {
986 $attr->{$key} = $self->normalise_space($value)
987 }
988 }
989
990
991 # Do variable substitutions
992
993 if(my $var = $self->{_var_values}) {
994 while(my($key, $val) = each(%$attr)) {
995 $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge;
996 $attr->{$key} = $val;
997 }
998 }
999
1000
1001 # Roll up 'value' attributes (but only if no nested elements)
1002
1003 if(!@_ and keys %$attr == 1) {
1004 my($k) = keys %$attr;
1005 if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) {
1006 return $attr->{$k};
1007 }
1008 }
1009
1010
1011 # Add any nested elements
1012
1013 my($key, $val);
1014 while(@_) {
1015 $key = shift;
1016 $val = shift;
1017
1018 if(ref($val)) {
1019 $val = $self->collapse(@$val);
1020 next if(!defined($val) and $self->{opt}->{suppressempty});
1021 }
1022 elsif($key eq '0') {
1023 next if($val =~ m{^\s*$}s); # Skip all whitespace content
1024
1025 $val = $self->normalise_space($val)
1026 if($self->{opt}->{normalisespace} == 2);
1027
1028 # do variable substitutions
1029
1030 if(my $var = $self->{_var_values}) {
1031 $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge;
1032 }
1033
1034
1035 # look for variable definitions
1036
1037 if(my $var = $self->{opt}->{varattr}) {
1038 if(exists $attr->{$var}) {
1039 $self->set_var($attr->{$var}, $val);
1040 }
1041 }
1042
1043
1044 # Collapse text content in element with no attributes to a string
1045
1046 if(!%$attr and !@_) {
1047 return($self->{opt}->{forcecontent} ?
1048 { $self->{opt}->{contentkey} => $val } : $val
1049 );
1050 }
1051 $key = $self->{opt}->{contentkey};
1052 }
1053
1054
1055 # Combine duplicate attributes into arrayref if required
1056
1057 if(exists($attr->{$key})) {
1058 if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) {
1059 push(@{$attr->{$key}}, $val);
1060 }
1061 else {
1062 $attr->{$key} = [ $attr->{$key}, $val ];
1063 }
1064 }
1065 elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
1066 $attr->{$key} = [ $val ];
1067 }
1068 else {
1069 if( $key ne $self->{opt}->{contentkey}
1070 and (
1071 ($self->{opt}->{forcearray} == 1)
1072 or (
1073 (ref($self->{opt}->{forcearray}) eq 'HASH')
1074 and (
1075 $self->{opt}->{forcearray}->{$key}
1076 or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}})
1077 )
1078 )
1079 )
1080 ) {
1081 $attr->{$key} = [ $val ];
1082 }
1083 else {
1084 $attr->{$key} = $val;
1085 }
1086 }
1087
1088 }
1089
1090
1091 # Turn arrayrefs into hashrefs if key fields present
1092
1093 if($self->{opt}->{keyattr}) {
1094 while(($key,$val) = each %$attr) {
1095 if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
1096 $attr->{$key} = $self->array_to_hash($key, $val);
1097 }
1098 }
1099 }
1100
1101
1102 # disintermediate grouped tags
1103
1104 if($self->{opt}->{grouptags}) {
1105 while(my($key, $val) = each(%$attr)) {
1106 next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
1107 next unless(exists($self->{opt}->{grouptags}->{$key}));
1108
1109 my($child_key, $child_val) = %$val;
1110
1111 if($self->{opt}->{grouptags}->{$key} eq $child_key) {
1112 $attr->{$key}= $child_val;
1113 }
1114 }
1115 }
1116
1117
1118 # Fold hashes containing a single anonymous array up into just the array
1119
1120 my $count = scalar keys %$attr;
1121 if($count == 1
1122 and exists $attr->{anon}
1123 and UNIVERSAL::isa($attr->{anon}, 'ARRAY')
1124 ) {
1125 return($attr->{anon});
1126 }
1127
1128
1129 # Do the right thing if hash is empty, otherwise just return it
1130
1131 if(!%$attr and exists($self->{opt}->{suppressempty})) {
1132 if(defined($self->{opt}->{suppressempty}) and
1133 $self->{opt}->{suppressempty} eq '') {
1134 return('');
1135 }
1136 return(undef);
1137 }
1138
1139
1140 # Roll up named elements with named nested 'value' attributes
1141
1142 if($self->{opt}->{valueattr}) {
1143 while(my($key, $val) = each(%$attr)) {
1144 next unless($self->{opt}->{valueattr}->{$key});
1145 next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
1146 my($k) = keys %$val;
1147 next unless($k eq $self->{opt}->{valueattr}->{$key});
1148 $attr->{$key} = $val->{$k};
1149 }
1150 }
1151
1152 return($attr)
1153
1154}
1155
1156
1157##############################################################################
1158# Method: set_var()
1159#
1160# Called when a variable definition is encountered in the XML. (A variable
1161# definition looks like <element attrname="name">value</element> where attrname
1162# matches the varattr setting).
1163#
1164
1165sub set_var {
1166 my($self, $name, $value) = @_;
1167
1168 $self->{_var_values}->{$name} = $value;
1169}
1170
1171
1172##############################################################################
1173# Method: get_var()
1174#
1175# Called during variable substitution to get the value for the named variable.
1176#
1177
1178sub get_var {
1179 my($self, $name) = @_;
1180
1181 my $value = $self->{_var_values}->{$name};
1182 return $value if(defined($value));
1183
1184 return '${' . $name . '}';
1185}
1186
1187
1188##############################################################################
1189# Method: normalise_space()
1190#
1191# Strips leading and trailing whitespace and collapses sequences of whitespace
1192# characters to a single space.
1193#
1194
1195sub normalise_space {
1196 my($self, $text) = @_;
1197
1198 $text =~ s/^\s+//s;
1199 $text =~ s/\s+$//s;
1200 $text =~ s/\s\s+/ /sg;
1201
1202 return $text;
1203}
1204
1205
1206##############################################################################
1207# Method: array_to_hash()
1208#
1209# Helper routine for collapse().
1210# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a
1211# reference to the hash on success or the original array if folding is
1212# not possible. Behaviour is controlled by 'keyattr' option.
1213#
1214
1215sub array_to_hash {
1216 my $self = shift;
1217 my $name = shift;
1218 my $arrayref = shift;
1219
1220 my $hashref = $self->new_hashref;
1221
1222 my($i, $key, $val, $flag);
1223
1224
1225 # Handle keyattr => { .... }
1226
1227 if(ref($self->{opt}->{keyattr}) eq 'HASH') {
1228 return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name}));
1229 ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}};
1230 for($i = 0; $i < @$arrayref; $i++) {
1231 if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and
1232 exists($arrayref->[$i]->{$key})
1233 ) {
1234 $val = $arrayref->[$i]->{$key};
1235 if(ref($val)) {
1236 $self->die_or_warn("<$name> element has non-scalar '$key' key attribute");
1237 return($arrayref);
1238 }
1239 $val = $self->normalise_space($val)
1240 if($self->{opt}->{normalisespace} == 1);
1241 $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
1242 if(exists($hashref->{$val}));
1243 $hashref->{$val} = { %{$arrayref->[$i]} };
1244 $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-');
1245 delete $hashref->{$val}->{$key} unless($flag eq '+');
1246 }
1247 else {
1248 $self->die_or_warn("<$name> element has no '$key' key attribute");
1249 return($arrayref);
1250 }
1251 }
1252 }
1253
1254
1255 # Or assume keyattr => [ .... ]
1256
1257 else {
1258 my $default_keys =
1259 join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}});
1260
1261 ELEMENT: for($i = 0; $i < @$arrayref; $i++) {
1262 return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH'));
1263
1264 foreach $key (@{$self->{opt}->{keyattr}}) {
1265 if(defined($arrayref->[$i]->{$key})) {
1266 $val = $arrayref->[$i]->{$key};
1267 if(ref($val)) {
1268 $self->die_or_warn("<$name> element has non-scalar '$key' key attribute")
1269 if not $default_keys;
1270 return($arrayref);
1271 }
1272 $val = $self->normalise_space($val)
1273 if($self->{opt}->{normalisespace} == 1);
1274 $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
1275 if(exists($hashref->{$val}));
1276 $hashref->{$val} = { %{$arrayref->[$i]} };
1277 delete $hashref->{$val}->{$key};
1278 next ELEMENT;
1279 }
1280 }
1281
1282 return($arrayref); # No keyfield matched
1283 }
1284 }
1285
1286 # collapse any hashes which now only have a 'content' key
1287
1288 if($self->{opt}->{collapseagain}) {
1289 $hashref = $self->collapse_content($hashref);
1290 }
1291
1292 return($hashref);
1293}
1294
1295
1296##############################################################################
1297# Method: die_or_warn()
1298#
1299# Takes a diagnostic message and does one of three things:
1300# 1. dies if strict mode is enabled
1301# 2. warns if warnings are enabled but strict mode is not
1302# 3. ignores message and resturns silently if neither strict mode nor warnings
1303# are enabled
1304#
1305
1306sub die_or_warn {
1307 my $self = shift;
1308 my $msg = shift;
1309
1310 croak $msg if($StrictMode);
1311 carp "Warning: $msg" if($^W);
1312}
1313
1314
1315##############################################################################
1316# Method: new_hashref()
1317#
1318# This is a hook routine for overriding in a sub-class. Some people believe
1319# that using Tie::IxHash here will solve order-loss problems.
1320#
1321
1322sub new_hashref {
1323 my $self = shift;
1324
1325 return { @_ };
1326}
1327
1328
1329##############################################################################
1330# Method: collapse_content()
1331#
1332# Helper routine for array_to_hash
1333#
1334# Arguments expected are:
1335# - an XML::Simple object
1336# - a hasref
1337# the hashref is a former array, turned into a hash by array_to_hash because
1338# of the presence of key attributes
1339# at this point collapse_content avoids over-complicated structures like
1340# dir => { libexecdir => { content => '$exec_prefix/libexec' },
1341# localstatedir => { content => '$prefix' },
1342# }
1343# into
1344# dir => { libexecdir => '$exec_prefix/libexec',
1345# localstatedir => '$prefix',
1346# }
1347
1348sub collapse_content {
1349 my $self = shift;
1350 my $hashref = shift;
1351
1352 my $contentkey = $self->{opt}->{contentkey};
1353
1354 # first go through the values,checking that they are fit to collapse
1355 foreach my $val (values %$hashref) {
1356 return $hashref unless ( (ref($val) eq 'HASH')
1357 and (keys %$val == 1)
1358 and (exists $val->{$contentkey})
1359 );
1360 }
1361
1362 # now collapse them
1363 foreach my $key (keys %$hashref) {
1364 $hashref->{$key}= $hashref->{$key}->{$contentkey};
1365 }
1366
1367 return $hashref;
1368}
1369
1370
1371##############################################################################
1372# Method: value_to_xml()
1373#
1374# Helper routine for XMLout() - recurses through a data structure building up
1375# and returning an XML representation of that structure as a string.
1376#
1377# Arguments expected are:
1378# - the data structure to be encoded (usually a reference)
1379# - the XML tag name to use for this item
1380# - a string of spaces for use as the current indent level
1381#
1382
1383sub value_to_xml {
1384 my $self = shift;;
1385
1386
1387 # Grab the other arguments
1388
1389 my($ref, $name, $indent) = @_;
1390
1391 my $named = (defined($name) and $name ne '' ? 1 : 0);
1392
1393 my $nl = "\n";
1394
1395 my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack!
1396 if($self->{opt}->{noindent}) {
1397 $indent = '';
1398 $nl = '';
1399 }
1400
1401
1402 # Convert to XML
1403
1404 if(ref($ref)) {
1405 croak "circular data structures not supported"
1406 if(grep($_ == $ref, @{$self->{_ancestors}}));
1407 push @{$self->{_ancestors}}, $ref;
1408 }
1409 else {
1410 if($named) {
1411 return(join('',
1412 $indent, '<', $name, '>',
1413 ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)),
1414 '</', $name, ">", $nl
1415 ));
1416 }
1417 else {
1418 return("$ref$nl");
1419 }
1420 }
1421
1422
1423 # Unfold hash to array if possible
1424
1425 if(UNIVERSAL::isa($ref, 'HASH') # It is a hash
1426 and keys %$ref # and it's not empty
1427 and $self->{opt}->{keyattr} # and folding is enabled
1428 and !$is_root # and its not the root element
1429 ) {
1430 $ref = $self->hash_to_array($name, $ref);
1431 }
1432
1433
1434 my @result = ();
1435 my($key, $value);
1436
1437
1438 # Handle hashrefs
1439
1440 if(UNIVERSAL::isa($ref, 'HASH')) {
1441
1442 # Reintermediate grouped values if applicable
1443
1444 if($self->{opt}->{grouptags}) {
1445 $ref = $self->copy_hash($ref);
1446 while(my($key, $val) = each %$ref) {
1447 if($self->{opt}->{grouptags}->{$key}) {
1448 $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val };
1449 }
1450 }
1451 }
1452
1453
1454 # Scan for namespace declaration attributes
1455
1456 my $nsdecls = '';
1457 my $default_ns_uri;
1458 if($self->{nsup}) {
1459 $ref = $self->copy_hash($ref);
1460 $self->{nsup}->push_context();
1461
1462 # Look for default namespace declaration first
1463
1464 if(exists($ref->{xmlns})) {
1465 $self->{nsup}->declare_prefix('', $ref->{xmlns});
1466 $nsdecls .= qq( xmlns="$ref->{xmlns}");
1467 delete($ref->{xmlns});
1468 }
1469 $default_ns_uri = $self->{nsup}->get_uri('');
1470
1471
1472 # Then check all the other keys
1473
1474 foreach my $qname (keys(%$ref)) {
1475 my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
1476 if($uri) {
1477 if($uri eq $xmlns_ns) {
1478 $self->{nsup}->declare_prefix($lname, $ref->{$qname});
1479 $nsdecls .= qq( xmlns:$lname="$ref->{$qname}");
1480 delete($ref->{$qname});
1481 }
1482 }
1483 }
1484
1485 # Translate any remaining Clarkian names
1486
1487 foreach my $qname (keys(%$ref)) {
1488 my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
1489 if($uri) {
1490 if($default_ns_uri and $uri eq $default_ns_uri) {
1491 $ref->{$lname} = $ref->{$qname};
1492 delete($ref->{$qname});
1493 }
1494 else {
1495 my $prefix = $self->{nsup}->get_prefix($uri);
1496 unless($prefix) {
1497 # $self->{nsup}->declare_prefix(undef, $uri);
1498 # $prefix = $self->{nsup}->get_prefix($uri);
1499 $prefix = $self->{ns_prefix}++;
1500 $self->{nsup}->declare_prefix($prefix, $uri);
1501 $nsdecls .= qq( xmlns:$prefix="$uri");
1502 }
1503 $ref->{"$prefix:$lname"} = $ref->{$qname};
1504 delete($ref->{$qname});
1505 }
1506 }
1507 }
1508 }
1509
1510
1511 my @nested = ();
1512 my $text_content = undef;
1513 if($named) {
1514 push @result, $indent, '<', $name, $nsdecls;
1515 }
1516
1517 if(keys %$ref) {
1518 my $first_arg = 1;
1519 foreach my $key ($self->sorted_keys($name, $ref)) {
1520 my $value = $ref->{$key};
1521 next if(substr($key, 0, 1) eq '-');
1522 if(!defined($value)) {
1523 next if $self->{opt}->{suppressempty};
1524 unless(exists($self->{opt}->{suppressempty})
1525 and !defined($self->{opt}->{suppressempty})
1526 ) {
1527 carp 'Use of uninitialized value' if($^W);
1528 }
1529 if($key eq $self->{opt}->{contentkey}) {
1530 $text_content = '';
1531 }
1532 else {
1533 $value = exists($self->{opt}->{suppressempty}) ? {} : '';
1534 }
1535 }
1536
1537 if(!ref($value)
1538 and $self->{opt}->{valueattr}
1539 and $self->{opt}->{valueattr}->{$key}
1540 ) {
1541 $value = { $self->{opt}->{valueattr}->{$key} => $value };
1542 }
1543
1544 if(ref($value) or $self->{opt}->{noattr}) {
1545 push @nested,
1546 $self->value_to_xml($value, $key, "$indent ");
1547 }
1548 else {
1549 $value = $self->escape_value($value) unless($self->{opt}->{noescape});
1550 if($key eq $self->{opt}->{contentkey}) {
1551 $text_content = $value;
1552 }
1553 else {
1554 push @result, "\n$indent " . ' ' x length($name)
1555 if($self->{opt}->{attrindent} and !$first_arg);
1556 push @result, ' ', $key, '="', $value , '"';
1557 $first_arg = 0;
1558 }
1559 }
1560 }
1561 }
1562 else {
1563 $text_content = '';
1564 }
1565
1566 if(@nested or defined($text_content)) {
1567 if($named) {
1568 push @result, ">";
1569 if(defined($text_content)) {
1570 push @result, $text_content;
1571 $nested[0] =~ s/^\s+// if(@nested);
1572 }
1573 else {
1574 push @result, $nl;
1575 }
1576 if(@nested) {
1577 push @result, @nested, $indent;
1578 }
1579 push @result, '</', $name, ">", $nl;
1580 }
1581 else {
1582 push @result, @nested; # Special case if no root elements
1583 }
1584 }
1585 else {
1586 push @result, " />", $nl;
1587 }
1588 $self->{nsup}->pop_context() if($self->{nsup});
1589 }
1590
1591
1592 # Handle arrayrefs
1593
1594 elsif(UNIVERSAL::isa($ref, 'ARRAY')) {
1595 foreach $value (@$ref) {
1596 next if !defined($value) and $self->{opt}->{suppressempty};
1597 if(!ref($value)) {
1598 push @result,
1599 $indent, '<', $name, '>',
1600 ($self->{opt}->{noescape} ? $value : $self->escape_value($value)),
1601 '</', $name, ">$nl";
1602 }
1603 elsif(UNIVERSAL::isa($value, 'HASH')) {
1604 push @result, $self->value_to_xml($value, $name, $indent);
1605 }
1606 else {
1607 push @result,
1608 $indent, '<', $name, ">$nl",
1609 $self->value_to_xml($value, 'anon', "$indent "),
1610 $indent, '</', $name, ">$nl";
1611 }
1612 }
1613 }
1614
1615 else {
1616 croak "Can't encode a value of type: " . ref($ref);
1617 }
1618
1619
1620 pop @{$self->{_ancestors}} if(ref($ref));
1621
1622 return(join('', @result));
1623}
1624
1625
1626##############################################################################
1627# Method: sorted_keys()
1628#
1629# Returns the keys of the referenced hash sorted into alphabetical order, but
1630# with the 'key' key (as in KeyAttr) first, if there is one.
1631#
1632
1633sub sorted_keys {
1634 my($self, $name, $ref) = @_;
1635
1636 return keys %$ref if $self->{opt}->{nosort};
1637
1638 my %hash = %$ref;
1639 my $keyattr = $self->{opt}->{keyattr};
1640
1641 my @key;
1642
1643 if(ref $keyattr eq 'HASH') {
1644 if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) {
1645 push @key, $keyattr->{$name}->[0];
1646 delete $hash{$keyattr->{$name}->[0]};
1647 }
1648 }
1649 elsif(ref $keyattr eq 'ARRAY') {
1650 foreach (@{$keyattr}) {
1651 if(exists $hash{$_}) {
1652 push @key, $_;
1653 delete $hash{$_};
1654 last;
1655 }
1656 }
1657 }
1658
1659 return(@key, sort keys %hash);
1660}
1661
1662##############################################################################
1663# Method: escape_value()
1664#
1665# Helper routine for automatically escaping values for XMLout().
1666# Expects a scalar data value. Returns escaped version.
1667#
1668
1669sub escape_value {
1670 my($self, $data) = @_;
1671
1672 return '' unless(defined($data));
1673
1674 $data =~ s/&/&amp;/sg;
1675 $data =~ s/</&lt;/sg;
1676 $data =~ s/>/&gt;/sg;
1677 $data =~ s/"/&quot;/sg;
1678
1679 my $level = $self->{opt}->{numericescape} or return $data;
1680
1681 return $self->numeric_escape($data, $level);
1682}
1683
1684sub numeric_escape {
1685 my($self, $data, $level) = @_;
1686
1687 use utf8; # required for 5.6
1688
1689 if($self->{opt}->{numericescape} eq '2') {
1690 $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
1691 }
1692 else {
1693 $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
1694 }
1695
1696 return $data;
1697}
1698
1699
1700##############################################################################
1701# Method: hash_to_array()
1702#
1703# Helper routine for value_to_xml().
1704# Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a
1705# reference to the array on success or the original hash if unfolding is
1706# not possible.
1707#
1708
1709sub hash_to_array {
1710 my $self = shift;
1711 my $parent = shift;
1712 my $hashref = shift;
1713
1714 my $arrayref = [];
1715
1716 my($key, $value);
1717
1718 my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref;
1719 foreach $key (@keys) {
1720 $value = $hashref->{$key};
1721 return($hashref) unless(UNIVERSAL::isa($value, 'HASH'));
1722
1723 if(ref($self->{opt}->{keyattr}) eq 'HASH') {
1724 return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent}));
1725 push @$arrayref, $self->copy_hash(
1726 $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key
1727 );
1728 }
1729 else {
1730 push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value });
1731 }
1732 }
1733
1734 return($arrayref);
1735}
1736
1737
1738##############################################################################
1739# Method: copy_hash()
1740#
1741# Helper routine for hash_to_array(). When unfolding a hash of hashes into
1742# an array of hashes, we need to copy the key from the outer hash into the
1743# inner hash. This routine makes a copy of the original hash so we don't
1744# destroy the original data structure. You might wish to override this
1745# method if you're using tied hashes and don't want them to get untied.
1746#
1747
1748sub copy_hash {
1749 my($self, $orig, @extra) = @_;
1750
1751 return { @extra, %$orig };
1752}
1753
1754##############################################################################
1755# Methods required for building trees from SAX events
1756##############################################################################
1757
1758sub start_document {
1759 my $self = shift;
1760
1761 $self->handle_options('in') unless($self->{opt});
1762
1763 $self->{lists} = [];
1764 $self->{curlist} = $self->{tree} = [];
1765}
1766
1767
1768sub start_element {
1769 my $self = shift;
1770 my $element = shift;
1771
1772 my $name = $element->{Name};
1773 if($self->{opt}->{nsexpand}) {
1774 $name = $element->{LocalName} || '';
1775 if($element->{NamespaceURI}) {
1776 $name = '{' . $element->{NamespaceURI} . '}' . $name;
1777 }
1778 }
1779 my $attributes = {};
1780 if($element->{Attributes}) { # Might be undef
1781 foreach my $attr (values %{$element->{Attributes}}) {
1782 if($self->{opt}->{nsexpand}) {
1783 my $name = $attr->{LocalName} || '';
1784 if($attr->{NamespaceURI}) {
1785 $name = '{' . $attr->{NamespaceURI} . '}' . $name
1786 }
1787 $name = 'xmlns' if($name eq $bad_def_ns_jcn);
1788 $attributes->{$name} = $attr->{Value};
1789 }
1790 else {
1791 $attributes->{$attr->{Name}} = $attr->{Value};
1792 }
1793 }
1794 }
1795 my $newlist = [ $attributes ];
1796 push @{ $self->{lists} }, $self->{curlist};
1797 push @{ $self->{curlist} }, $name => $newlist;
1798 $self->{curlist} = $newlist;
1799}
1800
1801
1802sub characters {
1803 my $self = shift;
1804 my $chars = shift;
1805
1806 my $text = $chars->{Data};
1807 my $clist = $self->{curlist};
1808 my $pos = $#$clist;
1809
1810 if ($pos > 0 and $clist->[$pos - 1] eq '0') {
1811 $clist->[$pos] .= $text;
1812 }
1813 else {
1814 push @$clist, 0 => $text;
1815 }
1816}
1817
1818
1819sub end_element {
1820 my $self = shift;
1821
1822 $self->{curlist} = pop @{ $self->{lists} };
1823}
1824
1825
1826sub end_document {
1827 my $self = shift;
1828
1829 delete($self->{curlist});
1830 delete($self->{lists});
1831
1832 my $tree = $self->{tree};
1833 delete($self->{tree});
1834
1835
1836 # Return tree as-is to XMLin()
1837
1838 return($tree) if($self->{nocollapse});
1839
1840
1841 # Or collapse it before returning it to SAX parser class
1842
1843 if($self->{opt}->{keeproot}) {
1844 $tree = $self->collapse({}, @$tree);
1845 }
1846 else {
1847 $tree = $self->collapse(@{$tree->[1]});
1848 }
1849
1850 if($self->{opt}->{datahandler}) {
1851 return($self->{opt}->{datahandler}->($self, $tree));
1852 }
1853
1854 return($tree);
1855}
1856
1857*xml_in = \&XMLin;
1858*xml_out = \&XMLout;
1859
18601;
1861
1862__END__
1863
1864=head1 QUICK START
1865
1866Say you have a script called B<foo> and a file of configuration options
1867called B<foo.xml> containing this:
1868
1869 <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
1870 <server name="sahara" osname="solaris" osversion="2.6">
1871 <address>10.0.0.101</address>
1872 <address>10.0.1.101</address>
1873 </server>
1874 <server name="gobi" osname="irix" osversion="6.5">
1875 <address>10.0.0.102</address>
1876 </server>
1877 <server name="kalahari" osname="linux" osversion="2.0.34">
1878 <address>10.0.0.103</address>
1879 <address>10.0.1.103</address>
1880 </server>
1881 </config>
1882
1883The following lines of code in B<foo>:
1884
1885 use XML::Simple;
1886
1887 my $config = XMLin();
1888
1889will 'slurp' the configuration options into the hashref $config (because no
1890arguments are passed to C<XMLin()> the name and location of the XML file will
1891be inferred from name and location of the script). You can dump out the
1892contents of the hashref using Data::Dumper:
1893
1894 use Data::Dumper;
1895
1896 print Dumper($config);
1897
1898which will produce something like this (formatting has been adjusted for
1899brevity):
1900
1901 {
1902 'logdir' => '/var/log/foo/',
1903 'debugfile' => '/tmp/foo.debug',
1904 'server' => {
1905 'sahara' => {
1906 'osversion' => '2.6',
1907 'osname' => 'solaris',
1908 'address' => [ '10.0.0.101', '10.0.1.101' ]
1909 },
1910 'gobi' => {
1911 'osversion' => '6.5',
1912 'osname' => 'irix',
1913 'address' => '10.0.0.102'
1914 },
1915 'kalahari' => {
1916 'osversion' => '2.0.34',
1917 'osname' => 'linux',
1918 'address' => [ '10.0.0.103', '10.0.1.103' ]
1919 }
1920 }
1921 }
1922
1923Your script could then access the name of the log directory like this:
1924
1925 print $config->{logdir};
1926
1927similarly, the second address on the server 'kalahari' could be referenced as:
1928
1929 print $config->{server}->{kalahari}->{address}->[1];
1930
1931What could be simpler? (Rhetorical).
1932
1933For simple requirements, that's really all there is to it. If you want to
1934store your XML in a different directory or file, or pass it in as a string or
1935even pass it in via some derivative of an IO::Handle, you'll need to check out
1936L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that
1937neat little transformation that produced $config->{server}) you'll find options
1938for that as well.
1939
1940If you want to generate XML (for example to write a modified version of
1941$config back out as XML), check out C<XMLout()>.
1942
1943If your needs are not so simple, this may not be the module for you. In that
1944case, you might want to read L<"WHERE TO FROM HERE?">.
1945
1946=head1 DESCRIPTION
1947
1948The XML::Simple module provides a simple API layer on top of an underlying XML
1949parsing module (either XML::Parser or one of the SAX2 parser modules). Two
1950functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity
1951request the lower case versions of the function names: C<xml_in()> and
1952C<xml_out()>.
1953
1954The simplest approach is to call these two functions directly, but an
1955optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below)
1956allows them to be called as methods of an B<XML::Simple> object. The object
1957interface can also be used at either end of a SAX pipeline.
1958
1959=head2 XMLin()
1960
1961Parses XML formatted data and returns a reference to a data structure which
1962contains the same information in a more readily accessible form. (Skip
1963down to L<"EXAMPLES"> below, for more sample code).
1964
1965C<XMLin()> accepts an optional XML specifier followed by zero or more 'name =>
1966value' option pairs. The XML specifier can be one of the following:
1967
1968=over 4
1969
1970=item A filename
1971
1972If the filename contains no directory components C<XMLin()> will look for the
1973file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the
1974current directory if the SearchPath option is not defined. eg:
1975
1976 $ref = XMLin('/etc/params.xml');
1977
1978Note, the filename '-' can be used to parse from STDIN.
1979
1980=item undef
1981
1982If there is no XML specifier, C<XMLin()> will check the script directory and
1983each of the SearchPath directories for a file with the same name as the script
1984but with the extension '.xml'. Note: if you wish to specify options, you
1985must specify the value 'undef'. eg:
1986
1987 $ref = XMLin(undef, ForceArray => 1);
1988
1989=item A string of XML
1990
1991A string containing XML (recognised by the presence of '<' and '>' characters)
1992will be parsed directly. eg:
1993
1994 $ref = XMLin('<opt username="bob" password="flurp" />');
1995
1996=item An IO::Handle object
1997
1998An IO::Handle object will be read to EOF and its contents parsed. eg:
1999
2000 $fh = IO::File->new('/etc/params.xml');
2001 $ref = XMLin($fh);
2002
2003=back
2004
2005=head2 XMLout()
2006
2007Takes a data structure (generally a hashref) and returns an XML encoding of
2008that structure. If the resulting XML is parsed using C<XMLin()>, it should
2009return a data structure equivalent to the original (see caveats below).
2010
2011The C<XMLout()> function can also be used to output the XML as SAX events
2012see the C<Handler> option and L<"SAX SUPPORT"> for more details).
2013
2014When translating hashes to XML, hash keys which have a leading '-' will be
2015silently skipped. This is the approved method for marking elements of a
2016data structure which should be ignored by C<XMLout>. (Note: If these items
2017were not skipped the key names would be emitted as element or attribute names
2018with a leading '-' which would not be valid XML).
2019
2020=head2 Caveats
2021
2022Some care is required in creating data structures which will be passed to
2023C<XMLout()>. Hash keys from the data structure will be encoded as either XML
2024element names or attribute names. Therefore, you should use hash key names
2025which conform to the relatively strict XML naming rules:
2026
2027Names in XML must begin with a letter. The remaining characters may be
2028letters, digits, hyphens (-), underscores (_) or full stops (.). It is also
2029allowable to include one colon (:) in an element name but this should only be
2030used when working with namespaces (B<XML::Simple> can only usefully work with
2031namespaces when teamed with a SAX Parser).
2032
2033You can use other punctuation characters in hash values (just not in hash
2034keys) however B<XML::Simple> does not support dumping binary data.
2035
2036If you break these rules, the current implementation of C<XMLout()> will
2037simply emit non-compliant XML which will be rejected if you try to read it
2038back in. (A later version of B<XML::Simple> might take a more proactive
2039approach).
2040
2041Note also that although you can nest hashes and arrays to arbitrary levels,
2042circular data structures are not supported and will cause C<XMLout()> to die.
2043
2044If you wish to 'round-trip' arbitrary data structures from Perl to XML and back
2045to Perl, then you should probably disable array folding (using the KeyAttr
2046option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the
2047expected results, you may prefer to use L<XML::Dumper> which is designed for
2048exactly that purpose.
2049
2050Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs.
2051
2052
2053=head1 OPTIONS
2054
2055B<XML::Simple> supports a number of options (in fact as each release of
2056B<XML::Simple> adds more options, the module's claim to the name 'Simple'
2057becomes increasingly tenuous). If you find yourself repeatedly having to
2058specify the same options, you might like to investigate L<"OPTIONAL OO
2059INTERFACE"> below.
2060
2061If you can't be bothered reading the documentation, refer to
2062L<"STRICT MODE"> to automatically catch common mistakes.
2063
2064Because there are so many options, it's hard for new users to know which ones
2065are important, so here are the two you really need to know about:
2066
2067=over 4
2068
2069=item *
2070
2071check out C<ForceArray> because you'll almost certainly want to turn it on
2072
2073=item *
2074
2075make sure you know what the C<KeyAttr> option does and what its default value is
2076because it may surprise you otherwise (note in particular that 'KeyAttr'
2077affects both C<XMLin> and C<XMLout>)
2078
2079=back
2080
2081The option name headings below have a trailing 'comment' - a hash followed by
2082two pieces of metadata:
2083
2084=over 4
2085
2086=item *
2087
2088Options are marked with 'I<in>' if they are recognised by C<XMLin()> and
2089'I<out>' if they are recognised by C<XMLout()>.
2090
2091=item *
2092
2093Each option is also flagged to indicate whether it is:
2094
2095 'important' - don't use the module until you understand this one
2096 'handy' - you can skip this on the first time through
2097 'advanced' - you can skip this on the second time through
2098 'SAX only' - don't worry about this unless you're using SAX (or
2099 alternatively if you need this, you also need SAX)
2100 'seldom used' - you'll probably never use this unless you were the
2101 person that requested the feature
2102
2103=back
2104
2105The options are listed alphabetically:
2106
2107Note: option names are no longer case sensitive so you can use the mixed case
2108versions shown here; all lower case as required by versions 2.03 and earlier;
2109or you can add underscores between the words (eg: key_attr).
2110
2111
2112=head2 AttrIndent => 1 I<# out - handy>
2113
2114When you are using C<XMLout()>, enable this option to have attributes printed
2115one-per-line with sensible indentation rather than all on one line.
2116
2117=head2 Cache => [ cache schemes ] I<# in - advanced>
2118
2119Because loading the B<XML::Parser> module and parsing an XML file can consume a
2120significant number of CPU cycles, it is often desirable to cache the output of
2121C<XMLin()> for later reuse.
2122
2123When parsing from a named file, B<XML::Simple> supports a number of caching
2124schemes. The 'Cache' option may be used to specify one or more schemes (using
2125an anonymous array). Each scheme will be tried in turn in the hope of finding
2126a cached pre-parsed representation of the XML file. If no cached copy is
2127found, the file will be parsed and the first cache scheme in the list will be
2128used to save a copy of the results. The following cache schemes have been
2129implemented:
2130
2131=over 4
2132
2133=item storable
2134
2135Utilises B<Storable.pm> to read/write a cache file with the same name as the
2136XML file but with the extension .stor
2137
2138=item memshare
2139
2140When a file is first parsed, a copy of the resulting data structure is retained
2141in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse
2142the same file will return a reference to this structure. This cached version
2143will persist only for the life of the Perl interpreter (which in the case of
2144mod_perl for example, may be some significant time).
2145
2146Because each caller receives a reference to the same data structure, a change
2147made by one caller will be visible to all. For this reason, the reference
2148returned should be treated as read-only.
2149
2150=item memcopy
2151
2152This scheme works identically to 'memshare' (above) except that each caller
2153receives a reference to a new data structure which is a copy of the cached
2154version. Copying the data structure will add a little processing overhead,
2155therefore this scheme should only be used where the caller intends to modify
2156the data structure (or wishes to protect itself from others who might). This
2157scheme uses B<Storable.pm> to perform the copy.
2158
2159=back
2160
2161Warning! The memory-based caching schemes compare the timestamp on the file to
2162the time when it was last parsed. If the file is stored on an NFS filesystem
2163(or other network share) and the clock on the file server is not exactly
2164synchronised with the clock where your script is run, updates to the source XML
2165file may appear to be ignored.
2166
2167=head2 ContentKey => 'keyname' I<# in+out - seldom used>
2168
2169When text content is parsed to a hash value, this option let's you specify a
2170name for the hash key to override the default 'content'. So for example:
2171
2172 XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
2173
2174will parse to:
2175
2176 { 'one' => 1, 'text' => 'Text' }
2177
2178instead of:
2179
2180 { 'one' => 1, 'content' => 'Text' }
2181
2182C<XMLout()> will also honour the value of this option when converting a hashref
2183to XML.
2184
2185You can also prefix your selected key name with a '-' character to have
2186C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after
2187array folding. For example:
2188
2189 XMLin(
2190 '<opt><item name="one">First</item><item name="two">Second</item></opt>',
2191 KeyAttr => {item => 'name'},
2192 ForceArray => [ 'item' ],
2193 ContentKey => '-content'
2194 )
2195
2196will parse to:
2197
2198 {
2199 'item' => {
2200 'one' => 'First'
2201 'two' => 'Second'
2202 }
2203 }
2204
2205rather than this (without the '-'):
2206
2207 {
2208 'item' => {
2209 'one' => { 'content' => 'First' }
2210 'two' => { 'content' => 'Second' }
2211 }
2212 }
2213
2214=head2 DataHandler => code_ref I<# in - SAX only>
2215
2216When you use an B<XML::Simple> object as a SAX handler, it will return a
2217'simple tree' data structure in the same format as C<XMLin()> would return. If
2218this option is set (to a subroutine reference), then when the tree is built the
2219subroutine will be called and passed two arguments: a reference to the
2220B<XML::Simple> object and a reference to the data tree. The return value from
2221the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for
2222more details).
2223
2224=head2 ForceArray => 1 I<# in - important>
2225
2226This option should be set to '1' to force nested elements to be represented
2227as arrays even when there is only one. Eg, with ForceArray enabled, this
2228XML:
2229
2230 <opt>
2231 <name>value</name>
2232 </opt>
2233
2234would parse to this:
2235
2236 {
2237 'name' => [
2238 'value'
2239 ]
2240 }
2241
2242instead of this (the default):
2243
2244 {
2245 'name' => 'value'
2246 }
2247
2248This option is especially useful if the data structure is likely to be written
2249back out as XML and the default behaviour of rolling single nested elements up
2250into attributes is not desirable.
2251
2252If you are using the array folding feature, you should almost certainly enable
2253this option. If you do not, single nested elements will not be parsed to
2254arrays and therefore will not be candidates for folding to a hash. (Given that
2255the default value of 'KeyAttr' enables array folding, the default value of this
2256option should probably also have been enabled too - sorry).
2257
2258=head2 ForceArray => [ names ] I<# in - important>
2259
2260This alternative (and preferred) form of the 'ForceArray' option allows you to
2261specify a list of element names which should always be forced into an array
2262representation, rather than the 'all or nothing' approach above.
2263
2264It is also possible (since version 2.05) to include compiled regular
2265expressions in the list - any element names which match the pattern will be
2266forced to arrays. If the list contains only a single regex, then it is not
2267necessary to enclose it in an arrayref. Eg:
2268
2269 ForceArray => qr/_list$/
2270
2271=head2 ForceContent => 1 I<# in - seldom used>
2272
2273When C<XMLin()> parses elements which have text content as well as attributes,
2274the text content must be represented as a hash value rather than a simple
2275scalar. This option allows you to force text content to always parse to
2276a hash value even when there are no attributes. So for example:
2277
2278 XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
2279
2280will parse to:
2281
2282 {
2283 'x' => { 'content' => 'text1' },
2284 'y' => { 'a' => 2, 'content' => 'text2' }
2285 }
2286
2287instead of:
2288
2289 {
2290 'x' => 'text1',
2291 'y' => { 'a' => 2, 'content' => 'text2' }
2292 }
2293
2294=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy>
2295
2296You can use this option to eliminate extra levels of indirection in your Perl
2297data structure. For example this XML:
2298
2299 <opt>
2300 <searchpath>
2301 <dir>/usr/bin</dir>
2302 <dir>/usr/local/bin</dir>
2303 <dir>/usr/X11/bin</dir>
2304 </searchpath>
2305 </opt>
2306
2307Would normally be read into a structure like this:
2308
2309 {
2310 searchpath => {
2311 dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
2312 }
2313 }
2314
2315But when read in with the appropriate value for 'GroupTags':
2316
2317 my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
2318
2319It will return this simpler structure:
2320
2321 {
2322 searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
2323 }
2324
2325The grouping element (C<< <searchpath> >> in the example) must not contain any
2326attributes or elements other than the grouped element.
2327
2328You can specify multiple 'grouping element' to 'grouped element' mappings in
2329the same hashref. If this option is combined with C<KeyAttr>, the array
2330folding will occur first and then the grouped element names will be eliminated.
2331
2332C<XMLout> will also use the grouptag mappings to re-introduce the tags around
2333the grouped elements. Beware though that this will occur in all places that
2334the 'grouping tag' name occurs - you probably don't want to use the same name
2335for elements as well as attributes.
2336
2337=head2 Handler => object_ref I<# out - SAX only>
2338
2339Use the 'Handler' option to have C<XMLout()> generate SAX events rather than
2340returning a string of XML. For more details see L<"SAX SUPPORT"> below.
2341
2342Note: the current implementation of this option generates a string of XML
2343and uses a SAX parser to translate it into SAX events. The normal encoding
2344rules apply here - your data must be UTF8 encoded unless you specify an
2345alternative encoding via the 'XMLDecl' option; and by the time the data reaches
2346the handler object, it will be in UTF8 form regardless of the encoding you
2347supply. A future implementation of this option may generate the events
2348directly.
2349
2350=head2 KeepRoot => 1 I<# in+out - handy>
2351
2352In its attempt to return a data structure free of superfluous detail and
2353unnecessary levels of indirection, C<XMLin()> normally discards the root
2354element name. Setting the 'KeepRoot' option to '1' will cause the root element
2355name to be retained. So after executing this code:
2356
2357 $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
2358
2359You'll be able to reference the tempdir as
2360C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default
2361C<$config-E<gt>{tempdir}>.
2362
2363Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the
2364data structure already contains a root element name and it is not necessary to
2365add another.
2366
2367=head2 KeyAttr => [ list ] I<# in+out - important>
2368
2369This option controls the 'array folding' feature which translates nested
2370elements from an array to a hash. It also controls the 'unfolding' of hashes
2371to arrays.
2372
2373For example, this XML:
2374
2375 <opt>
2376 <user login="grep" fullname="Gary R Epstein" />
2377 <user login="stty" fullname="Simon T Tyson" />
2378 </opt>
2379
2380would, by default, parse to this:
2381
2382 {
2383 'user' => [
2384 {
2385 'login' => 'grep',
2386 'fullname' => 'Gary R Epstein'
2387 },
2388 {
2389 'login' => 'stty',
2390 'fullname' => 'Simon T Tyson'
2391 }
2392 ]
2393 }
2394
2395If the option 'KeyAttr => "login"' were used to specify that the 'login'
2396attribute is a key, the same XML would parse to:
2397
2398 {
2399 'user' => {
2400 'stty' => {
2401 'fullname' => 'Simon T Tyson'
2402 },
2403 'grep' => {
2404 'fullname' => 'Gary R Epstein'
2405 }
2406 }
2407 }
2408
2409The key attribute names should be supplied in an arrayref if there is more
2410than one. C<XMLin()> will attempt to match attribute names in the order
2411supplied. C<XMLout()> will use the first attribute name supplied when
2412'unfolding' a hash into an array.
2413
2414Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do
2415not want folding on input or unfolding on output you must setting this option
2416to an empty list to disable the feature.
2417
2418Note 2: If you wish to use this option, you should also enable the
2419C<ForceArray> option. Without 'ForceArray', a single nested element will be
2420rolled up into a scalar rather than an array and therefore will not be folded
2421(since only arrays get folded).
2422
2423=head2 KeyAttr => { list } I<# in+out - important>
2424
2425This alternative (and preferred) method of specifiying the key attributes
2426allows more fine grained control over which elements are folded and on which
2427attributes. For example the option 'KeyAttr => { package => 'id' } will cause
2428any package elements to be folded on the 'id' attribute. No other elements
2429which have an 'id' attribute will be folded at all.
2430
2431Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">)
2432if this syntax is used and an element which does not have the specified key
2433attribute is encountered (eg: a 'package' element without an 'id' attribute, to
2434use the example above). Warnings will only be generated if B<-w> is in force.
2435
2436Two further variations are made possible by prefixing a '+' or a '-' character
2437to the attribute name:
2438
2439The option 'KeyAttr => { user => "+login" }' will cause this XML:
2440
2441 <opt>
2442 <user login="grep" fullname="Gary R Epstein" />
2443 <user login="stty" fullname="Simon T Tyson" />
2444 </opt>
2445
2446to parse to this data structure:
2447
2448 {
2449 'user' => {
2450 'stty' => {
2451 'fullname' => 'Simon T Tyson',
2452 'login' => 'stty'
2453 },
2454 'grep' => {
2455 'fullname' => 'Gary R Epstein',
2456 'login' => 'grep'
2457 }
2458 }
2459 }
2460
2461The '+' indicates that the value of the key attribute should be copied rather
2462than moved to the folded hash key.
2463
2464A '-' prefix would produce this result:
2465
2466 {
2467 'user' => {
2468 'stty' => {
2469 'fullname' => 'Simon T Tyson',
2470 '-login' => 'stty'
2471 },
2472 'grep' => {
2473 'fullname' => 'Gary R Epstein',
2474 '-login' => 'grep'
2475 }
2476 }
2477 }
2478
2479As described earlier, C<XMLout> will ignore hash keys starting with a '-'.
2480
2481=head2 NoAttr => 1 I<# in+out - handy>
2482
2483When used with C<XMLout()>, the generated XML will contain no attributes.
2484All hash key/values will be represented as nested elements instead.
2485
2486When used with C<XMLin()>, any attributes in the XML will be ignored.
2487
2488=head2 NoEscape => 1 I<# out - seldom used>
2489
2490By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and
2491'"' to '&lt;', '&gt;', '&amp;' and '&quot' respectively. Use this option to
2492suppress escaping (presumably because you've already escaped the data in some
2493more sophisticated manner).
2494
2495=head2 NoIndent => 1 I<# out - seldom used>
2496
2497Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode.
2498With this option enabled, the XML output will all be on one line (unless there
2499are newlines in the data) - this may be easier for downstream processing.
2500
2501=head2 NoSort => 1 I<# out - seldom used>
2502
2503Newer versions of XML::Simple sort elements and attributes alphabetically (*),
2504by default. Enable this option to suppress the sorting - possibly for
2505backwards compatibility.
2506
2507* Actually, sorting is alphabetical but 'key' attribute or element names (as in
2508'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements
2509are sorted alphabetically by the value of the key field.
2510
2511=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy>
2512
2513This option controls how whitespace in text content is handled. Recognised
2514values for the option are:
2515
2516=over 4
2517
2518=item *
2519
25200 = (default) whitespace is passed through unaltered (except of course for the
2521normalisation of whitespace in attribute values which is mandated by the XML
2522recommendation)
2523
2524=item *
2525
25261 = whitespace is normalised in any value used as a hash key (normalising means
2527removing leading and trailing whitespace and collapsing sequences of whitespace
2528characters to a single space)
2529
2530=item *
2531
25322 = whitespace is normalised in all text content
2533
2534=back
2535
2536Note: you can spell this option with a 'z' if that is more natural for you.
2537
2538=head2 NSExpand => 1 I<# in+out handy - SAX only>
2539
2540This option controls namespace expansion - the translation of element and
2541attribute names of the form 'prefix:name' to '{uri}name'. For example the
2542element name 'xsl:template' might be expanded to:
2543'{http://www.w3.org/1999/XSL/Transform}template'.
2544
2545By default, C<XMLin()> will return element names and attribute names exactly as
2546they appear in the XML. Setting this option to 1 will cause all element and
2547attribute names to be expanded to include their namespace prefix.
2548
2549I<Note: You must be using a SAX parser for this option to work (ie: it does not
2550work with XML::Parser)>.
2551
2552This option also controls whether C<XMLout()> performs the reverse translation
2553from '{uri}name' back to 'prefix:name'. The default is no translation. If
2554your data contains expanded names, you should set this option to 1 otherwise
2555C<XMLout> will emit XML which is not well formed.
2556
2557I<Note: You must have the XML::NamespaceSupport module installed if you want
2558C<XMLout()> to translate URIs back to prefixes>.
2559
2560=head2 NumericEscape => 0 | 1 | 2 I<# out - handy>
2561
2562Use this option to have 'high' (non-ASCII) characters in your Perl data
2563structure converted to numeric entities (eg: &#8364;) in the XML output. Three
2564levels are possible:
2565
25660 - default: no numeric escaping (OK if you're writing out UTF8)
2567
25681 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
2569
25702 - all characters above 0x7F are escaped (good for plain ASCII output)
2571
2572=head2 OutputFile => <file specifier> I<# out - handy>
2573
2574The default behaviour of C<XMLout()> is to return the XML as a string. If you
2575wish to write the XML to a file, simply supply the filename using the
2576'OutputFile' option.
2577
2578This option also accepts an IO handle object - especially useful in Perl 5.8.0
2579and later for output using an encoding other than UTF-8, eg:
2580
2581 open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
2582 XMLout($ref, OutputFile => $fh);
2583
2584Note, XML::Simple does not require that the object you pass in to the
2585OutputFile option inherits from L<IO::Handle> - it simply assumes the object
2586supports a C<print> method.
2587
2588=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this>
2589
2590I<Note: This option is now officially deprecated. If you find it useful, email
2591the author with an example of what you use it for. Do not use this option to
2592set the ProtocolEncoding, that's just plain wrong - fix the XML>.
2593
2594This option allows you to pass parameters to the constructor of the underlying
2595XML::Parser object (which of course assumes you're not using SAX).
2596
2597=head2 RootName => 'string' I<# out - handy>
2598
2599By default, when C<XMLout()> generates XML, the root element will be named
2600'opt'. This option allows you to specify an alternative name.
2601
2602Specifying either undef or the empty string for the RootName option will
2603produce XML with no root elements. In most cases the resulting XML fragment
2604will not be 'well formed' and therefore could not be read back in by C<XMLin()>.
2605Nevertheless, the option has been found to be useful in certain circumstances.
2606
2607=head2 SearchPath => [ list ] I<# in - handy>
2608
2609If you pass C<XMLin()> a filename, but the filename include no directory
2610component, you can use this option to specify which directories should be
2611searched to locate the file. You might use this option to search first in the
2612user's home directory, then in a global directory such as /etc.
2613
2614If a filename is provided to C<XMLin()> but SearchPath is not defined, the
2615file is assumed to be in the current directory.
2616
2617If the first parameter to C<XMLin()> is undefined, the default SearchPath
2618will contain only the directory in which the script itself is located.
2619Otherwise the default SearchPath will be empty.
2620
2621=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy>
2622
2623This option controls what C<XMLin()> should do with empty elements (no
2624attributes and no content). The default behaviour is to represent them as
2625empty hashes. Setting this option to a true value (eg: 1) will cause empty
2626elements to be skipped altogether. Setting the option to 'undef' or the empty
2627string will cause empty elements to be represented as the undefined value or
2628the empty string respectively. The latter two alternatives are a little
2629easier to test for in your code than a hash with no keys.
2630
2631The option also controls what C<XMLout()> does with undefined values. Setting
2632the option to undef causes undefined values to be output as empty elements
2633(rather than empty attributes), it also suppresses the generation of warnings
2634about undefined values. Setting the option to a true value (eg: 1) causes
2635undefined values to be skipped altogether on output.
2636
2637=head2 ValueAttr => [ names ] I<# in - handy>
2638
2639Use this option to deal elements which always have a single attribute and no
2640content. Eg:
2641
2642 <opt>
2643 <colour value="red" />
2644 <size value="XXL" />
2645 </opt>
2646
2647Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to:
2648
2649 {
2650 colour => 'red',
2651 size => 'XXL'
2652 }
2653
2654instead of this (the default):
2655
2656 {
2657 colour => { value => 'red' },
2658 size => { value => 'XXL' }
2659 }
2660
2661Note: This form of the ValueAttr option is not compatible with C<XMLout()> -
2662since the attribute name is discarded at parse time, the original XML cannot be
2663reconstructed.
2664
2665=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy>
2666
2667This (preferred) form of the ValueAttr option requires you to specify both
2668the element and the attribute names. This is not only safer, it also allows
2669the original XML to be reconstructed by C<XMLout()>.
2670
2671Note: You probably don't want to use this option and the NoAttr option at the
2672same time.
2673
2674=head2 Variables => { name => value } I<# in - handy>
2675
2676This option allows variables in the XML to be expanded when the file is read.
2677(there is no facility for putting the variable names back if you regenerate
2678XML using C<XMLout>).
2679
2680A 'variable' is any text of the form C<${name}> which occurs in an attribute
2681value or in the text content of an element. If 'name' matches a key in the
2682supplied hashref, C<${name}> will be replaced with the corresponding value from
2683the hashref. If no matching key is found, the variable will not be replaced.
2684Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are
2685allowed).
2686
2687=head2 VarAttr => 'attr_name' I<# in - handy>
2688
2689In addition to the variables defined using C<Variables>, this option allows
2690variables to be defined in the XML. A variable definition consists of an
2691element with an attribute called 'attr_name' (the value of the C<VarAttr>
2692option). The value of the attribute will be used as the variable name and the
2693text content of the element will be used as the value. A variable defined in
2694this way will override a variable defined using the C<Variables> option. For
2695example:
2696
2697 XMLin( '<opt>
2698 <dir name="prefix">/usr/local/apache</dir>
2699 <dir name="exec_prefix">${prefix}</dir>
2700 <dir name="bindir">${exec_prefix}/bin</dir>
2701 </opt>',
2702 VarAttr => 'name', ContentKey => '-content'
2703 );
2704
2705produces the following data structure:
2706
2707 {
2708 dir => {
2709 prefix => '/usr/local/apache',
2710 exec_prefix => '/usr/local/apache',
2711 bindir => '/usr/local/apache/bin',
2712 }
2713 }
2714
2715=head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy>
2716
2717If you want the output from C<XMLout()> to start with the optional XML
2718declaration, simply set the option to '1'. The default XML declaration is:
2719
2720 <?xml version='1.0' standalone='yes'?>
2721
2722If you want some other string (for example to declare an encoding value), set
2723the value of this option to the complete string you require.
2724
2725
2726=head1 OPTIONAL OO INTERFACE
2727
2728The procedural interface is both simple and convenient however there are a
2729couple of reasons why you might prefer to use the object oriented (OO)
2730interface:
2731
2732=over 4
2733
2734=item *
2735
2736to define a set of default values which should be used on all subsequent calls
2737to C<XMLin()> or C<XMLout()>
2738
2739=item *
2740
2741to override methods in B<XML::Simple> to provide customised behaviour
2742
2743=back
2744
2745The default values for the options described above are unlikely to suit
2746everyone. The OO interface allows you to effectively override B<XML::Simple>'s
2747defaults with your preferred values. It works like this:
2748
2749First create an XML::Simple parser object with your preferred defaults:
2750
2751 my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
2752
2753then call C<XMLin()> or C<XMLout()> as a method of that object:
2754
2755 my $ref = $xs->XMLin($xml);
2756 my $xml = $xs->XMLout($ref);
2757
2758You can also specify options when you make the method calls and these values
2759will be merged with the values specified when the object was created. Values
2760specified in a method call take precedence.
2761
2762Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be
2763called as C<xml_in()> or C<xml_out()>. The method names are aliased so the
2764only difference is the aesthetics.
2765
2766=head2 Parsing Methods
2767
2768You can explicitly call one of the following methods rather than rely on the
2769C<xml_in()> method automatically determining whether the target to be parsed is
2770a string, a file or a filehandle:
2771
2772=over 4
2773
2774=item parse_string(text)
2775
2776Works exactly like the C<xml_in()> method but assumes the first argument is
2777a string of XML (or a reference to a scalar containing a string of XML).
2778
2779=item parse_file(filename)
2780
2781Works exactly like the C<xml_in()> method but assumes the first argument is
2782the name of a file containing XML.
2783
2784=item parse_fh(file_handle)
2785
2786Works exactly like the C<xml_in()> method but assumes the first argument is
2787a filehandle which can be read to get XML.
2788
2789=back
2790
2791=head2 Hook Methods
2792
2793You can make your own class which inherits from XML::Simple and overrides
2794certain behaviours. The following methods may provide useful 'hooks' upon
2795which to hang your modified behaviour. You may find other undocumented methods
2796by examining the source, but those may be subject to change in future releases.
2797
2798=over 4
2799
2800=item handle_options(direction, name => value ...)
2801
2802This method will be called when one of the parsing methods or the C<XMLout()>
2803method is called. The initial argument will be a string (either 'in' or 'out')
2804and the remaining arguments will be name value pairs.
2805
2806=item default_config_file()
2807
2808Calculates and returns the name of the file which should be parsed if no
2809filename is passed to C<XMLin()> (default: C<$0.xml>).
2810
2811=item build_simple_tree(filename, string)
2812
2813Called from C<XMLin()> or any of the parsing methods. Takes either a file name
2814as the first argument or C<undef> followed by a 'string' as the second
2815argument. Returns a simple tree data structure. You could override this
2816method to apply your own transformations before the data structure is returned
2817to the caller.
2818
2819=item new_hashref()
2820
2821When the 'simple tree' data structure is being built, this method will be
2822called to create any required anonymous hashrefs.
2823
2824=item sorted_keys(name, hashref)
2825
2826Called when C<XMLout()> is translating a hashref to XML. This routine returns
2827a list of hash keys in the order that the corresponding attributes/elements
2828should appear in the output.
2829
2830=item escape_value(string)
2831
2832Called from C<XMLout()>, takes a string and returns a copy of the string with
2833XML character escaping rules applied.
2834
2835=item numeric_escape(string)
2836
2837Called from C<escape_value()>, to handle non-ASCII characters (depending on the
2838value of the NumericEscape option).
2839
2840=item copy_hash(hashref, extra_key => value, ...)
2841
2842Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of
2843hashes. You might wish to override this method if you're using tied hashes and
2844don't want them to get untied.
2845
2846=back
2847
2848=head2 Cache Methods
2849
2850XML::Simple implements three caching schemes ('storable', 'memshare' and
2851'memcopy'). You can implement a custom caching scheme by implementing
2852two methods - one for reading from the cache and one for writing to it.
2853
2854For example, you might implement a new 'dbm' scheme that stores cached data
2855structures using the L<MLDBM> module. First, you would add a
2856C<cache_read_dbm()> method which accepted a filename for use as a lookup key
2857and returned a data structure on success, or undef on failure. Then, you would
2858implement a C<cache_read_dbm()> method which accepted a data structure and a
2859filename.
2860
2861You would use this caching scheme by specifying the option:
2862
2863 Cache => [ 'dbm' ]
2864
2865=head1 STRICT MODE
2866
2867If you import the B<XML::Simple> routines like this:
2868
2869 use XML::Simple qw(:strict);
2870
2871the following common mistakes will be detected and treated as fatal errors
2872
2873=over 4
2874
2875=item *
2876
2877Failing to explicitly set the C<KeyAttr> option - if you can't be bothered
2878reading about this option, turn it off with: KeyAttr => [ ]
2879
2880=item *
2881
2882Failing to explicitly set the C<ForceArray> option - if you can't be bothered
2883reading about this option, set it to the safest mode with: ForceArray => 1
2884
2885=item *
2886
2887Setting ForceArray to an array, but failing to list all the elements from the
2888KeyAttr hash.
2889
2890=item *
2891
2892Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains
2893one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested
2894element). Note: if strict mode is not set but -w is, this condition triggers a
2895warning.
2896
2897=item *
2898
2899Data error - as above, but non-unique values are present in the key attribute
2900(eg: more than one E<lt>partE<gt> element with the same partnum). This will
2901also trigger a warning if strict mode is not enabled.
2902
2903=item *
2904
2905Data error - as above, but value of key attribute (eg: partnum) is not a
2906scalar string (due to nested elements etc). This will also trigger a warning
2907if strict mode is not enabled.
2908
2909=back
2910
2911=head1 SAX SUPPORT
2912
2913From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API
2914for XML) - specifically SAX2.
2915
2916In a typical SAX application, an XML parser (or SAX 'driver') module generates
2917SAX events (start of element, character data, end of element, etc) as it parses
2918an XML document and a 'handler' module processes the events to extract the
2919required data. This simple model allows for some interesting and powerful
2920possibilities:
2921
2922=over 4
2923
2924=item *
2925
2926Applications written to the SAX API can extract data from huge XML documents
2927without the memory overheads of a DOM or tree API.
2928
2929=item *
2930
2931The SAX API allows for plug and play interchange of parser modules without
2932having to change your code to fit a new module's API. A number of SAX parsers
2933are available with capabilities ranging from extreme portability to blazing
2934performance.
2935
2936=item *
2937
2938A SAX 'filter' module can implement both a handler interface for receiving
2939data and a generator interface for passing modified data on to a downstream
2940handler. Filters can be chained together in 'pipelines'.
2941
2942=item *
2943
2944One filter module might split a data stream to direct data to two or more
2945downstream handlers.
2946
2947=item *
2948
2949Generating SAX events is not the exclusive preserve of XML parsing modules.
2950For example, a module might extract data from a relational database using DBI
2951and pass it on to a SAX pipeline for filtering and formatting.
2952
2953=back
2954
2955B<XML::Simple> can operate at either end of a SAX pipeline. For example,
2956you can take a data structure in the form of a hashref and pass it into a
2957SAX pipeline using the 'Handler' option on C<XMLout()>:
2958
2959 use XML::Simple;
2960 use Some::SAX::Filter;
2961 use XML::SAX::Writer;
2962
2963 my $ref = {
2964 .... # your data here
2965 };
2966
2967 my $writer = XML::SAX::Writer->new();
2968 my $filter = Some::SAX::Filter->new(Handler => $writer);
2969 my $simple = XML::Simple->new(Handler => $filter);
2970 $simple->XMLout($ref);
2971
2972You can also put B<XML::Simple> at the opposite end of the pipeline to take
2973advantage of the simple 'tree' data structure once the relevant data has been
2974isolated through filtering:
2975
2976 use XML::SAX;
2977 use Some::SAX::Filter;
2978 use XML::Simple;
2979
2980 my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
2981 my $filter = Some::SAX::Filter->new(Handler => $simple);
2982 my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
2983
2984 my $ref = $parser->parse_uri('some_huge_file.xml');
2985
2986 print $ref->{part}->{'555-1234'};
2987
2988You can build a filter by using an XML::Simple object as a handler and setting
2989its DataHandler option to point to a routine which takes the resulting tree,
2990modifies it and sends it off as SAX events to a downstream handler:
2991
2992 my $writer = XML::SAX::Writer->new();
2993 my $filter = XML::Simple->new(
2994 DataHandler => sub {
2995 my $simple = shift;
2996 my $data = shift;
2997
2998 # Modify $data here
2999
3000 $simple->XMLout($data, Handler => $writer);
3001 }
3002 );
3003 my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
3004
3005 $parser->parse_uri($filename);
3006
3007I<Note: In this last example, the 'Handler' option was specified in the call to
3008C<XMLout()> but it could also have been specified in the constructor>.
3009
3010=head1 ENVIRONMENT
3011
3012If you don't care which parser module B<XML::Simple> uses then skip this
3013section entirely (it looks more complicated than it really is).
3014
3015B<XML::Simple> will default to using a B<SAX> parser if one is available or
3016B<XML::Parser> if SAX is not available.
3017
3018You can dictate which parser module is used by setting either the environment
3019variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable
3020$XML::Simple::PREFERRED_PARSER to contain the module name. The following rules
3021are used:
3022
3023=over 4
3024
3025=item *
3026
3027The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use
3028its default rules, you can set the package variable to an empty string.
3029
3030=item *
3031
3032If the 'preferred parser' is set to the string 'XML::Parser', then
3033L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not
3034installed).
3035
3036=item *
3037
3038If the 'preferred parser' is set to some other value, then it is assumed to be
3039the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.>
3040If L<XML::SAX> is not installed, or the requested parser module is not
3041installed, then C<XMLin()> will die.
3042
3043=item *
3044
3045If the 'preferred parser' is not defined at all (the normal default
3046state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is
3047installed, then a parser module will be selected according to
3048L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX
3049parser installed).
3050
3051=item *
3052
3053if the 'preferred parser' is not defined and B<XML::SAX> is not
3054installed, then B<XML::Parser> will be used. C<XMLin()> will die if
3055L<XML::Parser> is not installed.
3056
3057=back
3058
3059Note: The B<XML::SAX> distribution includes an XML parser written entirely in
3060Perl. It is very portable but it is not very fast. You should consider
3061installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your
3062platform.
3063
3064=head1 ERROR HANDLING
3065
3066The XML standard is very clear on the issue of non-compliant documents. An
3067error in parsing any single element (for example a missing end tag) must cause
3068the whole document to be rejected. B<XML::Simple> will die with an appropriate
3069message if it encounters a parsing error.
3070
3071If dying is not appropriate for your application, you should arrange to call
3072C<XMLin()> in an eval block and look for errors in $@. eg:
3073
3074 my $config = eval { XMLin() };
3075 PopUpMessage($@) if($@);
3076
3077Note, there is a common misconception that use of B<eval> will significantly
3078slow down a script. While that may be true when the code being eval'd is in a
3079string, it is not true of code like the sample above.
3080
3081=head1 EXAMPLES
3082
3083When C<XMLin()> reads the following very simple piece of XML:
3084
3085 <opt username="testuser" password="frodo"></opt>
3086
3087it returns the following data structure:
3088
3089 {
3090 'username' => 'testuser',
3091 'password' => 'frodo'
3092 }
3093
3094The identical result could have been produced with this alternative XML:
3095
3096 <opt username="testuser" password="frodo" />
3097
3098Or this (although see 'ForceArray' option for variations):
3099
3100 <opt>
3101 <username>testuser</username>
3102 <password>frodo</password>
3103 </opt>
3104
3105Repeated nested elements are represented as anonymous arrays:
3106
3107 <opt>
3108 <person firstname="Joe" lastname="Smith">
3109 <email>joe@smith.com</email>
3110 <email>jsmith@yahoo.com</email>
3111 </person>
3112 <person firstname="Bob" lastname="Smith">
3113 <email>bob@smith.com</email>
3114 </person>
3115 </opt>
3116
3117 {
3118 'person' => [
3119 {
3120 'email' => [
3121 'joe@smith.com',
3122 'jsmith@yahoo.com'
3123 ],
3124 'firstname' => 'Joe',
3125 'lastname' => 'Smith'
3126 },
3127 {
3128 'email' => 'bob@smith.com',
3129 'firstname' => 'Bob',
3130 'lastname' => 'Smith'
3131 }
3132 ]
3133 }
3134
3135Nested elements with a recognised key attribute are transformed (folded) from
3136an array into a hash keyed on the value of that attribute (see the C<KeyAttr>
3137option):
3138
3139 <opt>
3140 <person key="jsmith" firstname="Joe" lastname="Smith" />
3141 <person key="tsmith" firstname="Tom" lastname="Smith" />
3142 <person key="jbloggs" firstname="Joe" lastname="Bloggs" />
3143 </opt>
3144
3145 {
3146 'person' => {
3147 'jbloggs' => {
3148 'firstname' => 'Joe',
3149 'lastname' => 'Bloggs'
3150 },
3151 'tsmith' => {
3152 'firstname' => 'Tom',
3153 'lastname' => 'Smith'
3154 },
3155 'jsmith' => {
3156 'firstname' => 'Joe',
3157 'lastname' => 'Smith'
3158 }
3159 }
3160 }
3161
3162
3163The <anon> tag can be used to form anonymous arrays:
3164
3165 <opt>
3166 <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
3167 <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
3168 <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
3169 <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
3170 </opt>
3171
3172 {
3173 'head' => [
3174 [ 'Col 1', 'Col 2', 'Col 3' ]
3175 ],
3176 'data' => [
3177 [ 'R1C1', 'R1C2', 'R1C3' ],
3178 [ 'R2C1', 'R2C2', 'R2C3' ],
3179 [ 'R3C1', 'R3C2', 'R3C3' ]
3180 ]
3181 }
3182
3183Anonymous arrays can be nested to arbirtrary levels and as a special case, if
3184the surrounding tags for an XML document contain only an anonymous array the
3185arrayref will be returned directly rather than the usual hashref:
3186
3187 <opt>
3188 <anon><anon>Col 1</anon><anon>Col 2</anon></anon>
3189 <anon><anon>R1C1</anon><anon>R1C2</anon></anon>
3190 <anon><anon>R2C1</anon><anon>R2C2</anon></anon>
3191 </opt>
3192
3193 [
3194 [ 'Col 1', 'Col 2' ],
3195 [ 'R1C1', 'R1C2' ],
3196 [ 'R2C1', 'R2C2' ]
3197 ]
3198
3199Elements which only contain text content will simply be represented as a
3200scalar. Where an element has both attributes and text content, the element
3201will be represented as a hashref with the text content in the 'content' key
3202(see the C<ContentKey> option):
3203
3204 <opt>
3205 <one>first</one>
3206 <two attr="value">second</two>
3207 </opt>
3208
3209 {
3210 'one' => 'first',
3211 'two' => { 'attr' => 'value', 'content' => 'second' }
3212 }
3213
3214Mixed content (elements which contain both text content and nested elements)
3215will be not be represented in a useful way - element order and significant
3216whitespace will be lost. If you need to work with mixed content, then
3217XML::Simple is not the right tool for your job - check out the next section.
3218
3219=head1 WHERE TO FROM HERE?
3220
3221B<XML::Simple> is able to present a simple API because it makes some
3222assumptions on your behalf. These include:
3223
3224=over 4
3225
3226=item *
3227
3228You're not interested in text content consisting only of whitespace
3229
3230=item *
3231
3232You don't mind that when things get slurped into a hash the order is lost
3233
3234=item *
3235
3236You don't want fine-grained control of the formatting of generated XML
3237
3238=item *
3239
3240You would never use a hash key that was not a legal XML element name
3241
3242=item *
3243
3244You don't need help converting between different encodings
3245
3246=back
3247
3248In a serious XML project, you'll probably outgrow these assumptions fairly
3249quickly. This section of the document used to offer some advice on chosing a
3250more powerful option. That advice has now grown into the 'Perl-XML FAQ'
3251document which you can find at: L<http://perl-xml.sourceforge.net/faq/>
3252
3253The advice in the FAQ boils down to a quick explanation of tree versus
3254event based parsers and then recommends:
3255
3256For event based parsing, use SAX (do not set out to write any new code for
3257XML::Parser's handler API - it is obselete).
3258
3259For tree-based parsing, you could choose between the 'Perlish' approach of
3260L<XML::Twig> and more standards based DOM implementations - preferably one with
3261XPath support.
3262
3263
3264=head1 SEE ALSO
3265
3266B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>.
3267
3268To generate documents with namespaces, L<XML::NamespaceSupport> is required.
3269
3270The optional caching functions require L<Storable>.
3271
3272Answers to Frequently Asked Questions about XML::Simple are bundled with this
3273distribution as: L<XML::Simple::FAQ>
3274
3275=head1 COPYRIGHT
3276
3277Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt>
3278
3279This library is free software; you can redistribute it and/or modify it
3280under the same terms as Perl itself.
3281
3282=cut
3283
3284