From 0c601b73125d1bc120e248149bf4d83ecc27f1c2 Mon Sep 17 00:00:00 2001 From: Justin Clarke Casey Date: Tue, 8 Apr 2008 11:19:34 +0000 Subject: * Removing lulurun's perl UGAI from core svn, pending a link to an external repository, as per mailing list discussion. --- share/perl/lib/XML/Simple.pm | 3284 ------------------------------------------ 1 file changed, 3284 deletions(-) delete mode 100644 share/perl/lib/XML/Simple.pm (limited to 'share/perl/lib/XML/Simple.pm') diff --git a/share/perl/lib/XML/Simple.pm b/share/perl/lib/XML/Simple.pm deleted file mode 100644 index 993669b..0000000 --- a/share/perl/lib/XML/Simple.pm +++ /dev/null @@ -1,3284 +0,0 @@ -# $Id: Simple.pm,v 1.1 2008/01/18 09:10:19 ryu Exp $ - -package XML::Simple; - -=head1 NAME - -XML::Simple - Easy API to maintain XML (esp config files) - -=head1 SYNOPSIS - - use XML::Simple; - - my $ref = XMLin([] [, ]); - - my $xml = XMLout($hashref [, ]); - -Or the object oriented way: - - require XML::Simple; - - my $xs = XML::Simple->new(options); - - my $ref = $xs->XMLin([] [, ]); - - my $xml = $xs->XMLout($hashref [, ]); - -(or see L<"SAX SUPPORT"> for 'the SAX way'). - -To catch common errors: - - use XML::Simple qw(:strict); - -(see L<"STRICT MODE"> for more details). - -=cut - -# See after __END__ for more POD documentation - - -# Load essentials here, other modules loaded on demand later - -use strict; -use Carp; -require Exporter; - - -############################################################################## -# Define some constants -# - -use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER); - -@ISA = qw(Exporter); -@EXPORT = qw(XMLin XMLout); -@EXPORT_OK = qw(xml_in xml_out); -$VERSION = '2.18'; -$PREFERRED_PARSER = undef; - -my $StrictMode = 0; - -my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr - searchpath forcearray cache suppressempty parseropts - grouptags nsexpand datahandler varattr variables - normalisespace normalizespace valueattr); - -my @KnownOptOut = qw(keyattr keeproot contentkey noattr - rootname xmldecl outputfile noescape suppressempty - grouptags nsexpand handler noindent attrindent nosort - valueattr numericescape); - -my @DefKeyAttr = qw(name key id); -my $DefRootName = qq(opt); -my $DefContentKey = qq(content); -my $DefXmlDecl = qq(); - -my $xmlns_ns = 'http://www.w3.org/2000/xmlns/'; -my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround - - -############################################################################## -# Globals for use by caching routines -# - -my %MemShareCache = (); -my %MemCopyCache = (); - - -############################################################################## -# Wrapper for Exporter - handles ':strict' -# - -sub import { - # Handle the :strict tag - - $StrictMode = 1 if grep(/^:strict$/, @_); - - # Pass everything else to Exporter.pm - - @_ = grep(!/^:strict$/, @_); - goto &Exporter::import; -} - - -############################################################################## -# Constructor for optional object interface. -# - -sub new { - my $class = shift; - - if(@_ % 2) { - croak "Default options must be name=>value pairs (odd number supplied)"; - } - - my %known_opt; - @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100; - - my %raw_opt = @_; - my %def_opt; - while(my($key, $val) = each %raw_opt) { - my $lkey = lc($key); - $lkey =~ s/_//g; - croak "Unrecognised option: $key" unless(exists($known_opt{$lkey})); - $def_opt{$lkey} = $val; - } - my $self = { def_opt => \%def_opt }; - - return(bless($self, $class)); -} - - -############################################################################## -# Sub: _get_object() -# -# Helper routine called from XMLin() and XMLout() to create an object if none -# was provided. Note, this routine does mess with the caller's @_ array. -# - -sub _get_object { - my $self; - if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { - $self = shift; - } - else { - $self = XML::Simple->new(); - } - - return $self; -} - - -############################################################################## -# Sub/Method: XMLin() -# -# Exported routine for slurping XML into a hashref - see pod for info. -# -# May be called as object method or as a plain function. -# -# Expects one arg for the source XML, optionally followed by a number of -# name => value option pairs. -# - -sub XMLin { - my $self = &_get_object; # note, @_ is passed implicitly - - my $target = shift; - - - # Work out whether to parse a string, a file or a filehandle - - if(not defined $target) { - return $self->parse_file(undef, @_); - } - - elsif($target eq '-') { - local($/) = undef; - $target = ; - return $self->parse_string(\$target, @_); - } - - elsif(my $type = ref($target)) { - if($type eq 'SCALAR') { - return $self->parse_string($target, @_); - } - else { - return $self->parse_fh($target, @_); - } - } - - elsif($target =~ m{<.*?>}s) { - return $self->parse_string(\$target, @_); - } - - else { - return $self->parse_file($target, @_); - } -} - - -############################################################################## -# Sub/Method: parse_file() -# -# Same as XMLin, but only parses from a named file. -# - -sub parse_file { - my $self = &_get_object; # note, @_ is passed implicitly - - my $filename = shift; - - $self->handle_options('in', @_); - - $filename = $self->default_config_file if not defined $filename; - - $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}}); - - # Check cache for previous parse - - if($self->{opt}->{cache}) { - foreach my $scheme (@{$self->{opt}->{cache}}) { - my $method = 'cache_read_' . $scheme; - my $opt = $self->$method($filename); - return($opt) if($opt); - } - } - - my $ref = $self->build_simple_tree($filename, undef); - - if($self->{opt}->{cache}) { - my $method = 'cache_write_' . $self->{opt}->{cache}->[0]; - $self->$method($ref, $filename); - } - - return $ref; -} - - -############################################################################## -# Sub/Method: parse_fh() -# -# Same as XMLin, but only parses from a filehandle. -# - -sub parse_fh { - my $self = &_get_object; # note, @_ is passed implicitly - - my $fh = shift; - croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') . - " as a filehandle" unless ref $fh; - - $self->handle_options('in', @_); - - return $self->build_simple_tree(undef, $fh); -} - - -############################################################################## -# Sub/Method: parse_string() -# -# Same as XMLin, but only parses from a string or a reference to a string. -# - -sub parse_string { - my $self = &_get_object; # note, @_ is passed implicitly - - my $string = shift; - - $self->handle_options('in', @_); - - return $self->build_simple_tree(undef, ref $string ? $string : \$string); -} - - -############################################################################## -# Method: default_config_file() -# -# Returns the name of the XML file to parse if no filename (or XML string) -# was provided. -# - -sub default_config_file { - my $self = shift; - - require File::Basename; - - my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+'); - - # Add script directory to searchpath - - if($script_dir) { - unshift(@{$self->{opt}->{searchpath}}, $script_dir); - } - - return $basename . '.xml'; -} - - -############################################################################## -# Method: build_simple_tree() -# -# Builds a 'tree' data structure as provided by XML::Parser and then -# 'simplifies' it as specified by the various options in effect. -# - -sub build_simple_tree { - my $self = shift; - - my $tree = $self->build_tree(@_); - - return $self->{opt}->{keeproot} - ? $self->collapse({}, @$tree) - : $self->collapse(@{$tree->[1]}); -} - - -############################################################################## -# Method: build_tree() -# -# This routine will be called if there is no suitable pre-parsed tree in a -# cache. It parses the XML and returns an XML::Parser 'Tree' style data -# structure (summarised in the comments for the collapse() routine below). -# -# XML::Simple requires the services of another module that knows how to parse -# XML. If XML::SAX is installed, the default SAX parser will be used, -# otherwise XML::Parser will be used. -# -# This routine expects to be passed a filename as argument 1 or a 'string' as -# argument 2. The 'string' might be a string of XML (passed by reference to -# save memory) or it might be a reference to an IO::Handle. (This -# non-intuitive mess results in part from the way XML::Parser works but that's -# really no excuse). -# - -sub build_tree { - my $self = shift; - my $filename = shift; - my $string = shift; - - - my $preferred_parser = $PREFERRED_PARSER; - unless(defined($preferred_parser)) { - $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || ''; - } - if($preferred_parser eq 'XML::Parser') { - return($self->build_tree_xml_parser($filename, $string)); - } - - eval { require XML::SAX; }; # We didn't need it until now - if($@) { # No XML::SAX - fall back to XML::Parser - if($preferred_parser) { # unless a SAX parser was expressly requested - croak "XMLin() could not load XML::SAX"; - } - return($self->build_tree_xml_parser($filename, $string)); - } - - $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser); - - my $sp = XML::SAX::ParserFactory->parser(Handler => $self); - - $self->{nocollapse} = 1; - my($tree); - if($filename) { - $tree = $sp->parse_uri($filename); - } - else { - if(ref($string) && ref($string) ne 'SCALAR') { - $tree = $sp->parse_file($string); - } - else { - $tree = $sp->parse_string($$string); - } - } - - return($tree); -} - - -############################################################################## -# Method: build_tree_xml_parser() -# -# This routine will be called if XML::SAX is not installed, or if XML::Parser -# was specifically requested. It takes the same arguments as build_tree() and -# returns the same data structure (XML::Parser 'Tree' style). -# - -sub build_tree_xml_parser { - my $self = shift; - my $filename = shift; - my $string = shift; - - - eval { - local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load() - require XML::Parser; # We didn't need it until now - }; - if($@) { - croak "XMLin() requires either XML::SAX or XML::Parser"; - } - - if($self->{opt}->{nsexpand}) { - carp "'nsexpand' option requires XML::SAX"; - } - - my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}}); - my($tree); - if($filename) { - # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl - local(*XML_FILE); - open(XML_FILE, '<', $filename) || croak qq($filename - $!); - $tree = $xp->parse(*XML_FILE); - close(XML_FILE); - } - else { - $tree = $xp->parse($$string); - } - - return($tree); -} - - -############################################################################## -# Method: cache_write_storable() -# -# Wrapper routine for invoking Storable::nstore() to cache a parsed data -# structure. -# - -sub cache_write_storable { - my($self, $data, $filename) = @_; - - my $cachefile = $self->storable_filename($filename); - - require Storable; # We didn't need it until now - - if ('VMS' eq $^O) { - Storable::nstore($data, $cachefile); - } - else { - # If the following line fails for you, your Storable.pm is old - upgrade - Storable::lock_nstore($data, $cachefile); - } - -} - - -############################################################################## -# Method: cache_read_storable() -# -# Wrapper routine for invoking Storable::retrieve() to read a cached parsed -# data structure. Only returns cached data if the cache file exists and is -# newer than the source XML file. -# - -sub cache_read_storable { - my($self, $filename) = @_; - - my $cachefile = $self->storable_filename($filename); - - return unless(-r $cachefile); - return unless((stat($cachefile))[9] > (stat($filename))[9]); - - require Storable; # We didn't need it until now - - if ('VMS' eq $^O) { - return(Storable::retrieve($cachefile)); - } - else { - return(Storable::lock_retrieve($cachefile)); - } - -} - - -############################################################################## -# Method: storable_filename() -# -# Translates the supplied source XML filename into a filename for the storable -# cached data. A '.stor' suffix is added after stripping an optional '.xml' -# suffix. -# - -sub storable_filename { - my($self, $cachefile) = @_; - - $cachefile =~ s{(\.xml)?$}{.stor}; - return $cachefile; -} - - -############################################################################## -# Method: cache_write_memshare() -# -# Takes the supplied data structure reference and stores it away in a global -# hash structure. -# - -sub cache_write_memshare { - my($self, $data, $filename) = @_; - - $MemShareCache{$filename} = [time(), $data]; -} - - -############################################################################## -# Method: cache_read_memshare() -# -# Takes a filename and looks in a global hash for a cached parsed version. -# - -sub cache_read_memshare { - my($self, $filename) = @_; - - return unless($MemShareCache{$filename}); - return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]); - - return($MemShareCache{$filename}->[1]); - -} - - -############################################################################## -# Method: cache_write_memcopy() -# -# Takes the supplied data structure and stores a copy of it in a global hash -# structure. -# - -sub cache_write_memcopy { - my($self, $data, $filename) = @_; - - require Storable; # We didn't need it until now - - $MemCopyCache{$filename} = [time(), Storable::dclone($data)]; -} - - -############################################################################## -# Method: cache_read_memcopy() -# -# Takes a filename and looks in a global hash for a cached parsed version. -# Returns a reference to a copy of that data structure. -# - -sub cache_read_memcopy { - my($self, $filename) = @_; - - return unless($MemCopyCache{$filename}); - return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]); - - return(Storable::dclone($MemCopyCache{$filename}->[1])); - -} - - -############################################################################## -# Sub/Method: XMLout() -# -# Exported routine for 'unslurping' a data structure out to XML. -# -# Expects a reference to a data structure and an optional list of option -# name => value pairs. -# - -sub XMLout { - my $self = &_get_object; # note, @_ is passed implicitly - - croak "XMLout() requires at least one argument" unless(@_); - my $ref = shift; - - $self->handle_options('out', @_); - - - # If namespace expansion is set, XML::NamespaceSupport is required - - if($self->{opt}->{nsexpand}) { - require XML::NamespaceSupport; - $self->{nsup} = XML::NamespaceSupport->new(); - $self->{ns_prefix} = 'aaa'; - } - - - # Wrap top level arrayref in a hash - - if(UNIVERSAL::isa($ref, 'ARRAY')) { - $ref = { anon => $ref }; - } - - - # Extract rootname from top level hash if keeproot enabled - - if($self->{opt}->{keeproot}) { - my(@keys) = keys(%$ref); - if(@keys == 1) { - $ref = $ref->{$keys[0]}; - $self->{opt}->{rootname} = $keys[0]; - } - } - - # Ensure there are no top level attributes if we're not adding root elements - - elsif($self->{opt}->{rootname} eq '') { - if(UNIVERSAL::isa($ref, 'HASH')) { - my $refsave = $ref; - $ref = {}; - foreach (keys(%$refsave)) { - if(ref($refsave->{$_})) { - $ref->{$_} = $refsave->{$_}; - } - else { - $ref->{$_} = [ $refsave->{$_} ]; - } - } - } - } - - - # Encode the hashref and write to file if necessary - - $self->{_ancestors} = []; - my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, ''); - delete $self->{_ancestors}; - - if($self->{opt}->{xmldecl}) { - $xml = $self->{opt}->{xmldecl} . "\n" . $xml; - } - - if($self->{opt}->{outputfile}) { - if(ref($self->{opt}->{outputfile})) { - my $fh = $self->{opt}->{outputfile}; - if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) { - eval { require IO::Handle; }; - croak $@ if $@; - } - return($fh->print($xml)); - } - else { - local(*OUT); - open(OUT, '>', "$self->{opt}->{outputfile}") || - croak "open($self->{opt}->{outputfile}): $!"; - binmode(OUT, ':utf8') if($] >= 5.008); - print OUT $xml || croak "print: $!"; - close(OUT); - } - } - elsif($self->{opt}->{handler}) { - require XML::SAX; - my $sp = XML::SAX::ParserFactory->parser( - Handler => $self->{opt}->{handler} - ); - return($sp->parse_string($xml)); - } - else { - return($xml); - } -} - - -############################################################################## -# Method: handle_options() -# -# Helper routine for both XMLin() and XMLout(). Both routines handle their -# first argument and assume all other args are options handled by this routine. -# Saves a hash of options in $self->{opt}. -# -# If default options were passed to the constructor, they will be retrieved -# here and merged with options supplied to the method call. -# -# First argument should be the string 'in' or the string 'out'. -# -# Remaining arguments should be name=>value pairs. Sets up default values -# for options not supplied. Unrecognised options are a fatal error. -# - -sub handle_options { - my $self = shift; - my $dirn = shift; - - - # Determine valid options based on context - - my %known_opt; - if($dirn eq 'in') { - @known_opt{@KnownOptIn} = @KnownOptIn; - } - else { - @known_opt{@KnownOptOut} = @KnownOptOut; - } - - - # Store supplied options in hashref and weed out invalid ones - - if(@_ % 2) { - croak "Options must be name=>value pairs (odd number supplied)"; - } - my %raw_opt = @_; - my $opt = {}; - $self->{opt} = $opt; - - while(my($key, $val) = each %raw_opt) { - my $lkey = lc($key); - $lkey =~ s/_//g; - croak "Unrecognised option: $key" unless($known_opt{$lkey}); - $opt->{$lkey} = $val; - } - - - # Merge in options passed to constructor - - foreach (keys(%known_opt)) { - unless(exists($opt->{$_})) { - if(exists($self->{def_opt}->{$_})) { - $opt->{$_} = $self->{def_opt}->{$_}; - } - } - } - - - # Set sensible defaults if not supplied - - if(exists($opt->{rootname})) { - unless(defined($opt->{rootname})) { - $opt->{rootname} = ''; - } - } - else { - $opt->{rootname} = $DefRootName; - } - - if($opt->{xmldecl} and $opt->{xmldecl} eq '1') { - $opt->{xmldecl} = $DefXmlDecl; - } - - if(exists($opt->{contentkey})) { - if($opt->{contentkey} =~ m{^-(.*)$}) { - $opt->{contentkey} = $1; - $opt->{collapseagain} = 1; - } - } - else { - $opt->{contentkey} = $DefContentKey; - } - - unless(exists($opt->{normalisespace})) { - $opt->{normalisespace} = $opt->{normalizespace}; - } - $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace})); - - # Cleanups for values assumed to be arrays later - - if($opt->{searchpath}) { - unless(ref($opt->{searchpath})) { - $opt->{searchpath} = [ $opt->{searchpath} ]; - } - } - else { - $opt->{searchpath} = [ ]; - } - - if($opt->{cache} and !ref($opt->{cache})) { - $opt->{cache} = [ $opt->{cache} ]; - } - if($opt->{cache}) { - $_ = lc($_) foreach (@{$opt->{cache}}); - foreach my $scheme (@{$opt->{cache}}) { - my $method = 'cache_read_' . $scheme; - croak "Unsupported caching scheme: $scheme" - unless($self->can($method)); - } - } - - if(exists($opt->{parseropts})) { - if($^W) { - carp "Warning: " . - "'ParserOpts' is deprecated, contact the author if you need it"; - } - } - else { - $opt->{parseropts} = [ ]; - } - - - # Special cleanup for {forcearray} which could be regex, arrayref or boolean - # or left to default to 0 - - if(exists($opt->{forcearray})) { - if(ref($opt->{forcearray}) eq 'Regexp') { - $opt->{forcearray} = [ $opt->{forcearray} ]; - } - - if(ref($opt->{forcearray}) eq 'ARRAY') { - my @force_list = @{$opt->{forcearray}}; - if(@force_list) { - $opt->{forcearray} = {}; - foreach my $tag (@force_list) { - if(ref($tag) eq 'Regexp') { - push @{$opt->{forcearray}->{_regex}}, $tag; - } - else { - $opt->{forcearray}->{$tag} = 1; - } - } - } - else { - $opt->{forcearray} = 0; - } - } - else { - $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 ); - } - } - else { - if($StrictMode and $dirn eq 'in') { - croak "No value specified for 'ForceArray' option in call to XML$dirn()"; - } - $opt->{forcearray} = 0; - } - - - # Special cleanup for {keyattr} which could be arrayref or hashref or left - # to default to arrayref - - if(exists($opt->{keyattr})) { - if(ref($opt->{keyattr})) { - if(ref($opt->{keyattr}) eq 'HASH') { - - # Make a copy so we can mess with it - - $opt->{keyattr} = { %{$opt->{keyattr}} }; - - - # Convert keyattr => { elem => '+attr' } - # to keyattr => { elem => [ 'attr', '+' ] } - - foreach my $el (keys(%{$opt->{keyattr}})) { - if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) { - $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ]; - if($StrictMode and $dirn eq 'in') { - next if($opt->{forcearray} == 1); - next if(ref($opt->{forcearray}) eq 'HASH' - and $opt->{forcearray}->{$el}); - croak "<$el> set in KeyAttr but not in ForceArray"; - } - } - else { - delete($opt->{keyattr}->{$el}); # Never reached (famous last words?) - } - } - } - else { - if(@{$opt->{keyattr}} == 0) { - delete($opt->{keyattr}); - } - } - } - else { - $opt->{keyattr} = [ $opt->{keyattr} ]; - } - } - else { - if($StrictMode) { - croak "No value specified for 'KeyAttr' option in call to XML$dirn()"; - } - $opt->{keyattr} = [ @DefKeyAttr ]; - } - - - # Special cleanup for {valueattr} which could be arrayref or hashref - - if(exists($opt->{valueattr})) { - if(ref($opt->{valueattr}) eq 'ARRAY') { - $opt->{valueattrlist} = {}; - $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} }); - } - } - - # make sure there's nothing weird in {grouptags} - - if($opt->{grouptags}) { - croak "Illegal value for 'GroupTags' option - expected a hashref" - unless UNIVERSAL::isa($opt->{grouptags}, 'HASH'); - - while(my($key, $val) = each %{$opt->{grouptags}}) { - next if $key ne $val; - croak "Bad value in GroupTags: '$key' => '$val'"; - } - } - - - # Check the {variables} option is valid and initialise variables hash - - if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) { - croak "Illegal value for 'Variables' option - expected a hashref"; - } - - if($opt->{variables}) { - $self->{_var_values} = { %{$opt->{variables}} }; - } - elsif($opt->{varattr}) { - $self->{_var_values} = {}; - } - -} - - -############################################################################## -# Method: find_xml_file() -# -# Helper routine for XMLin(). -# Takes a filename, and a list of directories, attempts to locate the file in -# the directories listed. -# Returns a full pathname on success; croaks on failure. -# - -sub find_xml_file { - my $self = shift; - my $file = shift; - my @search_path = @_; - - - require File::Basename; - require File::Spec; - - my($filename, $filedir) = File::Basename::fileparse($file); - - if($filename ne $file) { # Ignore searchpath if dir component - return($file) if(-e $file); - } - else { - my($path); - foreach $path (@search_path) { - my $fullpath = File::Spec->catfile($path, $file); - return($fullpath) if(-e $fullpath); - } - } - - # If user did not supply a search path, default to current directory - - if(!@search_path) { - return($file) if(-e $file); - croak "File does not exist: $file"; - } - - croak "Could not find $file in ", join(':', @search_path); -} - - -############################################################################## -# Method: collapse() -# -# Helper routine for XMLin(). This routine really comprises the 'smarts' (or -# value add) of this module. -# -# Takes the parse tree that XML::Parser produced from the supplied XML and -# recurses through it 'collapsing' unnecessary levels of indirection (nested -# arrays etc) to produce a data structure that is easier to work with. -# -# Elements in the original parser tree are represented as an element name -# followed by an arrayref. The first element of the array is a hashref -# containing the attributes. The rest of the array contains a list of any -# nested elements as name+arrayref pairs: -# -# , [ { }, , [ ... ], ... ] -# -# The special element name '0' (zero) flags text content. -# -# This routine cuts down the noise by discarding any text content consisting of -# only whitespace and then moves the nested elements into the attribute hash -# using the name of the nested element as the hash key and the collapsed -# version of the nested element as the value. Multiple nested elements with -# the same name will initially be represented as an arrayref, but this may be -# 'folded' into a hashref depending on the value of the keyattr option. -# - -sub collapse { - my $self = shift; - - - # Start with the hash of attributes - - my $attr = shift; - if($self->{opt}->{noattr}) { # Discard if 'noattr' set - $attr = {}; - } - elsif($self->{opt}->{normalisespace} == 2) { - while(my($key, $value) = each %$attr) { - $attr->{$key} = $self->normalise_space($value) - } - } - - - # Do variable substitutions - - if(my $var = $self->{_var_values}) { - while(my($key, $val) = each(%$attr)) { - $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge; - $attr->{$key} = $val; - } - } - - - # Roll up 'value' attributes (but only if no nested elements) - - if(!@_ and keys %$attr == 1) { - my($k) = keys %$attr; - if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) { - return $attr->{$k}; - } - } - - - # Add any nested elements - - my($key, $val); - while(@_) { - $key = shift; - $val = shift; - - if(ref($val)) { - $val = $self->collapse(@$val); - next if(!defined($val) and $self->{opt}->{suppressempty}); - } - elsif($key eq '0') { - next if($val =~ m{^\s*$}s); # Skip all whitespace content - - $val = $self->normalise_space($val) - if($self->{opt}->{normalisespace} == 2); - - # do variable substitutions - - if(my $var = $self->{_var_values}) { - $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge; - } - - - # look for variable definitions - - if(my $var = $self->{opt}->{varattr}) { - if(exists $attr->{$var}) { - $self->set_var($attr->{$var}, $val); - } - } - - - # Collapse text content in element with no attributes to a string - - if(!%$attr and !@_) { - return($self->{opt}->{forcecontent} ? - { $self->{opt}->{contentkey} => $val } : $val - ); - } - $key = $self->{opt}->{contentkey}; - } - - - # Combine duplicate attributes into arrayref if required - - if(exists($attr->{$key})) { - if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) { - push(@{$attr->{$key}}, $val); - } - else { - $attr->{$key} = [ $attr->{$key}, $val ]; - } - } - elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { - $attr->{$key} = [ $val ]; - } - else { - if( $key ne $self->{opt}->{contentkey} - and ( - ($self->{opt}->{forcearray} == 1) - or ( - (ref($self->{opt}->{forcearray}) eq 'HASH') - and ( - $self->{opt}->{forcearray}->{$key} - or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}}) - ) - ) - ) - ) { - $attr->{$key} = [ $val ]; - } - else { - $attr->{$key} = $val; - } - } - - } - - - # Turn arrayrefs into hashrefs if key fields present - - if($self->{opt}->{keyattr}) { - while(($key,$val) = each %$attr) { - if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { - $attr->{$key} = $self->array_to_hash($key, $val); - } - } - } - - - # disintermediate grouped tags - - if($self->{opt}->{grouptags}) { - while(my($key, $val) = each(%$attr)) { - next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); - next unless(exists($self->{opt}->{grouptags}->{$key})); - - my($child_key, $child_val) = %$val; - - if($self->{opt}->{grouptags}->{$key} eq $child_key) { - $attr->{$key}= $child_val; - } - } - } - - - # Fold hashes containing a single anonymous array up into just the array - - my $count = scalar keys %$attr; - if($count == 1 - and exists $attr->{anon} - and UNIVERSAL::isa($attr->{anon}, 'ARRAY') - ) { - return($attr->{anon}); - } - - - # Do the right thing if hash is empty, otherwise just return it - - if(!%$attr and exists($self->{opt}->{suppressempty})) { - if(defined($self->{opt}->{suppressempty}) and - $self->{opt}->{suppressempty} eq '') { - return(''); - } - return(undef); - } - - - # Roll up named elements with named nested 'value' attributes - - if($self->{opt}->{valueattr}) { - while(my($key, $val) = each(%$attr)) { - next unless($self->{opt}->{valueattr}->{$key}); - next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); - my($k) = keys %$val; - next unless($k eq $self->{opt}->{valueattr}->{$key}); - $attr->{$key} = $val->{$k}; - } - } - - return($attr) - -} - - -############################################################################## -# Method: set_var() -# -# Called when a variable definition is encountered in the XML. (A variable -# definition looks like value where attrname -# matches the varattr setting). -# - -sub set_var { - my($self, $name, $value) = @_; - - $self->{_var_values}->{$name} = $value; -} - - -############################################################################## -# Method: get_var() -# -# Called during variable substitution to get the value for the named variable. -# - -sub get_var { - my($self, $name) = @_; - - my $value = $self->{_var_values}->{$name}; - return $value if(defined($value)); - - return '${' . $name . '}'; -} - - -############################################################################## -# Method: normalise_space() -# -# Strips leading and trailing whitespace and collapses sequences of whitespace -# characters to a single space. -# - -sub normalise_space { - my($self, $text) = @_; - - $text =~ s/^\s+//s; - $text =~ s/\s+$//s; - $text =~ s/\s\s+/ /sg; - - return $text; -} - - -############################################################################## -# Method: array_to_hash() -# -# Helper routine for collapse(). -# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a -# reference to the hash on success or the original array if folding is -# not possible. Behaviour is controlled by 'keyattr' option. -# - -sub array_to_hash { - my $self = shift; - my $name = shift; - my $arrayref = shift; - - my $hashref = $self->new_hashref; - - my($i, $key, $val, $flag); - - - # Handle keyattr => { .... } - - if(ref($self->{opt}->{keyattr}) eq 'HASH') { - return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name})); - ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}}; - for($i = 0; $i < @$arrayref; $i++) { - if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and - exists($arrayref->[$i]->{$key}) - ) { - $val = $arrayref->[$i]->{$key}; - if(ref($val)) { - $self->die_or_warn("<$name> element has non-scalar '$key' key attribute"); - return($arrayref); - } - $val = $self->normalise_space($val) - if($self->{opt}->{normalisespace} == 1); - $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") - if(exists($hashref->{$val})); - $hashref->{$val} = { %{$arrayref->[$i]} }; - $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-'); - delete $hashref->{$val}->{$key} unless($flag eq '+'); - } - else { - $self->die_or_warn("<$name> element has no '$key' key attribute"); - return($arrayref); - } - } - } - - - # Or assume keyattr => [ .... ] - - else { - my $default_keys = - join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}}); - - ELEMENT: for($i = 0; $i < @$arrayref; $i++) { - return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH')); - - foreach $key (@{$self->{opt}->{keyattr}}) { - if(defined($arrayref->[$i]->{$key})) { - $val = $arrayref->[$i]->{$key}; - if(ref($val)) { - $self->die_or_warn("<$name> element has non-scalar '$key' key attribute") - if not $default_keys; - return($arrayref); - } - $val = $self->normalise_space($val) - if($self->{opt}->{normalisespace} == 1); - $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") - if(exists($hashref->{$val})); - $hashref->{$val} = { %{$arrayref->[$i]} }; - delete $hashref->{$val}->{$key}; - next ELEMENT; - } - } - - return($arrayref); # No keyfield matched - } - } - - # collapse any hashes which now only have a 'content' key - - if($self->{opt}->{collapseagain}) { - $hashref = $self->collapse_content($hashref); - } - - return($hashref); -} - - -############################################################################## -# Method: die_or_warn() -# -# Takes a diagnostic message and does one of three things: -# 1. dies if strict mode is enabled -# 2. warns if warnings are enabled but strict mode is not -# 3. ignores message and resturns silently if neither strict mode nor warnings -# are enabled -# - -sub die_or_warn { - my $self = shift; - my $msg = shift; - - croak $msg if($StrictMode); - carp "Warning: $msg" if($^W); -} - - -############################################################################## -# Method: new_hashref() -# -# This is a hook routine for overriding in a sub-class. Some people believe -# that using Tie::IxHash here will solve order-loss problems. -# - -sub new_hashref { - my $self = shift; - - return { @_ }; -} - - -############################################################################## -# Method: collapse_content() -# -# Helper routine for array_to_hash -# -# Arguments expected are: -# - an XML::Simple object -# - a hasref -# the hashref is a former array, turned into a hash by array_to_hash because -# of the presence of key attributes -# at this point collapse_content avoids over-complicated structures like -# dir => { libexecdir => { content => '$exec_prefix/libexec' }, -# localstatedir => { content => '$prefix' }, -# } -# into -# dir => { libexecdir => '$exec_prefix/libexec', -# localstatedir => '$prefix', -# } - -sub collapse_content { - my $self = shift; - my $hashref = shift; - - my $contentkey = $self->{opt}->{contentkey}; - - # first go through the values,checking that they are fit to collapse - foreach my $val (values %$hashref) { - return $hashref unless ( (ref($val) eq 'HASH') - and (keys %$val == 1) - and (exists $val->{$contentkey}) - ); - } - - # now collapse them - foreach my $key (keys %$hashref) { - $hashref->{$key}= $hashref->{$key}->{$contentkey}; - } - - return $hashref; -} - - -############################################################################## -# Method: value_to_xml() -# -# Helper routine for XMLout() - recurses through a data structure building up -# and returning an XML representation of that structure as a string. -# -# Arguments expected are: -# - the data structure to be encoded (usually a reference) -# - the XML tag name to use for this item -# - a string of spaces for use as the current indent level -# - -sub value_to_xml { - my $self = shift;; - - - # Grab the other arguments - - my($ref, $name, $indent) = @_; - - my $named = (defined($name) and $name ne '' ? 1 : 0); - - my $nl = "\n"; - - my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack! - if($self->{opt}->{noindent}) { - $indent = ''; - $nl = ''; - } - - - # Convert to XML - - if(ref($ref)) { - croak "circular data structures not supported" - if(grep($_ == $ref, @{$self->{_ancestors}})); - push @{$self->{_ancestors}}, $ref; - } - else { - if($named) { - return(join('', - $indent, '<', $name, '>', - ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)), - '", $nl - )); - } - else { - return("$ref$nl"); - } - } - - - # Unfold hash to array if possible - - if(UNIVERSAL::isa($ref, 'HASH') # It is a hash - and keys %$ref # and it's not empty - and $self->{opt}->{keyattr} # and folding is enabled - and !$is_root # and its not the root element - ) { - $ref = $self->hash_to_array($name, $ref); - } - - - my @result = (); - my($key, $value); - - - # Handle hashrefs - - if(UNIVERSAL::isa($ref, 'HASH')) { - - # Reintermediate grouped values if applicable - - if($self->{opt}->{grouptags}) { - $ref = $self->copy_hash($ref); - while(my($key, $val) = each %$ref) { - if($self->{opt}->{grouptags}->{$key}) { - $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val }; - } - } - } - - - # Scan for namespace declaration attributes - - my $nsdecls = ''; - my $default_ns_uri; - if($self->{nsup}) { - $ref = $self->copy_hash($ref); - $self->{nsup}->push_context(); - - # Look for default namespace declaration first - - if(exists($ref->{xmlns})) { - $self->{nsup}->declare_prefix('', $ref->{xmlns}); - $nsdecls .= qq( xmlns="$ref->{xmlns}"); - delete($ref->{xmlns}); - } - $default_ns_uri = $self->{nsup}->get_uri(''); - - - # Then check all the other keys - - foreach my $qname (keys(%$ref)) { - my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); - if($uri) { - if($uri eq $xmlns_ns) { - $self->{nsup}->declare_prefix($lname, $ref->{$qname}); - $nsdecls .= qq( xmlns:$lname="$ref->{$qname}"); - delete($ref->{$qname}); - } - } - } - - # Translate any remaining Clarkian names - - foreach my $qname (keys(%$ref)) { - my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); - if($uri) { - if($default_ns_uri and $uri eq $default_ns_uri) { - $ref->{$lname} = $ref->{$qname}; - delete($ref->{$qname}); - } - else { - my $prefix = $self->{nsup}->get_prefix($uri); - unless($prefix) { - # $self->{nsup}->declare_prefix(undef, $uri); - # $prefix = $self->{nsup}->get_prefix($uri); - $prefix = $self->{ns_prefix}++; - $self->{nsup}->declare_prefix($prefix, $uri); - $nsdecls .= qq( xmlns:$prefix="$uri"); - } - $ref->{"$prefix:$lname"} = $ref->{$qname}; - delete($ref->{$qname}); - } - } - } - } - - - my @nested = (); - my $text_content = undef; - if($named) { - push @result, $indent, '<', $name, $nsdecls; - } - - if(keys %$ref) { - my $first_arg = 1; - foreach my $key ($self->sorted_keys($name, $ref)) { - my $value = $ref->{$key}; - next if(substr($key, 0, 1) eq '-'); - if(!defined($value)) { - next if $self->{opt}->{suppressempty}; - unless(exists($self->{opt}->{suppressempty}) - and !defined($self->{opt}->{suppressempty}) - ) { - carp 'Use of uninitialized value' if($^W); - } - if($key eq $self->{opt}->{contentkey}) { - $text_content = ''; - } - else { - $value = exists($self->{opt}->{suppressempty}) ? {} : ''; - } - } - - if(!ref($value) - and $self->{opt}->{valueattr} - and $self->{opt}->{valueattr}->{$key} - ) { - $value = { $self->{opt}->{valueattr}->{$key} => $value }; - } - - if(ref($value) or $self->{opt}->{noattr}) { - push @nested, - $self->value_to_xml($value, $key, "$indent "); - } - else { - $value = $self->escape_value($value) unless($self->{opt}->{noescape}); - if($key eq $self->{opt}->{contentkey}) { - $text_content = $value; - } - else { - push @result, "\n$indent " . ' ' x length($name) - if($self->{opt}->{attrindent} and !$first_arg); - push @result, ' ', $key, '="', $value , '"'; - $first_arg = 0; - } - } - } - } - else { - $text_content = ''; - } - - if(@nested or defined($text_content)) { - if($named) { - push @result, ">"; - if(defined($text_content)) { - push @result, $text_content; - $nested[0] =~ s/^\s+// if(@nested); - } - else { - push @result, $nl; - } - if(@nested) { - push @result, @nested, $indent; - } - push @result, '", $nl; - } - else { - push @result, @nested; # Special case if no root elements - } - } - else { - push @result, " />", $nl; - } - $self->{nsup}->pop_context() if($self->{nsup}); - } - - - # Handle arrayrefs - - elsif(UNIVERSAL::isa($ref, 'ARRAY')) { - foreach $value (@$ref) { - next if !defined($value) and $self->{opt}->{suppressempty}; - if(!ref($value)) { - push @result, - $indent, '<', $name, '>', - ($self->{opt}->{noescape} ? $value : $self->escape_value($value)), - '$nl"; - } - elsif(UNIVERSAL::isa($value, 'HASH')) { - push @result, $self->value_to_xml($value, $name, $indent); - } - else { - push @result, - $indent, '<', $name, ">$nl", - $self->value_to_xml($value, 'anon', "$indent "), - $indent, '$nl"; - } - } - } - - else { - croak "Can't encode a value of type: " . ref($ref); - } - - - pop @{$self->{_ancestors}} if(ref($ref)); - - return(join('', @result)); -} - - -############################################################################## -# Method: sorted_keys() -# -# Returns the keys of the referenced hash sorted into alphabetical order, but -# with the 'key' key (as in KeyAttr) first, if there is one. -# - -sub sorted_keys { - my($self, $name, $ref) = @_; - - return keys %$ref if $self->{opt}->{nosort}; - - my %hash = %$ref; - my $keyattr = $self->{opt}->{keyattr}; - - my @key; - - if(ref $keyattr eq 'HASH') { - if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) { - push @key, $keyattr->{$name}->[0]; - delete $hash{$keyattr->{$name}->[0]}; - } - } - elsif(ref $keyattr eq 'ARRAY') { - foreach (@{$keyattr}) { - if(exists $hash{$_}) { - push @key, $_; - delete $hash{$_}; - last; - } - } - } - - return(@key, sort keys %hash); -} - -############################################################################## -# Method: escape_value() -# -# Helper routine for automatically escaping values for XMLout(). -# Expects a scalar data value. Returns escaped version. -# - -sub escape_value { - my($self, $data) = @_; - - return '' unless(defined($data)); - - $data =~ s/&/&/sg; - $data =~ s//>/sg; - $data =~ s/"/"/sg; - - my $level = $self->{opt}->{numericescape} or return $data; - - return $self->numeric_escape($data, $level); -} - -sub numeric_escape { - my($self, $data, $level) = @_; - - use utf8; # required for 5.6 - - if($self->{opt}->{numericescape} eq '2') { - $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; - } - else { - $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; - } - - return $data; -} - - -############################################################################## -# Method: hash_to_array() -# -# Helper routine for value_to_xml(). -# Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a -# reference to the array on success or the original hash if unfolding is -# not possible. -# - -sub hash_to_array { - my $self = shift; - my $parent = shift; - my $hashref = shift; - - my $arrayref = []; - - my($key, $value); - - my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref; - foreach $key (@keys) { - $value = $hashref->{$key}; - return($hashref) unless(UNIVERSAL::isa($value, 'HASH')); - - if(ref($self->{opt}->{keyattr}) eq 'HASH') { - return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent})); - push @$arrayref, $self->copy_hash( - $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key - ); - } - else { - push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value }); - } - } - - return($arrayref); -} - - -############################################################################## -# Method: copy_hash() -# -# Helper routine for hash_to_array(). When unfolding a hash of hashes into -# an array of hashes, we need to copy the key from the outer hash into the -# inner hash. This routine makes a copy of the original hash so we don't -# destroy the original data structure. You might wish to override this -# method if you're using tied hashes and don't want them to get untied. -# - -sub copy_hash { - my($self, $orig, @extra) = @_; - - return { @extra, %$orig }; -} - -############################################################################## -# Methods required for building trees from SAX events -############################################################################## - -sub start_document { - my $self = shift; - - $self->handle_options('in') unless($self->{opt}); - - $self->{lists} = []; - $self->{curlist} = $self->{tree} = []; -} - - -sub start_element { - my $self = shift; - my $element = shift; - - my $name = $element->{Name}; - if($self->{opt}->{nsexpand}) { - $name = $element->{LocalName} || ''; - if($element->{NamespaceURI}) { - $name = '{' . $element->{NamespaceURI} . '}' . $name; - } - } - my $attributes = {}; - if($element->{Attributes}) { # Might be undef - foreach my $attr (values %{$element->{Attributes}}) { - if($self->{opt}->{nsexpand}) { - my $name = $attr->{LocalName} || ''; - if($attr->{NamespaceURI}) { - $name = '{' . $attr->{NamespaceURI} . '}' . $name - } - $name = 'xmlns' if($name eq $bad_def_ns_jcn); - $attributes->{$name} = $attr->{Value}; - } - else { - $attributes->{$attr->{Name}} = $attr->{Value}; - } - } - } - my $newlist = [ $attributes ]; - push @{ $self->{lists} }, $self->{curlist}; - push @{ $self->{curlist} }, $name => $newlist; - $self->{curlist} = $newlist; -} - - -sub characters { - my $self = shift; - my $chars = shift; - - my $text = $chars->{Data}; - my $clist = $self->{curlist}; - my $pos = $#$clist; - - if ($pos > 0 and $clist->[$pos - 1] eq '0') { - $clist->[$pos] .= $text; - } - else { - push @$clist, 0 => $text; - } -} - - -sub end_element { - my $self = shift; - - $self->{curlist} = pop @{ $self->{lists} }; -} - - -sub end_document { - my $self = shift; - - delete($self->{curlist}); - delete($self->{lists}); - - my $tree = $self->{tree}; - delete($self->{tree}); - - - # Return tree as-is to XMLin() - - return($tree) if($self->{nocollapse}); - - - # Or collapse it before returning it to SAX parser class - - if($self->{opt}->{keeproot}) { - $tree = $self->collapse({}, @$tree); - } - else { - $tree = $self->collapse(@{$tree->[1]}); - } - - if($self->{opt}->{datahandler}) { - return($self->{opt}->{datahandler}->($self, $tree)); - } - - return($tree); -} - -*xml_in = \&XMLin; -*xml_out = \&XMLout; - -1; - -__END__ - -=head1 QUICK START - -Say you have a script called B and a file of configuration options -called B containing this: - - - -
10.0.0.101
-
10.0.1.101
-
- -
10.0.0.102
-
- -
10.0.0.103
-
10.0.1.103
-
-
- -The following lines of code in B: - - use XML::Simple; - - my $config = XMLin(); - -will 'slurp' the configuration options into the hashref $config (because no -arguments are passed to C the name and location of the XML file will -be inferred from name and location of the script). You can dump out the -contents of the hashref using Data::Dumper: - - use Data::Dumper; - - print Dumper($config); - -which will produce something like this (formatting has been adjusted for -brevity): - - { - 'logdir' => '/var/log/foo/', - 'debugfile' => '/tmp/foo.debug', - 'server' => { - 'sahara' => { - 'osversion' => '2.6', - 'osname' => 'solaris', - 'address' => [ '10.0.0.101', '10.0.1.101' ] - }, - 'gobi' => { - 'osversion' => '6.5', - 'osname' => 'irix', - 'address' => '10.0.0.102' - }, - 'kalahari' => { - 'osversion' => '2.0.34', - 'osname' => 'linux', - 'address' => [ '10.0.0.103', '10.0.1.103' ] - } - } - } - -Your script could then access the name of the log directory like this: - - print $config->{logdir}; - -similarly, the second address on the server 'kalahari' could be referenced as: - - print $config->{server}->{kalahari}->{address}->[1]; - -What could be simpler? (Rhetorical). - -For simple requirements, that's really all there is to it. If you want to -store your XML in a different directory or file, or pass it in as a string or -even pass it in via some derivative of an IO::Handle, you'll need to check out -L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that -neat little transformation that produced $config->{server}) you'll find options -for that as well. - -If you want to generate XML (for example to write a modified version of -$config back out as XML), check out C. - -If your needs are not so simple, this may not be the module for you. In that -case, you might want to read L<"WHERE TO FROM HERE?">. - -=head1 DESCRIPTION - -The XML::Simple module provides a simple API layer on top of an underlying XML -parsing module (either XML::Parser or one of the SAX2 parser modules). Two -functions are exported: C and C. Note: you can explicity -request the lower case versions of the function names: C and -C. - -The simplest approach is to call these two functions directly, but an -optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below) -allows them to be called as methods of an B object. The object -interface can also be used at either end of a SAX pipeline. - -=head2 XMLin() - -Parses XML formatted data and returns a reference to a data structure which -contains the same information in a more readily accessible form. (Skip -down to L<"EXAMPLES"> below, for more sample code). - -C accepts an optional XML specifier followed by zero or more 'name => -value' option pairs. The XML specifier can be one of the following: - -=over 4 - -=item A filename - -If the filename contains no directory components C will look for the -file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the -current directory if the SearchPath option is not defined. eg: - - $ref = XMLin('/etc/params.xml'); - -Note, the filename '-' can be used to parse from STDIN. - -=item undef - -If there is no XML specifier, C will check the script directory and -each of the SearchPath directories for a file with the same name as the script -but with the extension '.xml'. Note: if you wish to specify options, you -must specify the value 'undef'. eg: - - $ref = XMLin(undef, ForceArray => 1); - -=item A string of XML - -A string containing XML (recognised by the presence of '<' and '>' characters) -will be parsed directly. eg: - - $ref = XMLin(''); - -=item An IO::Handle object - -An IO::Handle object will be read to EOF and its contents parsed. eg: - - $fh = IO::File->new('/etc/params.xml'); - $ref = XMLin($fh); - -=back - -=head2 XMLout() - -Takes a data structure (generally a hashref) and returns an XML encoding of -that structure. If the resulting XML is parsed using C, it should -return a data structure equivalent to the original (see caveats below). - -The C function can also be used to output the XML as SAX events -see the C option and L<"SAX SUPPORT"> for more details). - -When translating hashes to XML, hash keys which have a leading '-' will be -silently skipped. This is the approved method for marking elements of a -data structure which should be ignored by C. (Note: If these items -were not skipped the key names would be emitted as element or attribute names -with a leading '-' which would not be valid XML). - -=head2 Caveats - -Some care is required in creating data structures which will be passed to -C. Hash keys from the data structure will be encoded as either XML -element names or attribute names. Therefore, you should use hash key names -which conform to the relatively strict XML naming rules: - -Names in XML must begin with a letter. The remaining characters may be -letters, digits, hyphens (-), underscores (_) or full stops (.). It is also -allowable to include one colon (:) in an element name but this should only be -used when working with namespaces (B can only usefully work with -namespaces when teamed with a SAX Parser). - -You can use other punctuation characters in hash values (just not in hash -keys) however B does not support dumping binary data. - -If you break these rules, the current implementation of C will -simply emit non-compliant XML which will be rejected if you try to read it -back in. (A later version of B might take a more proactive -approach). - -Note also that although you can nest hashes and arrays to arbitrary levels, -circular data structures are not supported and will cause C to die. - -If you wish to 'round-trip' arbitrary data structures from Perl to XML and back -to Perl, then you should probably disable array folding (using the KeyAttr -option) both with C and with C. If you still don't get the -expected results, you may prefer to use L which is designed for -exactly that purpose. - -Refer to L<"WHERE TO FROM HERE?"> if C is too simple for your needs. - - -=head1 OPTIONS - -B supports a number of options (in fact as each release of -B adds more options, the module's claim to the name 'Simple' -becomes increasingly tenuous). If you find yourself repeatedly having to -specify the same options, you might like to investigate L<"OPTIONAL OO -INTERFACE"> below. - -If you can't be bothered reading the documentation, refer to -L<"STRICT MODE"> to automatically catch common mistakes. - -Because there are so many options, it's hard for new users to know which ones -are important, so here are the two you really need to know about: - -=over 4 - -=item * - -check out C because you'll almost certainly want to turn it on - -=item * - -make sure you know what the C option does and what its default value is -because it may surprise you otherwise (note in particular that 'KeyAttr' -affects both C and C) - -=back - -The option name headings below have a trailing 'comment' - a hash followed by -two pieces of metadata: - -=over 4 - -=item * - -Options are marked with 'I' if they are recognised by C and -'I' if they are recognised by C. - -=item * - -Each option is also flagged to indicate whether it is: - - 'important' - don't use the module until you understand this one - 'handy' - you can skip this on the first time through - 'advanced' - you can skip this on the second time through - 'SAX only' - don't worry about this unless you're using SAX (or - alternatively if you need this, you also need SAX) - 'seldom used' - you'll probably never use this unless you were the - person that requested the feature - -=back - -The options are listed alphabetically: - -Note: option names are no longer case sensitive so you can use the mixed case -versions shown here; all lower case as required by versions 2.03 and earlier; -or you can add underscores between the words (eg: key_attr). - - -=head2 AttrIndent => 1 I<# out - handy> - -When you are using C, enable this option to have attributes printed -one-per-line with sensible indentation rather than all on one line. - -=head2 Cache => [ cache schemes ] I<# in - advanced> - -Because loading the B module and parsing an XML file can consume a -significant number of CPU cycles, it is often desirable to cache the output of -C for later reuse. - -When parsing from a named file, B supports a number of caching -schemes. The 'Cache' option may be used to specify one or more schemes (using -an anonymous array). Each scheme will be tried in turn in the hope of finding -a cached pre-parsed representation of the XML file. If no cached copy is -found, the file will be parsed and the first cache scheme in the list will be -used to save a copy of the results. The following cache schemes have been -implemented: - -=over 4 - -=item storable - -Utilises B to read/write a cache file with the same name as the -XML file but with the extension .stor - -=item memshare - -When a file is first parsed, a copy of the resulting data structure is retained -in memory in the B module's namespace. Subsequent calls to parse -the same file will return a reference to this structure. This cached version -will persist only for the life of the Perl interpreter (which in the case of -mod_perl for example, may be some significant time). - -Because each caller receives a reference to the same data structure, a change -made by one caller will be visible to all. For this reason, the reference -returned should be treated as read-only. - -=item memcopy - -This scheme works identically to 'memshare' (above) except that each caller -receives a reference to a new data structure which is a copy of the cached -version. Copying the data structure will add a little processing overhead, -therefore this scheme should only be used where the caller intends to modify -the data structure (or wishes to protect itself from others who might). This -scheme uses B to perform the copy. - -=back - -Warning! The memory-based caching schemes compare the timestamp on the file to -the time when it was last parsed. If the file is stored on an NFS filesystem -(or other network share) and the clock on the file server is not exactly -synchronised with the clock where your script is run, updates to the source XML -file may appear to be ignored. - -=head2 ContentKey => 'keyname' I<# in+out - seldom used> - -When text content is parsed to a hash value, this option let's you specify a -name for the hash key to override the default 'content'. So for example: - - XMLin('Text', ContentKey => 'text') - -will parse to: - - { 'one' => 1, 'text' => 'Text' } - -instead of: - - { 'one' => 1, 'content' => 'Text' } - -C will also honour the value of this option when converting a hashref -to XML. - -You can also prefix your selected key name with a '-' character to have -C try a little harder to eliminate unnecessary 'content' keys after -array folding. For example: - - XMLin( - 'FirstSecond', - KeyAttr => {item => 'name'}, - ForceArray => [ 'item' ], - ContentKey => '-content' - ) - -will parse to: - - { - 'item' => { - 'one' => 'First' - 'two' => 'Second' - } - } - -rather than this (without the '-'): - - { - 'item' => { - 'one' => { 'content' => 'First' } - 'two' => { 'content' => 'Second' } - } - } - -=head2 DataHandler => code_ref I<# in - SAX only> - -When you use an B object as a SAX handler, it will return a -'simple tree' data structure in the same format as C would return. If -this option is set (to a subroutine reference), then when the tree is built the -subroutine will be called and passed two arguments: a reference to the -B object and a reference to the data tree. The return value from -the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for -more details). - -=head2 ForceArray => 1 I<# in - important> - -This option should be set to '1' to force nested elements to be represented -as arrays even when there is only one. Eg, with ForceArray enabled, this -XML: - - - value - - -would parse to this: - - { - 'name' => [ - 'value' - ] - } - -instead of this (the default): - - { - 'name' => 'value' - } - -This option is especially useful if the data structure is likely to be written -back out as XML and the default behaviour of rolling single nested elements up -into attributes is not desirable. - -If you are using the array folding feature, you should almost certainly enable -this option. If you do not, single nested elements will not be parsed to -arrays and therefore will not be candidates for folding to a hash. (Given that -the default value of 'KeyAttr' enables array folding, the default value of this -option should probably also have been enabled too - sorry). - -=head2 ForceArray => [ names ] I<# in - important> - -This alternative (and preferred) form of the 'ForceArray' option allows you to -specify a list of element names which should always be forced into an array -representation, rather than the 'all or nothing' approach above. - -It is also possible (since version 2.05) to include compiled regular -expressions in the list - any element names which match the pattern will be -forced to arrays. If the list contains only a single regex, then it is not -necessary to enclose it in an arrayref. Eg: - - ForceArray => qr/_list$/ - -=head2 ForceContent => 1 I<# in - seldom used> - -When C parses elements which have text content as well as attributes, -the text content must be represented as a hash value rather than a simple -scalar. This option allows you to force text content to always parse to -a hash value even when there are no attributes. So for example: - - XMLin('text1text2', ForceContent => 1) - -will parse to: - - { - 'x' => { 'content' => 'text1' }, - 'y' => { 'a' => 2, 'content' => 'text2' } - } - -instead of: - - { - 'x' => 'text1', - 'y' => { 'a' => 2, 'content' => 'text2' } - } - -=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy> - -You can use this option to eliminate extra levels of indirection in your Perl -data structure. For example this XML: - - - - /usr/bin - /usr/local/bin - /usr/X11/bin - - - -Would normally be read into a structure like this: - - { - searchpath => { - dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] - } - } - -But when read in with the appropriate value for 'GroupTags': - - my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); - -It will return this simpler structure: - - { - searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] - } - -The grouping element (C<< >> in the example) must not contain any -attributes or elements other than the grouped element. - -You can specify multiple 'grouping element' to 'grouped element' mappings in -the same hashref. If this option is combined with C, the array -folding will occur first and then the grouped element names will be eliminated. - -C will also use the grouptag mappings to re-introduce the tags around -the grouped elements. Beware though that this will occur in all places that -the 'grouping tag' name occurs - you probably don't want to use the same name -for elements as well as attributes. - -=head2 Handler => object_ref I<# out - SAX only> - -Use the 'Handler' option to have C generate SAX events rather than -returning a string of XML. For more details see L<"SAX SUPPORT"> below. - -Note: the current implementation of this option generates a string of XML -and uses a SAX parser to translate it into SAX events. The normal encoding -rules apply here - your data must be UTF8 encoded unless you specify an -alternative encoding via the 'XMLDecl' option; and by the time the data reaches -the handler object, it will be in UTF8 form regardless of the encoding you -supply. A future implementation of this option may generate the events -directly. - -=head2 KeepRoot => 1 I<# in+out - handy> - -In its attempt to return a data structure free of superfluous detail and -unnecessary levels of indirection, C normally discards the root -element name. Setting the 'KeepRoot' option to '1' will cause the root element -name to be retained. So after executing this code: - - $config = XMLin('', KeepRoot => 1) - -You'll be able to reference the tempdir as -C<$config-E{config}-E{tempdir}> instead of the default -C<$config-E{tempdir}>. - -Similarly, setting the 'KeepRoot' option to '1' will tell C that the -data structure already contains a root element name and it is not necessary to -add another. - -=head2 KeyAttr => [ list ] I<# in+out - important> - -This option controls the 'array folding' feature which translates nested -elements from an array to a hash. It also controls the 'unfolding' of hashes -to arrays. - -For example, this XML: - - - - - - -would, by default, parse to this: - - { - 'user' => [ - { - 'login' => 'grep', - 'fullname' => 'Gary R Epstein' - }, - { - 'login' => 'stty', - 'fullname' => 'Simon T Tyson' - } - ] - } - -If the option 'KeyAttr => "login"' were used to specify that the 'login' -attribute is a key, the same XML would parse to: - - { - 'user' => { - 'stty' => { - 'fullname' => 'Simon T Tyson' - }, - 'grep' => { - 'fullname' => 'Gary R Epstein' - } - } - } - -The key attribute names should be supplied in an arrayref if there is more -than one. C will attempt to match attribute names in the order -supplied. C will use the first attribute name supplied when -'unfolding' a hash into an array. - -Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do -not want folding on input or unfolding on output you must setting this option -to an empty list to disable the feature. - -Note 2: If you wish to use this option, you should also enable the -C option. Without 'ForceArray', a single nested element will be -rolled up into a scalar rather than an array and therefore will not be folded -(since only arrays get folded). - -=head2 KeyAttr => { list } I<# in+out - important> - -This alternative (and preferred) method of specifiying the key attributes -allows more fine grained control over which elements are folded and on which -attributes. For example the option 'KeyAttr => { package => 'id' } will cause -any package elements to be folded on the 'id' attribute. No other elements -which have an 'id' attribute will be folded at all. - -Note: C will generate a warning (or a fatal error in L<"STRICT MODE">) -if this syntax is used and an element which does not have the specified key -attribute is encountered (eg: a 'package' element without an 'id' attribute, to -use the example above). Warnings will only be generated if B<-w> is in force. - -Two further variations are made possible by prefixing a '+' or a '-' character -to the attribute name: - -The option 'KeyAttr => { user => "+login" }' will cause this XML: - - - - - - -to parse to this data structure: - - { - 'user' => { - 'stty' => { - 'fullname' => 'Simon T Tyson', - 'login' => 'stty' - }, - 'grep' => { - 'fullname' => 'Gary R Epstein', - 'login' => 'grep' - } - } - } - -The '+' indicates that the value of the key attribute should be copied rather -than moved to the folded hash key. - -A '-' prefix would produce this result: - - { - 'user' => { - 'stty' => { - 'fullname' => 'Simon T Tyson', - '-login' => 'stty' - }, - 'grep' => { - 'fullname' => 'Gary R Epstein', - '-login' => 'grep' - } - } - } - -As described earlier, C will ignore hash keys starting with a '-'. - -=head2 NoAttr => 1 I<# in+out - handy> - -When used with C, the generated XML will contain no attributes. -All hash key/values will be represented as nested elements instead. - -When used with C, any attributes in the XML will be ignored. - -=head2 NoEscape => 1 I<# out - seldom used> - -By default, C will translate the characters 'E', 'E', '&' and -'"' to '<', '>', '&' and '"' respectively. Use this option to -suppress escaping (presumably because you've already escaped the data in some -more sophisticated manner). - -=head2 NoIndent => 1 I<# out - seldom used> - -Set this option to 1 to disable C's default 'pretty printing' mode. -With this option enabled, the XML output will all be on one line (unless there -are newlines in the data) - this may be easier for downstream processing. - -=head2 NoSort => 1 I<# out - seldom used> - -Newer versions of XML::Simple sort elements and attributes alphabetically (*), -by default. Enable this option to suppress the sorting - possibly for -backwards compatibility. - -* Actually, sorting is alphabetical but 'key' attribute or element names (as in -'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements -are sorted alphabetically by the value of the key field. - -=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy> - -This option controls how whitespace in text content is handled. Recognised -values for the option are: - -=over 4 - -=item * - -0 = (default) whitespace is passed through unaltered (except of course for the -normalisation of whitespace in attribute values which is mandated by the XML -recommendation) - -=item * - -1 = whitespace is normalised in any value used as a hash key (normalising means -removing leading and trailing whitespace and collapsing sequences of whitespace -characters to a single space) - -=item * - -2 = whitespace is normalised in all text content - -=back - -Note: you can spell this option with a 'z' if that is more natural for you. - -=head2 NSExpand => 1 I<# in+out handy - SAX only> - -This option controls namespace expansion - the translation of element and -attribute names of the form 'prefix:name' to '{uri}name'. For example the -element name 'xsl:template' might be expanded to: -'{http://www.w3.org/1999/XSL/Transform}template'. - -By default, C will return element names and attribute names exactly as -they appear in the XML. Setting this option to 1 will cause all element and -attribute names to be expanded to include their namespace prefix. - -I. - -This option also controls whether C performs the reverse translation -from '{uri}name' back to 'prefix:name'. The default is no translation. If -your data contains expanded names, you should set this option to 1 otherwise -C will emit XML which is not well formed. - -I to translate URIs back to prefixes>. - -=head2 NumericEscape => 0 | 1 | 2 I<# out - handy> - -Use this option to have 'high' (non-ASCII) characters in your Perl data -structure converted to numeric entities (eg: €) in the XML output. Three -levels are possible: - -0 - default: no numeric escaping (OK if you're writing out UTF8) - -1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output - -2 - all characters above 0x7F are escaped (good for plain ASCII output) - -=head2 OutputFile => I<# out - handy> - -The default behaviour of C is to return the XML as a string. If you -wish to write the XML to a file, simply supply the filename using the -'OutputFile' option. - -This option also accepts an IO handle object - especially useful in Perl 5.8.0 -and later for output using an encoding other than UTF-8, eg: - - open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; - XMLout($ref, OutputFile => $fh); - -Note, XML::Simple does not require that the object you pass in to the -OutputFile option inherits from L - it simply assumes the object -supports a C method. - -=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> - -I. - -This option allows you to pass parameters to the constructor of the underlying -XML::Parser object (which of course assumes you're not using SAX). - -=head2 RootName => 'string' I<# out - handy> - -By default, when C generates XML, the root element will be named -'opt'. This option allows you to specify an alternative name. - -Specifying either undef or the empty string for the RootName option will -produce XML with no root elements. In most cases the resulting XML fragment -will not be 'well formed' and therefore could not be read back in by C. -Nevertheless, the option has been found to be useful in certain circumstances. - -=head2 SearchPath => [ list ] I<# in - handy> - -If you pass C a filename, but the filename include no directory -component, you can use this option to specify which directories should be -searched to locate the file. You might use this option to search first in the -user's home directory, then in a global directory such as /etc. - -If a filename is provided to C but SearchPath is not defined, the -file is assumed to be in the current directory. - -If the first parameter to C is undefined, the default SearchPath -will contain only the directory in which the script itself is located. -Otherwise the default SearchPath will be empty. - -=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy> - -This option controls what C should do with empty elements (no -attributes and no content). The default behaviour is to represent them as -empty hashes. Setting this option to a true value (eg: 1) will cause empty -elements to be skipped altogether. Setting the option to 'undef' or the empty -string will cause empty elements to be represented as the undefined value or -the empty string respectively. The latter two alternatives are a little -easier to test for in your code than a hash with no keys. - -The option also controls what C does with undefined values. Setting -the option to undef causes undefined values to be output as empty elements -(rather than empty attributes), it also suppresses the generation of warnings -about undefined values. Setting the option to a true value (eg: 1) causes -undefined values to be skipped altogether on output. - -=head2 ValueAttr => [ names ] I<# in - handy> - -Use this option to deal elements which always have a single attribute and no -content. Eg: - - - - - - -Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: - - { - colour => 'red', - size => 'XXL' - } - -instead of this (the default): - - { - colour => { value => 'red' }, - size => { value => 'XXL' } - } - -Note: This form of the ValueAttr option is not compatible with C - -since the attribute name is discarded at parse time, the original XML cannot be -reconstructed. - -=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy> - -This (preferred) form of the ValueAttr option requires you to specify both -the element and the attribute names. This is not only safer, it also allows -the original XML to be reconstructed by C. - -Note: You probably don't want to use this option and the NoAttr option at the -same time. - -=head2 Variables => { name => value } I<# in - handy> - -This option allows variables in the XML to be expanded when the file is read. -(there is no facility for putting the variable names back if you regenerate -XML using C). - -A 'variable' is any text of the form C<${name}> which occurs in an attribute -value or in the text content of an element. If 'name' matches a key in the -supplied hashref, C<${name}> will be replaced with the corresponding value from -the hashref. If no matching key is found, the variable will not be replaced. -Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are -allowed). - -=head2 VarAttr => 'attr_name' I<# in - handy> - -In addition to the variables defined using C, this option allows -variables to be defined in the XML. A variable definition consists of an -element with an attribute called 'attr_name' (the value of the C -option). The value of the attribute will be used as the variable name and the -text content of the element will be used as the value. A variable defined in -this way will override a variable defined using the C option. For -example: - - XMLin( ' - /usr/local/apache - ${prefix} - ${exec_prefix}/bin - ', - VarAttr => 'name', ContentKey => '-content' - ); - -produces the following data structure: - - { - dir => { - prefix => '/usr/local/apache', - exec_prefix => '/usr/local/apache', - bindir => '/usr/local/apache/bin', - } - } - -=head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy> - -If you want the output from C to start with the optional XML -declaration, simply set the option to '1'. The default XML declaration is: - - - -If you want some other string (for example to declare an encoding value), set -the value of this option to the complete string you require. - - -=head1 OPTIONAL OO INTERFACE - -The procedural interface is both simple and convenient however there are a -couple of reasons why you might prefer to use the object oriented (OO) -interface: - -=over 4 - -=item * - -to define a set of default values which should be used on all subsequent calls -to C or C - -=item * - -to override methods in B to provide customised behaviour - -=back - -The default values for the options described above are unlikely to suit -everyone. The OO interface allows you to effectively override B's -defaults with your preferred values. It works like this: - -First create an XML::Simple parser object with your preferred defaults: - - my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); - -then call C or C as a method of that object: - - my $ref = $xs->XMLin($xml); - my $xml = $xs->XMLout($ref); - -You can also specify options when you make the method calls and these values -will be merged with the values specified when the object was created. Values -specified in a method call take precedence. - -Note: when called as methods, the C and C routines may be -called as C or C. The method names are aliased so the -only difference is the aesthetics. - -=head2 Parsing Methods - -You can explicitly call one of the following methods rather than rely on the -C method automatically determining whether the target to be parsed is -a string, a file or a filehandle: - -=over 4 - -=item parse_string(text) - -Works exactly like the C method but assumes the first argument is -a string of XML (or a reference to a scalar containing a string of XML). - -=item parse_file(filename) - -Works exactly like the C method but assumes the first argument is -the name of a file containing XML. - -=item parse_fh(file_handle) - -Works exactly like the C method but assumes the first argument is -a filehandle which can be read to get XML. - -=back - -=head2 Hook Methods - -You can make your own class which inherits from XML::Simple and overrides -certain behaviours. The following methods may provide useful 'hooks' upon -which to hang your modified behaviour. You may find other undocumented methods -by examining the source, but those may be subject to change in future releases. - -=over 4 - -=item handle_options(direction, name => value ...) - -This method will be called when one of the parsing methods or the C -method is called. The initial argument will be a string (either 'in' or 'out') -and the remaining arguments will be name value pairs. - -=item default_config_file() - -Calculates and returns the name of the file which should be parsed if no -filename is passed to C (default: C<$0.xml>). - -=item build_simple_tree(filename, string) - -Called from C or any of the parsing methods. Takes either a file name -as the first argument or C followed by a 'string' as the second -argument. Returns a simple tree data structure. You could override this -method to apply your own transformations before the data structure is returned -to the caller. - -=item new_hashref() - -When the 'simple tree' data structure is being built, this method will be -called to create any required anonymous hashrefs. - -=item sorted_keys(name, hashref) - -Called when C is translating a hashref to XML. This routine returns -a list of hash keys in the order that the corresponding attributes/elements -should appear in the output. - -=item escape_value(string) - -Called from C, takes a string and returns a copy of the string with -XML character escaping rules applied. - -=item numeric_escape(string) - -Called from C, to handle non-ASCII characters (depending on the -value of the NumericEscape option). - -=item copy_hash(hashref, extra_key => value, ...) - -Called from C, when 'unfolding' a hash of hashes into an array of -hashes. You might wish to override this method if you're using tied hashes and -don't want them to get untied. - -=back - -=head2 Cache Methods - -XML::Simple implements three caching schemes ('storable', 'memshare' and -'memcopy'). You can implement a custom caching scheme by implementing -two methods - one for reading from the cache and one for writing to it. - -For example, you might implement a new 'dbm' scheme that stores cached data -structures using the L module. First, you would add a -C method which accepted a filename for use as a lookup key -and returned a data structure on success, or undef on failure. Then, you would -implement a C method which accepted a data structure and a -filename. - -You would use this caching scheme by specifying the option: - - Cache => [ 'dbm' ] - -=head1 STRICT MODE - -If you import the B routines like this: - - use XML::Simple qw(:strict); - -the following common mistakes will be detected and treated as fatal errors - -=over 4 - -=item * - -Failing to explicitly set the C option - if you can't be bothered -reading about this option, turn it off with: KeyAttr => [ ] - -=item * - -Failing to explicitly set the C option - if you can't be bothered -reading about this option, set it to the safest mode with: ForceArray => 1 - -=item * - -Setting ForceArray to an array, but failing to list all the elements from the -KeyAttr hash. - -=item * - -Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains -one or more EpartE elements without a 'partnum' attribute (or nested -element). Note: if strict mode is not set but -w is, this condition triggers a -warning. - -=item * - -Data error - as above, but non-unique values are present in the key attribute -(eg: more than one EpartE element with the same partnum). This will -also trigger a warning if strict mode is not enabled. - -=item * - -Data error - as above, but value of key attribute (eg: partnum) is not a -scalar string (due to nested elements etc). This will also trigger a warning -if strict mode is not enabled. - -=back - -=head1 SAX SUPPORT - -From version 1.08_01, B includes support for SAX (the Simple API -for XML) - specifically SAX2. - -In a typical SAX application, an XML parser (or SAX 'driver') module generates -SAX events (start of element, character data, end of element, etc) as it parses -an XML document and a 'handler' module processes the events to extract the -required data. This simple model allows for some interesting and powerful -possibilities: - -=over 4 - -=item * - -Applications written to the SAX API can extract data from huge XML documents -without the memory overheads of a DOM or tree API. - -=item * - -The SAX API allows for plug and play interchange of parser modules without -having to change your code to fit a new module's API. A number of SAX parsers -are available with capabilities ranging from extreme portability to blazing -performance. - -=item * - -A SAX 'filter' module can implement both a handler interface for receiving -data and a generator interface for passing modified data on to a downstream -handler. Filters can be chained together in 'pipelines'. - -=item * - -One filter module might split a data stream to direct data to two or more -downstream handlers. - -=item * - -Generating SAX events is not the exclusive preserve of XML parsing modules. -For example, a module might extract data from a relational database using DBI -and pass it on to a SAX pipeline for filtering and formatting. - -=back - -B can operate at either end of a SAX pipeline. For example, -you can take a data structure in the form of a hashref and pass it into a -SAX pipeline using the 'Handler' option on C: - - use XML::Simple; - use Some::SAX::Filter; - use XML::SAX::Writer; - - my $ref = { - .... # your data here - }; - - my $writer = XML::SAX::Writer->new(); - my $filter = Some::SAX::Filter->new(Handler => $writer); - my $simple = XML::Simple->new(Handler => $filter); - $simple->XMLout($ref); - -You can also put B at the opposite end of the pipeline to take -advantage of the simple 'tree' data structure once the relevant data has been -isolated through filtering: - - use XML::SAX; - use Some::SAX::Filter; - use XML::Simple; - - my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); - my $filter = Some::SAX::Filter->new(Handler => $simple); - my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); - - my $ref = $parser->parse_uri('some_huge_file.xml'); - - print $ref->{part}->{'555-1234'}; - -You can build a filter by using an XML::Simple object as a handler and setting -its DataHandler option to point to a routine which takes the resulting tree, -modifies it and sends it off as SAX events to a downstream handler: - - my $writer = XML::SAX::Writer->new(); - my $filter = XML::Simple->new( - DataHandler => sub { - my $simple = shift; - my $data = shift; - - # Modify $data here - - $simple->XMLout($data, Handler => $writer); - } - ); - my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); - - $parser->parse_uri($filename); - -I but it could also have been specified in the constructor>. - -=head1 ENVIRONMENT - -If you don't care which parser module B uses then skip this -section entirely (it looks more complicated than it really is). - -B will default to using a B parser if one is available or -B if SAX is not available. - -You can dictate which parser module is used by setting either the environment -variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable -$XML::Simple::PREFERRED_PARSER to contain the module name. The following rules -are used: - -=over 4 - -=item * - -The package variable takes precedence over the environment variable if both are defined. To force B to ignore the environment settings and use -its default rules, you can set the package variable to an empty string. - -=item * - -If the 'preferred parser' is set to the string 'XML::Parser', then -L will be used (or C will die if L is not -installed). - -=item * - -If the 'preferred parser' is set to some other value, then it is assumed to be -the name of a SAX parser module and is passed to L -If L is not installed, or the requested parser module is not -installed, then C will die. - -=item * - -If the 'preferred parser' is not defined at all (the normal default -state), an attempt will be made to load L. If L is -installed, then a parser module will be selected according to -L's normal rules (which typically means the last SAX -parser installed). - -=item * - -if the 'preferred parser' is not defined and B is not -installed, then B will be used. C will die if -L is not installed. - -=back - -Note: The B distribution includes an XML parser written entirely in -Perl. It is very portable but it is not very fast. You should consider -installing L or L if they are available for your -platform. - -=head1 ERROR HANDLING - -The XML standard is very clear on the issue of non-compliant documents. An -error in parsing any single element (for example a missing end tag) must cause -the whole document to be rejected. B will die with an appropriate -message if it encounters a parsing error. - -If dying is not appropriate for your application, you should arrange to call -C in an eval block and look for errors in $@. eg: - - my $config = eval { XMLin() }; - PopUpMessage($@) if($@); - -Note, there is a common misconception that use of B will significantly -slow down a script. While that may be true when the code being eval'd is in a -string, it is not true of code like the sample above. - -=head1 EXAMPLES - -When C reads the following very simple piece of XML: - - - -it returns the following data structure: - - { - 'username' => 'testuser', - 'password' => 'frodo' - } - -The identical result could have been produced with this alternative XML: - - - -Or this (although see 'ForceArray' option for variations): - - - testuser - frodo - - -Repeated nested elements are represented as anonymous arrays: - - - - joe@smith.com - jsmith@yahoo.com - - - bob@smith.com - - - - { - 'person' => [ - { - 'email' => [ - 'joe@smith.com', - 'jsmith@yahoo.com' - ], - 'firstname' => 'Joe', - 'lastname' => 'Smith' - }, - { - 'email' => 'bob@smith.com', - 'firstname' => 'Bob', - 'lastname' => 'Smith' - } - ] - } - -Nested elements with a recognised key attribute are transformed (folded) from -an array into a hash keyed on the value of that attribute (see the C -option): - - - - - - - - { - 'person' => { - 'jbloggs' => { - 'firstname' => 'Joe', - 'lastname' => 'Bloggs' - }, - 'tsmith' => { - 'firstname' => 'Tom', - 'lastname' => 'Smith' - }, - 'jsmith' => { - 'firstname' => 'Joe', - 'lastname' => 'Smith' - } - } - } - - -The tag can be used to form anonymous arrays: - - - Col 1Col 2Col 3 - R1C1R1C2R1C3 - R2C1R2C2R2C3 - R3C1R3C2R3C3 - - - { - 'head' => [ - [ 'Col 1', 'Col 2', 'Col 3' ] - ], - 'data' => [ - [ 'R1C1', 'R1C2', 'R1C3' ], - [ 'R2C1', 'R2C2', 'R2C3' ], - [ 'R3C1', 'R3C2', 'R3C3' ] - ] - } - -Anonymous arrays can be nested to arbirtrary levels and as a special case, if -the surrounding tags for an XML document contain only an anonymous array the -arrayref will be returned directly rather than the usual hashref: - - - Col 1Col 2 - R1C1R1C2 - R2C1R2C2 - - - [ - [ 'Col 1', 'Col 2' ], - [ 'R1C1', 'R1C2' ], - [ 'R2C1', 'R2C2' ] - ] - -Elements which only contain text content will simply be represented as a -scalar. Where an element has both attributes and text content, the element -will be represented as a hashref with the text content in the 'content' key -(see the C option): - - - first - second - - - { - 'one' => 'first', - 'two' => { 'attr' => 'value', 'content' => 'second' } - } - -Mixed content (elements which contain both text content and nested elements) -will be not be represented in a useful way - element order and significant -whitespace will be lost. If you need to work with mixed content, then -XML::Simple is not the right tool for your job - check out the next section. - -=head1 WHERE TO FROM HERE? - -B is able to present a simple API because it makes some -assumptions on your behalf. These include: - -=over 4 - -=item * - -You're not interested in text content consisting only of whitespace - -=item * - -You don't mind that when things get slurped into a hash the order is lost - -=item * - -You don't want fine-grained control of the formatting of generated XML - -=item * - -You would never use a hash key that was not a legal XML element name - -=item * - -You don't need help converting between different encodings - -=back - -In a serious XML project, you'll probably outgrow these assumptions fairly -quickly. This section of the document used to offer some advice on chosing a -more powerful option. That advice has now grown into the 'Perl-XML FAQ' -document which you can find at: L - -The advice in the FAQ boils down to a quick explanation of tree versus -event based parsers and then recommends: - -For event based parsing, use SAX (do not set out to write any new code for -XML::Parser's handler API - it is obselete). - -For tree-based parsing, you could choose between the 'Perlish' approach of -L and more standards based DOM implementations - preferably one with -XPath support. - - -=head1 SEE ALSO - -B requires either L or L. - -To generate documents with namespaces, L is required. - -The optional caching functions require L. - -Answers to Frequently Asked Questions about XML::Simple are bundled with this -distribution as: L - -=head1 COPYRIGHT - -Copyright 1999-2004 Grant McLean Egrantm@cpan.orgE - -This library is free software; you can redistribute it and/or modify it -under the same terms as Perl itself. - -=cut - - -- cgit v1.1