aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h')
-rw-r--r--libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h820
1 files changed, 820 insertions, 0 deletions
diff --git a/libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h b/libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h
new file mode 100644
index 0000000..f87be65
--- /dev/null
+++ b/libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h
@@ -0,0 +1,820 @@
1// Copyright (C) 2002-2012 Nikolaus Gebhardt
2// This file is part of the "Irrlicht Engine" and the "irrXML" project.
3// For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h
4
5#ifndef __ICXML_READER_IMPL_H_INCLUDED__
6#define __ICXML_READER_IMPL_H_INCLUDED__
7
8#include "irrXML.h"
9#include "irrString.h"
10#include "irrArray.h"
11#include "fast_atof.h"
12
13#ifdef _DEBUG
14#define IRR_DEBUGPRINT(x) printf((x));
15#else // _DEBUG
16#define IRR_DEBUGPRINT(x)
17#endif // _DEBUG
18
19
20namespace irr
21{
22namespace io
23{
24
25
26//! implementation of the IrrXMLReader
27template<class char_type, class superclass>
28class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>
29{
30public:
31
32 //! Constructor
33 CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)
34 : IgnoreWhitespaceText(true), TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE),
35 SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII), IsEmptyElement(false)
36 {
37 if (!callback)
38 return;
39
40 storeTargetFormat();
41
42 // read whole xml file
43
44 readFile(callback);
45
46 // clean up
47
48 if (deleteCallBack)
49 delete callback;
50
51 // create list with special characters
52
53 createSpecialCharacterList();
54
55 // set pointer to text begin
56 P = TextBegin;
57 }
58
59
60 //! Destructor
61 virtual ~CXMLReaderImpl()
62 {
63 delete [] TextData;
64 }
65
66
67 //! Reads forward to the next xml node.
68 //! \return Returns false, if there was no further node.
69 virtual bool read()
70 {
71 // if not end reached, parse the node
72 if (P && ((unsigned int)(P - TextBegin) < TextSize - 1) && (*P != 0))
73 {
74 return parseCurrentNode();
75 }
76
77 _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;
78 return false;
79 }
80
81
82 //! Returns the type of the current XML node.
83 virtual EXML_NODE getNodeType() const
84 {
85 return CurrentNodeType;
86 }
87
88
89 //! Returns attribute count of the current XML node.
90 virtual unsigned int getAttributeCount() const
91 {
92 return Attributes.size();
93 }
94
95
96 //! Returns name of an attribute.
97 virtual const char_type* getAttributeName(int idx) const
98 {
99 if ((u32)idx >= Attributes.size())
100 return 0;
101
102 return Attributes[idx].Name.c_str();
103 }
104
105
106 //! Returns the value of an attribute.
107 virtual const char_type* getAttributeValue(int idx) const
108 {
109 if ((unsigned int)idx >= Attributes.size())
110 return 0;
111
112 return Attributes[idx].Value.c_str();
113 }
114
115
116 //! Returns the value of an attribute.
117 virtual const char_type* getAttributeValue(const char_type* name) const
118 {
119 const SAttribute* attr = getAttributeByName(name);
120 if (!attr)
121 return 0;
122
123 return attr->Value.c_str();
124 }
125
126
127 //! Returns the value of an attribute
128 virtual const char_type* getAttributeValueSafe(const char_type* name) const
129 {
130 const SAttribute* attr = getAttributeByName(name);
131 if (!attr)
132 return EmptyString.c_str();
133
134 return attr->Value.c_str();
135 }
136
137
138
139 //! Returns the value of an attribute as integer.
140 int getAttributeValueAsInt(const char_type* name) const
141 {
142 const SAttribute* attr = getAttributeByName(name);
143 if (!attr)
144 return 0;
145
146 core::stringc c(attr->Value.c_str());
147 return core::strtol10(c.c_str());
148 }
149
150
151 //! Returns the value of an attribute as integer.
152 int getAttributeValueAsInt(int idx) const
153 {
154 const char_type* attrvalue = getAttributeValue(idx);
155 if (!attrvalue)
156 return 0;
157
158 core::stringc c(attrvalue);
159 return core::strtol10(c.c_str());
160 }
161
162
163 //! Returns the value of an attribute as float.
164 float getAttributeValueAsFloat(const char_type* name) const
165 {
166 const SAttribute* attr = getAttributeByName(name);
167 if (!attr)
168 return 0;
169
170 core::stringc c = attr->Value.c_str();
171 return core::fast_atof(c.c_str());
172 }
173
174
175 //! Returns the value of an attribute as float.
176 float getAttributeValueAsFloat(int idx) const
177 {
178 const char_type* attrvalue = getAttributeValue(idx);
179 if (!attrvalue)
180 return 0;
181
182 core::stringc c = attrvalue;
183 return core::fast_atof(c.c_str());
184 }
185
186
187 //! Returns the name of the current node.
188 virtual const char_type* getNodeName() const
189 {
190 return NodeName.c_str();
191 }
192
193
194 //! Returns data of the current node.
195 virtual const char_type* getNodeData() const
196 {
197 return NodeName.c_str();
198 }
199
200
201 //! Returns if an element is an empty element, like <foo />
202 virtual bool isEmptyElement() const
203 {
204 return IsEmptyElement;
205 }
206
207 //! Returns format of the source xml file.
208 virtual ETEXT_FORMAT getSourceFormat() const
209 {
210 return SourceFormat;
211 }
212
213 //! Returns format of the strings returned by the parser.
214 virtual ETEXT_FORMAT getParserFormat() const
215 {
216 return TargetFormat;
217 }
218
219private:
220
221 // Reads the current xml node
222 // return false if no further node is found
223 bool parseCurrentNode()
224 {
225 char_type* start = P;
226
227 // more forward until '<' found
228 while(*P != L'<' && *P)
229 ++P;
230
231 // not a node, so return false
232 if (!*P)
233 return false;
234
235 if (P - start > 0)
236 {
237 // we found some text, store it
238 if (setText(start, P))
239 return true;
240 }
241
242 ++P;
243
244 // based on current token, parse and report next element
245 switch(*P)
246 {
247 case L'/':
248 parseClosingXMLElement();
249 break;
250 case L'?':
251 ignoreDefinition();
252 break;
253 case L'!':
254 if (!parseCDATA())
255 parseComment();
256 break;
257 default:
258 parseOpeningXMLElement();
259 break;
260 }
261 return true;
262 }
263
264
265 //! sets the state that text was found. Returns true if set should be set
266 bool setText(char_type* start, char_type* end)
267 {
268 // By default xml preserves all whitespace. But Irrlicht dropped some whitespace by default
269 // in the past which did lead to OS dependent behavior. We just ignore all whitespace for now
270 // as it's the closest to fixing behavior without breaking downward compatibility too much.
271 if ( IgnoreWhitespaceText )
272 {
273 char_type* p = start;
274 for(; p != end; ++p)
275 if (!isWhiteSpace(*p))
276 break;
277
278 if (p == end)
279 return false;
280 }
281
282 // set current text to the parsed text, and replace xml special characters
283 core::string<char_type> s(start, (int)(end - start));
284 NodeName = replaceSpecialCharacters(s);
285
286 // current XML node type is text
287 CurrentNodeType = EXN_TEXT;
288
289 return true;
290 }
291
292
293
294 //! ignores an xml definition like <?xml something />
295 void ignoreDefinition()
296 {
297 CurrentNodeType = EXN_UNKNOWN;
298
299 // move until end marked with '>' reached
300 while(*P != L'>')
301 ++P;
302
303 ++P;
304 }
305
306
307 //! parses a comment
308 void parseComment()
309 {
310 CurrentNodeType = EXN_COMMENT;
311 P += 1;
312
313 char_type *pCommentBegin = P;
314
315 int count = 1;
316
317 // move until end of comment reached
318 while(count)
319 {
320 if (*P == L'>')
321 --count;
322 else
323 if (*P == L'<')
324 ++count;
325
326 ++P;
327 }
328
329 P -= 3;
330 NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));
331 P += 3;
332 }
333
334
335 //! parses an opening xml element and reads attributes
336 void parseOpeningXMLElement()
337 {
338 CurrentNodeType = EXN_ELEMENT;
339 IsEmptyElement = false;
340 Attributes.clear();
341
342 // find name
343 const char_type* startName = P;
344
345 // find end of element
346 while(*P != L'>' && !isWhiteSpace(*P))
347 ++P;
348
349 const char_type* endName = P;
350
351 // find Attributes
352 while(*P != L'>')
353 {
354 if (isWhiteSpace(*P))
355 ++P;
356 else
357 {
358 if (*P != L'/')
359 {
360 // we've got an attribute
361
362 // read the attribute names
363 const char_type* attributeNameBegin = P;
364
365 while(!isWhiteSpace(*P) && *P != L'=')
366 ++P;
367
368 const char_type* attributeNameEnd = P;
369 ++P;
370
371 // read the attribute value
372 // check for quotes and single quotes, thx to murphy
373 while( (*P != L'\"') && (*P != L'\'') && *P)
374 ++P;
375
376 if (!*P) // malformatted xml file
377 return;
378
379 const char_type attributeQuoteChar = *P;
380
381 ++P;
382 const char_type* attributeValueBegin = P;
383
384 while(*P != attributeQuoteChar && *P)
385 ++P;
386
387 if (!*P) // malformatted xml file
388 return;
389
390 const char_type* attributeValueEnd = P;
391 ++P;
392
393 SAttribute attr;
394 attr.Name = core::string<char_type>(attributeNameBegin,
395 (int)(attributeNameEnd - attributeNameBegin));
396
397 core::string<char_type> s(attributeValueBegin,
398 (int)(attributeValueEnd - attributeValueBegin));
399
400 attr.Value = replaceSpecialCharacters(s);
401 Attributes.push_back(attr);
402 }
403 else
404 {
405 // tag is closed directly
406 ++P;
407 IsEmptyElement = true;
408 break;
409 }
410 }
411 }
412
413 // check if this tag is closing directly
414 if (endName > startName && *(endName-1) == L'/')
415 {
416 // directly closing tag
417 IsEmptyElement = true;
418 endName--;
419 }
420
421 NodeName = core::string<char_type>(startName, (int)(endName - startName));
422
423 ++P;
424 }
425
426
427 //! parses an closing xml tag
428 void parseClosingXMLElement()
429 {
430 CurrentNodeType = EXN_ELEMENT_END;
431 IsEmptyElement = false;
432 Attributes.clear();
433
434 ++P;
435 const char_type* pBeginClose = P;
436
437 while(*P != L'>')
438 ++P;
439
440 NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));
441 ++P;
442 }
443
444 //! parses a possible CDATA section, returns false if begin was not a CDATA section
445 bool parseCDATA()
446 {
447 if (*(P+1) != L'[')
448 return false;
449
450 CurrentNodeType = EXN_CDATA;
451
452 // skip '<![CDATA['
453 int count=0;
454 while( *P && count<8 )
455 {
456 ++P;
457 ++count;
458 }
459
460 if (!*P)
461 return true;
462
463 char_type *cDataBegin = P;
464 char_type *cDataEnd = 0;
465
466 // find end of CDATA
467 while(*P && !cDataEnd)
468 {
469 if (*P == L'>' &&
470 (*(P-1) == L']') &&
471 (*(P-2) == L']'))
472 {
473 cDataEnd = P - 2;
474 }
475
476 ++P;
477 }
478
479 if ( cDataEnd )
480 NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));
481 else
482 NodeName = "";
483
484 return true;
485 }
486
487
488 // structure for storing attribute-name pairs
489 struct SAttribute
490 {
491 core::string<char_type> Name;
492 core::string<char_type> Value;
493 };
494
495 // finds a current attribute by name, returns 0 if not found
496 const SAttribute* getAttributeByName(const char_type* name) const
497 {
498 if (!name)
499 return 0;
500
501 core::string<char_type> n = name;
502
503 for (int i=0; i<(int)Attributes.size(); ++i)
504 if (Attributes[i].Name == n)
505 return &Attributes[i];
506
507 return 0;
508 }
509
510 // replaces xml special characters in a string and creates a new one
511 core::string<char_type> replaceSpecialCharacters(
512 core::string<char_type>& origstr)
513 {
514 int pos = origstr.findFirst(L'&');
515 int oldPos = 0;
516
517 if (pos == -1)
518 return origstr;
519
520 core::string<char_type> newstr;
521
522 while(pos != -1 && pos < (int)origstr.size()-2)
523 {
524 // check if it is one of the special characters
525
526 int specialChar = -1;
527 for (int i=0; i<(int)SpecialCharacters.size(); ++i)
528 {
529 const char_type* p = &origstr.c_str()[pos]+1;
530
531 if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))
532 {
533 specialChar = i;
534 break;
535 }
536 }
537
538 if (specialChar != -1)
539 {
540 newstr.append(origstr.subString(oldPos, pos - oldPos));
541 newstr.append(SpecialCharacters[specialChar][0]);
542 pos += SpecialCharacters[specialChar].size();
543 }
544 else
545 {
546 newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
547 pos += 1;
548 }
549
550 // find next &
551 oldPos = pos;
552 pos = origstr.findNext(L'&', pos);
553 }
554
555 if (oldPos < (int)origstr.size()-1)
556 newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));
557
558 return newstr;
559 }
560
561
562
563 //! reads the xml file and converts it into the wanted character format.
564 bool readFile(IFileReadCallBack* callback)
565 {
566 long size = callback->getSize();
567 if (size<0)
568 return false;
569 size += 4; // We need four terminating 0's at the end.
570 // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4.
571
572 char* data8 = new char[size];
573
574 if (!callback->read(data8, size-4))
575 {
576 delete [] data8;
577 return false;
578 }
579
580 // add zeros at end
581
582 memset(data8+size-4, 0, 4);
583
584 char16* data16 = reinterpret_cast<char16*>(data8);
585 char32* data32 = reinterpret_cast<char32*>(data8);
586
587 // now we need to convert the data to the desired target format
588 // based on the byte order mark.
589
590 const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF;
591 const u16 UTF16_BE = 0xFFFE;
592 const u16 UTF16_LE = 0xFEFF;
593 const u32 UTF32_BE = 0xFFFE0000;
594 const u32 UTF32_LE = 0x0000FEFF;
595
596 // check source for all utf versions and convert to target data format
597
598 if (size >= 4 && data32[0] == static_cast<char32>(UTF32_BE))
599 {
600 // UTF-32, big endian
601 SourceFormat = ETF_UTF32_BE;
602 convertTextData(data32+1, data8, (size/4)-1); // data32+1 because we need to skip the header
603 }
604 else
605 if (size >= 4 && data32[0] == static_cast<char32>(UTF32_LE))
606 {
607 // UTF-32, little endian
608 SourceFormat = ETF_UTF32_LE;
609 convertTextData(data32+1, data8, (size/4)-1); // data32+1 because we need to skip the header
610 }
611 else
612 if (size >= 2 && data16[0] == UTF16_BE)
613 {
614 // UTF-16, big endian
615 SourceFormat = ETF_UTF16_BE;
616 convertTextData(data16+1, data8, (size/2)-1); // data16+1 because we need to skip the header
617 }
618 else
619 if (size >= 2 && data16[0] == UTF16_LE)
620 {
621 // UTF-16, little endian
622 SourceFormat = ETF_UTF16_LE;
623 convertTextData(data16+1, data8, (size/2)-1); // data16+1 because we need to skip the header
624 }
625 else
626 if (size >= 3 && memcmp(data8,UTF8,3)==0)
627 {
628 // UTF-8
629 SourceFormat = ETF_UTF8;
630 convertTextData(data8+3, data8, size-3); // data8+3 because we need to skip the header
631 }
632 else
633 {
634 // ASCII
635 SourceFormat = ETF_ASCII;
636 convertTextData(data8, data8, size);
637 }
638
639 return true;
640 }
641
642
643 //! converts the text file into the desired format.
644 /** \param source: begin of the text (without byte order mark)
645 \param pointerToStore: pointer to text data block which can be
646 stored or deleted based on the nesessary conversion.
647 \param sizeWithoutHeader: Text size in characters without header
648 */
649 template<class src_char_type>
650 void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)
651 {
652 // convert little to big endian if necessary
653 if (sizeof(src_char_type) > 1 &&
654 isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))
655 convertToLittleEndian(source);
656
657 // check if conversion is necessary:
658 if (sizeof(src_char_type) == sizeof(char_type))
659 {
660 // no need to convert
661 TextBegin = (char_type*)source;
662 TextData = (char_type*)pointerToStore;
663 TextSize = sizeWithoutHeader;
664 }
665 else
666 {
667 // convert source into target data format.
668 // TODO: implement a real conversion. This one just
669 // copies bytes. This is a problem when there are
670 // unicode symbols using more than one character.
671
672 TextData = new char_type[sizeWithoutHeader];
673
674 if ( sizeof(src_char_type) == 1 )
675 {
676 // we have to cast away negative numbers or results might add the sign instead of just doing a copy
677 for (int i=0; i<sizeWithoutHeader; ++i)
678 {
679 TextData[i] = static_cast<char_type>(static_cast<unsigned char>(source[i]));
680 }
681 }
682 else
683 {
684 for (int i=0; i<sizeWithoutHeader; ++i)
685 TextData[i] = static_cast<char_type>(source[i]);
686 }
687 TextBegin = TextData;
688 TextSize = sizeWithoutHeader;
689
690 // delete original data because no longer needed
691 delete [] pointerToStore;
692 }
693 }
694
695 //! converts whole text buffer to little endian
696 template<class src_char_type>
697 void convertToLittleEndian(src_char_type* t)
698 {
699 if (sizeof(src_char_type) == 4)
700 {
701 // 32 bit
702
703 while(*t)
704 {
705 *t = ((*t & 0xff000000) >> 24) |
706 ((*t & 0x00ff0000) >> 8) |
707 ((*t & 0x0000ff00) << 8) |
708 ((*t & 0x000000ff) << 24);
709 ++t;
710 }
711 }
712 else
713 {
714 // 16 bit
715
716 while(*t)
717 {
718 *t = (*t >> 8) | (*t << 8);
719 ++t;
720 }
721 }
722 }
723
724 //! returns if a format is little endian
725 inline bool isLittleEndian(ETEXT_FORMAT f)
726 {
727 return f == ETF_ASCII ||
728 f == ETF_UTF8 ||
729 f == ETF_UTF16_LE ||
730 f == ETF_UTF32_LE;
731 }
732
733
734 //! returns true if a character is whitespace
735 inline bool isWhiteSpace(char_type c)
736 {
737 return (c==' ' || c=='\t' || c=='\n' || c=='\r');
738 }
739
740
741 //! generates a list with xml special characters
742 void createSpecialCharacterList()
743 {
744 // list of strings containing special symbols,
745 // the first character is the special character,
746 // the following is the symbol string without trailing &.
747
748 SpecialCharacters.push_back("&amp;");
749 SpecialCharacters.push_back("<lt;");
750 SpecialCharacters.push_back(">gt;");
751 SpecialCharacters.push_back("\"quot;");
752 SpecialCharacters.push_back("'apos;");
753
754 }
755
756
757 //! compares the first n characters of the strings
758 bool equalsn(const char_type* str1, const char_type* str2, int len)
759 {
760 int i;
761 for(i=0; str1[i] && str2[i] && i < len; ++i)
762 if (str1[i] != str2[i])
763 return false;
764
765 // if one (or both) of the strings was smaller then they
766 // are only equal if they have the same lenght
767 return (i == len) || (str1[i] == 0 && str2[i] == 0);
768 }
769
770
771 //! stores the target text format
772 void storeTargetFormat()
773 {
774 // get target format. We could have done this using template specialization,
775 // but VisualStudio 6 don't like it and we want to support it.
776
777 switch(sizeof(char_type))
778 {
779 case 1:
780 TargetFormat = ETF_UTF8;
781 break;
782 case 2:
783 TargetFormat = ETF_UTF16_LE;
784 break;
785 case 4:
786 TargetFormat = ETF_UTF32_LE;
787 break;
788 default:
789 TargetFormat = ETF_ASCII; // should never happen.
790 }
791 }
792
793
794 // instance variables:
795 bool IgnoreWhitespaceText; // do not return EXN_TEXT nodes for pure whitespace
796 char_type* TextData; // data block of the text file
797 char_type* P; // current point in text to parse
798 char_type* TextBegin; // start of text to parse
799 unsigned int TextSize; // size of text to parse in characters, not bytes
800
801 EXML_NODE CurrentNodeType; // type of the currently parsed node
802 ETEXT_FORMAT SourceFormat; // source format of the xml file
803 ETEXT_FORMAT TargetFormat; // output format of this parser
804
805 core::string<char_type> NodeName; // name of the node currently in - also used for text
806 core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods
807
808 bool IsEmptyElement; // is the currently parsed node empty?
809
810 core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList()
811
812 core::array<SAttribute> Attributes; // attributes of current element
813
814}; // end CXMLReaderImpl
815
816
817} // end namespace
818} // end namespace
819
820#endif