aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/src/others/irrlicht-1.8.1/source/Irrlicht/CXMLReaderImpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/others/irrlicht-1.8.1/source/Irrlicht/CXMLReaderImpl.h')
-rw-r--r--src/others/irrlicht-1.8.1/source/Irrlicht/CXMLReaderImpl.h821
1 files changed, 821 insertions, 0 deletions
diff --git a/src/others/irrlicht-1.8.1/source/Irrlicht/CXMLReaderImpl.h b/src/others/irrlicht-1.8.1/source/Irrlicht/CXMLReaderImpl.h
new file mode 100644
index 0000000..cfd89b0
--- /dev/null
+++ b/src/others/irrlicht-1.8.1/source/Irrlicht/CXMLReaderImpl.h
@@ -0,0 +1,821 @@
1// Copyright (C) 2002-2012 Nikolaus Gebhardt
2// This file is part of the "Irrlicht Engine" and the "irrXML" project.
3// For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h
4
5#ifndef __ICXML_READER_IMPL_H_INCLUDED__
6#define __ICXML_READER_IMPL_H_INCLUDED__
7
8#include "irrXML.h"
9#include "irrString.h"
10#include "irrArray.h"
11#include "fast_atof.h"
12
13#ifdef _DEBUG
14#define IRR_DEBUGPRINT(x) printf((x));
15#else // _DEBUG
16#define IRR_DEBUGPRINT(x)
17#endif // _DEBUG
18
19
20namespace irr
21{
22namespace io
23{
24
25
26//! implementation of the IrrXMLReader
27template<class char_type, class superclass>
28class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>
29{
30public:
31
32 //! Constructor
33 CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)
34 : IgnoreWhitespaceText(true), TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE),
35 SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII), IsEmptyElement(false)
36 {
37 if (!callback)
38 return;
39
40 storeTargetFormat();
41
42 // read whole xml file
43
44 readFile(callback);
45
46 // clean up
47
48 if (deleteCallBack)
49 delete callback;
50
51 // create list with special characters
52
53 createSpecialCharacterList();
54
55 // set pointer to text begin
56 P = TextBegin;
57 }
58
59
60 //! Destructor
61 virtual ~CXMLReaderImpl()
62 {
63 delete [] TextData;
64 }
65
66
67 //! Reads forward to the next xml node.
68 //! \return Returns false, if there was no further node.
69 virtual bool read()
70 {
71 // if not end reached, parse the node
72 if (P && ((unsigned int)(P - TextBegin) < TextSize - 1) && (*P != 0))
73 {
74 return parseCurrentNode();
75 }
76
77 _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;
78 return false;
79 }
80
81
82 //! Returns the type of the current XML node.
83 virtual EXML_NODE getNodeType() const
84 {
85 return CurrentNodeType;
86 }
87
88
89 //! Returns attribute count of the current XML node.
90 virtual unsigned int getAttributeCount() const
91 {
92 return Attributes.size();
93 }
94
95
96 //! Returns name of an attribute.
97 virtual const char_type* getAttributeName(int idx) const
98 {
99 if ((u32)idx >= Attributes.size())
100 return 0;
101
102 return Attributes[idx].Name.c_str();
103 }
104
105
106 //! Returns the value of an attribute.
107 virtual const char_type* getAttributeValue(int idx) const
108 {
109 if ((unsigned int)idx >= Attributes.size())
110 return 0;
111
112 return Attributes[idx].Value.c_str();
113 }
114
115
116 //! Returns the value of an attribute.
117 virtual const char_type* getAttributeValue(const char_type* name) const
118 {
119 const SAttribute* attr = getAttributeByName(name);
120 if (!attr)
121 return 0;
122
123 return attr->Value.c_str();
124 }
125
126
127 //! Returns the value of an attribute
128 virtual const char_type* getAttributeValueSafe(const char_type* name) const
129 {
130 const SAttribute* attr = getAttributeByName(name);
131 if (!attr)
132 return EmptyString.c_str();
133
134 return attr->Value.c_str();
135 }
136
137
138
139 //! Returns the value of an attribute as integer.
140 int getAttributeValueAsInt(const char_type* name) const
141 {
142 const SAttribute* attr = getAttributeByName(name);
143 if (!attr)
144 return 0;
145
146 core::stringc c(attr->Value.c_str());
147 return core::strtol10(c.c_str());
148 }
149
150
151 //! Returns the value of an attribute as integer.
152 int getAttributeValueAsInt(int idx) const
153 {
154 const char_type* attrvalue = getAttributeValue(idx);
155 if (!attrvalue)
156 return 0;
157
158 core::stringc c(attrvalue);
159 return core::strtol10(c.c_str());
160 }
161
162
163 //! Returns the value of an attribute as float.
164 float getAttributeValueAsFloat(const char_type* name) const
165 {
166 const SAttribute* attr = getAttributeByName(name);
167 if (!attr)
168 return 0;
169
170 core::stringc c = attr->Value.c_str();
171 return core::fast_atof(c.c_str());
172 }
173
174
175 //! Returns the value of an attribute as float.
176 float getAttributeValueAsFloat(int idx) const
177 {
178 const char_type* attrvalue = getAttributeValue(idx);
179 if (!attrvalue)
180 return 0;
181
182 core::stringc c = attrvalue;
183 return core::fast_atof(c.c_str());
184 }
185
186
187 //! Returns the name of the current node.
188 virtual const char_type* getNodeName() const
189 {
190 return NodeName.c_str();
191 }
192
193
194 //! Returns data of the current node.
195 virtual const char_type* getNodeData() const
196 {
197 return NodeName.c_str();
198 }
199
200
201 //! Returns if an element is an empty element, like <foo />
202 virtual bool isEmptyElement() const
203 {
204 return IsEmptyElement;
205 }
206
207 //! Returns format of the source xml file.
208 virtual ETEXT_FORMAT getSourceFormat() const
209 {
210 return SourceFormat;
211 }
212
213 //! Returns format of the strings returned by the parser.
214 virtual ETEXT_FORMAT getParserFormat() const
215 {
216 return TargetFormat;
217 }
218
219private:
220
221 // Reads the current xml node
222 // return false if no further node is found
223 bool parseCurrentNode()
224 {
225 char_type* start = P;
226
227 // more forward until '<' found
228 while(*P != L'<' && *P)
229 ++P;
230
231 // not a node, so return false
232 if (!*P)
233 return false;
234
235 if (P - start > 0)
236 {
237 // we found some text, store it
238 if (setText(start, P))
239 return true;
240 }
241
242 ++P;
243
244 // based on current token, parse and report next element
245 switch(*P)
246 {
247 case L'/':
248 parseClosingXMLElement();
249 break;
250 case L'?':
251 ignoreDefinition();
252 break;
253 case L'!':
254 if (!parseCDATA())
255 parseComment();
256 break;
257 default:
258 parseOpeningXMLElement();
259 break;
260 }
261 return true;
262 }
263
264
265 //! sets the state that text was found. Returns true if set should be set
266 bool setText(char_type* start, char_type* end)
267 {
268 // By default xml preserves all whitespace. But Irrlicht dropped some whitespace by default
269 // in the past which did lead to OS dependent behavior. We just ignore all whitespace for now
270 // as it's the closest to fixing behavior without breaking downward compatibility too much.
271 if ( IgnoreWhitespaceText )
272 {
273 char_type* p = start;
274 for(; p != end; ++p)
275 if (!isWhiteSpace(*p))
276 break;
277
278 if (p == end)
279 return false;
280 }
281
282 // set current text to the parsed text, and replace xml special characters
283 core::string<char_type> s(start, (int)(end - start));
284 NodeName = replaceSpecialCharacters(s);
285
286 // current XML node type is text
287 CurrentNodeType = EXN_TEXT;
288
289 return true;
290 }
291
292
293
294 //! ignores an xml definition like <?xml something />
295 void ignoreDefinition()
296 {
297 CurrentNodeType = EXN_UNKNOWN;
298
299 // move until end marked with '>' reached
300 while(*P != L'>')
301 ++P;
302
303 ++P;
304 }
305
306
307 //! parses a comment
308 void parseComment()
309 {
310 CurrentNodeType = EXN_COMMENT;
311 P += 1;
312
313 char_type *pCommentBegin = P;
314
315 int count = 1;
316
317 // move until end of comment reached
318 while(count)
319 {
320 if (*P == L'>')
321 --count;
322 else
323 if (*P == L'<')
324 ++count;
325
326 ++P;
327 }
328
329 P -= 3;
330 NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));
331 P += 3;
332 }
333
334
335 //! parses an opening xml element and reads attributes
336 void parseOpeningXMLElement()
337 {
338 CurrentNodeType = EXN_ELEMENT;
339 IsEmptyElement = false;
340 Attributes.clear();
341
342 // find name
343 const char_type* startName = P;
344
345 // find end of element
346 while(*P != L'>' && !isWhiteSpace(*P))
347 ++P;
348
349 const char_type* endName = P;
350
351 // find Attributes
352 while(*P != L'>')
353 {
354 if (isWhiteSpace(*P))
355 ++P;
356 else
357 {
358 if (*P != L'/')
359 {
360 // we've got an attribute
361
362 // read the attribute names
363 const char_type* attributeNameBegin = P;
364
365 while(!isWhiteSpace(*P) && *P != L'=')
366 ++P;
367
368 const char_type* attributeNameEnd = P;
369 ++P;
370
371 // read the attribute value
372 // check for quotes and single quotes, thx to murphy
373 while( (*P != L'\"') && (*P != L'\'') && *P)
374 ++P;
375
376 if (!*P) // malformatted xml file
377 return;
378
379 const char_type attributeQuoteChar = *P;
380
381 ++P;
382 const char_type* attributeValueBegin = P;
383
384 while(*P != attributeQuoteChar && *P)
385 ++P;
386
387 if (!*P) // malformatted xml file
388 return;
389
390 const char_type* attributeValueEnd = P;
391 ++P;
392
393 SAttribute attr;
394 attr.Name = core::string<char_type>(attributeNameBegin,
395 (int)(attributeNameEnd - attributeNameBegin));
396
397 core::string<char_type> s(attributeValueBegin,
398 (int)(attributeValueEnd - attributeValueBegin));
399
400 attr.Value = replaceSpecialCharacters(s);
401 Attributes.push_back(attr);
402 }
403 else
404 {
405 // tag is closed directly
406 ++P;
407 IsEmptyElement = true;
408 break;
409 }
410 }
411 }
412
413 // check if this tag is closing directly
414 if (endName > startName && *(endName-1) == L'/')
415 {
416 // directly closing tag
417 IsEmptyElement = true;
418 endName--;
419 }
420
421 NodeName = core::string<char_type>(startName, (int)(endName - startName));
422
423 ++P;
424 }
425
426
427 //! parses an closing xml tag
428 void parseClosingXMLElement()
429 {
430 CurrentNodeType = EXN_ELEMENT_END;
431 IsEmptyElement = false;
432 Attributes.clear();
433
434 ++P;
435 const char_type* pBeginClose = P;
436
437 while(*P != L'>')
438 ++P;
439
440 NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));
441 ++P;
442 }
443
444 //! parses a possible CDATA section, returns false if begin was not a CDATA section
445 bool parseCDATA()
446 {
447 if (*(P+1) != L'[')
448 return false;
449
450 CurrentNodeType = EXN_CDATA;
451
452 // skip '<![CDATA['
453 int count=0;
454 while( *P && count<8 )
455 {
456 ++P;
457 ++count;
458 }
459
460 if (!*P)
461 return true;
462
463 char_type *cDataBegin = P;
464 char_type *cDataEnd = 0;
465
466 // find end of CDATA
467 while(*P && !cDataEnd)
468 {
469 if (*P == L'>' &&
470 (*(P-1) == L']') &&
471 (*(P-2) == L']'))
472 {
473 cDataEnd = P - 2;
474 }
475
476 ++P;
477 }
478
479 if ( cDataEnd )
480 NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));
481 else
482 NodeName = "";
483
484 return true;
485 }
486
487
488 // structure for storing attribute-name pairs
489 struct SAttribute
490 {
491 core::string<char_type> Name;
492 core::string<char_type> Value;
493 };
494
495 // finds a current attribute by name, returns 0 if not found
496 const SAttribute* getAttributeByName(const char_type* name) const
497 {
498 if (!name)
499 return 0;
500
501 core::string<char_type> n = name;
502
503 for (int i=0; i<(int)Attributes.size(); ++i)
504 if (Attributes[i].Name == n)
505 return &Attributes[i];
506
507 return 0;
508 }
509
510 // replaces xml special characters in a string and creates a new one
511 core::string<char_type> replaceSpecialCharacters(
512 core::string<char_type>& origstr)
513 {
514 int pos = origstr.findFirst(L'&');
515 int oldPos = 0;
516
517 if (pos == -1)
518 return origstr;
519
520 core::string<char_type> newstr;
521
522 while(pos != -1 && pos < (int)origstr.size()-2)
523 {
524 // check if it is one of the special characters
525
526 int specialChar = -1;
527 for (int i=0; i<(int)SpecialCharacters.size(); ++i)
528 {
529 const char_type* p = &origstr.c_str()[pos]+1;
530
531 if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))
532 {
533 specialChar = i;
534 break;
535 }
536 }
537
538 if (specialChar != -1)
539 {
540 newstr.append(origstr.subString(oldPos, pos - oldPos));
541 newstr.append(SpecialCharacters[specialChar][0]);
542 pos += SpecialCharacters[specialChar].size();
543 }
544 else
545 {
546 newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
547 pos += 1;
548 }
549
550 // find next &
551 oldPos = pos;
552 pos = origstr.findNext(L'&', pos);
553 }
554
555 if (oldPos < (int)origstr.size()-1)
556 newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));
557
558 return newstr;
559 }
560
561
562
563 //! reads the xml file and converts it into the wanted character format.
564 bool readFile(IFileReadCallBack* callback)
565 {
566 long size = callback->getSize();
567 if (size<0)
568 return false;
569 size += 4; // We need four terminating 0's at the end.
570 // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4.
571
572 char* data8 = new char[size];
573
574 if (!callback->read(data8, size-4))
575 {
576 delete [] data8;
577 return false;
578 }
579
580 // add zeros at end
581
582 memset(data8+size-4, 0, 4);
583
584 char16* data16 = reinterpret_cast<char16*>(data8);
585 char32* data32 = reinterpret_cast<char32*>(data8);
586
587 // now we need to convert the data to the desired target format
588 // based on the byte order mark.
589
590 const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF;
591 const u16 UTF16_BE = 0xFFFE;
592 const u16 UTF16_LE = 0xFEFF;
593 const u32 UTF32_BE = 0xFFFE0000;
594 const u32 UTF32_LE = 0x0000FEFF;
595
596 // check source for all utf versions and convert to target data format
597
598 if (size >= 4 && data32[0]
599 == static_cast<char32>(UTF32_BE))
600 {
601 // UTF-32, big endian
602 SourceFormat = ETF_UTF32_BE;
603 convertTextData(data32+1, data8, (size/4)-1); // data32+1 because we need to skip the header
604 }
605 else
606 if (size >= 4 && data32[0] == static_cast<char32>(UTF32_LE))
607 {
608 // UTF-32, little endian
609 SourceFormat = ETF_UTF32_LE;
610 convertTextData(data32+1, data8, (size/4)-1); // data32+1 because we need to skip the header
611 }
612 else
613 if (size >= 2 && data16[0] == UTF16_BE)
614 {
615 // UTF-16, big endian
616 SourceFormat = ETF_UTF16_BE;
617 convertTextData(data16+1, data8, (size/2)-1); // data16+1 because we need to skip the header
618 }
619 else
620 if (size >= 2 && data16[0] == UTF16_LE)
621 {
622 // UTF-16, little endian
623 SourceFormat = ETF_UTF16_LE;
624 convertTextData(data16+1, data8, (size/2)-1); // data16+1 because we need to skip the header
625 }
626 else
627 if (size >= 3 && memcmp(data8,UTF8,3)==0)
628 {
629 // UTF-8
630 SourceFormat = ETF_UTF8;
631 convertTextData(data8+3, data8, size-3); // data8+3 because we need to skip the header
632 }
633 else
634 {
635 // ASCII
636 SourceFormat = ETF_ASCII;
637 convertTextData(data8, data8, size);
638 }
639
640 return true;
641 }
642
643
644 //! converts the text file into the desired format.
645 /** \param source: begin of the text (without byte order mark)
646 \param pointerToStore: pointer to text data block which can be
647 stored or deleted based on the nesessary conversion.
648 \param sizeWithoutHeader: Text size in characters without header
649 */
650 template<class src_char_type>
651 void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)
652 {
653 // convert little to big endian if necessary
654 if (sizeof(src_char_type) > 1 &&
655 isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))
656 convertToLittleEndian(source);
657
658 // check if conversion is necessary:
659 if (sizeof(src_char_type) == sizeof(char_type))
660 {
661 // no need to convert
662 TextBegin = (char_type*)source;
663 TextData = (char_type*)pointerToStore;
664 TextSize = sizeWithoutHeader;
665 }
666 else
667 {
668 // convert source into target data format.
669 // TODO: implement a real conversion. This one just
670 // copies bytes. This is a problem when there are
671 // unicode symbols using more than one character.
672
673 TextData = new char_type[sizeWithoutHeader];
674
675 if ( sizeof(src_char_type) == 1 )
676 {
677 // we have to cast away negative numbers or results might add the sign instead of just doing a copy
678 for (int i=0; i<sizeWithoutHeader; ++i)
679 {
680 TextData[i] = static_cast<char_type>(static_cast<unsigned char>(source[i]));
681 }
682 }
683 else
684 {
685 for (int i=0; i<sizeWithoutHeader; ++i)
686 TextData[i] = static_cast<char_type>(source[i]);
687 }
688 TextBegin = TextData;
689 TextSize = sizeWithoutHeader;
690
691 // delete original data because no longer needed
692 delete [] pointerToStore;
693 }
694 }
695
696 //! converts whole text buffer to little endian
697 template<class src_char_type>
698 void convertToLittleEndian(src_char_type* t)
699 {
700 if (sizeof(src_char_type) == 4)
701 {
702 // 32 bit
703
704 while(*t)
705 {
706 *t = ((*t & 0xff000000) >> 24) |
707 ((*t & 0x00ff0000) >> 8) |
708 ((*t & 0x0000ff00) << 8) |
709 ((*t & 0x000000ff) << 24);
710 ++t;
711 }
712 }
713 else
714 {
715 // 16 bit
716
717 while(*t)
718 {
719 *t = (*t >> 8) | (*t << 8);
720 ++t;
721 }
722 }
723 }
724
725 //! returns if a format is little endian
726 inline bool isLittleEndian(ETEXT_FORMAT f)
727 {
728 return f == ETF_ASCII ||
729 f == ETF_UTF8 ||
730 f == ETF_UTF16_LE ||
731 f == ETF_UTF32_LE;
732 }
733
734
735 //! returns true if a character is whitespace
736 inline bool isWhiteSpace(char_type c)
737 {
738 return (c==' ' || c=='\t' || c=='\n' || c=='\r');
739 }
740
741
742 //! generates a list with xml special characters
743 void createSpecialCharacterList()
744 {
745 // list of strings containing special symbols,
746 // the first character is the special character,
747 // the following is the symbol string without trailing &.
748
749 SpecialCharacters.push_back("&amp;");
750 SpecialCharacters.push_back("<lt;");
751 SpecialCharacters.push_back(">gt;");
752 SpecialCharacters.push_back("\"quot;");
753 SpecialCharacters.push_back("'apos;");
754
755 }
756
757
758 //! compares the first n characters of the strings
759 bool equalsn(const char_type* str1, const char_type* str2, int len)
760 {
761 int i;
762 for(i=0; str1[i] && str2[i] && i < len; ++i)
763 if (str1[i] != str2[i])
764 return false;
765
766 // if one (or both) of the strings was smaller then they
767 // are only equal if they have the same lenght
768 return (i == len) || (str1[i] == 0 && str2[i] == 0);
769 }
770
771
772 //! stores the target text format
773 void storeTargetFormat()
774 {
775 // get target format. We could have done this using template specialization,
776 // but VisualStudio 6 don't like it and we want to support it.
777
778 switch(sizeof(char_type))
779 {
780 case 1:
781 TargetFormat = ETF_UTF8;
782 break;
783 case 2:
784 TargetFormat = ETF_UTF16_LE;
785 break;
786 case 4:
787 TargetFormat = ETF_UTF32_LE;
788 break;
789 default:
790 TargetFormat = ETF_ASCII; // should never happen.
791 }
792 }
793
794
795 // instance variables:
796 bool IgnoreWhitespaceText; // do not return EXN_TEXT nodes for pure whitespace
797 char_type* TextData; // data block of the text file
798 char_type* P; // current point in text to parse
799 char_type* TextBegin; // start of text to parse
800 unsigned int TextSize; // size of text to parse in characters, not bytes
801
802 EXML_NODE CurrentNodeType; // type of the currently parsed node
803 ETEXT_FORMAT SourceFormat; // source format of the xml file
804 ETEXT_FORMAT TargetFormat; // output format of this parser
805
806 core::string<char_type> NodeName; // name of the node currently in - also used for text
807 core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods
808
809 bool IsEmptyElement; // is the currently parsed node empty?
810
811 core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList()
812
813 core::array<SAttribute> Attributes; // attributes of current element
814
815}; // end CXMLReaderImpl
816
817
818} // end namespace
819} // end namespace
820
821#endif