diff options
Diffstat (limited to 'libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h')
-rw-r--r-- | libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h | 820 |
1 files changed, 820 insertions, 0 deletions
diff --git a/libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h b/libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h new file mode 100644 index 0000000..f87be65 --- /dev/null +++ b/libraries/irrlicht-1.8/source/Irrlicht/CXMLReaderImpl.h | |||
@@ -0,0 +1,820 @@ | |||
1 | // Copyright (C) 2002-2012 Nikolaus Gebhardt | ||
2 | // This file is part of the "Irrlicht Engine" and the "irrXML" project. | ||
3 | // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h | ||
4 | |||
5 | #ifndef __ICXML_READER_IMPL_H_INCLUDED__ | ||
6 | #define __ICXML_READER_IMPL_H_INCLUDED__ | ||
7 | |||
8 | #include "irrXML.h" | ||
9 | #include "irrString.h" | ||
10 | #include "irrArray.h" | ||
11 | #include "fast_atof.h" | ||
12 | |||
13 | #ifdef _DEBUG | ||
14 | #define IRR_DEBUGPRINT(x) printf((x)); | ||
15 | #else // _DEBUG | ||
16 | #define IRR_DEBUGPRINT(x) | ||
17 | #endif // _DEBUG | ||
18 | |||
19 | |||
20 | namespace irr | ||
21 | { | ||
22 | namespace io | ||
23 | { | ||
24 | |||
25 | |||
26 | //! implementation of the IrrXMLReader | ||
27 | template<class char_type, class superclass> | ||
28 | class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass> | ||
29 | { | ||
30 | public: | ||
31 | |||
32 | //! Constructor | ||
33 | CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true) | ||
34 | : IgnoreWhitespaceText(true), TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE), | ||
35 | SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII), IsEmptyElement(false) | ||
36 | { | ||
37 | if (!callback) | ||
38 | return; | ||
39 | |||
40 | storeTargetFormat(); | ||
41 | |||
42 | // read whole xml file | ||
43 | |||
44 | readFile(callback); | ||
45 | |||
46 | // clean up | ||
47 | |||
48 | if (deleteCallBack) | ||
49 | delete callback; | ||
50 | |||
51 | // create list with special characters | ||
52 | |||
53 | createSpecialCharacterList(); | ||
54 | |||
55 | // set pointer to text begin | ||
56 | P = TextBegin; | ||
57 | } | ||
58 | |||
59 | |||
60 | //! Destructor | ||
61 | virtual ~CXMLReaderImpl() | ||
62 | { | ||
63 | delete [] TextData; | ||
64 | } | ||
65 | |||
66 | |||
67 | //! Reads forward to the next xml node. | ||
68 | //! \return Returns false, if there was no further node. | ||
69 | virtual bool read() | ||
70 | { | ||
71 | // if not end reached, parse the node | ||
72 | if (P && ((unsigned int)(P - TextBegin) < TextSize - 1) && (*P != 0)) | ||
73 | { | ||
74 | return parseCurrentNode(); | ||
75 | } | ||
76 | |||
77 | _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX; | ||
78 | return false; | ||
79 | } | ||
80 | |||
81 | |||
82 | //! Returns the type of the current XML node. | ||
83 | virtual EXML_NODE getNodeType() const | ||
84 | { | ||
85 | return CurrentNodeType; | ||
86 | } | ||
87 | |||
88 | |||
89 | //! Returns attribute count of the current XML node. | ||
90 | virtual unsigned int getAttributeCount() const | ||
91 | { | ||
92 | return Attributes.size(); | ||
93 | } | ||
94 | |||
95 | |||
96 | //! Returns name of an attribute. | ||
97 | virtual const char_type* getAttributeName(int idx) const | ||
98 | { | ||
99 | if ((u32)idx >= Attributes.size()) | ||
100 | return 0; | ||
101 | |||
102 | return Attributes[idx].Name.c_str(); | ||
103 | } | ||
104 | |||
105 | |||
106 | //! Returns the value of an attribute. | ||
107 | virtual const char_type* getAttributeValue(int idx) const | ||
108 | { | ||
109 | if ((unsigned int)idx >= Attributes.size()) | ||
110 | return 0; | ||
111 | |||
112 | return Attributes[idx].Value.c_str(); | ||
113 | } | ||
114 | |||
115 | |||
116 | //! Returns the value of an attribute. | ||
117 | virtual const char_type* getAttributeValue(const char_type* name) const | ||
118 | { | ||
119 | const SAttribute* attr = getAttributeByName(name); | ||
120 | if (!attr) | ||
121 | return 0; | ||
122 | |||
123 | return attr->Value.c_str(); | ||
124 | } | ||
125 | |||
126 | |||
127 | //! Returns the value of an attribute | ||
128 | virtual const char_type* getAttributeValueSafe(const char_type* name) const | ||
129 | { | ||
130 | const SAttribute* attr = getAttributeByName(name); | ||
131 | if (!attr) | ||
132 | return EmptyString.c_str(); | ||
133 | |||
134 | return attr->Value.c_str(); | ||
135 | } | ||
136 | |||
137 | |||
138 | |||
139 | //! Returns the value of an attribute as integer. | ||
140 | int getAttributeValueAsInt(const char_type* name) const | ||
141 | { | ||
142 | const SAttribute* attr = getAttributeByName(name); | ||
143 | if (!attr) | ||
144 | return 0; | ||
145 | |||
146 | core::stringc c(attr->Value.c_str()); | ||
147 | return core::strtol10(c.c_str()); | ||
148 | } | ||
149 | |||
150 | |||
151 | //! Returns the value of an attribute as integer. | ||
152 | int getAttributeValueAsInt(int idx) const | ||
153 | { | ||
154 | const char_type* attrvalue = getAttributeValue(idx); | ||
155 | if (!attrvalue) | ||
156 | return 0; | ||
157 | |||
158 | core::stringc c(attrvalue); | ||
159 | return core::strtol10(c.c_str()); | ||
160 | } | ||
161 | |||
162 | |||
163 | //! Returns the value of an attribute as float. | ||
164 | float getAttributeValueAsFloat(const char_type* name) const | ||
165 | { | ||
166 | const SAttribute* attr = getAttributeByName(name); | ||
167 | if (!attr) | ||
168 | return 0; | ||
169 | |||
170 | core::stringc c = attr->Value.c_str(); | ||
171 | return core::fast_atof(c.c_str()); | ||
172 | } | ||
173 | |||
174 | |||
175 | //! Returns the value of an attribute as float. | ||
176 | float getAttributeValueAsFloat(int idx) const | ||
177 | { | ||
178 | const char_type* attrvalue = getAttributeValue(idx); | ||
179 | if (!attrvalue) | ||
180 | return 0; | ||
181 | |||
182 | core::stringc c = attrvalue; | ||
183 | return core::fast_atof(c.c_str()); | ||
184 | } | ||
185 | |||
186 | |||
187 | //! Returns the name of the current node. | ||
188 | virtual const char_type* getNodeName() const | ||
189 | { | ||
190 | return NodeName.c_str(); | ||
191 | } | ||
192 | |||
193 | |||
194 | //! Returns data of the current node. | ||
195 | virtual const char_type* getNodeData() const | ||
196 | { | ||
197 | return NodeName.c_str(); | ||
198 | } | ||
199 | |||
200 | |||
201 | //! Returns if an element is an empty element, like <foo /> | ||
202 | virtual bool isEmptyElement() const | ||
203 | { | ||
204 | return IsEmptyElement; | ||
205 | } | ||
206 | |||
207 | //! Returns format of the source xml file. | ||
208 | virtual ETEXT_FORMAT getSourceFormat() const | ||
209 | { | ||
210 | return SourceFormat; | ||
211 | } | ||
212 | |||
213 | //! Returns format of the strings returned by the parser. | ||
214 | virtual ETEXT_FORMAT getParserFormat() const | ||
215 | { | ||
216 | return TargetFormat; | ||
217 | } | ||
218 | |||
219 | private: | ||
220 | |||
221 | // Reads the current xml node | ||
222 | // return false if no further node is found | ||
223 | bool parseCurrentNode() | ||
224 | { | ||
225 | char_type* start = P; | ||
226 | |||
227 | // more forward until '<' found | ||
228 | while(*P != L'<' && *P) | ||
229 | ++P; | ||
230 | |||
231 | // not a node, so return false | ||
232 | if (!*P) | ||
233 | return false; | ||
234 | |||
235 | if (P - start > 0) | ||
236 | { | ||
237 | // we found some text, store it | ||
238 | if (setText(start, P)) | ||
239 | return true; | ||
240 | } | ||
241 | |||
242 | ++P; | ||
243 | |||
244 | // based on current token, parse and report next element | ||
245 | switch(*P) | ||
246 | { | ||
247 | case L'/': | ||
248 | parseClosingXMLElement(); | ||
249 | break; | ||
250 | case L'?': | ||
251 | ignoreDefinition(); | ||
252 | break; | ||
253 | case L'!': | ||
254 | if (!parseCDATA()) | ||
255 | parseComment(); | ||
256 | break; | ||
257 | default: | ||
258 | parseOpeningXMLElement(); | ||
259 | break; | ||
260 | } | ||
261 | return true; | ||
262 | } | ||
263 | |||
264 | |||
265 | //! sets the state that text was found. Returns true if set should be set | ||
266 | bool setText(char_type* start, char_type* end) | ||
267 | { | ||
268 | // By default xml preserves all whitespace. But Irrlicht dropped some whitespace by default | ||
269 | // in the past which did lead to OS dependent behavior. We just ignore all whitespace for now | ||
270 | // as it's the closest to fixing behavior without breaking downward compatibility too much. | ||
271 | if ( IgnoreWhitespaceText ) | ||
272 | { | ||
273 | char_type* p = start; | ||
274 | for(; p != end; ++p) | ||
275 | if (!isWhiteSpace(*p)) | ||
276 | break; | ||
277 | |||
278 | if (p == end) | ||
279 | return false; | ||
280 | } | ||
281 | |||
282 | // set current text to the parsed text, and replace xml special characters | ||
283 | core::string<char_type> s(start, (int)(end - start)); | ||
284 | NodeName = replaceSpecialCharacters(s); | ||
285 | |||
286 | // current XML node type is text | ||
287 | CurrentNodeType = EXN_TEXT; | ||
288 | |||
289 | return true; | ||
290 | } | ||
291 | |||
292 | |||
293 | |||
294 | //! ignores an xml definition like <?xml something /> | ||
295 | void ignoreDefinition() | ||
296 | { | ||
297 | CurrentNodeType = EXN_UNKNOWN; | ||
298 | |||
299 | // move until end marked with '>' reached | ||
300 | while(*P != L'>') | ||
301 | ++P; | ||
302 | |||
303 | ++P; | ||
304 | } | ||
305 | |||
306 | |||
307 | //! parses a comment | ||
308 | void parseComment() | ||
309 | { | ||
310 | CurrentNodeType = EXN_COMMENT; | ||
311 | P += 1; | ||
312 | |||
313 | char_type *pCommentBegin = P; | ||
314 | |||
315 | int count = 1; | ||
316 | |||
317 | // move until end of comment reached | ||
318 | while(count) | ||
319 | { | ||
320 | if (*P == L'>') | ||
321 | --count; | ||
322 | else | ||
323 | if (*P == L'<') | ||
324 | ++count; | ||
325 | |||
326 | ++P; | ||
327 | } | ||
328 | |||
329 | P -= 3; | ||
330 | NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2)); | ||
331 | P += 3; | ||
332 | } | ||
333 | |||
334 | |||
335 | //! parses an opening xml element and reads attributes | ||
336 | void parseOpeningXMLElement() | ||
337 | { | ||
338 | CurrentNodeType = EXN_ELEMENT; | ||
339 | IsEmptyElement = false; | ||
340 | Attributes.clear(); | ||
341 | |||
342 | // find name | ||
343 | const char_type* startName = P; | ||
344 | |||
345 | // find end of element | ||
346 | while(*P != L'>' && !isWhiteSpace(*P)) | ||
347 | ++P; | ||
348 | |||
349 | const char_type* endName = P; | ||
350 | |||
351 | // find Attributes | ||
352 | while(*P != L'>') | ||
353 | { | ||
354 | if (isWhiteSpace(*P)) | ||
355 | ++P; | ||
356 | else | ||
357 | { | ||
358 | if (*P != L'/') | ||
359 | { | ||
360 | // we've got an attribute | ||
361 | |||
362 | // read the attribute names | ||
363 | const char_type* attributeNameBegin = P; | ||
364 | |||
365 | while(!isWhiteSpace(*P) && *P != L'=') | ||
366 | ++P; | ||
367 | |||
368 | const char_type* attributeNameEnd = P; | ||
369 | ++P; | ||
370 | |||
371 | // read the attribute value | ||
372 | // check for quotes and single quotes, thx to murphy | ||
373 | while( (*P != L'\"') && (*P != L'\'') && *P) | ||
374 | ++P; | ||
375 | |||
376 | if (!*P) // malformatted xml file | ||
377 | return; | ||
378 | |||
379 | const char_type attributeQuoteChar = *P; | ||
380 | |||
381 | ++P; | ||
382 | const char_type* attributeValueBegin = P; | ||
383 | |||
384 | while(*P != attributeQuoteChar && *P) | ||
385 | ++P; | ||
386 | |||
387 | if (!*P) // malformatted xml file | ||
388 | return; | ||
389 | |||
390 | const char_type* attributeValueEnd = P; | ||
391 | ++P; | ||
392 | |||
393 | SAttribute attr; | ||
394 | attr.Name = core::string<char_type>(attributeNameBegin, | ||
395 | (int)(attributeNameEnd - attributeNameBegin)); | ||
396 | |||
397 | core::string<char_type> s(attributeValueBegin, | ||
398 | (int)(attributeValueEnd - attributeValueBegin)); | ||
399 | |||
400 | attr.Value = replaceSpecialCharacters(s); | ||
401 | Attributes.push_back(attr); | ||
402 | } | ||
403 | else | ||
404 | { | ||
405 | // tag is closed directly | ||
406 | ++P; | ||
407 | IsEmptyElement = true; | ||
408 | break; | ||
409 | } | ||
410 | } | ||
411 | } | ||
412 | |||
413 | // check if this tag is closing directly | ||
414 | if (endName > startName && *(endName-1) == L'/') | ||
415 | { | ||
416 | // directly closing tag | ||
417 | IsEmptyElement = true; | ||
418 | endName--; | ||
419 | } | ||
420 | |||
421 | NodeName = core::string<char_type>(startName, (int)(endName - startName)); | ||
422 | |||
423 | ++P; | ||
424 | } | ||
425 | |||
426 | |||
427 | //! parses an closing xml tag | ||
428 | void parseClosingXMLElement() | ||
429 | { | ||
430 | CurrentNodeType = EXN_ELEMENT_END; | ||
431 | IsEmptyElement = false; | ||
432 | Attributes.clear(); | ||
433 | |||
434 | ++P; | ||
435 | const char_type* pBeginClose = P; | ||
436 | |||
437 | while(*P != L'>') | ||
438 | ++P; | ||
439 | |||
440 | NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose)); | ||
441 | ++P; | ||
442 | } | ||
443 | |||
444 | //! parses a possible CDATA section, returns false if begin was not a CDATA section | ||
445 | bool parseCDATA() | ||
446 | { | ||
447 | if (*(P+1) != L'[') | ||
448 | return false; | ||
449 | |||
450 | CurrentNodeType = EXN_CDATA; | ||
451 | |||
452 | // skip '<![CDATA[' | ||
453 | int count=0; | ||
454 | while( *P && count<8 ) | ||
455 | { | ||
456 | ++P; | ||
457 | ++count; | ||
458 | } | ||
459 | |||
460 | if (!*P) | ||
461 | return true; | ||
462 | |||
463 | char_type *cDataBegin = P; | ||
464 | char_type *cDataEnd = 0; | ||
465 | |||
466 | // find end of CDATA | ||
467 | while(*P && !cDataEnd) | ||
468 | { | ||
469 | if (*P == L'>' && | ||
470 | (*(P-1) == L']') && | ||
471 | (*(P-2) == L']')) | ||
472 | { | ||
473 | cDataEnd = P - 2; | ||
474 | } | ||
475 | |||
476 | ++P; | ||
477 | } | ||
478 | |||
479 | if ( cDataEnd ) | ||
480 | NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin)); | ||
481 | else | ||
482 | NodeName = ""; | ||
483 | |||
484 | return true; | ||
485 | } | ||
486 | |||
487 | |||
488 | // structure for storing attribute-name pairs | ||
489 | struct SAttribute | ||
490 | { | ||
491 | core::string<char_type> Name; | ||
492 | core::string<char_type> Value; | ||
493 | }; | ||
494 | |||
495 | // finds a current attribute by name, returns 0 if not found | ||
496 | const SAttribute* getAttributeByName(const char_type* name) const | ||
497 | { | ||
498 | if (!name) | ||
499 | return 0; | ||
500 | |||
501 | core::string<char_type> n = name; | ||
502 | |||
503 | for (int i=0; i<(int)Attributes.size(); ++i) | ||
504 | if (Attributes[i].Name == n) | ||
505 | return &Attributes[i]; | ||
506 | |||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | // replaces xml special characters in a string and creates a new one | ||
511 | core::string<char_type> replaceSpecialCharacters( | ||
512 | core::string<char_type>& origstr) | ||
513 | { | ||
514 | int pos = origstr.findFirst(L'&'); | ||
515 | int oldPos = 0; | ||
516 | |||
517 | if (pos == -1) | ||
518 | return origstr; | ||
519 | |||
520 | core::string<char_type> newstr; | ||
521 | |||
522 | while(pos != -1 && pos < (int)origstr.size()-2) | ||
523 | { | ||
524 | // check if it is one of the special characters | ||
525 | |||
526 | int specialChar = -1; | ||
527 | for (int i=0; i<(int)SpecialCharacters.size(); ++i) | ||
528 | { | ||
529 | const char_type* p = &origstr.c_str()[pos]+1; | ||
530 | |||
531 | if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1)) | ||
532 | { | ||
533 | specialChar = i; | ||
534 | break; | ||
535 | } | ||
536 | } | ||
537 | |||
538 | if (specialChar != -1) | ||
539 | { | ||
540 | newstr.append(origstr.subString(oldPos, pos - oldPos)); | ||
541 | newstr.append(SpecialCharacters[specialChar][0]); | ||
542 | pos += SpecialCharacters[specialChar].size(); | ||
543 | } | ||
544 | else | ||
545 | { | ||
546 | newstr.append(origstr.subString(oldPos, pos - oldPos + 1)); | ||
547 | pos += 1; | ||
548 | } | ||
549 | |||
550 | // find next & | ||
551 | oldPos = pos; | ||
552 | pos = origstr.findNext(L'&', pos); | ||
553 | } | ||
554 | |||
555 | if (oldPos < (int)origstr.size()-1) | ||
556 | newstr.append(origstr.subString(oldPos, origstr.size()-oldPos)); | ||
557 | |||
558 | return newstr; | ||
559 | } | ||
560 | |||
561 | |||
562 | |||
563 | //! reads the xml file and converts it into the wanted character format. | ||
564 | bool readFile(IFileReadCallBack* callback) | ||
565 | { | ||
566 | long size = callback->getSize(); | ||
567 | if (size<0) | ||
568 | return false; | ||
569 | size += 4; // We need four terminating 0's at the end. | ||
570 | // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4. | ||
571 | |||
572 | char* data8 = new char[size]; | ||
573 | |||
574 | if (!callback->read(data8, size-4)) | ||
575 | { | ||
576 | delete [] data8; | ||
577 | return false; | ||
578 | } | ||
579 | |||
580 | // add zeros at end | ||
581 | |||
582 | memset(data8+size-4, 0, 4); | ||
583 | |||
584 | char16* data16 = reinterpret_cast<char16*>(data8); | ||
585 | char32* data32 = reinterpret_cast<char32*>(data8); | ||
586 | |||
587 | // now we need to convert the data to the desired target format | ||
588 | // based on the byte order mark. | ||
589 | |||
590 | const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF; | ||
591 | const u16 UTF16_BE = 0xFFFE; | ||
592 | const u16 UTF16_LE = 0xFEFF; | ||
593 | const u32 UTF32_BE = 0xFFFE0000; | ||
594 | const u32 UTF32_LE = 0x0000FEFF; | ||
595 | |||
596 | // check source for all utf versions and convert to target data format | ||
597 | |||
598 | if (size >= 4 && data32[0] == static_cast<char32>(UTF32_BE)) | ||
599 | { | ||
600 | // UTF-32, big endian | ||
601 | SourceFormat = ETF_UTF32_BE; | ||
602 | convertTextData(data32+1, data8, (size/4)-1); // data32+1 because we need to skip the header | ||
603 | } | ||
604 | else | ||
605 | if (size >= 4 && data32[0] == static_cast<char32>(UTF32_LE)) | ||
606 | { | ||
607 | // UTF-32, little endian | ||
608 | SourceFormat = ETF_UTF32_LE; | ||
609 | convertTextData(data32+1, data8, (size/4)-1); // data32+1 because we need to skip the header | ||
610 | } | ||
611 | else | ||
612 | if (size >= 2 && data16[0] == UTF16_BE) | ||
613 | { | ||
614 | // UTF-16, big endian | ||
615 | SourceFormat = ETF_UTF16_BE; | ||
616 | convertTextData(data16+1, data8, (size/2)-1); // data16+1 because we need to skip the header | ||
617 | } | ||
618 | else | ||
619 | if (size >= 2 && data16[0] == UTF16_LE) | ||
620 | { | ||
621 | // UTF-16, little endian | ||
622 | SourceFormat = ETF_UTF16_LE; | ||
623 | convertTextData(data16+1, data8, (size/2)-1); // data16+1 because we need to skip the header | ||
624 | } | ||
625 | else | ||
626 | if (size >= 3 && memcmp(data8,UTF8,3)==0) | ||
627 | { | ||
628 | // UTF-8 | ||
629 | SourceFormat = ETF_UTF8; | ||
630 | convertTextData(data8+3, data8, size-3); // data8+3 because we need to skip the header | ||
631 | } | ||
632 | else | ||
633 | { | ||
634 | // ASCII | ||
635 | SourceFormat = ETF_ASCII; | ||
636 | convertTextData(data8, data8, size); | ||
637 | } | ||
638 | |||
639 | return true; | ||
640 | } | ||
641 | |||
642 | |||
643 | //! converts the text file into the desired format. | ||
644 | /** \param source: begin of the text (without byte order mark) | ||
645 | \param pointerToStore: pointer to text data block which can be | ||
646 | stored or deleted based on the nesessary conversion. | ||
647 | \param sizeWithoutHeader: Text size in characters without header | ||
648 | */ | ||
649 | template<class src_char_type> | ||
650 | void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader) | ||
651 | { | ||
652 | // convert little to big endian if necessary | ||
653 | if (sizeof(src_char_type) > 1 && | ||
654 | isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat)) | ||
655 | convertToLittleEndian(source); | ||
656 | |||
657 | // check if conversion is necessary: | ||
658 | if (sizeof(src_char_type) == sizeof(char_type)) | ||
659 | { | ||
660 | // no need to convert | ||
661 | TextBegin = (char_type*)source; | ||
662 | TextData = (char_type*)pointerToStore; | ||
663 | TextSize = sizeWithoutHeader; | ||
664 | } | ||
665 | else | ||
666 | { | ||
667 | // convert source into target data format. | ||
668 | // TODO: implement a real conversion. This one just | ||
669 | // copies bytes. This is a problem when there are | ||
670 | // unicode symbols using more than one character. | ||
671 | |||
672 | TextData = new char_type[sizeWithoutHeader]; | ||
673 | |||
674 | if ( sizeof(src_char_type) == 1 ) | ||
675 | { | ||
676 | // we have to cast away negative numbers or results might add the sign instead of just doing a copy | ||
677 | for (int i=0; i<sizeWithoutHeader; ++i) | ||
678 | { | ||
679 | TextData[i] = static_cast<char_type>(static_cast<unsigned char>(source[i])); | ||
680 | } | ||
681 | } | ||
682 | else | ||
683 | { | ||
684 | for (int i=0; i<sizeWithoutHeader; ++i) | ||
685 | TextData[i] = static_cast<char_type>(source[i]); | ||
686 | } | ||
687 | TextBegin = TextData; | ||
688 | TextSize = sizeWithoutHeader; | ||
689 | |||
690 | // delete original data because no longer needed | ||
691 | delete [] pointerToStore; | ||
692 | } | ||
693 | } | ||
694 | |||
695 | //! converts whole text buffer to little endian | ||
696 | template<class src_char_type> | ||
697 | void convertToLittleEndian(src_char_type* t) | ||
698 | { | ||
699 | if (sizeof(src_char_type) == 4) | ||
700 | { | ||
701 | // 32 bit | ||
702 | |||
703 | while(*t) | ||
704 | { | ||
705 | *t = ((*t & 0xff000000) >> 24) | | ||
706 | ((*t & 0x00ff0000) >> 8) | | ||
707 | ((*t & 0x0000ff00) << 8) | | ||
708 | ((*t & 0x000000ff) << 24); | ||
709 | ++t; | ||
710 | } | ||
711 | } | ||
712 | else | ||
713 | { | ||
714 | // 16 bit | ||
715 | |||
716 | while(*t) | ||
717 | { | ||
718 | *t = (*t >> 8) | (*t << 8); | ||
719 | ++t; | ||
720 | } | ||
721 | } | ||
722 | } | ||
723 | |||
724 | //! returns if a format is little endian | ||
725 | inline bool isLittleEndian(ETEXT_FORMAT f) | ||
726 | { | ||
727 | return f == ETF_ASCII || | ||
728 | f == ETF_UTF8 || | ||
729 | f == ETF_UTF16_LE || | ||
730 | f == ETF_UTF32_LE; | ||
731 | } | ||
732 | |||
733 | |||
734 | //! returns true if a character is whitespace | ||
735 | inline bool isWhiteSpace(char_type c) | ||
736 | { | ||
737 | return (c==' ' || c=='\t' || c=='\n' || c=='\r'); | ||
738 | } | ||
739 | |||
740 | |||
741 | //! generates a list with xml special characters | ||
742 | void createSpecialCharacterList() | ||
743 | { | ||
744 | // list of strings containing special symbols, | ||
745 | // the first character is the special character, | ||
746 | // the following is the symbol string without trailing &. | ||
747 | |||
748 | SpecialCharacters.push_back("&"); | ||
749 | SpecialCharacters.push_back("<lt;"); | ||
750 | SpecialCharacters.push_back(">gt;"); | ||
751 | SpecialCharacters.push_back("\"quot;"); | ||
752 | SpecialCharacters.push_back("'apos;"); | ||
753 | |||
754 | } | ||
755 | |||
756 | |||
757 | //! compares the first n characters of the strings | ||
758 | bool equalsn(const char_type* str1, const char_type* str2, int len) | ||
759 | { | ||
760 | int i; | ||
761 | for(i=0; str1[i] && str2[i] && i < len; ++i) | ||
762 | if (str1[i] != str2[i]) | ||
763 | return false; | ||
764 | |||
765 | // if one (or both) of the strings was smaller then they | ||
766 | // are only equal if they have the same lenght | ||
767 | return (i == len) || (str1[i] == 0 && str2[i] == 0); | ||
768 | } | ||
769 | |||
770 | |||
771 | //! stores the target text format | ||
772 | void storeTargetFormat() | ||
773 | { | ||
774 | // get target format. We could have done this using template specialization, | ||
775 | // but VisualStudio 6 don't like it and we want to support it. | ||
776 | |||
777 | switch(sizeof(char_type)) | ||
778 | { | ||
779 | case 1: | ||
780 | TargetFormat = ETF_UTF8; | ||
781 | break; | ||
782 | case 2: | ||
783 | TargetFormat = ETF_UTF16_LE; | ||
784 | break; | ||
785 | case 4: | ||
786 | TargetFormat = ETF_UTF32_LE; | ||
787 | break; | ||
788 | default: | ||
789 | TargetFormat = ETF_ASCII; // should never happen. | ||
790 | } | ||
791 | } | ||
792 | |||
793 | |||
794 | // instance variables: | ||
795 | bool IgnoreWhitespaceText; // do not return EXN_TEXT nodes for pure whitespace | ||
796 | char_type* TextData; // data block of the text file | ||
797 | char_type* P; // current point in text to parse | ||
798 | char_type* TextBegin; // start of text to parse | ||
799 | unsigned int TextSize; // size of text to parse in characters, not bytes | ||
800 | |||
801 | EXML_NODE CurrentNodeType; // type of the currently parsed node | ||
802 | ETEXT_FORMAT SourceFormat; // source format of the xml file | ||
803 | ETEXT_FORMAT TargetFormat; // output format of this parser | ||
804 | |||
805 | core::string<char_type> NodeName; // name of the node currently in - also used for text | ||
806 | core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods | ||
807 | |||
808 | bool IsEmptyElement; // is the currently parsed node empty? | ||
809 | |||
810 | core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList() | ||
811 | |||
812 | core::array<SAttribute> Attributes; // attributes of current element | ||
813 | |||
814 | }; // end CXMLReaderImpl | ||
815 | |||
816 | |||
817 | } // end namespace | ||
818 | } // end namespace | ||
819 | |||
820 | #endif | ||