summaryrefslogtreecommitdiff
path: root/foreign
diff options
context:
space:
mode:
Diffstat (limited to 'foreign')
-rw-r--r--foreign/xmlParser/xmlParser.cpp2923
-rw-r--r--foreign/xmlParser/xmlParser.hpp762
2 files changed, 0 insertions, 3685 deletions
diff --git a/foreign/xmlParser/xmlParser.cpp b/foreign/xmlParser/xmlParser.cpp
deleted file mode 100644
index ccb1e17..0000000
--- a/foreign/xmlParser/xmlParser.cpp
+++ /dev/null
@@ -1,2923 +0,0 @@
-/**
- ****************************************************************************
- * <P> XML.c - implementation file for basic XML parser written in ANSI C++
- * for portability. It works by using recursion and a node tree for breaking
- * down the elements of an XML document. </P>
- *
- * @version V2.39
- * @author Frank Vanden Berghen
- *
- * NOTE:
- *
- * If you add "#define STRICT_PARSING", on the first line of this file
- * the parser will see the following XML-stream:
- * <a><b>some text</b><b>other text </a>
- * as an error. Otherwise, this tring will be equivalent to:
- * <a><b>some text</b><b>other text</b></a>
- *
- * NOTE:
- *
- * If you add "#define APPROXIMATE_PARSING" on the first line of this file
- * the parser will see the following XML-stream:
- * <data name="n1">
- * <data name="n2">
- * <data name="n3" />
- * as equivalent to the following XML-stream:
- * <data name="n1" />
- * <data name="n2" />
- * <data name="n3" />
- * This can be useful for badly-formed XML-streams but prevent the use
- * of the following XML-stream (problem is: tags at contiguous levels
- * have the same names):
- * <data name="n1">
- * <data name="n2">
- * <data name="n3" />
- * </data>
- * </data>
- *
- * NOTE:
- *
- * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file
- * the "openFileHelper" function will always display error messages inside the
- * console instead of inside a message-box-window. Message-box-windows are
- * available on windows 9x/NT/2000/XP/Vista only.
- *
- * Copyright (c) 2002, Frank Vanden Berghen
- * All rights reserved.
- *
- * The following license terms apply to projects that are in some way related to
- * the "ZeroMQ project", including applications
- * using "ZeroMQ project" and tools developed
- * for enhancing "ZeroMQ project". All other projects
- * (not related to "ZeroMQ project") have to use this
- * code under the Aladdin Free Public License (AFPL)
- * See the file "AFPL-license.txt" for more informations about the AFPL license.
- * (see http://www.artifex.com/downloads/doc/Public.htm for detailed AFPL terms)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Frank Vanden Berghen nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY Frank Vanden Berghen ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- ****************************************************************************
- */
-
-#if defined _MSC_VER
-#pragma warning (push)
-#pragma warning (disable:4996)
-#endif
-
-#ifndef _CRT_SECURE_NO_DEPRECATE
-#define _CRT_SECURE_NO_DEPRECATE
-#endif
-#include "xmlParser.hpp"
-#ifdef _XMLWINDOWS
-//#ifdef _DEBUG
-//#define _CRTDBG_MAP_ALLOC
-//#include <crtdbg.h>
-//#endif
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files
- // to have "MessageBoxA" to display error messages for openFilHelper
-#endif
-
-#include <memory.h>
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-XMLCSTR XMLNode::getVersion() { return _CXML("v2.39"); }
-void freeXMLString(XMLSTR t){if(t)free(t);}
-
-static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8;
-static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1;
-
-inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }
-
-// You can modify the initialization of the variable "XMLClearTags" below
-// to change the clearTags that are currently recognized by the library.
-// The number on the second columns is the length of the string inside the
-// first column. The "<!DOCTYPE" declaration must be the second in the list.
-// The "<!--" declaration must be the third in the list.
-typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag;
-static ALLXMLClearTag XMLClearTags[] =
-{
- { _CXML("<![CDATA["),9, _CXML("]]>") },
- { _CXML("<!DOCTYPE"),9, _CXML(">") },
- { _CXML("<!--") ,4, _CXML("-->") },
- { _CXML("<PRE>") ,5, _CXML("</PRE>") },
-// { _CXML("<Script>") ,8, _CXML("</Script>")},
- { NULL ,0, NULL }
-};
-
-// You can modify the initialization of the variable "XMLEntities" below
-// to change the character entities that are currently recognized by the library.
-// The number on the second columns is the length of the string inside the
-// first column. Additionally, the syntaxes "&#xA0;" and "&#160;" are recognized.
-typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;
-static XMLCharacterEntity XMLEntities[] =
-{
- { _CXML("&amp;" ), 5, _CXML('&' )},
- { _CXML("&lt;" ), 4, _CXML('<' )},
- { _CXML("&gt;" ), 4, _CXML('>' )},
- { _CXML("&quot;"), 6, _CXML('\"')},
- { _CXML("&apos;"), 6, _CXML('\'')},
- { NULL , 0, '\0' }
-};
-
-// When rendering the XMLNode to a string (using the "createXMLString" function),
-// you can ask for a beautiful formatting. This formatting is using the
-// following indentation character:
-#define INDENTCHAR _CXML('\t')
-
-// The following function parses the XML errors into a user friendly string.
-// You can edit this to change the output language of the library to something else.
-XMLCSTR XMLNode::getError(XMLError xerror)
-{
- switch (xerror)
- {
- case eXMLErrorNone: return _CXML("No error");
- case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag");
- case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found");
- case eXMLErrorEmpty: return _CXML("Error: No XML data");
- case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name");
- case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name");
- case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag");
- case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end");
- case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found");
- case eXMLErrorNoElements: return _CXML("Error: No elements found");
- case eXMLErrorFileNotFound: return _CXML("Error: File not found");
- case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found");
- case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity");
- case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode.");
- case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars");
- case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing");
- case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file");
-
- case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4");
- case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated");
- case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character");
- case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small");
- };
- return _CXML("Unknown");
-}
-
-/////////////////////////////////////////////////////////////////////////
-// Here start the abstraction layer to be OS-independent //
-/////////////////////////////////////////////////////////////////////////
-
-// Here is an abstraction layer to access some common string manipulation functions.
-// The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0,
-// Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++.
-// If you plan to "port" the library to a new system/compiler, all you have to do is
-// to edit the following lines.
-#ifdef XML_NO_WIDE_CHAR
-char myIsTextWideChar(const void *b, int len) { return FALSE; }
-#else
- #if defined (UNDER_CE) || !defined(_XMLWINDOWS)
- char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode
- {
-#ifdef sun
- // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer.
- if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE;
-#endif
- const wchar_t *s=(const wchar_t*)b;
-
- // buffer too small:
- if (len<(int)sizeof(wchar_t)) return FALSE;
-
- // odd length test
- if (len&1) return FALSE;
-
- /* only checks the first 256 characters */
- len=mmin(256,len/sizeof(wchar_t));
-
- // Check for the special byte order:
- if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
- if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE
-
- // checks for ASCII characters in the UNICODE stream
- int i,stats=0;
- for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
- if (stats>len/2) return TRUE;
-
- // Check for UNICODE NULL chars
- for (i=0; i<len; i++) if (!s[i]) return TRUE;
-
- return FALSE;
- }
- #else
- char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); }
- #endif
-#endif
-
-#ifdef _XMLWINDOWS
-// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
- #ifdef _XMLWIDECHAR
- wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
- {
- int i;
- if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0);
- else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0);
- if (i<0) return NULL;
- wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR));
- if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i);
- else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i);
- d[i]=0;
- return d;
- }
- static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); }
- static inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); }
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);}
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); }
- static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
- static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
- #else
- char *myWideCharToMultiByte(const wchar_t *s)
- {
- UINT codePage=CP_ACP; if (characterEncoding==XMLNode::char_encoding_UTF8) codePage=CP_UTF8;
- int i=(int)WideCharToMultiByte(codePage, // code page
- 0, // performance and mapping flags
- s, // wide-character string
- -1, // number of chars in string
- NULL, // buffer for new string
- 0, // size of buffer
- NULL, // default for unmappable chars
- NULL // set when default char used
- );
- if (i<0) return NULL;
- char *d=(char*)malloc(i+1);
- WideCharToMultiByte(codePage, // code page
- 0, // performance and mapping flags
- s, // wide-character string
- -1, // number of chars in string
- d, // buffer for new string
- i, // size of buffer
- NULL, // default for unmappable chars
- NULL // set when default char used
- );
- d[i]=0;
- return d;
- }
- static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
- static inline int xstrlen(XMLCSTR c) { return (int)strlen(c); }
- #ifdef __BORLANDC__
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strnicmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return stricmp(c1,c2); }
- #else
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); }
- #endif
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
- static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
- static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
- #endif
-#else
-// for gcc and CC
- #ifdef XML_NO_WIDE_CHAR
- char *myWideCharToMultiByte(const wchar_t *s) { return NULL; }
- #else
- char *myWideCharToMultiByte(const wchar_t *s)
- {
- const wchar_t *ss=s;
- int i=(int)wcsrtombs(NULL,&ss,0,NULL);
- if (i<0) return NULL;
- char *d=(char *)malloc(i+1);
- wcsrtombs(d,&s,i,NULL);
- d[i]=0;
- return d;
- }
- #endif
- #ifdef _XMLWIDECHAR
- wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
- {
- const char *ss=s;
- int i=(int)mbsrtowcs(NULL,&ss,0,NULL);
- if (i<0) return NULL;
- wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t));
- mbsrtowcs(d,&s,i,NULL);
- d[i]=0;
- return d;
- }
- int xstrlen(XMLCSTR c) { return wcslen(c); }
- #ifdef sun
- // for CC
- #include <widec.h>
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);}
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); }
- #else
- // for gcc
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);}
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); }
- #endif
- static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
- static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
- static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode)
- {
- char *filenameAscii=myWideCharToMultiByte(filename);
- FILE *f;
- if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb");
- else f=fopen(filenameAscii,"wb");
- free(filenameAscii);
- return f;
- }
- #else
- static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
- static inline int xstrlen(XMLCSTR c) { return strlen(c); }
- static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);}
- static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
- static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); }
- static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
- static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
- #endif
- static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);}
-#endif
-
-
-///////////////////////////////////////////////////////////////////////////////
-// the "xmltoc,xmltob,xmltoi,xmltol,xmltof,xmltoa" functions //
-///////////////////////////////////////////////////////////////////////////////
-// These 6 functions are not used inside the XMLparser.
-// There are only here as "convenience" functions for the user.
-// If you don't need them, you can delete them without any trouble.
-#ifdef _XMLWIDECHAR
- #ifdef _XMLWINDOWS
- // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
- char xmltob(XMLCSTR t,int v){ if (t&&(*t)) return (char)_wtoi(t); return v; }
- int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; }
- long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; }
- double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; }
- #else
- #ifdef sun
- // for CC
- #include <widec.h>
- char xmltob(XMLCSTR t,int v){ if (t) return (char)wstol(t,NULL,10); return v; }
- int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; }
- long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; }
- #else
- // for gcc
- char xmltob(XMLCSTR t,int v){ if (t) return (char)wcstol(t,NULL,10); return v; }
- int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; }
- long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; }
- #endif
- double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; }
- #endif
-#else
- char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; }
- int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; }
- long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; }
- double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; }
-#endif
-XMLCSTR xmltoa(XMLCSTR t,XMLCSTR v){ if (t) return t; return v; }
-XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; }
-
-/////////////////////////////////////////////////////////////////////////
-// the "openFileHelper" function //
-/////////////////////////////////////////////////////////////////////////
-
-// Since each application has its own way to report and deal with errors, you should modify & rewrite
-// the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs.
-XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
-{
- // guess the value of the global parameter "characterEncoding"
- // (the guess is based on the first 200 bytes of the file).
- FILE *f=xfopen(filename,_CXML("rb"));
- if (f)
- {
- char bb[205];
- int l=(int)fread(bb,1,200,f);
- setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText);
- fclose(f);
- }
-
- // parse the file
- XMLResults pResults;
- XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);
-
- // display error message (if any)
- if (pResults.error != eXMLErrorNone)
- {
- // create message
- char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML("");
- if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; }
- sprintf(message,
-#ifdef _XMLWIDECHAR
- "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
-#else
- "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
-#endif
- ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);
-
- // display message
-#if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_)
- MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);
-#else
- printf("%s",message);
-#endif
- exit(255);
- }
- return xnode;
-}
-
-/////////////////////////////////////////////////////////////////////////
-// Here start the core implementation of the XMLParser library //
-/////////////////////////////////////////////////////////////////////////
-
-// You should normally not change anything below this point.
-
-#ifndef _XMLWIDECHAR
-// If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte.
-// If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes).
-// If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes).
-// This table is used as lookup-table to know the length of a character (in byte) based on the
-// content of the first byte of the character.
-// (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).
-static const char XML_utf8ByteTable[256] =
-{
- // 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
- 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte
- 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
-};
-static const char XML_legacyByteTable[256] =
-{
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
-};
-static const char XML_sjisByteTable[256] =
-{
- // 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
- 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0
-};
-static const char XML_gb2312ByteTable[256] =
-{
-// 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
- 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
- 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0
-};
-static const char XML_gbk_big5_ByteTable[256] =
-{
- // 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
- 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0
-};
-static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8"
-#endif
-
-
-XMLNode XMLNode::emptyXMLNode;
-XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};
-XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};
-
-// Enumeration used to decipher what type a token is
-typedef enum XMLTokenTypeTag
-{
- eTokenText = 0,
- eTokenQuotedText,
- eTokenTagStart, /* "<" */
- eTokenTagEnd, /* "</" */
- eTokenCloseTag, /* ">" */
- eTokenEquals, /* "=" */
- eTokenDeclaration, /* "<?" */
- eTokenShortHandClose, /* "/>" */
- eTokenClear,
- eTokenError
-} XMLTokenType;
-
-// Main structure used for parsing XML
-typedef struct XML
-{
- XMLCSTR lpXML;
- XMLCSTR lpszText;
- int nIndex,nIndexMissigEndTag;
- enum XMLError error;
- XMLCSTR lpEndTag;
- int cbEndTag;
- XMLCSTR lpNewElement;
- int cbNewElement;
- int nFirst;
-} XML;
-
-typedef struct
-{
- ALLXMLClearTag *pClr;
- XMLCSTR pStr;
-} NextToken;
-
-// Enumeration used when parsing attributes
-typedef enum Attrib
-{
- eAttribName = 0,
- eAttribEquals,
- eAttribValue
-} Attrib;
-
-// Enumeration used when parsing elements to dictate whether we are currently
-// inside a tag
-typedef enum Status
-{
- eInsideTag = 0,
- eOutsideTag
-} Status;
-
-XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const
-{
- if (!d) return eXMLErrorNone;
- FILE *f=xfopen(filename,_CXML("wb"));
- if (!f) return eXMLErrorCannotOpenWriteFile;
-#ifdef _XMLWIDECHAR
- unsigned char h[2]={ 0xFF, 0xFE };
- if (!fwrite(h,2,1,f))
- {
- fclose(f);
- return eXMLErrorCannotWriteFile;
- }
- if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
- {
- if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",sizeof(wchar_t)*40,1,f))
- {
- fclose(f);
- return eXMLErrorCannotWriteFile;
- }
- }
-#else
- if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
- {
- if (characterEncoding==char_encoding_UTF8)
- {
- // header so that windows recognize the file as UTF-8:
- unsigned char h[3]={0xEF,0xBB,0xBF};
- if (!fwrite(h,3,1,f))
- {
- fclose(f);
- return eXMLErrorCannotWriteFile;
- }
- encoding="utf-8";
- } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS";
-
- if (!encoding) encoding="ISO-8859-1";
- if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0)
- {
- fclose(f);
- return eXMLErrorCannotWriteFile;
- }
- } else
- {
- if (characterEncoding==char_encoding_UTF8)
- {
- unsigned char h[3]={0xEF,0xBB,0xBF};
- if (!fwrite(h,3,1,f))
- {
- fclose(f);
- return eXMLErrorCannotWriteFile;
- }
- }
- }
-#endif
- int i;
- XMLSTR t=createXMLString(nFormat,&i);
- if (!fwrite(t,sizeof(XMLCHAR)*i,1,f))
- {
- fclose(f);
- return eXMLErrorCannotWriteFile;
- }
- if (fclose(f)!=0) return eXMLErrorCannotWriteFile;
- free(t);
- return eXMLErrorNone;
-}
-
-// Duplicate a given string.
-XMLSTR stringDup(XMLCSTR lpszData, int cbData)
-{
- if (lpszData==NULL) return NULL;
-
- XMLSTR lpszNew;
- if (cbData==-1) cbData=(int)xstrlen(lpszData);
- lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));
- if (lpszNew)
- {
- memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));
- lpszNew[cbData] = (XMLCHAR)NULL;
- }
- return lpszNew;
-}
-
-XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source)
-{
- XMLSTR dd=dest;
- XMLCHAR ch;
- XMLCharacterEntity *entity;
- while ((ch=*source))
- {
- entity=XMLEntities;
- do
- {
- if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }
- entity++;
- } while(entity->s);
-#ifdef _XMLWIDECHAR
- *(dest++)=*(source++);
-#else
- switch(XML_ByteTable[(unsigned char)ch])
- {
- case 4: *(dest++)=*(source++);
- case 3: *(dest++)=*(source++);
- case 2: *(dest++)=*(source++);
- case 1: *(dest++)=*(source++);
- }
-#endif
-out_of_loop1:
- ;
- }
- *dest=0;
- return dd;
-}
-
-// private (used while rendering):
-int ToXMLStringTool::lengthXMLString(XMLCSTR source)
-{
- int r=0;
- XMLCharacterEntity *entity;
- XMLCHAR ch;
- while ((ch=*source))
- {
- entity=XMLEntities;
- do
- {
- if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }
- entity++;
- } while(entity->s);
-#ifdef _XMLWIDECHAR
- r++; source++;
-#else
- ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;
-#endif
-out_of_loop1:
- ;
- }
- return r;
-}
-
-ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); }
-void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }
-XMLSTR ToXMLStringTool::toXML(XMLCSTR source)
-{
- int l=lengthXMLString(source)+1;
- if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); }
- return toXMLUnSafe(buf,source);
-}
-
-// private:
-XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
-{
- // This function is the opposite of the function "toXMLString". It decodes the escape
- // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
- // &,",',<,>. This function is used internally by the XML Parser. All the calls to
- // the XML library will always gives you back "decoded" strings.
- //
- // in: string (s) and length (lo) of string
- // out: new allocated string converted from xml
- if (!s) return NULL;
-
- int ll=0,j;
- XMLSTR d;
- XMLCSTR ss=s;
- XMLCharacterEntity *entity;
- while ((lo>0)&&(*s))
- {
- if (*s==_CXML('&'))
- {
- if ((lo>2)&&(s[1]==_CXML('#')))
- {
- s+=2; lo-=2;
- if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; }
- while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++;
- if (*s!=_CXML(';'))
- {
- pXML->error=eXMLErrorUnknownCharacterEntity;
- return NULL;
- }
- s++; lo--;
- } else
- {
- entity=XMLEntities;
- do
- {
- if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }
- entity++;
- } while(entity->s);
- if (!entity->s)
- {
- pXML->error=eXMLErrorUnknownCharacterEntity;
- return NULL;
- }
- }
- } else
- {
-#ifdef _XMLWIDECHAR
- s++; lo--;
-#else
- j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;
-#endif
- }
- ll++;
- }
-
- d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));
- s=d;
- while (ll-->0)
- {
- if (*ss==_CXML('&'))
- {
- if (ss[1]==_CXML('#'))
- {
- ss+=2; j=0;
- if ((*ss==_CXML('X'))||(*ss==_CXML('x')))
- {
- ss++;
- while (*ss!=_CXML(';'))
- {
- if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0');
- else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10;
- else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10;
- else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
- ss++;
- }
- } else
- {
- while (*ss!=_CXML(';'))
- {
- if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0');
- else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
- ss++;
- }
- }
-#ifndef _XMLWIDECHAR
- if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;}
-#endif
- (*d++)=(XMLCHAR)j; ss++;
- } else
- {
- entity=XMLEntities;
- do
- {
- if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; }
- entity++;
- } while(entity->s);
- }
- } else
- {
-#ifdef _XMLWIDECHAR
- *(d++)=*(ss++);
-#else
- switch(XML_ByteTable[(unsigned char)*ss])
- {
- case 4: *(d++)=*(ss++); ll--;
- case 3: *(d++)=*(ss++); ll--;
- case 2: *(d++)=*(ss++); ll--;
- case 1: *(d++)=*(ss++);
- }
-#endif
- }
- }
- *d=0;
- return (XMLSTR)s;
-}
-
-#define XML_isSPACECHAR(ch) ((ch==_CXML('\n'))||(ch==_CXML(' '))||(ch== _CXML('\t'))||(ch==_CXML('\r')))
-
-// private:
-char myTagCompare(XMLCSTR cclose, XMLCSTR copen)
-// !!!! WARNING strange convention&:
-// return 0 if equals
-// return 1 if different
-{
- if (!cclose) return 1;
- int l=(int)xstrlen(cclose);
- if (xstrnicmp(cclose, copen, l)!=0) return 1;
- const XMLCHAR c=copen[l];
- if (XML_isSPACECHAR(c)||
- (c==_CXML('/' ))||
- (c==_CXML('<' ))||
- (c==_CXML('>' ))||
- (c==_CXML('=' ))) return 0;
- return 1;
-}
-
-// Obtain the next character from the string.
-static inline XMLCHAR getNextChar(XML *pXML)
-{
- XMLCHAR ch = pXML->lpXML[pXML->nIndex];
-#ifdef _XMLWIDECHAR
- if (ch!=0) pXML->nIndex++;
-#else
- pXML->nIndex+=XML_ByteTable[(unsigned char)ch];
-#endif
- return ch;
-}
-
-// Find the next token in a string.
-// pcbToken contains the number of characters that have been read.
-static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType)
-{
- NextToken result;
- XMLCHAR ch;
- XMLCHAR chTemp;
- int indexStart,nFoundMatch,nIsText=FALSE;
- result.pClr=NULL; // prevent warning
-
- // Find next non-white space character
- do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch);
-
- if (ch)
- {
- // Cache the current string pointer
- result.pStr = &pXML->lpXML[indexStart];
-
- // First check whether the token is in the clear tag list (meaning it
- // does not need formatting).
- ALLXMLClearTag *ctag=XMLClearTags;
- do
- {
- if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0)
- {
- result.pClr=ctag;
- pXML->nIndex+=ctag->openTagLen-1;
- *pType=eTokenClear;
- return result;
- }
- ctag++;
- } while(ctag->lpszOpen);
-
- // If we didn't find a clear tag then check for standard