diff options
Diffstat (limited to 'foreign')
-rw-r--r-- | foreign/xmlParser/xmlParser.cpp | 2923 | ||||
-rw-r--r-- | foreign/xmlParser/xmlParser.hpp | 762 |
2 files changed, 0 insertions, 3685 deletions
diff --git a/foreign/xmlParser/xmlParser.cpp b/foreign/xmlParser/xmlParser.cpp deleted file mode 100644 index ccb1e17..0000000 --- a/foreign/xmlParser/xmlParser.cpp +++ /dev/null @@ -1,2923 +0,0 @@ -/** - **************************************************************************** - * <P> XML.c - implementation file for basic XML parser written in ANSI C++ - * for portability. It works by using recursion and a node tree for breaking - * down the elements of an XML document. </P> - * - * @version V2.39 - * @author Frank Vanden Berghen - * - * NOTE: - * - * If you add "#define STRICT_PARSING", on the first line of this file - * the parser will see the following XML-stream: - * <a><b>some text</b><b>other text </a> - * as an error. Otherwise, this tring will be equivalent to: - * <a><b>some text</b><b>other text</b></a> - * - * NOTE: - * - * If you add "#define APPROXIMATE_PARSING" on the first line of this file - * the parser will see the following XML-stream: - * <data name="n1"> - * <data name="n2"> - * <data name="n3" /> - * as equivalent to the following XML-stream: - * <data name="n1" /> - * <data name="n2" /> - * <data name="n3" /> - * This can be useful for badly-formed XML-streams but prevent the use - * of the following XML-stream (problem is: tags at contiguous levels - * have the same names): - * <data name="n1"> - * <data name="n2"> - * <data name="n3" /> - * </data> - * </data> - * - * NOTE: - * - * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file - * the "openFileHelper" function will always display error messages inside the - * console instead of inside a message-box-window. Message-box-windows are - * available on windows 9x/NT/2000/XP/Vista only. - * - * Copyright (c) 2002, Frank Vanden Berghen - * All rights reserved. - * - * The following license terms apply to projects that are in some way related to - * the "ZeroMQ project", including applications - * using "ZeroMQ project" and tools developed - * for enhancing "ZeroMQ project". All other projects - * (not related to "ZeroMQ project") have to use this - * code under the Aladdin Free Public License (AFPL) - * See the file "AFPL-license.txt" for more informations about the AFPL license. - * (see http://www.artifex.com/downloads/doc/Public.htm for detailed AFPL terms) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Frank Vanden Berghen nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY Frank Vanden Berghen ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - **************************************************************************** - */ - -#if defined _MSC_VER -#pragma warning (push) -#pragma warning (disable:4996) -#endif - -#ifndef _CRT_SECURE_NO_DEPRECATE -#define _CRT_SECURE_NO_DEPRECATE -#endif -#include "xmlParser.hpp" -#ifdef _XMLWINDOWS -//#ifdef _DEBUG -//#define _CRTDBG_MAP_ALLOC -//#include <crtdbg.h> -//#endif -#define WIN32_LEAN_AND_MEAN -#include <windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files - // to have "MessageBoxA" to display error messages for openFilHelper -#endif - -#include <memory.h> -#include <assert.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> - -XMLCSTR XMLNode::getVersion() { return _CXML("v2.39"); } -void freeXMLString(XMLSTR t){if(t)free(t);} - -static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8; -static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1; - -inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; } - -// You can modify the initialization of the variable "XMLClearTags" below -// to change the clearTags that are currently recognized by the library. -// The number on the second columns is the length of the string inside the -// first column. The "<!DOCTYPE" declaration must be the second in the list. -// The "<!--" declaration must be the third in the list. -typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag; -static ALLXMLClearTag XMLClearTags[] = -{ - { _CXML("<![CDATA["),9, _CXML("]]>") }, - { _CXML("<!DOCTYPE"),9, _CXML(">") }, - { _CXML("<!--") ,4, _CXML("-->") }, - { _CXML("<PRE>") ,5, _CXML("</PRE>") }, -// { _CXML("<Script>") ,8, _CXML("</Script>")}, - { NULL ,0, NULL } -}; - -// You can modify the initialization of the variable "XMLEntities" below -// to change the character entities that are currently recognized by the library. -// The number on the second columns is the length of the string inside the -// first column. Additionally, the syntaxes " " and " " are recognized. -typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity; -static XMLCharacterEntity XMLEntities[] = -{ - { _CXML("&" ), 5, _CXML('&' )}, - { _CXML("<" ), 4, _CXML('<' )}, - { _CXML(">" ), 4, _CXML('>' )}, - { _CXML("""), 6, _CXML('\"')}, - { _CXML("'"), 6, _CXML('\'')}, - { NULL , 0, '\0' } -}; - -// When rendering the XMLNode to a string (using the "createXMLString" function), -// you can ask for a beautiful formatting. This formatting is using the -// following indentation character: -#define INDENTCHAR _CXML('\t') - -// The following function parses the XML errors into a user friendly string. -// You can edit this to change the output language of the library to something else. -XMLCSTR XMLNode::getError(XMLError xerror) -{ - switch (xerror) - { - case eXMLErrorNone: return _CXML("No error"); - case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag"); - case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found"); - case eXMLErrorEmpty: return _CXML("Error: No XML data"); - case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name"); - case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name"); - case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag"); - case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end"); - case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found"); - case eXMLErrorNoElements: return _CXML("Error: No elements found"); - case eXMLErrorFileNotFound: return _CXML("Error: File not found"); - case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found"); - case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity"); - case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode."); - case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars"); - case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing"); - case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file"); - - case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4"); - case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated"); - case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character"); - case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small"); - }; - return _CXML("Unknown"); -} - -///////////////////////////////////////////////////////////////////////// -// Here start the abstraction layer to be OS-independent // -///////////////////////////////////////////////////////////////////////// - -// Here is an abstraction layer to access some common string manipulation functions. -// The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0, -// Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++. -// If you plan to "port" the library to a new system/compiler, all you have to do is -// to edit the following lines. -#ifdef XML_NO_WIDE_CHAR -char myIsTextWideChar(const void *b, int len) { return FALSE; } -#else - #if defined (UNDER_CE) || !defined(_XMLWINDOWS) - char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode - { -#ifdef sun - // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. - if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE; -#endif - const wchar_t *s=(const wchar_t*)b; - - // buffer too small: - if (len<(int)sizeof(wchar_t)) return FALSE; - - // odd length test - if (len&1) return FALSE; - - /* only checks the first 256 characters */ - len=mmin(256,len/sizeof(wchar_t)); - - // Check for the special byte order: - if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; - if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE - - // checks for ASCII characters in the UNICODE stream - int i,stats=0; - for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++; - if (stats>len/2) return TRUE; - - // Check for UNICODE NULL chars - for (i=0; i<len; i++) if (!s[i]) return TRUE; - - return FALSE; - } - #else - char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); } - #endif -#endif - -#ifdef _XMLWINDOWS -// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 - #ifdef _XMLWIDECHAR - wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) - { - int i; - if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0); - else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0); - if (i<0) return NULL; - wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR)); - if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i); - else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i); - d[i]=0; - return d; - } - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); } - static inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); } - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); } - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } - #else - char *myWideCharToMultiByte(const wchar_t *s) - { - UINT codePage=CP_ACP; if (characterEncoding==XMLNode::char_encoding_UTF8) codePage=CP_UTF8; - int i=(int)WideCharToMultiByte(codePage, // code page - 0, // performance and mapping flags - s, // wide-character string - -1, // number of chars in string - NULL, // buffer for new string - 0, // size of buffer - NULL, // default for unmappable chars - NULL // set when default char used - ); - if (i<0) return NULL; - char *d=(char*)malloc(i+1); - WideCharToMultiByte(codePage, // code page - 0, // performance and mapping flags - s, // wide-character string - -1, // number of chars in string - d, // buffer for new string - i, // size of buffer - NULL, // default for unmappable chars - NULL // set when default char used - ); - d[i]=0; - return d; - } - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } - static inline int xstrlen(XMLCSTR c) { return (int)strlen(c); } - #ifdef __BORLANDC__ - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strnicmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return stricmp(c1,c2); } - #else - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); } - #endif - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } - #endif -#else -// for gcc and CC - #ifdef XML_NO_WIDE_CHAR - char *myWideCharToMultiByte(const wchar_t *s) { return NULL; } - #else - char *myWideCharToMultiByte(const wchar_t *s) - { - const wchar_t *ss=s; - int i=(int)wcsrtombs(NULL,&ss,0,NULL); - if (i<0) return NULL; - char *d=(char *)malloc(i+1); - wcsrtombs(d,&s,i,NULL); - d[i]=0; - return d; - } - #endif - #ifdef _XMLWIDECHAR - wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) - { - const char *ss=s; - int i=(int)mbsrtowcs(NULL,&ss,0,NULL); - if (i<0) return NULL; - wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t)); - mbsrtowcs(d,&s,i,NULL); - d[i]=0; - return d; - } - int xstrlen(XMLCSTR c) { return wcslen(c); } - #ifdef sun - // for CC - #include <widec.h> - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); } - #else - // for gcc - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); } - #endif - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) - { - char *filenameAscii=myWideCharToMultiByte(filename); - FILE *f; - if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb"); - else f=fopen(filenameAscii,"wb"); - free(filenameAscii); - return f; - } - #else - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } - static inline int xstrlen(XMLCSTR c) { return strlen(c); } - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); } - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } - #endif - static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);} -#endif - - -/////////////////////////////////////////////////////////////////////////////// -// the "xmltoc,xmltob,xmltoi,xmltol,xmltof,xmltoa" functions // -/////////////////////////////////////////////////////////////////////////////// -// These 6 functions are not used inside the XMLparser. -// There are only here as "convenience" functions for the user. -// If you don't need them, you can delete them without any trouble. -#ifdef _XMLWIDECHAR - #ifdef _XMLWINDOWS - // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 - char xmltob(XMLCSTR t,int v){ if (t&&(*t)) return (char)_wtoi(t); return v; } - int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; } - long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; } - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; } - #else - #ifdef sun - // for CC - #include <widec.h> - char xmltob(XMLCSTR t,int v){ if (t) return (char)wstol(t,NULL,10); return v; } - int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; } - long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; } - #else - // for gcc - char xmltob(XMLCSTR t,int v){ if (t) return (char)wcstol(t,NULL,10); return v; } - int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; } - long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; } - #endif - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; } - #endif -#else - char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; } - int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; } - long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; } - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; } -#endif -XMLCSTR xmltoa(XMLCSTR t,XMLCSTR v){ if (t) return t; return v; } -XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; } - -///////////////////////////////////////////////////////////////////////// -// the "openFileHelper" function // -///////////////////////////////////////////////////////////////////////// - -// Since each application has its own way to report and deal with errors, you should modify & rewrite -// the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs. -XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) -{ - // guess the value of the global parameter "characterEncoding" - // (the guess is based on the first 200 bytes of the file). - FILE *f=xfopen(filename,_CXML("rb")); - if (f) - { - char bb[205]; - int l=(int)fread(bb,1,200,f); - setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText); - fclose(f); - } - - // parse the file - XMLResults pResults; - XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults); - - // display error message (if any) - if (pResults.error != eXMLErrorNone) - { - // create message - char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML(""); - if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; } - sprintf(message, -#ifdef _XMLWIDECHAR - "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s" -#else - "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s" -#endif - ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3); - - // display message -#if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_) - MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST); -#else - printf("%s",message); -#endif - exit(255); - } - return xnode; -} - -///////////////////////////////////////////////////////////////////////// -// Here start the core implementation of the XMLParser library // -///////////////////////////////////////////////////////////////////////// - -// You should normally not change anything below this point. - -#ifndef _XMLWIDECHAR -// If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte. -// If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes). -// If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes). -// This table is used as lookup-table to know the length of a character (in byte) based on the -// content of the first byte of the character. -// (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ). -static const char XML_utf8ByteTable[256] = -{ - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 - 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte - 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid -}; -static const char XML_legacyByteTable[256] = -{ - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 -}; -static const char XML_sjisByteTable[256] = -{ - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 -}; -static const char XML_gb2312ByteTable[256] = -{ -// 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 - 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0 -}; -static const char XML_gbk_big5_ByteTable[256] = -{ - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0 -}; -static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8" -#endif - - -XMLNode XMLNode::emptyXMLNode; -XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL}; -XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL}; - -// Enumeration used to decipher what type a token is -typedef enum XMLTokenTypeTag -{ - eTokenText = 0, - eTokenQuotedText, - eTokenTagStart, /* "<" */ - eTokenTagEnd, /* "</" */ - eTokenCloseTag, /* ">" */ - eTokenEquals, /* "=" */ - eTokenDeclaration, /* "<?" */ - eTokenShortHandClose, /* "/>" */ - eTokenClear, - eTokenError -} XMLTokenType; - -// Main structure used for parsing XML -typedef struct XML -{ - XMLCSTR lpXML; - XMLCSTR lpszText; - int nIndex,nIndexMissigEndTag; - enum XMLError error; - XMLCSTR lpEndTag; - int cbEndTag; - XMLCSTR lpNewElement; - int cbNewElement; - int nFirst; -} XML; - -typedef struct -{ - ALLXMLClearTag *pClr; - XMLCSTR pStr; -} NextToken; - -// Enumeration used when parsing attributes -typedef enum Attrib -{ - eAttribName = 0, - eAttribEquals, - eAttribValue -} Attrib; - -// Enumeration used when parsing elements to dictate whether we are currently -// inside a tag -typedef enum Status -{ - eInsideTag = 0, - eOutsideTag -} Status; - -XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const -{ - if (!d) return eXMLErrorNone; - FILE *f=xfopen(filename,_CXML("wb")); - if (!f) return eXMLErrorCannotOpenWriteFile; -#ifdef _XMLWIDECHAR - unsigned char h[2]={ 0xFF, 0xFE }; - if (!fwrite(h,2,1,f)) - { - fclose(f); - return eXMLErrorCannotWriteFile; - } - if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) - { - if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",sizeof(wchar_t)*40,1,f)) - { - fclose(f); - return eXMLErrorCannotWriteFile; - } - } -#else - if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) - { - if (characterEncoding==char_encoding_UTF8) - { - // header so that windows recognize the file as UTF-8: - unsigned char h[3]={0xEF,0xBB,0xBF}; - if (!fwrite(h,3,1,f)) - { - fclose(f); - return eXMLErrorCannotWriteFile; - } - encoding="utf-8"; - } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS"; - - if (!encoding) encoding="ISO-8859-1"; - if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) - { - fclose(f); - return eXMLErrorCannotWriteFile; - } - } else - { - if (characterEncoding==char_encoding_UTF8) - { - unsigned char h[3]={0xEF,0xBB,0xBF}; - if (!fwrite(h,3,1,f)) - { - fclose(f); - return eXMLErrorCannotWriteFile; - } - } - } -#endif - int i; - XMLSTR t=createXMLString(nFormat,&i); - if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) - { - fclose(f); - return eXMLErrorCannotWriteFile; - } - if (fclose(f)!=0) return eXMLErrorCannotWriteFile; - free(t); - return eXMLErrorNone; -} - -// Duplicate a given string. -XMLSTR stringDup(XMLCSTR lpszData, int cbData) -{ - if (lpszData==NULL) return NULL; - - XMLSTR lpszNew; - if (cbData==-1) cbData=(int)xstrlen(lpszData); - lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR)); - if (lpszNew) - { - memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR)); - lpszNew[cbData] = (XMLCHAR)NULL; - } - return lpszNew; -} - -XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source) -{ - XMLSTR dd=dest; - XMLCHAR ch; - XMLCharacterEntity *entity; - while ((ch=*source)) - { - entity=XMLEntities; - do - { - if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; } - entity++; - } while(entity->s); -#ifdef _XMLWIDECHAR - *(dest++)=*(source++); -#else - switch(XML_ByteTable[(unsigned char)ch]) - { - case 4: *(dest++)=*(source++); - case 3: *(dest++)=*(source++); - case 2: *(dest++)=*(source++); - case 1: *(dest++)=*(source++); - } -#endif -out_of_loop1: - ; - } - *dest=0; - return dd; -} - -// private (used while rendering): -int ToXMLStringTool::lengthXMLString(XMLCSTR source) -{ - int r=0; - XMLCharacterEntity *entity; - XMLCHAR ch; - while ((ch=*source)) - { - entity=XMLEntities; - do - { - if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; } - entity++; - } while(entity->s); -#ifdef _XMLWIDECHAR - r++; source++; -#else - ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch; -#endif -out_of_loop1: - ; - } - return r; -} - -ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); } -void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; } -XMLSTR ToXMLStringTool::toXML(XMLCSTR source) -{ - int l=lengthXMLString(source)+1; - if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); } - return toXMLUnSafe(buf,source); -} - -// private: -XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) -{ - // This function is the opposite of the function "toXMLString". It decodes the escape - // sequences &, ", ', <, > and replace them by the characters - // &,",',<,>. This function is used internally by the XML Parser. All the calls to - // the XML library will always gives you back "decoded" strings. - // - // in: string (s) and length (lo) of string - // out: new allocated string converted from xml - if (!s) return NULL; - - int ll=0,j; - XMLSTR d; - XMLCSTR ss=s; - XMLCharacterEntity *entity; - while ((lo>0)&&(*s)) - { - if (*s==_CXML('&')) - { - if ((lo>2)&&(s[1]==_CXML('#'))) - { - s+=2; lo-=2; - if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; } - while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++; - if (*s!=_CXML(';')) - { - pXML->error=eXMLErrorUnknownCharacterEntity; - return NULL; - } - s++; lo--; - } else - { - entity=XMLEntities; - do - { - if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; } - entity++; - } while(entity->s); - if (!entity->s) - { - pXML->error=eXMLErrorUnknownCharacterEntity; - return NULL; - } - } - } else - { -#ifdef _XMLWIDECHAR - s++; lo--; -#else - j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1; -#endif - } - ll++; - } - - d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR)); - s=d; - while (ll-->0) - { - if (*ss==_CXML('&')) - { - if (ss[1]==_CXML('#')) - { - ss+=2; j=0; - if ((*ss==_CXML('X'))||(*ss==_CXML('x'))) - { - ss++; - while (*ss!=_CXML(';')) - { - if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0'); - else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10; - else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10; - else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} - ss++; - } - } else - { - while (*ss!=_CXML(';')) - { - if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0'); - else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} - ss++; - } - } -#ifndef _XMLWIDECHAR - if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;} -#endif - (*d++)=(XMLCHAR)j; ss++; - } else - { - entity=XMLEntities; - do - { - if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; } - entity++; - } while(entity->s); - } - } else - { -#ifdef _XMLWIDECHAR - *(d++)=*(ss++); -#else - switch(XML_ByteTable[(unsigned char)*ss]) - { - case 4: *(d++)=*(ss++); ll--; - case 3: *(d++)=*(ss++); ll--; - case 2: *(d++)=*(ss++); ll--; - case 1: *(d++)=*(ss++); - } -#endif - } - } - *d=0; - return (XMLSTR)s; -} - -#define XML_isSPACECHAR(ch) ((ch==_CXML('\n'))||(ch==_CXML(' '))||(ch== _CXML('\t'))||(ch==_CXML('\r'))) - -// private: -char myTagCompare(XMLCSTR cclose, XMLCSTR copen) -// !!!! WARNING strange convention&: -// return 0 if equals -// return 1 if different -{ - if (!cclose) return 1; - int l=(int)xstrlen(cclose); - if (xstrnicmp(cclose, copen, l)!=0) return 1; - const XMLCHAR c=copen[l]; - if (XML_isSPACECHAR(c)|| - (c==_CXML('/' ))|| - (c==_CXML('<' ))|| - (c==_CXML('>' ))|| - (c==_CXML('=' ))) return 0; - return 1; -} - -// Obtain the next character from the string. -static inline XMLCHAR getNextChar(XML *pXML) -{ - XMLCHAR ch = pXML->lpXML[pXML->nIndex]; -#ifdef _XMLWIDECHAR - if (ch!=0) pXML->nIndex++; -#else - pXML->nIndex+=XML_ByteTable[(unsigned char)ch]; -#endif - return ch; -} - -// Find the next token in a string. -// pcbToken contains the number of characters that have been read. -static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType) -{ - NextToken result; - XMLCHAR ch; - XMLCHAR chTemp; - int indexStart,nFoundMatch,nIsText=FALSE; - result.pClr=NULL; // prevent warning - - // Find next non-white space character - do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch); - - if (ch) - { - // Cache the current string pointer - result.pStr = &pXML->lpXML[indexStart]; - - // First check whether the token is in the clear tag list (meaning it - // does not need formatting). - ALLXMLClearTag *ctag=XMLClearTags; - do - { - if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0) - { - result.pClr=ctag; - pXML->nIndex+=ctag->openTagLen-1; - *pType=eTokenClear; - return result; - } - ctag++; - } while(ctag->lpszOpen); - - // If we didn't find a clear tag then check for standard |