修改tinyxml让其支持解析特殊字符

修改tinyxmlparser.cpp文件中的部分代码如下:

TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
{
    { "&",  5, "&" },
    { "&lt;",   4, "<" },
    { "&gt;",   4, ">" },
    { "&quot;", 6, "\"" },
    { "&apos;", 6, "\'" },
    { "&\#955;", 6, "λ" },
    { "&\#934;", 6, "Φ" },
    { "&\#951;", 6, "η" }
};

const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
{
	// Presume an entity, and pull it out.
    TIXML_STRING ent;
	int i;
	*length = 0;

    // Now try to match it.
	for( i=0; i<NUM_ENTITY; ++i )
	{
		if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
		{
			assert( strlen( entity[i].str ) == entity[i].strLength );
			strncat(value, entity[i].chr, strlen(entity[i].chr));
			*length = strlen(entity[i].chr);
			return ( p + entity[i].strLength );
		}
	}

	if ( *(p+1) && *(p+1) == '#' && *(p+2) )
	{
		unsigned long ucs = 0;
		ptrdiff_t delta = 0;
		unsigned mult = 1;

		if ( *(p+2) == 'x' )
		{
			// Hexadecimal.
			if ( !*(p+3) ) return 0;

			const char* q = p+3;
			q = strchr( q, ';' );

			if ( !q || !*q ) return 0;

			delta = q-p;
			--q;

			while ( *q != 'x' )
			{
				if ( *q >= '0' && *q <= '9' )
					ucs += mult * (*q - '0');
				else if ( *q >= 'a' && *q <= 'f' )
					ucs += mult * (*q - 'a' + 10);
				else if ( *q >= 'A' && *q <= 'F' )
					ucs += mult * (*q - 'A' + 10 );
				else 
					return 0;
				mult *= 16;
				--q;
			}
		}
		else
		{
			// Decimal.
			if ( !*(p+2) ) return 0;

			const char* q = p+2;
			q = strchr( q, ';' );

			if ( !q || !*q ) return 0;

			delta = q-p;
			--q;

			while ( *q != '#' )
			{
				if ( *q >= '0' && *q <= '9' )
					ucs += mult * (*q - '0');
				else 
					return 0;
				mult *= 10;
				--q;
			}
		}
		if ( encoding == TIXML_ENCODING_UTF8 )
		{
			// convert the UCS to UTF-8
			ConvertUTF32ToUTF8( ucs, value, length );
		}
		else
		{
			*value = (char)ucs;
			*length = 1;
		}
		return p + delta + 1;
	}

	// So it wasn't an entity, its unrecognized, or something like that.
	*value = *p;	// Don't put back the last one, since we return it!
	//*length = 1;	// Leave unrecognized entities - this doesn't really work.
					// Just writes strange XML.
	return p+1;
}



修改tinyxml.h文件中的部分代码如下:

struct Entity
{
    const char*     str;
    unsigned int	strLength;
    char		    chr[3];//特殊字符的长度一般最长是两个字节,所以这里设置为3的字符串
};
enum
{
    NUM_ENTITY = 8,//特殊符号的个数,可以修改
    MAX_ENTITY_LENGTH = 6

};




你可能感兴趣的:(修改tinyxml让其支持解析特殊字符)