# coding: gbk import xml.etree.ElementTree as ET rootelem = ET.Element("SystemList") organization = ET.SubElement(rootelem, "Organization") organization.attrib["label"] = "<空>" organization.attrib["id"] = "-1" organization.text = "测试" print ET.tostring(rootelem, encoding="gbk")
输出:
def _encode_entity(text, pattern=_escape): # map reserved and non-ascii characters to numerical entities def escape_entities(m, map=_escape_map): out = [] append = out.append for char in m.group(): text = map.get(char) if text is None: text = "&#%d;" % ord(char) append(text) return string.join(out, "") try: return _encode(pattern.sub(escape_entities, text), "ascii") except TypeError: _raise_serialization_error(text)
在_escape_attrib()函数里,会调用上面的_encode_entity()函数:
def _escape_attrib(text, encoding=None, replace=string.replace): # escape attribute value try: if encoding: try: text = _encode(text, encoding) except UnicodeError: return _encode_entity(text) text = replace(text, "&", "&") text = replace(text, "'", "'") # FIXME: overkill text = replace(text, "\"", """) text = replace(text, "<", "<") text = replace(text, ">", ">") return text except (TypeError, AttributeError): _raise_serialization_error(text)
而ElementTree类的_write()函数里,会调用上面这个_escape_attrib()函数,所以导致了¿Õ这些字符的出现。
def _encode(s, encoding): try: return s.encode(encoding) except AttributeError: return s # 1.5.2: assume the string uses the right encoding
s="<空>",类型是str,encoding="gbk"
import sys reload(sys) sys.setdefaultencoding("gbk")
所以"<空>"在转换到unicode时,不会失败。
def _encode(s, encoding): try: if isinstance(s, str): return s return s.encode(encoding) except AttributeError: return s # 1.5.2: assume the string uses the right encoding
可以把上面这个函数定义在自己的py文件里,然后在