diff options
Diffstat (limited to 'xml2obj.py')
| -rw-r--r-- | xml2obj.py | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/xml2obj.py b/xml2obj.py new file mode 100644 index 0000000..5668ec8 --- /dev/null +++ b/xml2obj.py @@ -0,0 +1,84 @@ +import re +import xml.sax.handler + +def xml2obj(src): + """ + A simple function to converts XML data into native Python object. + """ + + non_id_char = re.compile('[^_0-9a-zA-Z]') + def _name_mangle(name): + return non_id_char.sub('_', name) + + class DataNode(object): + def __init__(self): + self._attrs = {} # XML attributes and child elements + self.data = None # child text data + def __len__(self): + # treat single element as a list of 1 + return 1 + def __getitem__(self, key): + if isinstance(key, basestring): + return self._attrs.get(key,None) + else: + return [self][key] + def __contains__(self, name): + return self._attrs.has_key(name) + def __nonzero__(self): + return bool(self._attrs or self.data) + def __getattr__(self, name): + if name.startswith('__'): + # need to do this for Python special methods??? + raise AttributeError(name) + return self._attrs.get(name,None) + def _add_xml_attr(self, name, value): + if name in self._attrs: + # multiple attribute of the same name are represented by a list + children = self._attrs[name] + if not isinstance(children, list): + children = [children] + self._attrs[name] = children + children.append(value) + else: + self._attrs[name] = value + def __str__(self): + return self.data or '' + def __repr__(self): + items = sorted(self._attrs.items()) + if self.data: + items.append(('data', self.data)) + return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items]) + + class TreeBuilder(xml.sax.handler.ContentHandler): + def __init__(self): + self.stack = [] + self.root = DataNode() + self.current = self.root + self.text_parts = [] + def startElement(self, name, attrs): + self.stack.append((self.current, self.text_parts)) + self.current = DataNode() + self.text_parts = [] + # xml attributes --> python attributes + for k, v in attrs.items(): + self.current._add_xml_attr(_name_mangle(k), v) + def endElement(self, name): + text = ''.join(self.text_parts).strip() + if text: + self.current.data = text + if self.current._attrs: + obj = self.current + else: + # a text only node is simply represented by the string + obj = text or '' + self.current, self.text_parts = self.stack.pop() + self.current._add_xml_attr(_name_mangle(name), obj) + def characters(self, content): + self.text_parts.append(content) + + builder = TreeBuilder() + if isinstance(src,basestring): + xml.sax.parseString(src, builder) + else: + xml.sax.parse(src, builder) + return builder.root._attrs.values()[0]
\ No newline at end of file |
