summaryrefslogtreecommitdiff
path: root/xml2obj.py
diff options
context:
space:
mode:
Diffstat (limited to 'xml2obj.py')
-rw-r--r--xml2obj.py84
1 files changed, 84 insertions, 0 deletions
diff --git a/xml2obj.py b/xml2obj.py
new file mode 100644
index 0000000..5668ec8
--- /dev/null
+++ b/xml2obj.py
@@ -0,0 +1,84 @@
+import re
+import xml.sax.handler
+
+def xml2obj(src):
+ """
+ A simple function to converts XML data into native Python object.
+ """
+
+ non_id_char = re.compile('[^_0-9a-zA-Z]')
+ def _name_mangle(name):
+ return non_id_char.sub('_', name)
+
+ class DataNode(object):
+ def __init__(self):
+ self._attrs = {} # XML attributes and child elements
+ self.data = None # child text data
+ def __len__(self):
+ # treat single element as a list of 1
+ return 1
+ def __getitem__(self, key):
+ if isinstance(key, basestring):
+ return self._attrs.get(key,None)
+ else:
+ return [self][key]
+ def __contains__(self, name):
+ return self._attrs.has_key(name)
+ def __nonzero__(self):
+ return bool(self._attrs or self.data)
+ def __getattr__(self, name):
+ if name.startswith('__'):
+ # need to do this for Python special methods???
+ raise AttributeError(name)
+ return self._attrs.get(name,None)
+ def _add_xml_attr(self, name, value):
+ if name in self._attrs:
+ # multiple attribute of the same name are represented by a list
+ children = self._attrs[name]
+ if not isinstance(children, list):
+ children = [children]
+ self._attrs[name] = children
+ children.append(value)
+ else:
+ self._attrs[name] = value
+ def __str__(self):
+ return self.data or ''
+ def __repr__(self):
+ items = sorted(self._attrs.items())
+ if self.data:
+ items.append(('data', self.data))
+ return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items])
+
+ class TreeBuilder(xml.sax.handler.ContentHandler):
+ def __init__(self):
+ self.stack = []
+ self.root = DataNode()
+ self.current = self.root
+ self.text_parts = []
+ def startElement(self, name, attrs):
+ self.stack.append((self.current, self.text_parts))
+ self.current = DataNode()
+ self.text_parts = []
+ # xml attributes --> python attributes
+ for k, v in attrs.items():
+ self.current._add_xml_attr(_name_mangle(k), v)
+ def endElement(self, name):
+ text = ''.join(self.text_parts).strip()
+ if text:
+ self.current.data = text
+ if self.current._attrs:
+ obj = self.current
+ else:
+ # a text only node is simply represented by the string
+ obj = text or ''
+ self.current, self.text_parts = self.stack.pop()
+ self.current._add_xml_attr(_name_mangle(name), obj)
+ def characters(self, content):
+ self.text_parts.append(content)
+
+ builder = TreeBuilder()
+ if isinstance(src,basestring):
+ xml.sax.parseString(src, builder)
+ else:
+ xml.sax.parse(src, builder)
+ return builder.root._attrs.values()[0] \ No newline at end of file