[PATCH] elementtree 1.2.6 update

Fredrik Lundh fredrik at pythonware.com
Fri Apr 15 22:23:42 BST 2005


> this patch updates the ElementTree module to version 1.2.6.  fixes
> that are relevant for bzr include:
>   - expat interface optimizations for recent Python versions
>   - correct ascii conversion also for non-standard default encodings
> 
> enjoy /F 

*** modified file 'elementtree/ElementTree.py'
--- elementtree/ElementTree.py 
+++ elementtree/ElementTree.py 
@@ -1,16 +1,8 @@
 #
 # ElementTree
-# $Id: ElementTree.py 1862 2004-06-18 07:31:02Z Fredrik $
+# $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $
 #
 # light-weight XML support for Python 1.5.2 and later.
-#
-# this is a stripped-down version of Secret Labs' effDOM library (part
-# of xmlToolkit).  compared to effDOM, this implementation has:
-#
-# - no support for observers
-# - no html-specific extensions (e.g. entity preload)
-# - no custom entities, doctypes, etc
-# - no accelerator module
 #
 # history:
 # 2001-10-20 fl   created (from various sources)
@@ -38,8 +30,11 @@
 # 2004-03-28 fl   added XMLID helper
 # 2004-06-02 fl   added default support to findtext
 # 2004-06-08 fl   fixed encoding of non-ascii element/attribute names
-#
-# Copyright (c) 1999-2004 by Fredrik Lundh.  All rights reserved.
+# 2004-08-23 fl   take advantage of post-2.1 expat features
+# 2005-02-01 fl   added iterparse implementation
+# 2005-03-02 fl   fixed iterparse support for pre-2.2 versions
+#
+# Copyright (c) 1999-2005 by Fredrik Lundh.  All rights reserved.
 #
 # fredrik at pythonware.com
 # http://www.pythonware.com
@@ -47,7 +42,7 @@
 # --------------------------------------------------------------------
 # The ElementTree toolkit is
 #
-# Copyright (c) 1999-2004 by Fredrik Lundh
+# Copyright (c) 1999-2005 by Fredrik Lundh
 #
 # By obtaining, using, and/or copying this software and/or its
 # associated documentation, you agree that you have read, understood,
@@ -78,7 +73,7 @@
     "dump",
     "Element", "ElementTree",
     "fromstring",
-    "iselement",
+    "iselement", "iterparse",
     "parse",
     "PI", "ProcessingInstruction",
     "QName",
@@ -143,7 +138,7 @@
 # TODO: add support for custom namespace resolvers/default namespaces
 # TODO: add improved support for incremental parsing
 
-VERSION = "1.2"
+VERSION = "1.2.6"
 
 ##
 # Internal element class.  This class defines the Element interface,
@@ -701,7 +696,7 @@
                 for k, v in xmlns_items:
                     file.write(" %s=\"%s\"" % (_encode(k, encoding),
                                                _escape_attrib(v, encoding)))
-            if node.text or node:
+            if node.text or len(node):
                 file.write(">")
                 if node.text:
                     file.write(_escape_cdata(node.text, encoding))
@@ -865,6 +860,94 @@
     return tree
 
 ##
+# Parses an XML document into an element tree incrementally, and reports
+# what's going on to the user.
+#
+# @param source A filename or file object containing XML data.
+# @param events A list of events to report back.  If omitted, only "end"
+#     events are reported.
+# @return A (event, elem) iterator.
+
+class iterparse:
+
+    def __init__(self, source, events=None):
+        if not hasattr(source, "read"):
+            source = open(source, "rb")
+        self._file = source
+        self._events = []
+        self._index = 0
+        self.root = self._root = None
+        self._parser = XMLTreeBuilder()
+        # wire up the parser for event reporting
+        parser = self._parser._parser
+        append = self._events.append
+        if events is None:
+            events = ["end"]
+        for event in events:
+            if event == "start":
+                try:
+                    parser.ordered_attributes = 1
+                    parser.specified_attributes = 1
+                    def handler(tag, attrib_in, event=event, append=append,
+                                start=self._parser._start_list):
+                        append((event, start(tag, attrib_in)))
+                    parser.StartElementHandler = handler
+                except AttributeError:
+                    def handler(tag, attrib_in, event=event, append=append,
+                                start=self._parser._start):
+                        append((event, start(tag, attrib_in)))
+                    parser.StartElementHandler = handler
+            elif event == "end":
+                def handler(tag, event=event, append=append,
+                            end=self._parser._end):
+                    append((event, end(tag)))
+                parser.EndElementHandler = handler
+            elif event == "start-ns":
+                def handler(prefix, uri, event=event, append=append):
+                    try:
+                        uri = _encode(uri, "ascii")
+                    except UnicodeError:
+                        pass
+                    append((event, (prefix or "", uri)))
+                parser.StartNamespaceDeclHandler = handler
+            elif event == "end-ns":
+                def handler(prefix, event=event, append=append):
+                    append((event, None))
+                parser.EndNamespaceDeclHandler = handler
+
+    def next(self):
+        while 1:
+            try:
+                item = self._events[self._index]
+            except IndexError:
+                if self._parser is None:
+                    self.root = self._root
+                    try:
+                        raise StopIteration
+                    except NameError:
+                        raise IndexError
+                # load event buffer
+                del self._events[:]
+                self._index = 0
+                data = self._file.read(16384)
+                if data:
+                    self._parser.feed(data)
+                else:
+                    self._root = self._parser.close()
+                    self._parser = None
+            else:
+                self._index = self._index + 1
+                return item
+
+    try:
+        iter
+        def __iter__(self):
+            return self
+    except NameError:
+        def __getitem__(self, index):
+            return self.next()
+
+##
 # Parses an XML document from a string constant.  This function can
 # be used to embed "XML literals" in Python code.
 #
@@ -1025,16 +1108,34 @@
 class XMLTreeBuilder:
 
     def __init__(self, html=0, target=None):
-        from xml.parsers import expat
+        try:
+            from xml.parsers import expat
+        except ImportError:
+            raise ImportError(
+                "No module named expat; use SimpleXMLTreeBuilder instead"
+                )
         self._parser = parser = expat.ParserCreate(None, "}")
         if target is None:
             target = TreeBuilder()
         self._target = target
         self._names = {} # name memo cache
-        parser.DefaultHandler = self._default
+        # callbacks
+        parser.DefaultHandlerExpand = self._default
         parser.StartElementHandler = self._start
         parser.EndElementHandler = self._end
         parser.CharacterDataHandler = self._data
+        # let expat do the buffering, if supported
+        try:
+            self._parser.buffer_text = 1
+        except AttributeError:
+            pass
+        # use new-style attribute handling, if supported
+        try:
+            self._parser.ordered_attributes = 1
+            self._parser.specified_attributes = 1
+            parser.StartElementHandler = self._start_list
+        except AttributeError:
+            pass
         encoding = None
         if not parser.returns_unicode:
             encoding = "utf-8"
@@ -1045,7 +1146,7 @@
     def _fixtext(self, text):
         # convert text string to ascii, if possible
         try:
-            return str(text) # what if the default encoding is changed?
+            return _encode(text, "ascii")
         except UnicodeError:
             return text
 
@@ -1066,6 +1167,15 @@
         attrib = {}
         for key, value in attrib_in.items():
             attrib[fixname(key)] = self._fixtext(value)
+        return self._target.start(tag, attrib)
+
+    def _start_list(self, tag, attrib_in):
+        fixname = self._fixname
+        tag = fixname(tag)
+        attrib = {}
+        if attrib_in:
+            for i in range(0, len(attrib_in), 2):
+                attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
         return self._target.start(tag, attrib)
 
     def _data(self, text):







More information about the bazaar mailing list