Rev 47: Make the DAV parsing more robust. in http://bazaar.launchpad.net/%7Ebzr/bzr.webdav/webdav

Vincent Ladeuil v.ladeuil+lp at free.fr
Sat Jun 7 18:31:07 BST 2008


At http://bazaar.launchpad.net/%7Ebzr/bzr.webdav/webdav

------------------------------------------------------------
revno: 47
revision-id: v.ladeuil+lp at free.fr-20080607173105-g5au9vt8txid83ee
parent: v.ladeuil+lp at free.fr-20080607152020-65i1q0ac9f88fvpo
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: webdav
timestamp: Sat 2008-06-07 19:31:05 +0200
message:
  Make the DAV parsing more robust.
  
  * test_webdav.py:
  (TestDavSaxParser): AAdd tests.
  
  * webdav.py:
  (DavResponseHandler.endElement): Be more strict in href
  recognition.
  (DavResponseParser): New class.
modified:
  test_webdav.py                 test_webdav.py-20060823130244-qvg4wqdodnmf5nhs-1
  webdav.py                      webdav.py-20060816232542-enpjxth2743ttqpq-3
-------------- next part --------------
=== modified file 'test_webdav.py'
--- a/test_webdav.py	2008-06-07 15:13:23 +0000
+++ b/test_webdav.py	2008-06-07 17:31:05 +0000
@@ -41,6 +41,7 @@
 
 
 from bzrlib import (
+    errors,
     tests,
     trace,
     )
@@ -422,19 +423,16 @@
 
 class TestDavSaxParser(tests.TestCase):
 
-    def _get_handler(self):
-        return webdav.DavResponseHandler()
-
-    def _get_parser(self, handler):
-        parser = xml.sax.make_parser()
-        parser.setContentHandler(handler)
-        return parser
-
-    def _parse_string(self, str):
-        handler = self._get_handler()
-        parser = self._get_parser(handler)
-        parser.parse(StringIO(str))
-        return handler
+    def _get_parser(self, handler=None):
+        if handler is None:
+            handler = webdav.DavResponseHandler()
+        return webdav.DavResponseParser(handler)
+
+    def _parse_string(self, str, url):
+        parser = self._get_parser()
+        parser.handler.set_url(url)
+        parser.parse(StringIO(str), url)
+        return parser.handler
 
     def test_apache2_example(self):
         example = """<?xml version="1.0" encoding="utf-8"?>
@@ -472,7 +470,7 @@
         </D:propstat>
     </D:response>
 </D:multistatus>"""
-        handler = self._parse_string(example)
+        handler = self._parse_string(example, 'http://localhost/blah')
         self.assertEqual(['a', 'b', 'c'], handler.get_dir_content())
 
     def test_lighttpd_example(self):
@@ -488,6 +486,42 @@
 <D:href>http://localhost/toto</D:href>
 </D:response>
 </D:multistatus>"""
-        handler = self._parse_string(example)
+        handler = self._parse_string(example, 'http://localhost/blah')
         self.assertEqual(['titi', 'toto'], handler.get_dir_content())
 
+    def test_malformed_response(self):
+        # Invalid xml, neither multistatus nor response are properly closed
+        example = """<?xml version="1.0" encoding="utf-8"?>
+<D:multistatus xmlns:D="DAV:" xmlns:ns0="urn:uuid:c2f41010-65b3-11d1-a29f-00aa00c14882/">
+<D:response>
+<D:href>http://localhost/</D:href>"""
+        self.assertRaises(errors.InvalidHttpResponse,
+                          self._parse_string, example,
+                          'http://localhost/blah')
+
+    def test_unkown_format_response(self):
+        # Valid but unrelated xml
+        example = """<document/>"""
+        self.assertRaises(errors.InvalidHttpResponse,
+                          self._parse_string, example,
+                          'http://localhost/blah')
+
+    def test_incomplete_format_response(self):
+        # The minimal information is present but doesn't conform to RFC 2518
+        # (well, as I understand it since the reference servers disagree on
+        # more than details).
+
+        # The last href below is not enclosed in a response element and is
+        # therefore ignored.
+        example = """<?xml version="1.0" encoding="utf-8"?>
+<D:multistatus xmlns:D="DAV:" xmlns:ns0="urn:uuid:c2f41010-65b3-11d1-a29f-00aa00c14882/">
+<D:response>
+<D:href>http://localhost/</D:href>
+</D:response>
+<D:response>
+<D:href>http://localhost/titi</D:href>
+</D:response>
+<D:href>http://localhost/toto</D:href>
+</D:multistatus>"""
+        handler = self._parse_string(example, 'http://localhost/blah')
+        self.assertEqual(['titi'], handler.get_dir_content())

=== modified file 'webdav.py'
--- a/webdav.py	2008-06-07 15:20:20 +0000
+++ b/webdav.py	2008-06-07 17:31:05 +0000
@@ -87,18 +87,19 @@
 import xml.sax
 import xml.sax.handler
 
+
 from bzrlib import (
     errors,
     osutils,
     trace,
     transport,
     )
-
 from bzrlib.transport.http import (
     _urllib,
     _urllib2_wrappers,
     )
 
+
 class DavResponseHandler(xml.sax.handler.ContentHandler):
     """Handle a mutli-status DAV response.
 
@@ -108,18 +109,22 @@
     """
 
     def __init__(self):
+        self.url = None
         self.dir_content = None
         self.elt_stack = None
         self.chars = None
 
+    def set_url(self, url):
+        """Set the url used for error reporting when handling a response."""
+        self.url = url
+
     def startDocument(self):
-        self.dir_content = []
         self.elt_stack = []
 
     def endDocument(self):
-        if self.elt_stack is not None and len(self.elt_stack):
-            # Some xml element wasn't closed properly, the response is invalid
-            self.dir_content = None
+        if self.dir_content is None:
+            raise errors.InvalidHttpResponse(self.url,
+                                             msg='Unknown xml response')
 
     def startElement(self, name, attrs):
         self.elt_stack.append(name)
@@ -127,9 +132,15 @@
             self.chars = []
 
     def endElement(self, name):
-        if name == 'D:href':
+        st = self.elt_stack
+        if (len(st) == 3
+            and st[0] == 'D:multistatus'
+            and st[1] == 'D:response'
+            and name == 'D:href'): # sax guarantees that st[2] is also D:href
+            if self.dir_content is None:
+                self.dir_content = []
             self.dir_content.append(''.join(self.chars))
-            self.chars = None
+        self.chars = None
         self.elt_stack.pop()
 
     def characters(self, chrs):
@@ -166,6 +177,34 @@
         return elements
 
 
+class DavResponseParser(object):
+    """A parser for DAV responses.
+
+    The main aim is to encapsulate sax house keeping and translate exceptions.
+    """
+
+    def __init__(self, handler=None):
+        if handler is None:
+            handler = DavResponseHandler()
+        self.handler = handler
+        self.parser = None
+
+    def parse(self, infile, url):
+        p = self._get_parser()
+        try:
+            p.parse(infile)
+        except xml.sax.SAXParseException, e:
+            raise errors.InvalidHttpResponse(
+                url, msg='Malformed xml response: %s' % e)
+
+    def _get_parser(self):
+        if self.parser is None:
+            parser = xml.sax.make_parser()
+            parser.setContentHandler(self.handler)
+            self.parser = parser
+        return self.parser
+
+
 class PUTRequest(_urllib2_wrappers.Request):
 
     def __init__(self, url, data, more_headers={}, accepted_errors=None):



More information about the bazaar-commits mailing list