Rev 47: Make the DAV parsing more robust. in http://bazaar.launchpad.net/%7Ebzr/bzr.webdav/webdav
Vincent Ladeuil
v.ladeuil+lp at free.fr
Sat Jun 7 18:31:07 BST 2008
At http://bazaar.launchpad.net/%7Ebzr/bzr.webdav/webdav
------------------------------------------------------------
revno: 47
revision-id: v.ladeuil+lp at free.fr-20080607173105-g5au9vt8txid83ee
parent: v.ladeuil+lp at free.fr-20080607152020-65i1q0ac9f88fvpo
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: webdav
timestamp: Sat 2008-06-07 19:31:05 +0200
message:
Make the DAV parsing more robust.
* test_webdav.py:
(TestDavSaxParser): AAdd tests.
* webdav.py:
(DavResponseHandler.endElement): Be more strict in href
recognition.
(DavResponseParser): New class.
modified:
test_webdav.py test_webdav.py-20060823130244-qvg4wqdodnmf5nhs-1
webdav.py webdav.py-20060816232542-enpjxth2743ttqpq-3
-------------- next part --------------
=== modified file 'test_webdav.py'
--- a/test_webdav.py 2008-06-07 15:13:23 +0000
+++ b/test_webdav.py 2008-06-07 17:31:05 +0000
@@ -41,6 +41,7 @@
from bzrlib import (
+ errors,
tests,
trace,
)
@@ -422,19 +423,16 @@
class TestDavSaxParser(tests.TestCase):
- def _get_handler(self):
- return webdav.DavResponseHandler()
-
- def _get_parser(self, handler):
- parser = xml.sax.make_parser()
- parser.setContentHandler(handler)
- return parser
-
- def _parse_string(self, str):
- handler = self._get_handler()
- parser = self._get_parser(handler)
- parser.parse(StringIO(str))
- return handler
+ def _get_parser(self, handler=None):
+ if handler is None:
+ handler = webdav.DavResponseHandler()
+ return webdav.DavResponseParser(handler)
+
+ def _parse_string(self, str, url):
+ parser = self._get_parser()
+ parser.handler.set_url(url)
+ parser.parse(StringIO(str), url)
+ return parser.handler
def test_apache2_example(self):
example = """<?xml version="1.0" encoding="utf-8"?>
@@ -472,7 +470,7 @@
</D:propstat>
</D:response>
</D:multistatus>"""
- handler = self._parse_string(example)
+ handler = self._parse_string(example, 'http://localhost/blah')
self.assertEqual(['a', 'b', 'c'], handler.get_dir_content())
def test_lighttpd_example(self):
@@ -488,6 +486,42 @@
<D:href>http://localhost/toto</D:href>
</D:response>
</D:multistatus>"""
- handler = self._parse_string(example)
+ handler = self._parse_string(example, 'http://localhost/blah')
self.assertEqual(['titi', 'toto'], handler.get_dir_content())
+ def test_malformed_response(self):
+ # Invalid xml, neither multistatus nor response are properly closed
+ example = """<?xml version="1.0" encoding="utf-8"?>
+<D:multistatus xmlns:D="DAV:" xmlns:ns0="urn:uuid:c2f41010-65b3-11d1-a29f-00aa00c14882/">
+<D:response>
+<D:href>http://localhost/</D:href>"""
+ self.assertRaises(errors.InvalidHttpResponse,
+ self._parse_string, example,
+ 'http://localhost/blah')
+
+ def test_unkown_format_response(self):
+ # Valid but unrelated xml
+ example = """<document/>"""
+ self.assertRaises(errors.InvalidHttpResponse,
+ self._parse_string, example,
+ 'http://localhost/blah')
+
+ def test_incomplete_format_response(self):
+ # The minimal information is present but doesn't conform to RFC 2518
+ # (well, as I understand it since the reference servers disagree on
+ # more than details).
+
+ # The last href below is not enclosed in a response element and is
+ # therefore ignored.
+ example = """<?xml version="1.0" encoding="utf-8"?>
+<D:multistatus xmlns:D="DAV:" xmlns:ns0="urn:uuid:c2f41010-65b3-11d1-a29f-00aa00c14882/">
+<D:response>
+<D:href>http://localhost/</D:href>
+</D:response>
+<D:response>
+<D:href>http://localhost/titi</D:href>
+</D:response>
+<D:href>http://localhost/toto</D:href>
+</D:multistatus>"""
+ handler = self._parse_string(example, 'http://localhost/blah')
+ self.assertEqual(['titi'], handler.get_dir_content())
=== modified file 'webdav.py'
--- a/webdav.py 2008-06-07 15:20:20 +0000
+++ b/webdav.py 2008-06-07 17:31:05 +0000
@@ -87,18 +87,19 @@
import xml.sax
import xml.sax.handler
+
from bzrlib import (
errors,
osutils,
trace,
transport,
)
-
from bzrlib.transport.http import (
_urllib,
_urllib2_wrappers,
)
+
class DavResponseHandler(xml.sax.handler.ContentHandler):
"""Handle a mutli-status DAV response.
@@ -108,18 +109,22 @@
"""
def __init__(self):
+ self.url = None
self.dir_content = None
self.elt_stack = None
self.chars = None
+ def set_url(self, url):
+ """Set the url used for error reporting when handling a response."""
+ self.url = url
+
def startDocument(self):
- self.dir_content = []
self.elt_stack = []
def endDocument(self):
- if self.elt_stack is not None and len(self.elt_stack):
- # Some xml element wasn't closed properly, the response is invalid
- self.dir_content = None
+ if self.dir_content is None:
+ raise errors.InvalidHttpResponse(self.url,
+ msg='Unknown xml response')
def startElement(self, name, attrs):
self.elt_stack.append(name)
@@ -127,9 +132,15 @@
self.chars = []
def endElement(self, name):
- if name == 'D:href':
+ st = self.elt_stack
+ if (len(st) == 3
+ and st[0] == 'D:multistatus'
+ and st[1] == 'D:response'
+ and name == 'D:href'): # sax guarantees that st[2] is also D:href
+ if self.dir_content is None:
+ self.dir_content = []
self.dir_content.append(''.join(self.chars))
- self.chars = None
+ self.chars = None
self.elt_stack.pop()
def characters(self, chrs):
@@ -166,6 +177,34 @@
return elements
+class DavResponseParser(object):
+ """A parser for DAV responses.
+
+ The main aim is to encapsulate sax house keeping and translate exceptions.
+ """
+
+ def __init__(self, handler=None):
+ if handler is None:
+ handler = DavResponseHandler()
+ self.handler = handler
+ self.parser = None
+
+ def parse(self, infile, url):
+ p = self._get_parser()
+ try:
+ p.parse(infile)
+ except xml.sax.SAXParseException, e:
+ raise errors.InvalidHttpResponse(
+ url, msg='Malformed xml response: %s' % e)
+
+ def _get_parser(self):
+ if self.parser is None:
+ parser = xml.sax.make_parser()
+ parser.setContentHandler(self.handler)
+ self.parser = parser
+ return self.parser
+
+
class PUTRequest(_urllib2_wrappers.Request):
def __init__(self, url, data, more_headers={}, accepted_errors=None):
More information about the bazaar-commits
mailing list