Rev 2380: urlutils improvements from hpss. in file:///home/robertc/source/baz/hpss-urlutils/

Robert Collins robertc at robertcollins.net
Tue Mar 27 22:43:22 BST 2007


At file:///home/robertc/source/baz/hpss-urlutils/

------------------------------------------------------------
revno: 2380
revision-id: robertc at robertcollins.net-20070327214317-2hqvbmbcztm9ycqb
parent: pqm at pqm.ubuntu.com-20070327081802-271be0d343108f4f
committer: Robert Collins <robertc at robertcollins.net>
branch nick: hpss-urlutils
timestamp: Wed 2007-03-28 07:43:17 +1000
message:
  urlutils improvements from hpss.
modified:
  bzrlib/tests/test_urlutils.py  test_urlutils.py-20060502192900-46b1f9579987cf9c
  bzrlib/urlutils.py             urlutils.py-20060502195429-e8a161ecf8fac004
=== modified file 'bzrlib/tests/test_urlutils.py'
--- a/bzrlib/tests/test_urlutils.py	2007-02-14 10:11:12 +0000
+++ b/bzrlib/tests/test_urlutils.py	2007-03-27 21:43:17 +0000
@@ -197,21 +197,24 @@
             joined = urlutils.join(*args)
             self.assertEqual(expected, joined)
 
-        # Test a single element
-        test('foo', 'foo')
-
         # Test relative path joining
+        test('foo', 'foo') # relative fragment with nothing is preserved.
         test('foo/bar', 'foo', 'bar')
         test('http://foo/bar', 'http://foo', 'bar')
         test('http://foo/bar', 'http://foo', '.', 'bar')
         test('http://foo/baz', 'http://foo', 'bar', '../baz')
         test('http://foo/bar/baz', 'http://foo', 'bar/baz')
         test('http://foo/baz', 'http://foo', 'bar/../baz')
+        test('http://foo/baz', 'http://foo/bar/', '../baz')
 
         # Absolute paths
+        test('http://foo', 'http://foo') # abs url with nothing is preserved.
         test('http://bar', 'http://foo', 'http://bar')
         test('sftp://bzr/foo', 'http://foo', 'bar', 'sftp://bzr/foo')
         test('file:///bar', 'foo', 'file:///bar')
+        test('http://bar/', 'http://foo', 'http://bar/')
+        test('http://bar/a', 'http://foo', 'http://bar/a')
+        test('http://bar/a/', 'http://foo', 'http://bar/a/')
 
         # From a base path
         test('file:///foo', 'file:///', 'foo')
@@ -221,8 +224,47 @@
         
         # Invalid joinings
         # Cannot go above root
+        # Implicitly at root:
         self.assertRaises(InvalidURLJoin, urlutils.join,
                 'http://foo', '../baz')
+        self.assertRaises(InvalidURLJoin, urlutils.join,
+                'http://foo', '/..')
+        # Joining from a path explicitly under the root.
+        self.assertRaises(InvalidURLJoin, urlutils.join,
+                'http://foo/a', '../../b')
+
+    def test_joinpath(self):
+        def test(expected, *args):
+            joined = urlutils.joinpath(*args)
+            self.assertEqual(expected, joined)
+
+        # Test a single element
+        test('foo', 'foo')
+
+        # Test relative path joining
+        test('foo/bar', 'foo', 'bar')
+        test('foo/bar', 'foo', '.', 'bar')
+        test('foo/baz', 'foo', 'bar', '../baz')
+        test('foo/bar/baz', 'foo', 'bar/baz')
+        test('foo/baz', 'foo', 'bar/../baz')
+
+        # Test joining to an absolute path
+        test('/foo', '/foo')
+        test('/foo', '/foo', '.')
+        test('/foo/bar', '/foo', 'bar')
+        test('/', '/foo', '..')
+
+        # Test joining with an absolute path
+        test('/bar', 'foo', '/bar')
+
+        # Test joining to a path with a trailing slash
+        test('foo/bar', 'foo/', 'bar')
+        
+        # Invalid joinings
+        # Cannot go above root
+        self.assertRaises(InvalidURLJoin, urlutils.joinpath, '/', '../baz')
+        self.assertRaises(InvalidURLJoin, urlutils.joinpath, '/', '..')
+        self.assertRaises(InvalidURLJoin, urlutils.joinpath, '/', '/..')
 
     def test_function_type(self):
         if sys.platform == 'win32':

=== modified file 'bzrlib/urlutils.py'
--- a/bzrlib/urlutils.py	2007-02-14 10:11:12 +0000
+++ b/bzrlib/urlutils.py	2007-03-27 21:43:17 +0000
@@ -117,11 +117,13 @@
         join('http://foo', 'bar') => 'http://foo/bar'
         join('http://foo', 'bar', '../baz') => 'http://foo/baz'
     """
-    m = _url_scheme_re.match(base)
+    if not args:
+        return base
+    match = _url_scheme_re.match(base)
     scheme = None
-    if m:
-        scheme = m.group('scheme')
-        path = m.group('path').split('/')
+    if match:
+        scheme = match.group('scheme')
+        path = match.group('path').split('/')
         if path[-1:] == ['']:
             # Strip off a trailing slash
             # This helps both when we are at the root, and when
@@ -130,33 +132,89 @@
     else:
         path = base.split('/')
 
+    if scheme is not None and len(path) >= 1:
+        host = path[:1]
+        # the path should be represented as an abs path.
+        # we know this must be absolute because of the presence of a URL scheme.
+        remove_root = True
+        path = [''] + path[1:]
+    else:
+        # create an empty host, but dont alter the path - this might be a
+        # relative url fragment.
+        host = []
+        remove_root = False
+
     for arg in args:
-        m = _url_scheme_re.match(arg)
-        if m:
+        match = _url_scheme_re.match(arg)
+        if match:
             # Absolute URL
-            scheme = m.group('scheme')
+            scheme = match.group('scheme')
             # this skips .. normalisation, making http://host/../../..
             # be rather strange.
-            path = m.group('path').split('/')
+            path = match.group('path').split('/')
+            # set the host and path according to new absolute URL, discarding
+            # any previous values.
+            # XXX: duplicates mess from earlier in this function.  This URL
+            # manipulation code needs some cleaning up.
+            if scheme is not None and len(path) >= 1:
+                host = path[:1]
+                path = path[1:]
+                # url scheme implies absolute path.
+                path = [''] + path
+            else:
+                # no url scheme we take the path as is.
+                host = []
         else:
-            for chunk in arg.split('/'):
-                if chunk == '.':
-                    continue
-                elif chunk == '..':
-                    if len(path) >= 2:
-                        # Don't pop off the host portion
-                        path.pop()
-                    else:
-                        raise errors.InvalidURLJoin('Cannot go above root',
-                                base, args)
-                else:
-                    path.append(chunk)
+            path = '/'.join(path)
+            path = joinpath(path, arg)
+            path = path.split('/')
+    if remove_root and path[0:1] == ['']:
+        del path[0]
+    if host:
+        # Remove the leading slash from the path, so long as it isn't also the
+        # trailing slash, which we want to keep if present.
+        if path and path[0] == '' and len(path) > 1:
+            del path[0]
+        path = host + path
 
     if scheme is None:
         return '/'.join(path)
     return scheme + '://' + '/'.join(path)
 
 
+def joinpath(base, *args):
+    """Join URL path segments to a URL path segment.
+    
+    This is somewhat like osutils.joinpath, but intended for URLs.
+
+    XXX: this duplicates some normalisation logic, and also duplicates a lot of
+    path handling logic that already exists in some Transport implementations.
+    We really should try to have exactly one place in the code base responsible
+    for combining paths of URLs.
+    """
+    path = base.split('/')
+    if len(path) > 1 and path[-1] == '':
+        #If the path ends in a trailing /, remove it.
+        path.pop()
+    for arg in args:
+        if arg.startswith('/'):
+            path = []
+        for chunk in arg.split('/'):
+            if chunk == '.':
+                continue
+            elif chunk == '..':
+                if path == ['']:
+                    raise errors.InvalidURLJoin('Cannot go above root',
+                            base, args)
+                path.pop()
+            else:
+                path.append(chunk)
+    if path == ['']:
+        return '/'
+    else:
+        return '/'.join(path)
+
+
 # jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
 def _posix_local_path_from_url(url):
     """Convert a url like file:///path/to/foo into /path/to/foo"""



More information about the bazaar-commits mailing list