Rev 2380: urlutils improvements from hpss. in file:///home/robertc/source/baz/hpss-urlutils/
Robert Collins
robertc at robertcollins.net
Tue Mar 27 22:43:22 BST 2007
At file:///home/robertc/source/baz/hpss-urlutils/
------------------------------------------------------------
revno: 2380
revision-id: robertc at robertcollins.net-20070327214317-2hqvbmbcztm9ycqb
parent: pqm at pqm.ubuntu.com-20070327081802-271be0d343108f4f
committer: Robert Collins <robertc at robertcollins.net>
branch nick: hpss-urlutils
timestamp: Wed 2007-03-28 07:43:17 +1000
message:
urlutils improvements from hpss.
modified:
bzrlib/tests/test_urlutils.py test_urlutils.py-20060502192900-46b1f9579987cf9c
bzrlib/urlutils.py urlutils.py-20060502195429-e8a161ecf8fac004
=== modified file 'bzrlib/tests/test_urlutils.py'
--- a/bzrlib/tests/test_urlutils.py 2007-02-14 10:11:12 +0000
+++ b/bzrlib/tests/test_urlutils.py 2007-03-27 21:43:17 +0000
@@ -197,21 +197,24 @@
joined = urlutils.join(*args)
self.assertEqual(expected, joined)
- # Test a single element
- test('foo', 'foo')
-
# Test relative path joining
+ test('foo', 'foo') # relative fragment with nothing is preserved.
test('foo/bar', 'foo', 'bar')
test('http://foo/bar', 'http://foo', 'bar')
test('http://foo/bar', 'http://foo', '.', 'bar')
test('http://foo/baz', 'http://foo', 'bar', '../baz')
test('http://foo/bar/baz', 'http://foo', 'bar/baz')
test('http://foo/baz', 'http://foo', 'bar/../baz')
+ test('http://foo/baz', 'http://foo/bar/', '../baz')
# Absolute paths
+ test('http://foo', 'http://foo') # abs url with nothing is preserved.
test('http://bar', 'http://foo', 'http://bar')
test('sftp://bzr/foo', 'http://foo', 'bar', 'sftp://bzr/foo')
test('file:///bar', 'foo', 'file:///bar')
+ test('http://bar/', 'http://foo', 'http://bar/')
+ test('http://bar/a', 'http://foo', 'http://bar/a')
+ test('http://bar/a/', 'http://foo', 'http://bar/a/')
# From a base path
test('file:///foo', 'file:///', 'foo')
@@ -221,8 +224,47 @@
# Invalid joinings
# Cannot go above root
+ # Implicitly at root:
self.assertRaises(InvalidURLJoin, urlutils.join,
'http://foo', '../baz')
+ self.assertRaises(InvalidURLJoin, urlutils.join,
+ 'http://foo', '/..')
+ # Joining from a path explicitly under the root.
+ self.assertRaises(InvalidURLJoin, urlutils.join,
+ 'http://foo/a', '../../b')
+
+ def test_joinpath(self):
+ def test(expected, *args):
+ joined = urlutils.joinpath(*args)
+ self.assertEqual(expected, joined)
+
+ # Test a single element
+ test('foo', 'foo')
+
+ # Test relative path joining
+ test('foo/bar', 'foo', 'bar')
+ test('foo/bar', 'foo', '.', 'bar')
+ test('foo/baz', 'foo', 'bar', '../baz')
+ test('foo/bar/baz', 'foo', 'bar/baz')
+ test('foo/baz', 'foo', 'bar/../baz')
+
+ # Test joining to an absolute path
+ test('/foo', '/foo')
+ test('/foo', '/foo', '.')
+ test('/foo/bar', '/foo', 'bar')
+ test('/', '/foo', '..')
+
+ # Test joining with an absolute path
+ test('/bar', 'foo', '/bar')
+
+ # Test joining to a path with a trailing slash
+ test('foo/bar', 'foo/', 'bar')
+
+ # Invalid joinings
+ # Cannot go above root
+ self.assertRaises(InvalidURLJoin, urlutils.joinpath, '/', '../baz')
+ self.assertRaises(InvalidURLJoin, urlutils.joinpath, '/', '..')
+ self.assertRaises(InvalidURLJoin, urlutils.joinpath, '/', '/..')
def test_function_type(self):
if sys.platform == 'win32':
=== modified file 'bzrlib/urlutils.py'
--- a/bzrlib/urlutils.py 2007-02-14 10:11:12 +0000
+++ b/bzrlib/urlutils.py 2007-03-27 21:43:17 +0000
@@ -117,11 +117,13 @@
join('http://foo', 'bar') => 'http://foo/bar'
join('http://foo', 'bar', '../baz') => 'http://foo/baz'
"""
- m = _url_scheme_re.match(base)
+ if not args:
+ return base
+ match = _url_scheme_re.match(base)
scheme = None
- if m:
- scheme = m.group('scheme')
- path = m.group('path').split('/')
+ if match:
+ scheme = match.group('scheme')
+ path = match.group('path').split('/')
if path[-1:] == ['']:
# Strip off a trailing slash
# This helps both when we are at the root, and when
@@ -130,33 +132,89 @@
else:
path = base.split('/')
+ if scheme is not None and len(path) >= 1:
+ host = path[:1]
+ # the path should be represented as an abs path.
+ # we know this must be absolute because of the presence of a URL scheme.
+ remove_root = True
+ path = [''] + path[1:]
+ else:
+ # create an empty host, but dont alter the path - this might be a
+ # relative url fragment.
+ host = []
+ remove_root = False
+
for arg in args:
- m = _url_scheme_re.match(arg)
- if m:
+ match = _url_scheme_re.match(arg)
+ if match:
# Absolute URL
- scheme = m.group('scheme')
+ scheme = match.group('scheme')
# this skips .. normalisation, making http://host/../../..
# be rather strange.
- path = m.group('path').split('/')
+ path = match.group('path').split('/')
+ # set the host and path according to new absolute URL, discarding
+ # any previous values.
+ # XXX: duplicates mess from earlier in this function. This URL
+ # manipulation code needs some cleaning up.
+ if scheme is not None and len(path) >= 1:
+ host = path[:1]
+ path = path[1:]
+ # url scheme implies absolute path.
+ path = [''] + path
+ else:
+ # no url scheme we take the path as is.
+ host = []
else:
- for chunk in arg.split('/'):
- if chunk == '.':
- continue
- elif chunk == '..':
- if len(path) >= 2:
- # Don't pop off the host portion
- path.pop()
- else:
- raise errors.InvalidURLJoin('Cannot go above root',
- base, args)
- else:
- path.append(chunk)
+ path = '/'.join(path)
+ path = joinpath(path, arg)
+ path = path.split('/')
+ if remove_root and path[0:1] == ['']:
+ del path[0]
+ if host:
+ # Remove the leading slash from the path, so long as it isn't also the
+ # trailing slash, which we want to keep if present.
+ if path and path[0] == '' and len(path) > 1:
+ del path[0]
+ path = host + path
if scheme is None:
return '/'.join(path)
return scheme + '://' + '/'.join(path)
+def joinpath(base, *args):
+ """Join URL path segments to a URL path segment.
+
+ This is somewhat like osutils.joinpath, but intended for URLs.
+
+ XXX: this duplicates some normalisation logic, and also duplicates a lot of
+ path handling logic that already exists in some Transport implementations.
+ We really should try to have exactly one place in the code base responsible
+ for combining paths of URLs.
+ """
+ path = base.split('/')
+ if len(path) > 1 and path[-1] == '':
+ #If the path ends in a trailing /, remove it.
+ path.pop()
+ for arg in args:
+ if arg.startswith('/'):
+ path = []
+ for chunk in arg.split('/'):
+ if chunk == '.':
+ continue
+ elif chunk == '..':
+ if path == ['']:
+ raise errors.InvalidURLJoin('Cannot go above root',
+ base, args)
+ path.pop()
+ else:
+ path.append(chunk)
+ if path == ['']:
+ return '/'
+ else:
+ return '/'.join(path)
+
+
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
def _posix_local_path_from_url(url):
"""Convert a url like file:///path/to/foo into /path/to/foo"""
More information about the bazaar-commits
mailing list