Rev 5292: Improved ``bzrlib.urlutils`` to handle lp:foo/bar URLs. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Mon Jun 14 18:58:29 BST 2010
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 5292 [merge]
revision-id: pqm at pqm.ubuntu.com-20100614175824-nq51rf1uetnut04t
parent: pqm at pqm.ubuntu.com-20100614152210-e4n3ahrxpmwcw6mx
parent: gordon at doxxx.net-20100608013134-xp0vr3g6zy062rrh
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2010-06-14 18:58:24 +0100
message:
Improved ``bzrlib.urlutils`` to handle lp:foo/bar URLs.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/tests/test_urlutils.py test_urlutils.py-20060502192900-46b1f9579987cf9c
bzrlib/urlutils.py urlutils.py-20060502195429-e8a161ecf8fac004
=== modified file 'NEWS'
--- a/NEWS 2010-06-11 08:02:42 +0000
+++ b/NEWS 2010-06-14 17:58:24 +0000
@@ -14,6 +14,11 @@
Compatibility Breaks
********************
+* URLs like ``foo:bar/baz`` are now always parsed as a URL with scheme "foo"
+ and path "bar/baz", even if bzr does not recognize "foo" as a known URL
+ scheme. Previously these URLs would be treated as local paths.
+ (Gordon Tyler)
+
New Features
************
@@ -78,6 +83,8 @@
Internals
*********
+* Improved ``bzrlib.urlutils`` to handle lp:foo/bar URLs. (Gordon Tyler)
+
Testing
*******
=== modified file 'bzrlib/tests/test_urlutils.py'
--- a/bzrlib/tests/test_urlutils.py 2010-05-27 22:10:42 +0000
+++ b/bzrlib/tests/test_urlutils.py 2010-06-08 01:31:34 +0000
@@ -156,7 +156,7 @@
# Weird stuff
# Can't have slashes or colons in the scheme
test_one('/path/to/://foo', None)
- test_one('path:path://foo', None)
+ test_one('scheme:stuff://foo', ('scheme', 'stuff://foo'))
# Must have more than one character for scheme
test_one('C://foo', None)
test_one('ab://foo', ('ab', 'foo'))
@@ -210,6 +210,8 @@
test('http://foo/bar/baz', 'http://foo', 'bar/baz')
test('http://foo/baz', 'http://foo', 'bar/../baz')
test('http://foo/baz', 'http://foo/bar/', '../baz')
+ test('lp:foo/bar', 'lp:foo', 'bar')
+ test('lp:foo/bar/baz', 'lp:foo', 'bar/baz')
# Absolute paths
test('http://foo', 'http://foo') # abs url with nothing is preserved.
@@ -219,6 +221,9 @@
test('http://bar/', 'http://foo', 'http://bar/')
test('http://bar/a', 'http://foo', 'http://bar/a')
test('http://bar/a/', 'http://foo', 'http://bar/a/')
+ test('lp:bar', 'http://foo', 'lp:bar')
+ test('lp:bar', 'lp:foo', 'lp:bar')
+ test('file:///stuff', 'lp:foo', 'file:///stuff')
# From a base path
test('file:///foo', 'file:///', 'foo')
=== modified file 'bzrlib/urlutils.py'
--- a/bzrlib/urlutils.py 2010-06-02 05:03:31 +0000
+++ b/bzrlib/urlutils.py 2010-06-14 17:58:24 +0000
@@ -101,7 +101,7 @@
first_path_slash = path.find('/')
if first_path_slash == -1:
return len(scheme), None
- return len(scheme), first_path_slash+len(scheme)+3
+ return len(scheme), first_path_slash+m.start('path')
def join(base, *args):
@@ -118,67 +118,26 @@
"""
if not args:
return base
- match = _url_scheme_re.match(base)
- scheme = None
- if match:
- scheme = match.group('scheme')
- path = match.group('path').split('/')
- if path[-1:] == ['']:
- # Strip off a trailing slash
- # This helps both when we are at the root, and when
- # 'base' has an extra slash at the end
- path = path[:-1]
- else:
- path = base.split('/')
-
- if scheme is not None and len(path) >= 1:
- host = path[:1]
- # the path should be represented as an abs path.
- # we know this must be absolute because of the presence of a URL scheme.
- remove_root = True
- path = [''] + path[1:]
- else:
- # create an empty host, but dont alter the path - this might be a
- # relative url fragment.
- host = []
- remove_root = False
-
+ scheme_end, path_start = _find_scheme_and_separator(base)
+ if scheme_end is None and path_start is None:
+ path_start = 0
+ elif path_start is None:
+ path_start = len(base)
+ path = base[path_start:]
for arg in args:
- match = _url_scheme_re.match(arg)
- if match:
- # Absolute URL
- scheme = match.group('scheme')
- # this skips .. normalisation, making http://host/../../..
- # be rather strange.
- path = match.group('path').split('/')
- # set the host and path according to new absolute URL, discarding
- # any previous values.
- # XXX: duplicates mess from earlier in this function. This URL
- # manipulation code needs some cleaning up.
- if scheme is not None and len(path) >= 1:
- host = path[:1]
- path = path[1:]
- # url scheme implies absolute path.
- path = [''] + path
- else:
- # no url scheme we take the path as is.
- host = []
+ arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg)
+ if arg_scheme_end is None and arg_path_start is None:
+ arg_path_start = 0
+ elif arg_path_start is None:
+ arg_path_start = len(arg)
+ if arg_scheme_end is not None:
+ base = arg
+ path = arg[arg_path_start:]
+ scheme_end = arg_scheme_end
+ path_start = arg_path_start
else:
- path = '/'.join(path)
path = joinpath(path, arg)
- path = path.split('/')
- if remove_root and path[0:1] == ['']:
- del path[0]
- if host:
- # Remove the leading slash from the path, so long as it isn't also the
- # trailing slash, which we want to keep if present.
- if path and path[0] == '' and len(path) > 1:
- del path[0]
- path = host + path
-
- if scheme is None:
- return '/'.join(path)
- return scheme + '://' + '/'.join(path)
+ return base[:path_start] + path
def joinpath(base, *args):
@@ -303,7 +262,7 @@
MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
-_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')
+_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')
_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
@@ -339,18 +298,18 @@
:param url: Either a hybrid URL or a local path
:return: A normalized URL which only includes 7-bit ASCII characters.
"""
- m = _url_scheme_re.match(url)
- if not m:
+ scheme_end, path_start = _find_scheme_and_separator(url)
+ if scheme_end is None:
return local_path_to_url(url)
- scheme = m.group('scheme')
- path = m.group('path')
+ prefix = url[:path_start]
+ path = url[path_start:]
if not isinstance(url, unicode):
for c in url:
if c not in _url_safe_characters:
raise errors.InvalidURL(url, 'URLs can only contain specific'
' safe characters (not %r)' % c)
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
- return str(scheme + '://' + ''.join(path))
+ return str(prefix + ''.join(path))
# We have a unicode (hybrid) url
path_chars = list(path)
@@ -362,7 +321,7 @@
['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
path = ''.join(path_chars)
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
- return str(scheme + '://' + path)
+ return str(prefix + path)
def relative_url(base, other):
More information about the bazaar-commits
mailing list