Rev 5292: Improved ``bzrlib.urlutils`` to handle lp:foo/bar URLs. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Mon Jun 14 18:58:29 BST 2010

At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5292 [merge]
revision-id: pqm at pqm.ubuntu.com-20100614175824-nq51rf1uetnut04t
parent: pqm at pqm.ubuntu.com-20100614152210-e4n3ahrxpmwcw6mx
parent: gordon at doxxx.net-20100608013134-xp0vr3g6zy062rrh
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2010-06-14 18:58:24 +0100
message:
  Improved ``bzrlib.urlutils`` to handle lp:foo/bar URLs.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/tests/test_urlutils.py  test_urlutils.py-20060502192900-46b1f9579987cf9c
  bzrlib/urlutils.py             urlutils.py-20060502195429-e8a161ecf8fac004
=== modified file 'NEWS'

--- a/NEWS	2010-06-11 08:02:42 +0000
+++ b/NEWS	2010-06-14 17:58:24 +0000
@@ -14,6 +14,11 @@
 Compatibility Breaks
 ********************
 
+* URLs like ``foo:bar/baz`` are now always parsed as a URL with scheme "foo"
+  and path "bar/baz", even if bzr does not recognize "foo" as a known URL
+  scheme.  Previously these URLs would be treated as local paths.
+  (Gordon Tyler)
+
 New Features
 ************
 
@@ -78,6 +83,8 @@
 Internals
 *********
 
+* Improved ``bzrlib.urlutils`` to handle lp:foo/bar URLs. (Gordon Tyler)
+
 Testing
 *******
 

=== modified file 'bzrlib/tests/test_urlutils.py'
--- a/bzrlib/tests/test_urlutils.py	2010-05-27 22:10:42 +0000
+++ b/bzrlib/tests/test_urlutils.py	2010-06-08 01:31:34 +0000
@@ -156,7 +156,7 @@
         # Weird stuff
         # Can't have slashes or colons in the scheme
         test_one('/path/to/://foo', None)
-        test_one('path:path://foo', None)
+        test_one('scheme:stuff://foo', ('scheme', 'stuff://foo'))
         # Must have more than one character for scheme
         test_one('C://foo', None)
         test_one('ab://foo', ('ab', 'foo'))
@@ -210,6 +210,8 @@
         test('http://foo/bar/baz', 'http://foo', 'bar/baz')
         test('http://foo/baz', 'http://foo', 'bar/../baz')
         test('http://foo/baz', 'http://foo/bar/', '../baz')
+        test('lp:foo/bar', 'lp:foo', 'bar')
+        test('lp:foo/bar/baz', 'lp:foo', 'bar/baz')
 
         # Absolute paths
         test('http://foo', 'http://foo') # abs url with nothing is preserved.
@@ -219,6 +221,9 @@
         test('http://bar/', 'http://foo', 'http://bar/')
         test('http://bar/a', 'http://foo', 'http://bar/a')
         test('http://bar/a/', 'http://foo', 'http://bar/a/')
+        test('lp:bar', 'http://foo', 'lp:bar')
+        test('lp:bar', 'lp:foo', 'lp:bar')
+        test('file:///stuff', 'lp:foo', 'file:///stuff')
 
         # From a base path
         test('file:///foo', 'file:///', 'foo')

=== modified file 'bzrlib/urlutils.py'
--- a/bzrlib/urlutils.py	2010-06-02 05:03:31 +0000
+++ b/bzrlib/urlutils.py	2010-06-14 17:58:24 +0000
@@ -101,7 +101,7 @@
     first_path_slash = path.find('/')
     if first_path_slash == -1:
         return len(scheme), None
-    return len(scheme), first_path_slash+len(scheme)+3
+    return len(scheme), first_path_slash+m.start('path')
 
 
 def join(base, *args):
@@ -118,67 +118,26 @@
     """
     if not args:
         return base
-    match = _url_scheme_re.match(base)
-    scheme = None
-    if match:
-        scheme = match.group('scheme')
-        path = match.group('path').split('/')
-        if path[-1:] == ['']:
-            # Strip off a trailing slash
-            # This helps both when we are at the root, and when
-            # 'base' has an extra slash at the end
-            path = path[:-1]
-    else:
-        path = base.split('/')
-
-    if scheme is not None and len(path) >= 1:
-        host = path[:1]
-        # the path should be represented as an abs path.
-        # we know this must be absolute because of the presence of a URL scheme.
-        remove_root = True
-        path = [''] + path[1:]
-    else:
-        # create an empty host, but dont alter the path - this might be a
-        # relative url fragment.
-        host = []
-        remove_root = False
-
+    scheme_end, path_start = _find_scheme_and_separator(base)
+    if scheme_end is None and path_start is None:
+        path_start = 0
+    elif path_start is None:
+        path_start = len(base)
+    path = base[path_start:]
     for arg in args:
-        match = _url_scheme_re.match(arg)
-        if match:
-            # Absolute URL
-            scheme = match.group('scheme')
-            # this skips .. normalisation, making http://host/../../..
-            # be rather strange.
-            path = match.group('path').split('/')
-            # set the host and path according to new absolute URL, discarding
-            # any previous values.
-            # XXX: duplicates mess from earlier in this function.  This URL
-            # manipulation code needs some cleaning up.
-            if scheme is not None and len(path) >= 1:
-                host = path[:1]
-                path = path[1:]
-                # url scheme implies absolute path.
-                path = [''] + path
-            else:
-                # no url scheme we take the path as is.
-                host = []
+        arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg)
+        if arg_scheme_end is None and arg_path_start is None:
+            arg_path_start = 0
+        elif arg_path_start is None:
+            arg_path_start = len(arg)
+        if arg_scheme_end is not None:
+            base = arg
+            path = arg[arg_path_start:]
+            scheme_end = arg_scheme_end
+            path_start = arg_path_start
         else:
-            path = '/'.join(path)
             path = joinpath(path, arg)
-            path = path.split('/')
-    if remove_root and path[0:1] == ['']:
-        del path[0]
-    if host:
-        # Remove the leading slash from the path, so long as it isn't also the
-        # trailing slash, which we want to keep if present.
-        if path and path[0] == '' and len(path) > 1:
-            del path[0]
-        path = host + path
-
-    if scheme is None:
-        return '/'.join(path)
-    return scheme + '://' + '/'.join(path)
+    return base[:path_start] + path
 
 
 def joinpath(base, *args):
@@ -303,7 +262,7 @@
     MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
 
 
-_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')
+_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')
 _url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
 
 
@@ -339,18 +298,18 @@
     :param url: Either a hybrid URL or a local path
     :return: A normalized URL which only includes 7-bit ASCII characters.
     """
-    m = _url_scheme_re.match(url)
-    if not m:
+    scheme_end, path_start = _find_scheme_and_separator(url)
+    if scheme_end is None:
         return local_path_to_url(url)
-    scheme = m.group('scheme')
-    path = m.group('path')
+    prefix = url[:path_start]
+    path = url[path_start:]
     if not isinstance(url, unicode):
         for c in url:
             if c not in _url_safe_characters:
                 raise errors.InvalidURL(url, 'URLs can only contain specific'
                                             ' safe characters (not %r)' % c)
         path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
-        return str(scheme + '://' + ''.join(path))
+        return str(prefix + ''.join(path))
 
     # We have a unicode (hybrid) url
     path_chars = list(path)
@@ -362,7 +321,7 @@
                 ['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
     path = ''.join(path_chars)
     path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
-    return str(scheme + '://' + path)
+    return str(prefix + path)
 
 
 def relative_url(base, other):