Rev 3880: Fix bug #265070 by providing a finer sieve for accepted redirections. in lp:~vila/bzr/303959-redirection

Thu Dec 4 11:25:59 GMT 2008

At lp:~vila/bzr/303959-redirection

------------------------------------------------------------
revno: 3880
revision-id: v.ladeuil+lp at free.fr-20081204112556-g3i9tf396wygm1b7
parent: v.ladeuil+lp at free.fr-20081203164038-5ghthsgdei3ank1j
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 303959-redirection
timestamp: Thu 2008-12-04 12:25:56 +0100
message:
  Fix bug #265070 by providing a finer sieve for accepted redirections.
  
  * bzrlib/transport/http/_urllib.py:
  (HttpTransport_urllib.__init__): Provides the implementation name.
  
  * bzrlib/transport/http/_pycurl.py:
  (PyCurlTransport.__init__): Provides the implementation name.
  
  * bzrlib/transport/http/__init__.py:
  (HttpTransportBase.__init__): Make the implementation a mandatory
  parameter.
  (HttpTransportBase._redirected_to.relpath): We need a dedicated
  relpath method to be able to cope with slight differences between
  the url sent and the url received from the server.
  (HttpTransportBase._redirected_to): Fix bug #265070 by accepting
  more redirection kinds and handling them: http <-> https and user
  specified in the url.
  
  * bzrlib/tests/test_http.py:
  (Test_redirected_to): More complete tests for _redirected_to().
modified:
  bzrlib/tests/test_http.py      testhttp.py-20051018020158-b2eef6e867c514d9
  bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
  bzrlib/transport/http/_pycurl.py pycurlhttp.py-20060110060940-4e2a705911af77a6
  bzrlib/transport/http/_urllib.py _urlgrabber.py-20060113083826-0bbf7d992fbf090c
-------------- next part --------------
=== modified file 'bzrlib/tests/test_http.py'

--- a/bzrlib/tests/test_http.py	2008-12-03 16:40:38 +0000
+++ b/bzrlib/tests/test_http.py	2008-12-04 11:25:56 +0000
@@ -120,6 +120,7 @@
     t_adapter = TransportAdapter()
     t_classes= (TestHttpTransportRegistration,
                 TestHttpTransportUrls,
+                Test_redirected_to,
                 )
     is_testing_for_transports = tests.condition_isinstance(t_classes)
 
@@ -1743,3 +1744,47 @@
                           t.get_smart_medium().send_http_smart_request,
                           'whatever')
 
+class Test_redirected_to(tests.TestCase):
+
+    def test_redirected_to_subdir(self):
+        t = self._transport('http://www.example.com/foo')
+        e = errors.RedirectRequested('http://www.example.com/foo',
+                                     'http://www.example.com/foo/subdir',
+                                     self._qualified_prefix)
+        r = t._redirected_to(e)
+        self.assertIsInstance(r, type(t))
+        # Both transports share the some connection
+        self.assertEquals(t._get_connection(), r._get_connection())
+
+    def test_redirected_to_host(self):
+        t = self._transport('http://www.example.com/foo')
+        e = errors.RedirectRequested('http://www.example.com/foo',
+                                     'http://foo.example.com/foo/subdir',
+                                     self._qualified_prefix)
+        r = t._redirected_to(e)
+        self.assertIsInstance(r, type(t))
+
+    def test_redirected_to_same_host_sibling_protocol(self):
+        t = self._transport('http://www.example.com/foo')
+        e = errors.RedirectRequested('http://www.example.com/foo',
+                                     'https://www.example.com/foo',
+                                     self._qualified_prefix)
+        r = t._redirected_to(e)
+        self.assertIsInstance(r, type(t))
+
+    def test_redirected_to_same_host_different_protocol(self):
+        t = self._transport('http://www.example.com/foo')
+        e = errors.RedirectRequested('http://www.example.com/foo',
+                                     'ftp://www.example.com/foo',
+                                     self._qualified_prefix)
+        r = t._redirected_to(e)
+        self.assertNotEquals(type(r), type(t))
+
+    def test_redirected_to_different_host_same_user(self):
+        t = self._transport('http://joe@www.example.com/foo')
+        e = errors.RedirectRequested('http://www.example.com/foo',
+                                     'https://foo.example.com/foo',
+                                     self._qualified_prefix)
+        r = t._redirected_to(e)
+        self.assertIsInstance(r, type(t))
+        self.assertEquals(t._user, r._user)

=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py	2008-12-03 16:40:38 +0000
+++ b/bzrlib/transport/http/__init__.py	2008-12-04 11:25:56 +0000
@@ -93,16 +93,13 @@
     # _unqualified_scheme: "http" or "https"
     # _scheme: may have "+pycurl", etc
 
-    def __init__(self, base, _from_transport=None):
+    def __init__(self, base, _impl_name, _from_transport=None):
         """Set the base path where files will be stored."""
         proto_match = re.match(r'^(https?)(\+\w+)?://', base)
         if not proto_match:
             raise AssertionError("not a http url: %r" % base)
         self._unqualified_scheme = proto_match.group(1)
-        impl_name = proto_match.group(2)
-        if impl_name:
-            impl_name = impl_name[1:]
-        self._impl_name = impl_name
+        self._impl_name = _impl_name
         super(HttpTransportBase, self).__init__(base,
                                                 _from_transport=_from_transport)
         self._medium = None
@@ -517,7 +514,23 @@
 
         :returns: A transport or None.
         """
-        relpath = self.relpath(exception.get_source_url())
+        def relpath(abspath):
+            """Returns the path relative to our base.
+
+            The constraints are weaker than the real relpath method because the
+            abspath is coming from the server and may slightly differ from our
+            base. We don't check the scheme, host, port, user, password parts,
+            relying on the caller to give us a proper url (i.e. one returned by
+            the server mirroring the one we sent).
+            """
+            (scheme,
+             user, password,
+             host, port,
+             path) = self._split_url(abspath)
+            pl = len(self._path)
+            return path[pl:].strip('/')
+
+        relpath = relpath(exception.source)
         if not exception.target.endswith(relpath):
             # The final part of the url has been renamed, we can't handle the
             # redirection.
@@ -531,18 +544,23 @@
         # redirected tranport will be used to re-try whatever request was
         # redirected, we end up with the same url
         base_path = path[:-len(relpath)]
-        if scheme == self._unqualified_scheme:
+        if scheme in ('http', 'https'):
             # Same protocol (i.e. http[s])
-            if (host == self._host
+            if (scheme == self._unqualified_scheme
+                and host == self._host
                 and port == self._port
                 and (user is None or user == self._user)):
                 # If a user is specified, it should match, we don't care about
                 # passwords, wrong passwords will be rejected anyway.
                 new_transport = self.clone(base_path)
             else:
-                # Rebuild the url preserving the qualified scheme
-                new_url = self._unsplit_url(self._scheme,
-                                            user, password,
+                # Rebuild the url preserving the scheme qualification and the
+                # credentials (if they don't apply, the redirected to server
+                # will tell us, but if they do apply, we avoid prompting the
+                # user)
+                redir_scheme = scheme + '+' + self._impl_name
+                new_url = self._unsplit_url(redir_scheme,
+                                            self._user, self._password,
                                             host, port,
                                             base_path)
                 new_transport = get_transport(new_url)

=== modified file 'bzrlib/transport/http/_pycurl.py'
--- a/bzrlib/transport/http/_pycurl.py	2008-08-29 07:40:35 +0000
+++ b/bzrlib/transport/http/_pycurl.py	2008-12-04 11:25:56 +0000
@@ -106,8 +106,9 @@
 
     def __init__(self, base, _from_transport=None):
         super(PyCurlTransport, self).__init__(base,
-                                              _from_transport=_from_transport)
-        if base.startswith('https'):
+                                              _from_transport=_from_transport,
+                                              _impl_name='pycurl')
+        if self._unqualified_scheme == 'https':
             # Check availability of https into pycurl supported
             # protocols
             supported = pycurl.version_info()[8]

=== modified file 'bzrlib/transport/http/_urllib.py'
--- a/bzrlib/transport/http/_urllib.py	2008-05-29 06:43:43 +0000
+++ b/bzrlib/transport/http/_urllib.py	2008-12-04 11:25:56 +0000
@@ -43,7 +43,7 @@
 
     def __init__(self, base, _from_transport=None):
         super(HttpTransport_urllib, self).__init__(
-            base, _from_transport=_from_transport)
+            base, _from_transport=_from_transport, _impl_name='urllib')
         if _from_transport is not None:
             self._opener = _from_transport._opener
         else: