[MERGE] HTTP redirections at first branch or bundle access

Vincent Ladeuil v.ladeuil+lp at free.fr
Wed Mar 14 17:38:06 GMT 2007


# Bazaar merge directive format 1
# revision_id: v.ladeuil+lp at free.fr-20070314170125-ywny47g3rz2u4nba
# target_branch: http://bazaar-vcs.org/bzr/bzr.dev/
# testament_sha1: 9ed584970e3740edfcc77420dea1e95e09205dde
# timestamp: 2007-03-14 18:37:57 +0100
# source_branch: http://bazaar.launchpad.net/~bzr/bzr\
#   /bzr.http.redirection
# message: [MERGE] HTTP redirections at first branch or bundle access
# 
=== modified file 'NEWS'
--- NEWS	2007-03-14 15:22:51 +0000
+++ NEWS	2007-03-14 16:57:31 +0000
@@ -16,6 +16,17 @@
 
     * Added support for Putty's SSH implementation. (Dmitry Vasiliev)
 
+    * HTTP redirections are now taken into account when a branch (or a
+      bundle) is accessed for the first time. A message is issued at each
+      redirection to inform the user. In the past, http redirections were
+      silently followed for each request which significantly degraded the
+      performances. The http redirections are not followed anymore by
+      default, instead a RedirectRequested exception is raised. For bzrlib
+      users needing to follow http redirections anyway,
+      bzrlib.transport.do_catching_redirections provide an easy transition
+      path.  
+      (vila)
+
     * Added ``bzr status --versioned`` to report only versioned files, 
       not unknowns. (Kent Gibson)
 
@@ -46,6 +57,9 @@
     * Switch the ``bzr init-repo`` default from --no-trees to --trees. 
       (Wouter van Heyst, #53483)
 
+    * Correctly handles mutiple permanent http redirections.
+     (vila, #88780)
+
     * Take smtp_server from user config into account.
       (vila, #92195)
  

=== modified file 'bzrlib/bundle/__init__.py'
--- bzrlib/bundle/__init__.py	2006-12-07 15:30:26 +0000
+++ bzrlib/bundle/__init__.py	2007-03-01 21:26:57 +0000
@@ -21,8 +21,12 @@
     urlutils,
     )
 from bzrlib.bundle import serializer as _serializer
-from bzrlib.transport import get_transport as _get_transport
+from bzrlib.transport import (
+    do_catching_redirections,
+    get_transport,
+    )
 """)
+from bzrlib.trace import note
 
 
 def read_bundle_from_url(url):
@@ -42,8 +46,25 @@
     # Some transports cannot detect that we are trying to read a
     # directory until we actually issue read() on the handle.
     try:
-        t = _get_transport(url)
-        f = t.get(filename)
+        transport = get_transport(url)
+
+        def get_bundle(transport):
+            return transport.get(filename)
+
+        def redirected_transport(transport, exception, redirection_notice):
+            note(redirection_notice)
+            url, filename = urlutils.split(exception.target,
+                                           exclude_trailing_slash=False)
+            if not filename:
+                raise errors.NotABundle('A directory cannot be a bundle')
+            return get_transport(url)
+
+        try:
+            f = do_catching_redirections(get_bundle, transport,
+                                         redirected_transport)
+        except errors.TooManyRedirections:
+            raise errors.NotABundle(str(url))
+
         return _serializer.read_bundle(f)
     except (errors.TransportError, errors.PathError), e:
         raise errors.NotABundle(str(e))

=== modified file 'bzrlib/bzrdir.py'
--- bzrlib/bzrdir.py	2007-03-14 04:43:45 +0000
+++ bzrlib/bzrdir.py	2007-03-14 16:57:31 +0000
@@ -61,11 +61,17 @@
 from bzrlib.store.text import TextStore
 from bzrlib.store.versioned import WeaveStore
 from bzrlib.transactions import WriteTransaction
-from bzrlib.transport import get_transport
+from bzrlib.transport import (
+    do_catching_redirections,
+    get_transport,
+    )
 from bzrlib.weave import Weave
 """)
 
-from bzrlib.trace import mutter, note
+from bzrlib.trace import (
+    mutter,
+    note,
+    )
 from bzrlib.transport.local import LocalTransport
 
 
@@ -526,7 +532,36 @@
         :param transport: Transport containing the bzrdir.
         :param _unsupported: private.
         """
-        format = BzrDirFormat.find_format(transport)
+        base = transport.base
+
+        def find_format(transport):
+            return transport, BzrDirFormat.find_format(transport)
+
+        def redirected(transport, e, redirection_notice):
+            qualified_source = e.get_source_url()
+            relpath = transport.relpath(qualified_source)
+            if not e.target.endswith(relpath):
+                # Not redirected to a branch-format, not a branch
+                raise errors.NotBranchError(path=e.target)
+            target = e.target[:-len(relpath)]
+            note('%s is%s redirected to %s',
+                 transport.base, e.permanently, target)
+            # Let's try with a new transport
+            qualified_target = e.get_target_url()[:-len(relpath)]
+            # FIXME: If 'transport' has a qualifier, this should
+            # be applied again to the new transport *iff* the
+            # schemes used are the same. It's a bit tricky to
+            # verify, so I'll punt for now
+            # -- vila20070212
+            return get_transport(target)
+
+        try:
+            transport, format = do_catching_redirections(find_format,
+                                                         transport,
+                                                         redirected)
+        except errors.TooManyRedirections:
+            raise errors.NotBranchError(base)
+
         BzrDir._check_supported(format, _unsupported)
         return format.open(transport, _found=True)
 
@@ -1306,7 +1341,7 @@
 
     @classmethod
     def register_control_format(klass, format):
-        """Register a format that does not use '.bzrdir' for its control dir.
+        """Register a format that does not use '.bzr' for its control dir.
 
         TODO: This should be pulled up into a 'ControlDirFormat' base class
         which BzrDirFormat can inherit from, and renamed to register_format 
@@ -1338,10 +1373,6 @@
         klass._control_formats.remove(format)
 
 
-# register BzrDirFormat as a control format
-BzrDirFormat.register_control_format(BzrDirFormat)
-
-
 class BzrDirFormat4(BzrDirFormat):
     """Bzr dir format 4.
 
@@ -1598,6 +1629,10 @@
                                   __set_workingtree_format)
 
 
+# Register bzr control format
+BzrDirFormat.register_control_format(BzrDirFormat)
+
+# Register bzr formats
 BzrDirFormat.register_format(BzrDirFormat4())
 BzrDirFormat.register_format(BzrDirFormat5())
 BzrDirFormat.register_format(BzrDirFormat6())

=== modified file 'bzrlib/errors.py'
--- bzrlib/errors.py	2007-03-13 20:18:05 +0000
+++ bzrlib/errors.py	2007-03-14 16:57:31 +0000
@@ -1275,6 +1275,59 @@
         InvalidHttpResponse.__init__(self, path, msg)
 
 
+class RedirectRequested(TransportError):
+
+    _fmt = '%(source)s is%(permanently)s redirected to %(target)s'
+
+    def __init__(self, source, target, is_permament=False, qual_proto=None):
+        self.source = source
+        self.target = target
+        if is_permament:
+            self.permanently = ' permanently'
+        else:
+            self.permanently = ''
+        self.is_permament = is_permament
+        self._qualified_proto = qual_proto
+        TransportError.__init__(self)
+
+    def _requalify_url(self, url):
+        """Restore the qualified proto in front of the url"""
+        # When this exception is raised, source and target are in
+        # user readable format. But some transports may use a
+        # different proto (http+urllib:// will present http:// to
+        # the user. If a qualified proto is specified, the code
+        # trapping the exception can get the qualified urls to
+        # properly handle the redirection themself (creating a
+        # new transport object from the target url for example).
+        # But checking that the scheme of the original and
+        # redirected urls are the same can be tricky. (see the
+        # FIXME in BzrDir.open_from_transport for the unique use
+        # case so far).
+        if self._qualified_proto is None:
+            return url
+
+        # The TODO related to NotBranchError mention that doing
+        # that kind of manipulation on the urls may not be the
+        # exception object job. On the other hand, this object is
+        # the interface between the code and the user so
+        # presenting the urls in different ways is indeed its
+        # job...
+        import urlparse
+        proto, netloc, path, query, fragment = urlparse.urlsplit(url)
+        return urlparse.urlunsplit((self._qualified_proto, netloc, path,
+                                   query, fragment))
+
+    def get_source_url(self):
+        return self._requalify_url(self.source)
+
+    def get_target_url(self):
+        return self._requalify_url(self.target)
+
+
+class TooManyRedirections(TransportError):
+
+    _fmt = "Too many redirections"
+
 class ConflictsInTree(BzrError):
 
     _fmt = "Working tree has conflicts."

=== modified file 'bzrlib/tests/HTTPTestUtil.py'
--- bzrlib/tests/HTTPTestUtil.py	2006-12-22 09:54:52 +0000
+++ bzrlib/tests/HTTPTestUtil.py	2007-03-14 16:39:24 +0000
@@ -17,6 +17,7 @@
 from cStringIO import StringIO
 import errno
 from SimpleHTTPServer import SimpleHTTPRequestHandler
+import re
 import socket
 import urlparse
 
@@ -120,7 +121,7 @@
         """Hand the request off to a smart server instance."""
         self.send_response(200)
         self.send_header("Content-type", "application/octet-stream")
-        transport = get_transport(self.server.test_case._home_dir)
+        transport = get_transport(self.server.test_case_server._home_dir)
         # TODO: We might like to support streaming responses.  1.0 allows no
         # Content-length in this case, so for integrity we should perform our
         # own chunking within the stream.
@@ -177,9 +178,9 @@
 
 
 class TestCaseWithTwoWebservers(TestCaseWithWebserver):
-    """A support class providinf readonly urls (on two servers) that are http://.
+    """A support class providing readonly urls on two servers that are http://.
 
-    We setup two webservers to allows various tests involving
+    We set up two webservers to allows various tests involving
     proxies or redirections from one server to the other.
     """
     def setUp(self):
@@ -225,4 +226,86 @@
         return TestingHTTPRequestHandler.translate_path(self, path)
 
 
+class RedirectRequestHandler(TestingHTTPRequestHandler):
+    """Redirect all request to the specified server"""
+
+    def parse_request(self):
+        """Redirect a single HTTP request to another host"""
+        valid = TestingHTTPRequestHandler.parse_request(self)
+        if valid:
+            tcs = self.server.test_case_server
+            code, target = tcs.is_redirected(self.path)
+            if code is not None and target is not None:
+                # Redirect as instructed
+                self.send_response(code)
+                self.send_header('Location', target)
+                self.end_headers()
+                return False # The job is done
+            else:
+                # We leave the parent class serve the request
+                pass
+        return valid
+
+
+class HTTPServerRedirecting(HttpServer):
+    """An HttpServer redirecting to another server """
+
+    def __init__(self, request_handler=RedirectRequestHandler):
+        HttpServer.__init__(self, request_handler)
+        # redirections is a list of tuples (source, target, code)
+        # - source is a regexp for the paths requested
+        # - target is a replacement for re.sub describing where
+        #   the request will be redirected
+        # - code is the http error code associated to the
+        #   redirection (301 permanent, 302 temporarry, etc
+        self.redirections = []
+
+    def redirect_to(self, host, port):
+        """Redirect all requests to a specific host:port"""
+        self.redirections = [('(.*)',
+                              r'http://%s:%s\1' % (host, port) ,
+                              301)]
+
+    def is_redirected(self, path):
+        """Is the path redirected by this server.
+
+        :param path: the requested relative path
+
+        :returns: a tuple (code, target) if a matching
+             redirection is found, (None, None) otherwise.
+        """
+        code = None
+        target = None
+        for (rsource, rtarget, rcode) in self.redirections:
+            target, match = re.subn(rsource, rtarget, path)
+            if match:
+                code = rcode
+                break # The first match wins
+            else:
+                target = None
+        return code, target
+
+
+class TestCaseWithRedirectedWebserver(TestCaseWithTwoWebservers):
+   """A support class providing redirections from one server to another.
+
+   We set up two webservers to allows various tests involving
+   redirections.
+   The 'old' server is redirected to the 'new' server.
+   """
+
+   def create_transport_secondary_server(self):
+       """Create the secondary server redirecting to the primary server"""
+       new = self.get_readonly_server()
+       redirecting = HTTPServerRedirecting()
+       redirecting.redirect_to(new.host, new.port)
+       return redirecting
+
+   def setUp(self):
+       super(TestCaseWithRedirectedWebserver, self).setUp()
+       # The redirections will point to the new server
+       self.new_server = self.get_readonly_server()
+       # The requests to the old server will be redirected
+       self.old_server = self.get_secondary_server()
+
 

=== modified file 'bzrlib/tests/HttpServer.py'
--- bzrlib/tests/HttpServer.py	2006-12-13 17:21:18 +0000
+++ bzrlib/tests/HttpServer.py	2007-03-14 16:39:24 +0000
@@ -43,12 +43,13 @@
 class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
 
     def log_message(self, format, *args):
-        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
-                                  self.address_string(),
-                                  self.log_date_time_string(),
-                                  format % args,
-                                  self.headers.get('referer', '-'),
-                                  self.headers.get('user-agent', '-'))
+        tcs = self.server.test_case_server
+        tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
+                self.address_string(),
+                self.log_date_time_string(),
+                format % args,
+                self.headers.get('referer', '-'),
+                self.headers.get('user-agent', '-'))
 
     def handle_one_request(self):
         """Handle a single HTTP request.
@@ -153,7 +154,6 @@
             self.end_headers()
             self.send_range_content(file, start, end - start + 1)
             self.wfile.write("--%s\r\n" % boundary)
-            pass
 
     def do_GET(self):
         """Serve a GET request.
@@ -248,10 +248,16 @@
 
 
 class TestingHTTPServer(BaseHTTPServer.HTTPServer):
-    def __init__(self, server_address, RequestHandlerClass, test_case):
+
+    def __init__(self, server_address, RequestHandlerClass,
+                 test_case_server):
         BaseHTTPServer.HTTPServer.__init__(self, server_address,
-                                                RequestHandlerClass)
-        self.test_case = test_case
+                                           RequestHandlerClass)
+        # test_case_server can be used to communicate between the
+        # tests and the server (or the request handler and the
+        # server), allowing dynamic behaviors to be defined from
+        # the tests cases.
+        self.test_case_server = test_case_server
 
 
 class HttpServer(Server):
@@ -267,18 +273,23 @@
     def __init__(self, request_handler=TestingHTTPRequestHandler):
         Server.__init__(self)
         self.request_handler = request_handler
+        self.host = 'localhost'
+        self.port = 0
+        self._httpd = None
 
     def _get_httpd(self):
-        return TestingHTTPServer(('localhost', 0),
-                                  self.request_handler,
-                                  self)
+        if self._httpd is None:
+            self._httpd = TestingHTTPServer((self.host, self.port),
+                                            self.request_handler,
+                                            self)
+            host, self.port = self._httpd.socket.getsockname()
+        return self._httpd
 
     def _http_start(self):
-        httpd = None
         httpd = self._get_httpd()
-        host, self.port = httpd.socket.getsockname()
-        self._http_base_url = '%s://localhost:%s/' % (self._url_protocol,
-                                                      self.port)
+        self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
+                                               self.host,
+                                               self.port)
         self._http_starting.release()
         httpd.socket.settimeout(0.1)
 

=== modified file 'bzrlib/tests/test_bzrdir.py'
--- bzrlib/tests/test_bzrdir.py	2007-03-14 04:43:45 +0000
+++ bzrlib/tests/test_bzrdir.py	2007-03-14 16:57:31 +0000
@@ -36,9 +36,19 @@
                            UnknownFormatError,
                            UnsupportedFormatError,
                            )
-from bzrlib.tests import TestCase, TestCaseWithTransport, test_sftp_transport
+from bzrlib.tests import (
+    TestCase,
+    TestCaseWithTransport,
+    test_sftp_transport
+    )
 from bzrlib.tests.HttpServer import HttpServer
+from bzrlib.tests.HTTPTestUtil import (
+    TestCaseWithTwoWebservers,
+    HTTPServerRedirecting,
+    )
+from bzrlib.tests.test_http import TestWithTransport_pycurl
 from bzrlib.transport import get_transport
+from bzrlib.transport.http._urllib import HttpTransport_urllib
 from bzrlib.transport.memory import MemoryServer
 from bzrlib.repofmt import knitrepo, weaverepo
 
@@ -765,3 +775,72 @@
     def test_open_containing_tree_or_branch(self):
         tree = self.make_branch_and_tree('tree')
         bzrdir.BzrDir.open_containing_tree_or_branch(self.get_url('tree'))
+
+
+class TestHTTPRedirectionLoop(object):
+    """Test redirection loop between two http servers.
+
+    This MUST be used by daughter classes that also inherit from
+    TestCaseWithTwoWebservers.
+
+    We can't inherit directly from TestCaseWithTwoWebservers or the
+    test framework will try to create an instance which cannot
+    run, its implementation being incomplete. 
+    """
+
+    # Should be defined by daughter classes to ensure redirection
+    # still use the same transport implementation (not currently
+    # enforced as it's a bit tricky to get right (see the FIXME
+    # in BzrDir.open_from_transport for the unique use case so
+    # far)
+    _qualifier = None
+
+    def create_transport_readonly_server(self):
+        return HTTPServerRedirecting()
+
+    def create_transport_secondary_server(self):
+        return HTTPServerRedirecting()
+
+    def setUp(self):
+        # Both servers redirect to each server creating a loop
+        super(TestHTTPRedirectionLoop, self).setUp()
+        # The redirections will point to the new server
+        self.new_server = self.get_readonly_server()
+        # The requests to the old server will be redirected
+        self.old_server = self.get_secondary_server()
+        # Configure the redirections
+        self.old_server.redirect_to(self.new_server.host, self.new_server.port)
+        self.new_server.redirect_to(self.old_server.host, self.old_server.port)
+
+    def _qualified_url(self, host, port):
+        return 'http+%s://%s:%s' % (self._qualifier, host, port)
+
+    def test_loop(self):
+        # Starting from either server should loop
+        old_url = self._qualified_url(self.old_server.host, 
+                                      self.old_server.port)
+        oldt = self._transport(old_url)
+        self.assertRaises(errors.NotBranchError,
+                          bzrdir.BzrDir.open_from_transport, oldt)
+        new_url = self._qualified_url(self.new_server.host, 
+                                      self.new_server.port)
+        newt = self._transport(new_url)
+        self.assertRaises(errors.NotBranchError,
+                          bzrdir.BzrDir.open_from_transport, newt)
+
+
+class TestHTTPRedirections_urllib(TestHTTPRedirectionLoop,
+                                  TestCaseWithTwoWebservers):
+    """Tests redirections for urllib implementation"""
+
+    _qualifier = 'urllib'
+    _transport = HttpTransport_urllib
+
+
+
+class TestHTTPRedirections_pycurl(TestWithTransport_pycurl,
+                                  TestHTTPRedirectionLoop,
+                                  TestCaseWithTwoWebservers):
+    """Tests redirections for pycurl implementation"""
+
+    _qualifier = 'pycurl'

=== modified file 'bzrlib/tests/test_http.py'
--- bzrlib/tests/test_http.py	2007-03-05 20:51:42 +0000
+++ bzrlib/tests/test_http.py	2007-03-14 16:39:24 +0000
@@ -26,8 +26,11 @@
 import threading
 
 import bzrlib
-from bzrlib import errors
-from bzrlib import osutils
+from bzrlib import (
+    errors,
+    osutils,
+    urlutils,
+    )
 from bzrlib.tests import (
     TestCase,
     TestSkipped,
@@ -42,20 +45,24 @@
     BadStatusRequestHandler,
     FakeProxyRequestHandler,
     ForbiddenRequestHandler,
+    HTTPServerRedirecting,
     InvalidStatusRequestHandler,
     NoRangeRequestHandler,
     SingleRangeRequestHandler,
+    TestCaseWithRedirectedWebserver,
     TestCaseWithTwoWebservers,
     TestCaseWithWebserver,
     WallRequestHandler,
     )
 from bzrlib.transport import (
+    do_catching_redirections,
     get_transport,
     Transport,
     )
 from bzrlib.transport.http import (
     extract_auth,
     HttpTransportBase,
+    _urllib2_wrappers,
     )
 from bzrlib.transport.http._urllib import HttpTransport_urllib
 
@@ -946,3 +953,195 @@
                         TestRanges,
                         TestCaseWithWebserver):
     """Test the Range header in GET methods for pycurl implementation"""
+
+
+class TestHTTPRedirections(object):
+    """Test redirection between http servers.
+
+    This MUST be used by daughter classes that also inherit from
+    TestCaseWithRedirectedWebserver.
+
+    We can't inherit directly from TestCaseWithTwoWebservers or the
+    test framework will try to create an instance which cannot
+    run, its implementation being incomplete. 
+    """
+
+    def create_transport_secondary_server(self):
+        """Create the secondary server redirecting to the primary server"""
+        new = self.get_readonly_server()
+
+        redirecting = HTTPServerRedirecting()
+        redirecting.redirect_to(new.host, new.port)
+        return redirecting
+
+    def setUp(self):
+        super(TestHTTPRedirections, self).setUp()
+        self.build_tree_contents([('a', '0123456789'),
+                                  ('bundle',
+                                  '# Bazaar revision bundle v0.9\n#\n')
+                                  ],)
+
+        self.old_transport = self._transport(self.old_server.get_url())
+
+    def test_redirected(self):
+        self.assertRaises(errors.RedirectRequested, self.old_transport.get, 'a')
+        t = self._transport(self.new_server.get_url())
+        self.assertEqual('0123456789', t.get('a').read())
+
+    def test_read_redirected_bundle_from_url(self):
+        from bzrlib.bundle import read_bundle_from_url
+        url = self.old_transport.abspath('bundle')
+        bundle = read_bundle_from_url(url)
+        # If read_bundle_from_url was successful we get an empty bundle
+        self.assertEqual([], bundle.revisions)
+
+
+class TestHTTPRedirections_urllib(TestHTTPRedirections,
+                                  TestCaseWithRedirectedWebserver):
+    """Tests redirections for urllib implementation"""
+
+    _transport = HttpTransport_urllib
+
+
+
+class TestHTTPRedirections_pycurl(TestWithTransport_pycurl,
+                                  TestHTTPRedirections,
+                                  TestCaseWithRedirectedWebserver):
+    """Tests redirections for pycurl implementation"""
+
+
+class RedirectedRequest(_urllib2_wrappers.Request):
+    """Request following redirections"""
+
+    init_orig = _urllib2_wrappers.Request.__init__
+
+    def __init__(self, method, url, *args, **kwargs):
+        RedirectedRequest.init_orig(self, method, url, args, kwargs)
+        self.follow_redirections = True
+
+
+class TestHTTPSilentRedirections_urllib(TestCaseWithRedirectedWebserver):
+    """Test redirections provided by urllib.
+
+    http implementations do not redirect silently anymore (they
+    do not redirect at all in fact). The mechanism is still in
+    place at the _urllib2_wrappers.Request level and these tests
+    exercise it.
+
+    For the pycurl implementation
+    the redirection have been deleted as we may deprecate pycurl
+    and I have no place to keep a working implementation.
+    -- vila 20070212
+    """
+
+    _transport = HttpTransport_urllib
+
+    def setUp(self):
+        super(TestHTTPSilentRedirections_urllib, self).setUp()
+        self.setup_redirected_request()
+        self.addCleanup(self.cleanup_redirected_request)
+        self.build_tree_contents([('a','a'),
+                                  ('1/',),
+                                  ('1/a', 'redirected once'),
+                                  ('2/',),
+                                  ('2/a', 'redirected twice'),
+                                  ('3/',),
+                                  ('3/a', 'redirected thrice'),
+                                  ('4/',),
+                                  ('4/a', 'redirected 4 times'),
+                                  ('5/',),
+                                  ('5/a', 'redirected 5 times'),
+                                  ],)
+
+        self.old_transport = self._transport(self.old_server.get_url())
+
+    def setup_redirected_request(self):
+        self.original_class = _urllib2_wrappers.Request
+        _urllib2_wrappers.Request = RedirectedRequest
+
+    def cleanup_redirected_request(self):
+        _urllib2_wrappers.Request = self.original_class
+
+    def create_transport_secondary_server(self):
+        """Create the secondary server, redirections are defined in the tests"""
+        return HTTPServerRedirecting()
+
+    def test_one_redirection(self):
+        t = self.old_transport
+
+        req = RedirectedRequest('GET', t.abspath('a'))
+        req.follow_redirections = True
+        new_prefix = 'http://%s:%s' % (self.new_server.host,
+                                       self.new_server.port)
+        self.old_server.redirections = \
+            [('(.*)', r'%s/1\1' % (new_prefix), 301),]
+        self.assertEquals('redirected once',t._perform(req).read())
+
+    def test_five_redirections(self):
+        t = self.old_transport
+
+        req = RedirectedRequest('GET', t.abspath('a'))
+        req.follow_redirections = True
+        old_prefix = 'http://%s:%s' % (self.old_server.host,
+                                       self.old_server.port)
+        new_prefix = 'http://%s:%s' % (self.new_server.host,
+                                       self.new_server.port)
+        self.old_server.redirections = \
+            [('/1(.*)', r'%s/2\1' % (old_prefix), 302),
+             ('/2(.*)', r'%s/3\1' % (old_prefix), 303),
+             ('/3(.*)', r'%s/4\1' % (old_prefix), 307),
+             ('/4(.*)', r'%s/5\1' % (new_prefix), 301),
+             ('(/[^/]+)', r'%s/1\1' % (old_prefix), 301),
+             ]
+        self.assertEquals('redirected 5 times',t._perform(req).read())
+
+
+class TestDoCatchRedirections(TestCaseWithRedirectedWebserver):
+    """Test transport.do_catching_redirections.
+
+    We arbitrarily choose to use urllib transports
+    """
+
+    _transport = HttpTransport_urllib
+
+    def setUp(self):
+        super(TestDoCatchRedirections, self).setUp()
+        self.build_tree_contents([('a', '0123456789'),],)
+
+        self.old_transport = self._transport(self.old_server.get_url())
+
+    def get_a(self, transport):
+        return transport.get('a')
+
+    def test_no_redirection(self):
+        t = self._transport(self.new_server.get_url())
+
+        # We use None for redirected so that we fail if redirected
+        self.assertEquals('0123456789',
+                          do_catching_redirections(self.get_a, t, None).read())
+
+    def test_one_redirection(self):
+        self.redirections = 0
+
+        def redirected(transport, exception, redirection_notice):
+            self.redirections += 1
+            dir, file = urlutils.split(exception.target)
+            return self._transport(dir)
+
+        self.assertEquals('0123456789',
+                          do_catching_redirections(self.get_a,
+                                                   self.old_transport,
+                                                   redirected
+                                                   ).read())
+        self.assertEquals(1, self.redirections)
+
+    def test_redirection_loop(self):
+
+        def redirected(transport, exception, redirection_notice):
+            # By using the redirected url as a base dir for the
+            # *old* transport, we create a loop: a => a/a =>
+            # a/a/a
+            return self.old_transport.clone(exception.target)
+
+        self.assertRaises(errors.TooManyRedirections, do_catching_redirections,
+                          self.get_a, self.old_transport, redirected)

=== modified file 'bzrlib/tests/test_smart_transport.py'
--- bzrlib/tests/test_smart_transport.py	2006-11-21 01:08:52 +0000
+++ bzrlib/tests/test_smart_transport.py	2007-02-08 14:52:19 +0000
@@ -1478,8 +1478,12 @@
             'Content-Length: 6\r\n'
             '\r\n'
             'hello\n')
-        request_handler = SmartRequestHandler(
-            socket, ('localhost', 80), httpd)
+        # Beware: the ('localhost', 80) below is the
+        # client_address parameter, but we don't have one because
+        # we have defined a socket which is not bound to an
+        # address. The test framework never uses this client
+        # address, so far...
+        request_handler = SmartRequestHandler(socket, ('localhost', 80), httpd)
         response = socket.writefile.getvalue()
         self.assertStartsWith(response, 'HTTP/1.0 200 ')
         # This includes the end of the HTTP headers, and all the body.

=== modified file 'bzrlib/tests/test_transport_implementations.py'
--- bzrlib/tests/test_transport_implementations.py	2007-03-13 05:27:12 +0000
+++ bzrlib/tests/test_transport_implementations.py	2007-03-14 16:57:31 +0000
@@ -42,15 +42,6 @@
 import bzrlib.transport
 
 
-def _append(fn, txt):
-    """Append the given text (file-like object) to the supplied filename."""
-    f = open(fn, 'ab')
-    try:
-        f.write(txt.read())
-    finally:
-        f.close()
-
-
 class TransportTests(TestTransportImplementation):
 
     def check_transport_contents(self, content, transport, relpath):

=== modified file 'bzrlib/transport/__init__.py'
--- bzrlib/transport/__init__.py	2007-01-26 01:23:38 +0000
+++ bzrlib/transport/__init__.py	2007-03-13 15:38:37 +0000
@@ -58,7 +58,11 @@
         zero_eight,
         zero_eleven,
         )
-from bzrlib.trace import mutter, warning
+from bzrlib.trace import (
+    note,
+    mutter,
+    warning,
+    )
 
 # {prefix: [transport_classes]}
 # Transports are inserted onto the list LIFO and tried in order; as a result
@@ -133,7 +137,7 @@
         for factory in factory_list:
             if factory.__module__ == "bzrlib.transport":
                 # this is a lazy load transport, because no real ones
-                # are directlry in bzrlib.transport
+                # are directly in bzrlib.transport
                 modules.add(factory.module)
             else:
                 modules.add(factory.__module__)
@@ -1044,6 +1048,48 @@
     return _try_transport_factories(base, _protocol_handlers[None])[0]
 
 
+def do_catching_redirections(action, transport, redirected):
+    """Execute an action with given transport catching redirections.
+
+    This is a facility provided for callers needing to follow redirections
+    silently. The silence is relative: it is the caller responsability to
+    inform the user about each redirection or only inform the user of a user
+    via the exception parameter.
+
+    :param action: A callable, what the caller want to do while catching
+                  redirections.
+    :param transport: The initial transport used.
+    :param redirected: A callable receiving the redirected transport and the 
+                  RedirectRequested exception.
+
+    :return: Whatever 'action' returns
+    """
+    MAX_REDIRECTIONS = 8
+
+    # If a loop occurs, there is little we can do. So we don't try to detect
+    # them, just getting out if too much redirections occurs. The solution
+    # is outside: where the loop is defined.
+    for redirections in range(MAX_REDIRECTIONS):
+        try:
+            return action(transport)
+        except errors.RedirectRequested, e:
+            redirection_notice = '%s is%s redirected to %s' % (
+                e.source, e.permanently, e.target)
+            transport = redirected(transport, e, redirection_notice)
+    else:
+        # Loop exited without resolving redirect ? Either the
+        # user has kept a very very very old reference or a loop
+        # occurred in the redirections.  Nothing we can cure here:
+        # tell the user. Note that as the user has been informed
+        # about each redirection (it is the caller responsibility
+        # to do that in redirected via the provided
+        # redirection_notice). The caller may provide more
+        # information if needed (like what file or directory we
+        # were trying to act upon when the redirection loop
+        # occurred).
+        raise errors.TooManyRedirections
+
+
 def _try_transport_factories(base, factory_list):
     last_err = None
     for factory in factory_list:
@@ -1165,6 +1211,7 @@
 register_lazy_transport(None, 'bzrlib.transport.local', 'LocalTransport')
 register_lazy_transport('file://', 'bzrlib.transport.local', 'LocalTransport')
 register_lazy_transport('sftp://', 'bzrlib.transport.sftp', 'SFTPTransport')
+# Decorated http transport
 register_lazy_transport('http+urllib://', 'bzrlib.transport.http._urllib',
                         'HttpTransport_urllib')
 register_lazy_transport('https+urllib://', 'bzrlib.transport.http._urllib',
@@ -1173,6 +1220,7 @@
                         'PyCurlTransport')
 register_lazy_transport('https+pycurl://', 'bzrlib.transport.http._pycurl',
                         'PyCurlTransport')
+# Default http transports (last declared wins (if it can be imported))
 register_lazy_transport('http://', 'bzrlib.transport.http._urllib',
                         'HttpTransport_urllib')
 register_lazy_transport('https://', 'bzrlib.transport.http._urllib',

=== modified file 'bzrlib/transport/http/__init__.py'
--- bzrlib/transport/http/__init__.py	2006-12-13 05:07:19 +0000
+++ bzrlib/transport/http/__init__.py	2007-03-13 16:27:30 +0000
@@ -143,7 +143,7 @@
             self._query, self._fragment) = urlparse.urlparse(self.base)
         self._qualified_proto = apparent_proto
         # range hint is handled dynamically throughout the life
-        # of the object. We start by trying mulri-range requests
+        # of the object. We start by trying multi-range requests
         # and if the server returns bougs results, we retry with
         # single range requests and, finally, we forget about
         # range if the server really can't understand. Once
@@ -226,17 +226,15 @@
         code, response_file = self._get(relpath, None)
         return response_file
 
-    def _get(self, relpath, ranges):
+    def _get(self, relpath, ranges, tail_amount=0):
         """Get a file, or part of a file.
 
         :param relpath: Path relative to transport base URL
-        :param byte_range: None to get the whole file;
-            or [(start,end)] to fetch parts of a file.
+        :param ranges: None to get the whole file;
+            or [(start,end)+], a list of tuples to fetch parts of a file.
+        :param tail_amount: The amount to get from the end of the file.
 
         :returns: (http_code, result_file)
-
-        Note that the current http implementations can only fetch one range at
-        a time through this call.
         """
         raise NotImplementedError(self._get)
 

=== modified file 'bzrlib/transport/http/_pycurl.py'
--- bzrlib/transport/http/_pycurl.py	2007-03-01 03:08:30 +0000
+++ bzrlib/transport/http/_pycurl.py	2007-03-13 17:00:20 +0000
@@ -23,6 +23,14 @@
 # whether we expect a particular file will be modified after it's committed.
 # It's probably safer to just always revalidate.  mbp 20060321
 
+# TODO: Some refactoring could be done to avoid the strange idiom
+# used to capture data and headers while setting up the request
+# (and having to pass 'header' to _curl_perform to handle
+# redirections) . This could be achieved by creating a
+# specialized Curl object and returning code, headers and data
+# from _curl_perform.  Not done because we may deprecate pycurl in the
+# future -- vila 20070212
+
 import os
 from cStringIO import StringIO
 import sys
@@ -111,16 +119,19 @@
         # don't want the body - ie just do a HEAD request
         # This means "NO BODY" not 'nobody'
         curl.setopt(pycurl.NOBODY, 1)
+        # But we need headers to handle redirections
+        header = StringIO()
+        curl.setopt(pycurl.HEADERFUNCTION, header.write)
         # In some erroneous cases, pycurl will emit text on
         # stdout if we don't catch it (see InvalidStatus tests
         # for one such occurrence).
         blackhole = StringIO()
         curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
-        self._curl_perform(curl)
+        self._curl_perform(curl, header)
         code = curl.getinfo(pycurl.HTTP_CODE)
         if code == 404: # not found
             return False
-        elif code in (200, 302): # "ok", "found"
+        elif code == 200: # "ok"
             return True
         else:
             self._raise_curl_http_error(curl)
@@ -165,7 +176,7 @@
         """Make a request for the entire file"""
         curl = self._curl
         abspath, data, header = self._setup_get_request(curl, relpath)
-        self._curl_perform(curl)
+        self._curl_perform(curl, header)
 
         code = curl.getinfo(pycurl.HTTP_CODE)
         data.seek(0)
@@ -188,8 +199,9 @@
             # Forget ranges, the server can't handle them
             return self._get_full(relpath)
 
-        self._curl_perform(curl, ['Range: bytes=%s'
-                                  % self.range_header(ranges, tail_amount)])
+        self._curl_perform(curl, header,
+                           ['Range: bytes=%s'
+                            % self.range_header(ranges, tail_amount)])
         data.seek(0)
 
         code = curl.getinfo(pycurl.HTTP_CODE)
@@ -210,7 +222,7 @@
         abspath, data, header = self._setup_request(curl, '.bzr/smart')
         # We override the Expect: header so that pycurl will send the POST
         # body immediately.
-        self._curl_perform(curl,['Expect: '])
+        self._curl_perform(curl, header, ['Expect: '])
         data.seek(0)
         code = curl.getinfo(pycurl.HTTP_CODE)
         headers = _extract_headers(header.getvalue(), abspath)
@@ -238,11 +250,10 @@
         # TODO: maybe include a summary of the pycurl version
         ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
         curl.setopt(pycurl.USERAGENT, ua_str)
-        curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
         if self.cabundle:
             curl.setopt(pycurl.CAINFO, self.cabundle)
 
-    def _curl_perform(self, curl, more_headers=[]):
+    def _curl_perform(self, curl, header, more_headers=[]):
         """Perform curl operation and translate exceptions."""
         try:
             # There's no way in http/1.0 to say "must
@@ -275,6 +286,15 @@
             # jam 20060713 The code didn't use to re-raise the exception here,
             # but that seemed bogus
             raise
+        code = curl.getinfo(pycurl.HTTP_CODE)
+        if code in (301, 302, 303, 307):
+            url = curl.getinfo(pycurl.EFFECTIVE_URL)
+            headers = _extract_headers(header.getvalue(), url)
+            redirected_to = headers['Location']
+            raise errors.RedirectRequested(url,
+                                           redirected_to,
+                                           is_permament=(code == 301),
+                                           qual_proto=self._qualified_proto)
 
 
 def get_test_permutations():

=== modified file 'bzrlib/transport/http/_urllib.py'
--- bzrlib/transport/http/_urllib.py	2006-12-21 13:37:29 +0000
+++ bzrlib/transport/http/_urllib.py	2007-02-12 14:02:01 +0000
@@ -16,8 +16,10 @@
 
 from cStringIO import StringIO
 
-from bzrlib import ui
-from bzrlib.errors import NoSuchFile
+from bzrlib import (
+    ui,
+    errors,
+    )
 from bzrlib.trace import mutter
 from bzrlib.transport import register_urlparse_netloc_protocol
 from bzrlib.transport.http import HttpTransportBase
@@ -107,9 +109,15 @@
             self._user = request.user
             self._password = request.password
 
+        code = response.code
+        if request.follow_redirections is False \
+                and code in (301, 302, 303, 307):
+            raise errors.RedirectRequested(request.get_full_url(),
+                                           request.redirected_to,
+                                           is_permament=(code == 301),
+                                           qual_proto=self._qualified_proto)
+
         if request.redirected_to is not None:
-            # TODO: Update the transport so that subsequent
-            # requests goes directly to the right host
             mutter('redirected from: %s to: %s' % (request.get_full_url(),
                                                    request.redirected_to))
 
@@ -132,7 +140,7 @@
         code = response.code
         if code == 404: # not found
             self._connection.fake_close()
-            raise NoSuchFile(abspath)
+            raise errors.NoSuchFile(abspath)
 
         data = handle_response(abspath, code, response.headers, response)
         # Close response to free the httplib.HTTPConnection pipeline
@@ -171,12 +179,10 @@
         response = self._head(relpath)
 
         code = response.code
-        # FIXME: 302 MAY have been already processed by the
-        # redirection handler
-        if code in (200, 302): # "ok", "found"
+        if code == 200: # "ok",
             return True
         else:
-            assert(code == 404, 'Only 200, 404 or may be 302 are correct')
+            assert(code == 404, 'Only 200 or 404 are correct')
             return False
 
 

=== modified file 'bzrlib/transport/http/_urllib2_wrappers.py'
--- bzrlib/transport/http/_urllib2_wrappers.py	2007-02-25 12:10:53 +0000
+++ bzrlib/transport/http/_urllib2_wrappers.py	2007-03-14 16:39:24 +0000
@@ -156,6 +156,8 @@
         # To handle redirections
         self.parent = parent
         self.redirected_to = None
+        # Unless told otherwise, redirections are not followed
+        self.follow_redirections = False
 
     def extract_auth(self, url):
         """Extracts authentification information from url.
@@ -501,11 +503,11 @@
         # of creating a new one, but the urllib2.Request object
         # has a too complicated creation process to provide a
         # simple enough equivalent update process. Instead, when
-        # redirecting, we only update the original request with a
-        # reference to the following request in the redirect
-        # chain.
+        # redirecting, we only update the following request in
+        # the redirect chain with a reference to the parent
+        # request .
 
-        # Some codes make no sense on out context and are treated
+        # Some codes make no sense in our context and are treated
         # as errors:
 
         # 300: Multiple choices for different representations of
@@ -520,11 +522,10 @@
 
         # 306: Unused (if the RFC says so...)
 
-        # FIXME: If the code is 302 and the request is HEAD, we
-        # MAY avoid following the redirections if the intent is
-        # to check the existence, we have a hint that the file
-        # exist, now if we want to be sure, we must follow the
-        # redirection. Let's do that for now.
+        # If the code is 302 and the request is HEAD, some may
+        # think that it is a sufficent hint that the file exists
+        # and that we MAY avoid following the redirections. But
+        # if we want to be sure, we MUST follow them.
 
         if code in (301, 302, 303, 307):
             return Request(req.get_method(),newurl,
@@ -541,7 +542,7 @@
         else:
             raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
 
-    def http_error_30x(self, req, fp, code, msg, headers):
+    def http_error_302(self, req, fp, code, msg, headers):
         """Requests the redirected to URI.
 
         Copied from urllib2 to be able to fake_close the
@@ -561,7 +562,12 @@
         else:
             return
         if self._debuglevel > 0:
-            print 'Redirected to: %s' % newurl
+            print 'Redirected to: %s (followed: %r)' % (newurl,
+                                                        req.follow_redirections)
+        if req.follow_redirections is False:
+            req.redirected_to = newurl
+            return fp
+
         newurl = urlparse.urljoin(req.get_full_url(), newurl)
 
         # This call succeeds or raise an error. urllib2 returns
@@ -590,23 +596,7 @@
 
         return self.parent.open(redirected_req)
 
-    http_error_302 = http_error_303 = http_error_307 = http_error_30x
-
-    def http_error_301(self, req, fp, code, msg, headers):
-        response = self.http_error_30x(req, fp, code, msg, headers)
-        # If one or several 301 response occur during the
-        # redirection chain, we MUST update the original request
-        # to indicate where the URI where finally found.
-
-        original_req = req
-        while original_req.parent is not None:
-            original_req = original_req.parent
-            if original_req.redirected_to is None:
-                # Only the last occurring 301 should be taken
-                # into account i.e. the first occurring here when
-                # redirected_to has not yet been set.
-                original_req.redirected_to = redirected_url
-        return response
+    http_error_301 = http_error_303 = http_error_307 = http_error_302
 
 
 class ProxyHandler(urllib2.ProxyHandler):




More information about the bazaar mailing list