[MERGE] HTTP redirections at first branch or bundle access
Vincent Ladeuil
v.ladeuil+lp at free.fr
Wed Mar 14 17:38:06 GMT 2007
# Bazaar merge directive format 1
# revision_id: v.ladeuil+lp at free.fr-20070314170125-ywny47g3rz2u4nba
# target_branch: http://bazaar-vcs.org/bzr/bzr.dev/
# testament_sha1: 9ed584970e3740edfcc77420dea1e95e09205dde
# timestamp: 2007-03-14 18:37:57 +0100
# source_branch: http://bazaar.launchpad.net/~bzr/bzr\
# /bzr.http.redirection
# message: [MERGE] HTTP redirections at first branch or bundle access
#
=== modified file 'NEWS'
--- NEWS 2007-03-14 15:22:51 +0000
+++ NEWS 2007-03-14 16:57:31 +0000
@@ -16,6 +16,17 @@
* Added support for Putty's SSH implementation. (Dmitry Vasiliev)
+ * HTTP redirections are now taken into account when a branch (or a
+ bundle) is accessed for the first time. A message is issued at each
+ redirection to inform the user. In the past, http redirections were
+ silently followed for each request which significantly degraded the
+ performances. The http redirections are not followed anymore by
+ default, instead a RedirectRequested exception is raised. For bzrlib
+ users needing to follow http redirections anyway,
+ bzrlib.transport.do_catching_redirections provide an easy transition
+ path.
+ (vila)
+
* Added ``bzr status --versioned`` to report only versioned files,
not unknowns. (Kent Gibson)
@@ -46,6 +57,9 @@
* Switch the ``bzr init-repo`` default from --no-trees to --trees.
(Wouter van Heyst, #53483)
+ * Correctly handles mutiple permanent http redirections.
+ (vila, #88780)
+
* Take smtp_server from user config into account.
(vila, #92195)
=== modified file 'bzrlib/bundle/__init__.py'
--- bzrlib/bundle/__init__.py 2006-12-07 15:30:26 +0000
+++ bzrlib/bundle/__init__.py 2007-03-01 21:26:57 +0000
@@ -21,8 +21,12 @@
urlutils,
)
from bzrlib.bundle import serializer as _serializer
-from bzrlib.transport import get_transport as _get_transport
+from bzrlib.transport import (
+ do_catching_redirections,
+ get_transport,
+ )
""")
+from bzrlib.trace import note
def read_bundle_from_url(url):
@@ -42,8 +46,25 @@
# Some transports cannot detect that we are trying to read a
# directory until we actually issue read() on the handle.
try:
- t = _get_transport(url)
- f = t.get(filename)
+ transport = get_transport(url)
+
+ def get_bundle(transport):
+ return transport.get(filename)
+
+ def redirected_transport(transport, exception, redirection_notice):
+ note(redirection_notice)
+ url, filename = urlutils.split(exception.target,
+ exclude_trailing_slash=False)
+ if not filename:
+ raise errors.NotABundle('A directory cannot be a bundle')
+ return get_transport(url)
+
+ try:
+ f = do_catching_redirections(get_bundle, transport,
+ redirected_transport)
+ except errors.TooManyRedirections:
+ raise errors.NotABundle(str(url))
+
return _serializer.read_bundle(f)
except (errors.TransportError, errors.PathError), e:
raise errors.NotABundle(str(e))
=== modified file 'bzrlib/bzrdir.py'
--- bzrlib/bzrdir.py 2007-03-14 04:43:45 +0000
+++ bzrlib/bzrdir.py 2007-03-14 16:57:31 +0000
@@ -61,11 +61,17 @@
from bzrlib.store.text import TextStore
from bzrlib.store.versioned import WeaveStore
from bzrlib.transactions import WriteTransaction
-from bzrlib.transport import get_transport
+from bzrlib.transport import (
+ do_catching_redirections,
+ get_transport,
+ )
from bzrlib.weave import Weave
""")
-from bzrlib.trace import mutter, note
+from bzrlib.trace import (
+ mutter,
+ note,
+ )
from bzrlib.transport.local import LocalTransport
@@ -526,7 +532,36 @@
:param transport: Transport containing the bzrdir.
:param _unsupported: private.
"""
- format = BzrDirFormat.find_format(transport)
+ base = transport.base
+
+ def find_format(transport):
+ return transport, BzrDirFormat.find_format(transport)
+
+ def redirected(transport, e, redirection_notice):
+ qualified_source = e.get_source_url()
+ relpath = transport.relpath(qualified_source)
+ if not e.target.endswith(relpath):
+ # Not redirected to a branch-format, not a branch
+ raise errors.NotBranchError(path=e.target)
+ target = e.target[:-len(relpath)]
+ note('%s is%s redirected to %s',
+ transport.base, e.permanently, target)
+ # Let's try with a new transport
+ qualified_target = e.get_target_url()[:-len(relpath)]
+ # FIXME: If 'transport' has a qualifier, this should
+ # be applied again to the new transport *iff* the
+ # schemes used are the same. It's a bit tricky to
+ # verify, so I'll punt for now
+ # -- vila20070212
+ return get_transport(target)
+
+ try:
+ transport, format = do_catching_redirections(find_format,
+ transport,
+ redirected)
+ except errors.TooManyRedirections:
+ raise errors.NotBranchError(base)
+
BzrDir._check_supported(format, _unsupported)
return format.open(transport, _found=True)
@@ -1306,7 +1341,7 @@
@classmethod
def register_control_format(klass, format):
- """Register a format that does not use '.bzrdir' for its control dir.
+ """Register a format that does not use '.bzr' for its control dir.
TODO: This should be pulled up into a 'ControlDirFormat' base class
which BzrDirFormat can inherit from, and renamed to register_format
@@ -1338,10 +1373,6 @@
klass._control_formats.remove(format)
-# register BzrDirFormat as a control format
-BzrDirFormat.register_control_format(BzrDirFormat)
-
-
class BzrDirFormat4(BzrDirFormat):
"""Bzr dir format 4.
@@ -1598,6 +1629,10 @@
__set_workingtree_format)
+# Register bzr control format
+BzrDirFormat.register_control_format(BzrDirFormat)
+
+# Register bzr formats
BzrDirFormat.register_format(BzrDirFormat4())
BzrDirFormat.register_format(BzrDirFormat5())
BzrDirFormat.register_format(BzrDirFormat6())
=== modified file 'bzrlib/errors.py'
--- bzrlib/errors.py 2007-03-13 20:18:05 +0000
+++ bzrlib/errors.py 2007-03-14 16:57:31 +0000
@@ -1275,6 +1275,59 @@
InvalidHttpResponse.__init__(self, path, msg)
+class RedirectRequested(TransportError):
+
+ _fmt = '%(source)s is%(permanently)s redirected to %(target)s'
+
+ def __init__(self, source, target, is_permament=False, qual_proto=None):
+ self.source = source
+ self.target = target
+ if is_permament:
+ self.permanently = ' permanently'
+ else:
+ self.permanently = ''
+ self.is_permament = is_permament
+ self._qualified_proto = qual_proto
+ TransportError.__init__(self)
+
+ def _requalify_url(self, url):
+ """Restore the qualified proto in front of the url"""
+ # When this exception is raised, source and target are in
+ # user readable format. But some transports may use a
+ # different proto (http+urllib:// will present http:// to
+ # the user. If a qualified proto is specified, the code
+ # trapping the exception can get the qualified urls to
+ # properly handle the redirection themself (creating a
+ # new transport object from the target url for example).
+ # But checking that the scheme of the original and
+ # redirected urls are the same can be tricky. (see the
+ # FIXME in BzrDir.open_from_transport for the unique use
+ # case so far).
+ if self._qualified_proto is None:
+ return url
+
+ # The TODO related to NotBranchError mention that doing
+ # that kind of manipulation on the urls may not be the
+ # exception object job. On the other hand, this object is
+ # the interface between the code and the user so
+ # presenting the urls in different ways is indeed its
+ # job...
+ import urlparse
+ proto, netloc, path, query, fragment = urlparse.urlsplit(url)
+ return urlparse.urlunsplit((self._qualified_proto, netloc, path,
+ query, fragment))
+
+ def get_source_url(self):
+ return self._requalify_url(self.source)
+
+ def get_target_url(self):
+ return self._requalify_url(self.target)
+
+
+class TooManyRedirections(TransportError):
+
+ _fmt = "Too many redirections"
+
class ConflictsInTree(BzrError):
_fmt = "Working tree has conflicts."
=== modified file 'bzrlib/tests/HTTPTestUtil.py'
--- bzrlib/tests/HTTPTestUtil.py 2006-12-22 09:54:52 +0000
+++ bzrlib/tests/HTTPTestUtil.py 2007-03-14 16:39:24 +0000
@@ -17,6 +17,7 @@
from cStringIO import StringIO
import errno
from SimpleHTTPServer import SimpleHTTPRequestHandler
+import re
import socket
import urlparse
@@ -120,7 +121,7 @@
"""Hand the request off to a smart server instance."""
self.send_response(200)
self.send_header("Content-type", "application/octet-stream")
- transport = get_transport(self.server.test_case._home_dir)
+ transport = get_transport(self.server.test_case_server._home_dir)
# TODO: We might like to support streaming responses. 1.0 allows no
# Content-length in this case, so for integrity we should perform our
# own chunking within the stream.
@@ -177,9 +178,9 @@
class TestCaseWithTwoWebservers(TestCaseWithWebserver):
- """A support class providinf readonly urls (on two servers) that are http://.
+ """A support class providing readonly urls on two servers that are http://.
- We setup two webservers to allows various tests involving
+ We set up two webservers to allows various tests involving
proxies or redirections from one server to the other.
"""
def setUp(self):
@@ -225,4 +226,86 @@
return TestingHTTPRequestHandler.translate_path(self, path)
+class RedirectRequestHandler(TestingHTTPRequestHandler):
+ """Redirect all request to the specified server"""
+
+ def parse_request(self):
+ """Redirect a single HTTP request to another host"""
+ valid = TestingHTTPRequestHandler.parse_request(self)
+ if valid:
+ tcs = self.server.test_case_server
+ code, target = tcs.is_redirected(self.path)
+ if code is not None and target is not None:
+ # Redirect as instructed
+ self.send_response(code)
+ self.send_header('Location', target)
+ self.end_headers()
+ return False # The job is done
+ else:
+ # We leave the parent class serve the request
+ pass
+ return valid
+
+
+class HTTPServerRedirecting(HttpServer):
+ """An HttpServer redirecting to another server """
+
+ def __init__(self, request_handler=RedirectRequestHandler):
+ HttpServer.__init__(self, request_handler)
+ # redirections is a list of tuples (source, target, code)
+ # - source is a regexp for the paths requested
+ # - target is a replacement for re.sub describing where
+ # the request will be redirected
+ # - code is the http error code associated to the
+ # redirection (301 permanent, 302 temporarry, etc
+ self.redirections = []
+
+ def redirect_to(self, host, port):
+ """Redirect all requests to a specific host:port"""
+ self.redirections = [('(.*)',
+ r'http://%s:%s\1' % (host, port) ,
+ 301)]
+
+ def is_redirected(self, path):
+ """Is the path redirected by this server.
+
+ :param path: the requested relative path
+
+ :returns: a tuple (code, target) if a matching
+ redirection is found, (None, None) otherwise.
+ """
+ code = None
+ target = None
+ for (rsource, rtarget, rcode) in self.redirections:
+ target, match = re.subn(rsource, rtarget, path)
+ if match:
+ code = rcode
+ break # The first match wins
+ else:
+ target = None
+ return code, target
+
+
+class TestCaseWithRedirectedWebserver(TestCaseWithTwoWebservers):
+ """A support class providing redirections from one server to another.
+
+ We set up two webservers to allows various tests involving
+ redirections.
+ The 'old' server is redirected to the 'new' server.
+ """
+
+ def create_transport_secondary_server(self):
+ """Create the secondary server redirecting to the primary server"""
+ new = self.get_readonly_server()
+ redirecting = HTTPServerRedirecting()
+ redirecting.redirect_to(new.host, new.port)
+ return redirecting
+
+ def setUp(self):
+ super(TestCaseWithRedirectedWebserver, self).setUp()
+ # The redirections will point to the new server
+ self.new_server = self.get_readonly_server()
+ # The requests to the old server will be redirected
+ self.old_server = self.get_secondary_server()
+
=== modified file 'bzrlib/tests/HttpServer.py'
--- bzrlib/tests/HttpServer.py 2006-12-13 17:21:18 +0000
+++ bzrlib/tests/HttpServer.py 2007-03-14 16:39:24 +0000
@@ -43,12 +43,13 @@
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
def log_message(self, format, *args):
- self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
- self.address_string(),
- self.log_date_time_string(),
- format % args,
- self.headers.get('referer', '-'),
- self.headers.get('user-agent', '-'))
+ tcs = self.server.test_case_server
+ tcs.log('webserver - %s - - [%s] %s "%s" "%s"',
+ self.address_string(),
+ self.log_date_time_string(),
+ format % args,
+ self.headers.get('referer', '-'),
+ self.headers.get('user-agent', '-'))
def handle_one_request(self):
"""Handle a single HTTP request.
@@ -153,7 +154,6 @@
self.end_headers()
self.send_range_content(file, start, end - start + 1)
self.wfile.write("--%s\r\n" % boundary)
- pass
def do_GET(self):
"""Serve a GET request.
@@ -248,10 +248,16 @@
class TestingHTTPServer(BaseHTTPServer.HTTPServer):
- def __init__(self, server_address, RequestHandlerClass, test_case):
+
+ def __init__(self, server_address, RequestHandlerClass,
+ test_case_server):
BaseHTTPServer.HTTPServer.__init__(self, server_address,
- RequestHandlerClass)
- self.test_case = test_case
+ RequestHandlerClass)
+ # test_case_server can be used to communicate between the
+ # tests and the server (or the request handler and the
+ # server), allowing dynamic behaviors to be defined from
+ # the tests cases.
+ self.test_case_server = test_case_server
class HttpServer(Server):
@@ -267,18 +273,23 @@
def __init__(self, request_handler=TestingHTTPRequestHandler):
Server.__init__(self)
self.request_handler = request_handler
+ self.host = 'localhost'
+ self.port = 0
+ self._httpd = None
def _get_httpd(self):
- return TestingHTTPServer(('localhost', 0),
- self.request_handler,
- self)
+ if self._httpd is None:
+ self._httpd = TestingHTTPServer((self.host, self.port),
+ self.request_handler,
+ self)
+ host, self.port = self._httpd.socket.getsockname()
+ return self._httpd
def _http_start(self):
- httpd = None
httpd = self._get_httpd()
- host, self.port = httpd.socket.getsockname()
- self._http_base_url = '%s://localhost:%s/' % (self._url_protocol,
- self.port)
+ self._http_base_url = '%s://%s:%s/' % (self._url_protocol,
+ self.host,
+ self.port)
self._http_starting.release()
httpd.socket.settimeout(0.1)
=== modified file 'bzrlib/tests/test_bzrdir.py'
--- bzrlib/tests/test_bzrdir.py 2007-03-14 04:43:45 +0000
+++ bzrlib/tests/test_bzrdir.py 2007-03-14 16:57:31 +0000
@@ -36,9 +36,19 @@
UnknownFormatError,
UnsupportedFormatError,
)
-from bzrlib.tests import TestCase, TestCaseWithTransport, test_sftp_transport
+from bzrlib.tests import (
+ TestCase,
+ TestCaseWithTransport,
+ test_sftp_transport
+ )
from bzrlib.tests.HttpServer import HttpServer
+from bzrlib.tests.HTTPTestUtil import (
+ TestCaseWithTwoWebservers,
+ HTTPServerRedirecting,
+ )
+from bzrlib.tests.test_http import TestWithTransport_pycurl
from bzrlib.transport import get_transport
+from bzrlib.transport.http._urllib import HttpTransport_urllib
from bzrlib.transport.memory import MemoryServer
from bzrlib.repofmt import knitrepo, weaverepo
@@ -765,3 +775,72 @@
def test_open_containing_tree_or_branch(self):
tree = self.make_branch_and_tree('tree')
bzrdir.BzrDir.open_containing_tree_or_branch(self.get_url('tree'))
+
+
+class TestHTTPRedirectionLoop(object):
+ """Test redirection loop between two http servers.
+
+ This MUST be used by daughter classes that also inherit from
+ TestCaseWithTwoWebservers.
+
+ We can't inherit directly from TestCaseWithTwoWebservers or the
+ test framework will try to create an instance which cannot
+ run, its implementation being incomplete.
+ """
+
+ # Should be defined by daughter classes to ensure redirection
+ # still use the same transport implementation (not currently
+ # enforced as it's a bit tricky to get right (see the FIXME
+ # in BzrDir.open_from_transport for the unique use case so
+ # far)
+ _qualifier = None
+
+ def create_transport_readonly_server(self):
+ return HTTPServerRedirecting()
+
+ def create_transport_secondary_server(self):
+ return HTTPServerRedirecting()
+
+ def setUp(self):
+ # Both servers redirect to each server creating a loop
+ super(TestHTTPRedirectionLoop, self).setUp()
+ # The redirections will point to the new server
+ self.new_server = self.get_readonly_server()
+ # The requests to the old server will be redirected
+ self.old_server = self.get_secondary_server()
+ # Configure the redirections
+ self.old_server.redirect_to(self.new_server.host, self.new_server.port)
+ self.new_server.redirect_to(self.old_server.host, self.old_server.port)
+
+ def _qualified_url(self, host, port):
+ return 'http+%s://%s:%s' % (self._qualifier, host, port)
+
+ def test_loop(self):
+ # Starting from either server should loop
+ old_url = self._qualified_url(self.old_server.host,
+ self.old_server.port)
+ oldt = self._transport(old_url)
+ self.assertRaises(errors.NotBranchError,
+ bzrdir.BzrDir.open_from_transport, oldt)
+ new_url = self._qualified_url(self.new_server.host,
+ self.new_server.port)
+ newt = self._transport(new_url)
+ self.assertRaises(errors.NotBranchError,
+ bzrdir.BzrDir.open_from_transport, newt)
+
+
+class TestHTTPRedirections_urllib(TestHTTPRedirectionLoop,
+ TestCaseWithTwoWebservers):
+ """Tests redirections for urllib implementation"""
+
+ _qualifier = 'urllib'
+ _transport = HttpTransport_urllib
+
+
+
+class TestHTTPRedirections_pycurl(TestWithTransport_pycurl,
+ TestHTTPRedirectionLoop,
+ TestCaseWithTwoWebservers):
+ """Tests redirections for pycurl implementation"""
+
+ _qualifier = 'pycurl'
=== modified file 'bzrlib/tests/test_http.py'
--- bzrlib/tests/test_http.py 2007-03-05 20:51:42 +0000
+++ bzrlib/tests/test_http.py 2007-03-14 16:39:24 +0000
@@ -26,8 +26,11 @@
import threading
import bzrlib
-from bzrlib import errors
-from bzrlib import osutils
+from bzrlib import (
+ errors,
+ osutils,
+ urlutils,
+ )
from bzrlib.tests import (
TestCase,
TestSkipped,
@@ -42,20 +45,24 @@
BadStatusRequestHandler,
FakeProxyRequestHandler,
ForbiddenRequestHandler,
+ HTTPServerRedirecting,
InvalidStatusRequestHandler,
NoRangeRequestHandler,
SingleRangeRequestHandler,
+ TestCaseWithRedirectedWebserver,
TestCaseWithTwoWebservers,
TestCaseWithWebserver,
WallRequestHandler,
)
from bzrlib.transport import (
+ do_catching_redirections,
get_transport,
Transport,
)
from bzrlib.transport.http import (
extract_auth,
HttpTransportBase,
+ _urllib2_wrappers,
)
from bzrlib.transport.http._urllib import HttpTransport_urllib
@@ -946,3 +953,195 @@
TestRanges,
TestCaseWithWebserver):
"""Test the Range header in GET methods for pycurl implementation"""
+
+
+class TestHTTPRedirections(object):
+ """Test redirection between http servers.
+
+ This MUST be used by daughter classes that also inherit from
+ TestCaseWithRedirectedWebserver.
+
+ We can't inherit directly from TestCaseWithTwoWebservers or the
+ test framework will try to create an instance which cannot
+ run, its implementation being incomplete.
+ """
+
+ def create_transport_secondary_server(self):
+ """Create the secondary server redirecting to the primary server"""
+ new = self.get_readonly_server()
+
+ redirecting = HTTPServerRedirecting()
+ redirecting.redirect_to(new.host, new.port)
+ return redirecting
+
+ def setUp(self):
+ super(TestHTTPRedirections, self).setUp()
+ self.build_tree_contents([('a', '0123456789'),
+ ('bundle',
+ '# Bazaar revision bundle v0.9\n#\n')
+ ],)
+
+ self.old_transport = self._transport(self.old_server.get_url())
+
+ def test_redirected(self):
+ self.assertRaises(errors.RedirectRequested, self.old_transport.get, 'a')
+ t = self._transport(self.new_server.get_url())
+ self.assertEqual('0123456789', t.get('a').read())
+
+ def test_read_redirected_bundle_from_url(self):
+ from bzrlib.bundle import read_bundle_from_url
+ url = self.old_transport.abspath('bundle')
+ bundle = read_bundle_from_url(url)
+ # If read_bundle_from_url was successful we get an empty bundle
+ self.assertEqual([], bundle.revisions)
+
+
+class TestHTTPRedirections_urllib(TestHTTPRedirections,
+ TestCaseWithRedirectedWebserver):
+ """Tests redirections for urllib implementation"""
+
+ _transport = HttpTransport_urllib
+
+
+
+class TestHTTPRedirections_pycurl(TestWithTransport_pycurl,
+ TestHTTPRedirections,
+ TestCaseWithRedirectedWebserver):
+ """Tests redirections for pycurl implementation"""
+
+
+class RedirectedRequest(_urllib2_wrappers.Request):
+ """Request following redirections"""
+
+ init_orig = _urllib2_wrappers.Request.__init__
+
+ def __init__(self, method, url, *args, **kwargs):
+ RedirectedRequest.init_orig(self, method, url, args, kwargs)
+ self.follow_redirections = True
+
+
+class TestHTTPSilentRedirections_urllib(TestCaseWithRedirectedWebserver):
+ """Test redirections provided by urllib.
+
+ http implementations do not redirect silently anymore (they
+ do not redirect at all in fact). The mechanism is still in
+ place at the _urllib2_wrappers.Request level and these tests
+ exercise it.
+
+ For the pycurl implementation
+ the redirection have been deleted as we may deprecate pycurl
+ and I have no place to keep a working implementation.
+ -- vila 20070212
+ """
+
+ _transport = HttpTransport_urllib
+
+ def setUp(self):
+ super(TestHTTPSilentRedirections_urllib, self).setUp()
+ self.setup_redirected_request()
+ self.addCleanup(self.cleanup_redirected_request)
+ self.build_tree_contents([('a','a'),
+ ('1/',),
+ ('1/a', 'redirected once'),
+ ('2/',),
+ ('2/a', 'redirected twice'),
+ ('3/',),
+ ('3/a', 'redirected thrice'),
+ ('4/',),
+ ('4/a', 'redirected 4 times'),
+ ('5/',),
+ ('5/a', 'redirected 5 times'),
+ ],)
+
+ self.old_transport = self._transport(self.old_server.get_url())
+
+ def setup_redirected_request(self):
+ self.original_class = _urllib2_wrappers.Request
+ _urllib2_wrappers.Request = RedirectedRequest
+
+ def cleanup_redirected_request(self):
+ _urllib2_wrappers.Request = self.original_class
+
+ def create_transport_secondary_server(self):
+ """Create the secondary server, redirections are defined in the tests"""
+ return HTTPServerRedirecting()
+
+ def test_one_redirection(self):
+ t = self.old_transport
+
+ req = RedirectedRequest('GET', t.abspath('a'))
+ req.follow_redirections = True
+ new_prefix = 'http://%s:%s' % (self.new_server.host,
+ self.new_server.port)
+ self.old_server.redirections = \
+ [('(.*)', r'%s/1\1' % (new_prefix), 301),]
+ self.assertEquals('redirected once',t._perform(req).read())
+
+ def test_five_redirections(self):
+ t = self.old_transport
+
+ req = RedirectedRequest('GET', t.abspath('a'))
+ req.follow_redirections = True
+ old_prefix = 'http://%s:%s' % (self.old_server.host,
+ self.old_server.port)
+ new_prefix = 'http://%s:%s' % (self.new_server.host,
+ self.new_server.port)
+ self.old_server.redirections = \
+ [('/1(.*)', r'%s/2\1' % (old_prefix), 302),
+ ('/2(.*)', r'%s/3\1' % (old_prefix), 303),
+ ('/3(.*)', r'%s/4\1' % (old_prefix), 307),
+ ('/4(.*)', r'%s/5\1' % (new_prefix), 301),
+ ('(/[^/]+)', r'%s/1\1' % (old_prefix), 301),
+ ]
+ self.assertEquals('redirected 5 times',t._perform(req).read())
+
+
+class TestDoCatchRedirections(TestCaseWithRedirectedWebserver):
+ """Test transport.do_catching_redirections.
+
+ We arbitrarily choose to use urllib transports
+ """
+
+ _transport = HttpTransport_urllib
+
+ def setUp(self):
+ super(TestDoCatchRedirections, self).setUp()
+ self.build_tree_contents([('a', '0123456789'),],)
+
+ self.old_transport = self._transport(self.old_server.get_url())
+
+ def get_a(self, transport):
+ return transport.get('a')
+
+ def test_no_redirection(self):
+ t = self._transport(self.new_server.get_url())
+
+ # We use None for redirected so that we fail if redirected
+ self.assertEquals('0123456789',
+ do_catching_redirections(self.get_a, t, None).read())
+
+ def test_one_redirection(self):
+ self.redirections = 0
+
+ def redirected(transport, exception, redirection_notice):
+ self.redirections += 1
+ dir, file = urlutils.split(exception.target)
+ return self._transport(dir)
+
+ self.assertEquals('0123456789',
+ do_catching_redirections(self.get_a,
+ self.old_transport,
+ redirected
+ ).read())
+ self.assertEquals(1, self.redirections)
+
+ def test_redirection_loop(self):
+
+ def redirected(transport, exception, redirection_notice):
+ # By using the redirected url as a base dir for the
+ # *old* transport, we create a loop: a => a/a =>
+ # a/a/a
+ return self.old_transport.clone(exception.target)
+
+ self.assertRaises(errors.TooManyRedirections, do_catching_redirections,
+ self.get_a, self.old_transport, redirected)
=== modified file 'bzrlib/tests/test_smart_transport.py'
--- bzrlib/tests/test_smart_transport.py 2006-11-21 01:08:52 +0000
+++ bzrlib/tests/test_smart_transport.py 2007-02-08 14:52:19 +0000
@@ -1478,8 +1478,12 @@
'Content-Length: 6\r\n'
'\r\n'
'hello\n')
- request_handler = SmartRequestHandler(
- socket, ('localhost', 80), httpd)
+ # Beware: the ('localhost', 80) below is the
+ # client_address parameter, but we don't have one because
+ # we have defined a socket which is not bound to an
+ # address. The test framework never uses this client
+ # address, so far...
+ request_handler = SmartRequestHandler(socket, ('localhost', 80), httpd)
response = socket.writefile.getvalue()
self.assertStartsWith(response, 'HTTP/1.0 200 ')
# This includes the end of the HTTP headers, and all the body.
=== modified file 'bzrlib/tests/test_transport_implementations.py'
--- bzrlib/tests/test_transport_implementations.py 2007-03-13 05:27:12 +0000
+++ bzrlib/tests/test_transport_implementations.py 2007-03-14 16:57:31 +0000
@@ -42,15 +42,6 @@
import bzrlib.transport
-def _append(fn, txt):
- """Append the given text (file-like object) to the supplied filename."""
- f = open(fn, 'ab')
- try:
- f.write(txt.read())
- finally:
- f.close()
-
-
class TransportTests(TestTransportImplementation):
def check_transport_contents(self, content, transport, relpath):
=== modified file 'bzrlib/transport/__init__.py'
--- bzrlib/transport/__init__.py 2007-01-26 01:23:38 +0000
+++ bzrlib/transport/__init__.py 2007-03-13 15:38:37 +0000
@@ -58,7 +58,11 @@
zero_eight,
zero_eleven,
)
-from bzrlib.trace import mutter, warning
+from bzrlib.trace import (
+ note,
+ mutter,
+ warning,
+ )
# {prefix: [transport_classes]}
# Transports are inserted onto the list LIFO and tried in order; as a result
@@ -133,7 +137,7 @@
for factory in factory_list:
if factory.__module__ == "bzrlib.transport":
# this is a lazy load transport, because no real ones
- # are directlry in bzrlib.transport
+ # are directly in bzrlib.transport
modules.add(factory.module)
else:
modules.add(factory.__module__)
@@ -1044,6 +1048,48 @@
return _try_transport_factories(base, _protocol_handlers[None])[0]
+def do_catching_redirections(action, transport, redirected):
+ """Execute an action with given transport catching redirections.
+
+ This is a facility provided for callers needing to follow redirections
+ silently. The silence is relative: it is the caller responsability to
+ inform the user about each redirection or only inform the user of a user
+ via the exception parameter.
+
+ :param action: A callable, what the caller want to do while catching
+ redirections.
+ :param transport: The initial transport used.
+ :param redirected: A callable receiving the redirected transport and the
+ RedirectRequested exception.
+
+ :return: Whatever 'action' returns
+ """
+ MAX_REDIRECTIONS = 8
+
+ # If a loop occurs, there is little we can do. So we don't try to detect
+ # them, just getting out if too much redirections occurs. The solution
+ # is outside: where the loop is defined.
+ for redirections in range(MAX_REDIRECTIONS):
+ try:
+ return action(transport)
+ except errors.RedirectRequested, e:
+ redirection_notice = '%s is%s redirected to %s' % (
+ e.source, e.permanently, e.target)
+ transport = redirected(transport, e, redirection_notice)
+ else:
+ # Loop exited without resolving redirect ? Either the
+ # user has kept a very very very old reference or a loop
+ # occurred in the redirections. Nothing we can cure here:
+ # tell the user. Note that as the user has been informed
+ # about each redirection (it is the caller responsibility
+ # to do that in redirected via the provided
+ # redirection_notice). The caller may provide more
+ # information if needed (like what file or directory we
+ # were trying to act upon when the redirection loop
+ # occurred).
+ raise errors.TooManyRedirections
+
+
def _try_transport_factories(base, factory_list):
last_err = None
for factory in factory_list:
@@ -1165,6 +1211,7 @@
register_lazy_transport(None, 'bzrlib.transport.local', 'LocalTransport')
register_lazy_transport('file://', 'bzrlib.transport.local', 'LocalTransport')
register_lazy_transport('sftp://', 'bzrlib.transport.sftp', 'SFTPTransport')
+# Decorated http transport
register_lazy_transport('http+urllib://', 'bzrlib.transport.http._urllib',
'HttpTransport_urllib')
register_lazy_transport('https+urllib://', 'bzrlib.transport.http._urllib',
@@ -1173,6 +1220,7 @@
'PyCurlTransport')
register_lazy_transport('https+pycurl://', 'bzrlib.transport.http._pycurl',
'PyCurlTransport')
+# Default http transports (last declared wins (if it can be imported))
register_lazy_transport('http://', 'bzrlib.transport.http._urllib',
'HttpTransport_urllib')
register_lazy_transport('https://', 'bzrlib.transport.http._urllib',
=== modified file 'bzrlib/transport/http/__init__.py'
--- bzrlib/transport/http/__init__.py 2006-12-13 05:07:19 +0000
+++ bzrlib/transport/http/__init__.py 2007-03-13 16:27:30 +0000
@@ -143,7 +143,7 @@
self._query, self._fragment) = urlparse.urlparse(self.base)
self._qualified_proto = apparent_proto
# range hint is handled dynamically throughout the life
- # of the object. We start by trying mulri-range requests
+ # of the object. We start by trying multi-range requests
# and if the server returns bougs results, we retry with
# single range requests and, finally, we forget about
# range if the server really can't understand. Once
@@ -226,17 +226,15 @@
code, response_file = self._get(relpath, None)
return response_file
- def _get(self, relpath, ranges):
+ def _get(self, relpath, ranges, tail_amount=0):
"""Get a file, or part of a file.
:param relpath: Path relative to transport base URL
- :param byte_range: None to get the whole file;
- or [(start,end)] to fetch parts of a file.
+ :param ranges: None to get the whole file;
+ or [(start,end)+], a list of tuples to fetch parts of a file.
+ :param tail_amount: The amount to get from the end of the file.
:returns: (http_code, result_file)
-
- Note that the current http implementations can only fetch one range at
- a time through this call.
"""
raise NotImplementedError(self._get)
=== modified file 'bzrlib/transport/http/_pycurl.py'
--- bzrlib/transport/http/_pycurl.py 2007-03-01 03:08:30 +0000
+++ bzrlib/transport/http/_pycurl.py 2007-03-13 17:00:20 +0000
@@ -23,6 +23,14 @@
# whether we expect a particular file will be modified after it's committed.
# It's probably safer to just always revalidate. mbp 20060321
+# TODO: Some refactoring could be done to avoid the strange idiom
+# used to capture data and headers while setting up the request
+# (and having to pass 'header' to _curl_perform to handle
+# redirections) . This could be achieved by creating a
+# specialized Curl object and returning code, headers and data
+# from _curl_perform. Not done because we may deprecate pycurl in the
+# future -- vila 20070212
+
import os
from cStringIO import StringIO
import sys
@@ -111,16 +119,19 @@
# don't want the body - ie just do a HEAD request
# This means "NO BODY" not 'nobody'
curl.setopt(pycurl.NOBODY, 1)
+ # But we need headers to handle redirections
+ header = StringIO()
+ curl.setopt(pycurl.HEADERFUNCTION, header.write)
# In some erroneous cases, pycurl will emit text on
# stdout if we don't catch it (see InvalidStatus tests
# for one such occurrence).
blackhole = StringIO()
curl.setopt(pycurl.WRITEFUNCTION, blackhole.write)
- self._curl_perform(curl)
+ self._curl_perform(curl, header)
code = curl.getinfo(pycurl.HTTP_CODE)
if code == 404: # not found
return False
- elif code in (200, 302): # "ok", "found"
+ elif code == 200: # "ok"
return True
else:
self._raise_curl_http_error(curl)
@@ -165,7 +176,7 @@
"""Make a request for the entire file"""
curl = self._curl
abspath, data, header = self._setup_get_request(curl, relpath)
- self._curl_perform(curl)
+ self._curl_perform(curl, header)
code = curl.getinfo(pycurl.HTTP_CODE)
data.seek(0)
@@ -188,8 +199,9 @@
# Forget ranges, the server can't handle them
return self._get_full(relpath)
- self._curl_perform(curl, ['Range: bytes=%s'
- % self.range_header(ranges, tail_amount)])
+ self._curl_perform(curl, header,
+ ['Range: bytes=%s'
+ % self.range_header(ranges, tail_amount)])
data.seek(0)
code = curl.getinfo(pycurl.HTTP_CODE)
@@ -210,7 +222,7 @@
abspath, data, header = self._setup_request(curl, '.bzr/smart')
# We override the Expect: header so that pycurl will send the POST
# body immediately.
- self._curl_perform(curl,['Expect: '])
+ self._curl_perform(curl, header, ['Expect: '])
data.seek(0)
code = curl.getinfo(pycurl.HTTP_CODE)
headers = _extract_headers(header.getvalue(), abspath)
@@ -238,11 +250,10 @@
# TODO: maybe include a summary of the pycurl version
ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
curl.setopt(pycurl.USERAGENT, ua_str)
- curl.setopt(pycurl.FOLLOWLOCATION, 1) # follow redirect responses
if self.cabundle:
curl.setopt(pycurl.CAINFO, self.cabundle)
- def _curl_perform(self, curl, more_headers=[]):
+ def _curl_perform(self, curl, header, more_headers=[]):
"""Perform curl operation and translate exceptions."""
try:
# There's no way in http/1.0 to say "must
@@ -275,6 +286,15 @@
# jam 20060713 The code didn't use to re-raise the exception here,
# but that seemed bogus
raise
+ code = curl.getinfo(pycurl.HTTP_CODE)
+ if code in (301, 302, 303, 307):
+ url = curl.getinfo(pycurl.EFFECTIVE_URL)
+ headers = _extract_headers(header.getvalue(), url)
+ redirected_to = headers['Location']
+ raise errors.RedirectRequested(url,
+ redirected_to,
+ is_permament=(code == 301),
+ qual_proto=self._qualified_proto)
def get_test_permutations():
=== modified file 'bzrlib/transport/http/_urllib.py'
--- bzrlib/transport/http/_urllib.py 2006-12-21 13:37:29 +0000
+++ bzrlib/transport/http/_urllib.py 2007-02-12 14:02:01 +0000
@@ -16,8 +16,10 @@
from cStringIO import StringIO
-from bzrlib import ui
-from bzrlib.errors import NoSuchFile
+from bzrlib import (
+ ui,
+ errors,
+ )
from bzrlib.trace import mutter
from bzrlib.transport import register_urlparse_netloc_protocol
from bzrlib.transport.http import HttpTransportBase
@@ -107,9 +109,15 @@
self._user = request.user
self._password = request.password
+ code = response.code
+ if request.follow_redirections is False \
+ and code in (301, 302, 303, 307):
+ raise errors.RedirectRequested(request.get_full_url(),
+ request.redirected_to,
+ is_permament=(code == 301),
+ qual_proto=self._qualified_proto)
+
if request.redirected_to is not None:
- # TODO: Update the transport so that subsequent
- # requests goes directly to the right host
mutter('redirected from: %s to: %s' % (request.get_full_url(),
request.redirected_to))
@@ -132,7 +140,7 @@
code = response.code
if code == 404: # not found
self._connection.fake_close()
- raise NoSuchFile(abspath)
+ raise errors.NoSuchFile(abspath)
data = handle_response(abspath, code, response.headers, response)
# Close response to free the httplib.HTTPConnection pipeline
@@ -171,12 +179,10 @@
response = self._head(relpath)
code = response.code
- # FIXME: 302 MAY have been already processed by the
- # redirection handler
- if code in (200, 302): # "ok", "found"
+ if code == 200: # "ok",
return True
else:
- assert(code == 404, 'Only 200, 404 or may be 302 are correct')
+ assert(code == 404, 'Only 200 or 404 are correct')
return False
=== modified file 'bzrlib/transport/http/_urllib2_wrappers.py'
--- bzrlib/transport/http/_urllib2_wrappers.py 2007-02-25 12:10:53 +0000
+++ bzrlib/transport/http/_urllib2_wrappers.py 2007-03-14 16:39:24 +0000
@@ -156,6 +156,8 @@
# To handle redirections
self.parent = parent
self.redirected_to = None
+ # Unless told otherwise, redirections are not followed
+ self.follow_redirections = False
def extract_auth(self, url):
"""Extracts authentification information from url.
@@ -501,11 +503,11 @@
# of creating a new one, but the urllib2.Request object
# has a too complicated creation process to provide a
# simple enough equivalent update process. Instead, when
- # redirecting, we only update the original request with a
- # reference to the following request in the redirect
- # chain.
+ # redirecting, we only update the following request in
+ # the redirect chain with a reference to the parent
+ # request .
- # Some codes make no sense on out context and are treated
+ # Some codes make no sense in our context and are treated
# as errors:
# 300: Multiple choices for different representations of
@@ -520,11 +522,10 @@
# 306: Unused (if the RFC says so...)
- # FIXME: If the code is 302 and the request is HEAD, we
- # MAY avoid following the redirections if the intent is
- # to check the existence, we have a hint that the file
- # exist, now if we want to be sure, we must follow the
- # redirection. Let's do that for now.
+ # If the code is 302 and the request is HEAD, some may
+ # think that it is a sufficent hint that the file exists
+ # and that we MAY avoid following the redirections. But
+ # if we want to be sure, we MUST follow them.
if code in (301, 302, 303, 307):
return Request(req.get_method(),newurl,
@@ -541,7 +542,7 @@
else:
raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
- def http_error_30x(self, req, fp, code, msg, headers):
+ def http_error_302(self, req, fp, code, msg, headers):
"""Requests the redirected to URI.
Copied from urllib2 to be able to fake_close the
@@ -561,7 +562,12 @@
else:
return
if self._debuglevel > 0:
- print 'Redirected to: %s' % newurl
+ print 'Redirected to: %s (followed: %r)' % (newurl,
+ req.follow_redirections)
+ if req.follow_redirections is False:
+ req.redirected_to = newurl
+ return fp
+
newurl = urlparse.urljoin(req.get_full_url(), newurl)
# This call succeeds or raise an error. urllib2 returns
@@ -590,23 +596,7 @@
return self.parent.open(redirected_req)
- http_error_302 = http_error_303 = http_error_307 = http_error_30x
-
- def http_error_301(self, req, fp, code, msg, headers):
- response = self.http_error_30x(req, fp, code, msg, headers)
- # If one or several 301 response occur during the
- # redirection chain, we MUST update the original request
- # to indicate where the URI where finally found.
-
- original_req = req
- while original_req.parent is not None:
- original_req = original_req.parent
- if original_req.redirected_to is None:
- # Only the last occurring 301 should be taken
- # into account i.e. the first occurring here when
- # redirected_to has not yet been set.
- original_req.redirected_to = redirected_url
- return response
+ http_error_301 = http_error_303 = http_error_307 = http_error_302
class ProxyHandler(urllib2.ProxyHandler):
More information about the bazaar
mailing list