Rev 3064: DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix). in file:///v/home/vila/src/bzr/bugs/173010/

Mon Dec 3 13:52:42 GMT 2007

At file:///v/home/vila/src/bzr/bugs/173010/

------------------------------------------------------------
revno: 3064
revision-id:v.ladeuil+lp at free.fr-20071203135236-eh1ia9y0aurc91gb
parent: v.ladeuil+lp at free.fr-20071203091407-en2nm0ktfs2f9sg2
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 173010
timestamp: Mon 2007-12-03 14:52:36 +0100
message:
  DAMN^64, the http test server is 1.0 not 1.1 :( Better pipe cleaning and less readv caching (since that's the point of the whole fix).
  
  * bzrlib/transport/http/_urllib2_wrappers.py:
  (Response.finish): Do the Right Thing to clean the pipe: most of
  the time we know exactly how many bytes are still to be read. And
  when we don't, we are not maintaining a persistent connection so
  we don't care.
  (AbstractHTTPConnection.__init__): Keep track of the last response
  handled.
  (AbstractHTTPConnection._mutter_connect): New method issuing the
  traces.
  (AbstractHTTPConnection.getresponse): Capture each response.
  (AbstractHTTPConnection.cleanup_pipe): Delegate the cleaning to
  the response, it knows better.
  (HTTPConnection.__init__): Not the right place to reliably trace
  the connections issued.
  (HTTPConnection.connect, HTTPSConnection.connect): Right place to
  reliably trace the connections issued.
  (AbstractHTTPHandler.do_open): The debug output for response was
  not done at the right place, resulting in trace doubling in case
  of retry on errors.
  
  * bzrlib/transport/http/__init__.py:
  (HttpTransportBase._readv): If the coalesced offsets order matches
  the provided offsets order, don't cache. Ftw.
modified:
  bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
  bzrlib/transport/http/_urllib2_wrappers.py _urllib2_wrappers.py-20060913231729-ha9ugi48ktx481ao-1
-------------- next part --------------
=== modified file 'bzrlib/transport/http/__init__.py'

--- a/bzrlib/transport/http/__init__.py	2007-12-03 08:33:06 +0000
+++ b/bzrlib/transport/http/__init__.py	2007-12-03 13:52:36 +0000
@@ -259,7 +259,14 @@
                         if data_len != size:
                             raise errors.ShortReadvError(relpath, start, size,
                                                          actual=data_len)
-                        data_map[(start, size)] = data
+                        if (start, size) == cur_offset_and_size:
+                            # The offset requested are sorted as the coalesced
+                            # ones, not need to cache. Win !
+                            yield cur_offset_and_size[0], data
+                            cur_offset_and_size = iter_offsets.next()
+                        else:
+                            # Different sorting. We need to cache.
+                            data_map[(start, size)] = data
 
                     # Yield everything we can
                     while cur_offset_and_size in data_map:

=== modified file 'bzrlib/transport/http/_urllib2_wrappers.py'
--- a/bzrlib/transport/http/_urllib2_wrappers.py	2007-12-03 08:33:06 +0000
+++ b/bzrlib/transport/http/_urllib2_wrappers.py	2007-12-03 13:52:36 +0000
@@ -123,6 +123,20 @@
             # below we keep the socket with the server opened.
             self.will_close = False
 
+    def finish(self):
+        """Finish reading the bdy.
+
+        In some cases, the client may have leave some bytes to read in the
+        body. That will block the next request to succeed if we use a
+        persistent connection. If we don't use a persitent connection, well,
+        nothing will block the next request since a new connection will be
+        issued anyway.
+        """
+        if not self.isclosed():
+            # Make sure nothing was left to be read on the socket
+            data = self.read(self.length)
+            self.close()
+
 
 # Not inheriting from 'object' because httplib.HTTPConnection doesn't.
 class AbstractHTTPConnection:
@@ -131,11 +145,30 @@
     response_class = Response
     strict = 1 # We don't support HTTP/0.9
 
+    def __init__(self):
+        self._response = None
+
+    def _mutter_connect(self):
+        netloc = self.host
+        if self.port is not None:
+            netloc += ':%d' % self.port
+        if self.proxied_host is not None:
+            netloc += '(proxy for %s)' % self.proxied_host
+        trace.mutter('* About to connect() to %s' % netloc)
+
+    def getresponse(self):
+        """Capture the response to be able to cleanup"""
+        self._response = httplib.HTTPConnection.getresponse(self)
+        return self._response
+
     def cleanup_pipe(self):
         """Make the connection believes the response have been fully handled.
 
         That makes the httplib.HTTPConnection happy
         """
+        if self._response is not None:
+            self._response.finish()
+            self._response = None
         # Preserve our preciousss
         sock = self.sock
         self.sock = None
@@ -149,26 +182,28 @@
 
     # XXX: Needs refactoring at the caller level.
     def __init__(self, host, port=None, strict=None, proxied_host=None):
-        if 'http' in debug.debug_flags:
-            netloc = host
-            if port is not None:
-                netloc += '%d' % port
-            if proxied_host is not None:
-                netloc += '(proxy for %s)' % proxied_host
-            trace.mutter('* About to connect() to %s' % netloc)
+        AbstractHTTPConnection.__init__(self)
         httplib.HTTPConnection.__init__(self, host, port, strict)
         self.proxied_host = proxied_host
 
+    def connect(self):
+        if 'http' in debug.debug_flags:
+            self._mutter_connect()
+        httplib.HTTPConnection.connect(self)
+
 
 class HTTPSConnection(AbstractHTTPConnection, httplib.HTTPSConnection):
 
     def __init__(self, host, port=None, key_file=None, cert_file=None,
                  strict=None, proxied_host=None):
+        AbstractHTTPConnection.__init__(self)
         httplib.HTTPSConnection.__init__(self, host, port,
                                          key_file, cert_file, strict)
         self.proxied_host = proxied_host
 
     def connect(self):
+        if 'http' in debug.debug_flags:
+            self._mutter_connect()
         httplib.HTTPConnection.connect(self)
         if self.proxied_host is None:
             self.connect_to_origin()
@@ -385,7 +420,6 @@
                     print '  Will retry, %s %r' % (method, url)
                 request.connection.close()
                 response = self.do_open(http_class, request, False)
-                convert_to_addinfourl = False
             else:
                 if self._debuglevel > 0:
                     print 'Received second exception: [%r]' % exc_val
@@ -421,7 +455,7 @@
                     print '  Failed again, %s %r' % (method, url)
                     print '  Will raise: [%r]' % my_exception
                 raise my_exception, None, exc_tb
-        return response, convert_to_addinfourl
+        return response
 
     def do_open(self, http_class, request, first_try=True):
         """See urllib2.AbstractHTTPHandler.do_open for the general idea.
@@ -455,9 +489,8 @@
             convert_to_addinfourl = True
         except (socket.gaierror, httplib.BadStatusLine, httplib.UnknownProtocol,
                 socket.error, httplib.HTTPException):
-            response, convert_to_addinfourl = self.retry_or_raise(http_class,
-                                                                  request,
-                                                                  first_try)
+            response = self.retry_or_raise(http_class, request, first_try)
+            convert_to_addinfourl = False
 
 # FIXME: HTTPConnection does not fully support 100-continue (the
 # server responses are just ignored)
@@ -471,19 +504,6 @@
 #            connection.send(body)
 #            response = connection.getresponse()
 
-        if 'http' in debug.debug_flags:
-            version = 'HTTP/%d.%d'
-            try:
-                version = version % (response.version / 10,
-                                     response.version % 10)
-            except:
-                version = 'HTTP/%r' % version
-            trace.mutter('< %s %s %s' % (version, response.status,
-                                            response.reason))
-            # Use the raw header lines instead of treating response.msg as a
-            # dict since we may miss duplicated headers otherwise.
-            hdrs = [h.rstrip('\r\n') for h in response.msg.headers]
-            trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
         if self._debuglevel > 0:
             print 'Receives response: %r' % response
             print '  For: %r(%r)' % (request.get_method(),
@@ -498,37 +518,28 @@
             resp = urllib2.addinfourl(fp, r.msg, req.get_full_url())
             resp.code = r.status
             resp.msg = r.reason
+            resp.version = r.version
             if self._debuglevel > 0:
                 print 'Create addinfourl: %r' % resp
                 print '  For: %r(%r)' % (request.get_method(),
                                          request.get_full_url())
+            if 'http' in debug.debug_flags:
+                version = 'HTTP/%d.%d'
+                try:
+                    version = version % (resp.version / 10,
+                                         resp.version % 10)
+                except:
+                    version = 'HTTP/%r' % resp.version
+                trace.mutter('< %s %s %s' % (version, resp.code,
+                                             resp.msg))
+                # Use the raw header lines instead of treating resp.info() as a
+                # dict since we may miss duplicated headers otherwise.
+                hdrs = [h.rstrip('\r\n') for h in resp.info().headers]
+                trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
         else:
             resp = response
         return resp
 
-#       # we need titled headers in a dict but
-#       # response.getheaders returns a list of (lower(header).
-#       # Let's title that because most of bzr handle titled
-#       # headers, but maybe we should switch to lowercased
-#       # headers...
-#        # jam 20060908: I think we actually expect the headers to
-#        #       be similar to mimetools.Message object, which uses
-#        #       case insensitive keys. It lowers() all requests.
-#        #       My concern is that the code may not do perfect title case.
-#        #       For example, it may use Content-type rather than Content-Type
-#
-#        # When we get rid of addinfourl, we must ensure that bzr
-#        # always use titled headers and that any header received
-#        # from server is also titled.
-#
-#        headers = {}
-#        for header, value in (response.getheaders()):
-#            headers[header.title()] = value
-#        # FIXME: Implements a secured .read method
-#        response.code = response.status
-#        response.headers = headers
-#        return response
-
 
 class HTTPHandler(AbstractHTTPHandler):
     """A custom handler that just thunks into HTTPConnection"""