Rev 3055: Add -Dhttp support. in file:///v/home/vila/src/bzr/bugs/172701/

Vincent Ladeuil v.ladeuil+lp at free.fr
Fri Nov 30 09:51:26 GMT 2007


At file:///v/home/vila/src/bzr/bugs/172701/

------------------------------------------------------------
revno: 3055
revision-id:v.ladeuil+lp at free.fr-20071130095122-6xz845lluzjp7tvs
parent: v.ladeuil+lp at free.fr-20071129232201-thjmmlgo5ucbrwfn
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: 172701
timestamp: Fri 2007-11-30 10:51:22 +0100
message:
  Add -Dhttp support.
  
  * bzrlib/transport/http/_urllib2_wrappers.py:
  (HTTPConnection.__init__): Report the host we are about to connect
  to if -Dhttp is used.
  (AbstractHTTPHandler.do_open): Report requests and
  responses (including headers) if -Dhttp is used.
  
  * bzrlib/transport/http/_urllib.py: Fix some imports.
  (HttpTransport_urllib._perform): Delete one mutter call since
  -Dhttp provides better information.
  
  * bzrlib/transport/http/_pycurl.py:
  Fix some imports.
  (PyCurlTransport._set_curl_options): Activate verbose output if
  -Dhttp is used. Unfortunately this goes straight to stderr instead
  of .bzr.log (libcurl provides an option but pycurl does not
  implement it), but since we are debugging, I think it's
  acceptable.
  
  * bzrlib/transport/http/__init__.py:
  (HttpTransportBase._coalesce_readv): Add a comment about the
  servers that return the whole file ignoring the Ranges header.
  
  * bzrlib/help_topics.py:
  (_global_options): Add http.
  
  * bzrlib/debug.py: 
  Add 'http'.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/debug.py                debug.py-20061102062349-vdhrw9qdpck8cl35-1
  bzrlib/help_topics.py          help_topics.py-20060920210027-rnim90q9e0bwxvy4-1
  bzrlib/transport/http/__init__.py http_transport.py-20050711212304-506c5fd1059ace96
  bzrlib/transport/http/_pycurl.py pycurlhttp.py-20060110060940-4e2a705911af77a6
  bzrlib/transport/http/_urllib.py _urlgrabber.py-20060113083826-0bbf7d992fbf090c
  bzrlib/transport/http/_urllib2_wrappers.py _urllib2_wrappers.py-20060913231729-ha9ugi48ktx481ao-1
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS	2007-11-29 23:22:01 +0000
+++ b/NEWS	2007-11-30 09:51:22 +0000
@@ -71,6 +71,11 @@
      sorted chronologically instead of lexicographically with --sort=time.
      (Adeodato Sim??, #120231)
 
+  INTERNALS:
+
+    * New -Dhttp debug option reports http connections, requests and responses.
+      (Vincent Ladeuil)
+
   DOCUMENTATION:
 
   BUG FIXES:

=== modified file 'bzrlib/debug.py'
--- a/bzrlib/debug.py	2007-11-04 15:29:17 +0000
+++ b/bzrlib/debug.py	2007-11-30 09:51:22 +0000
@@ -31,6 +31,7 @@
  * fetch - trace history copying between repositories
  * hooks - trace hook execution
  * hpss - trace smart protocol requests and responses
+ * htpp - trace http connections, requests and responses
  * index - trace major index operations
  * lock - trace when lockdir locks are taken or released
 

=== modified file 'bzrlib/help_topics.py'
--- a/bzrlib/help_topics.py	2007-11-06 07:01:07 +0000
+++ b/bzrlib/help_topics.py	2007-11-30 09:51:22 +0000
@@ -268,6 +268,7 @@
                operations.
 -Dhashcache    Log every time a working file is read to determine its hash.
 -Dhooks        Trace hook execution.
+-Dhttp         Trace http connections, requests and responses
 -Dhpss         Trace smart protocol requests and responses.
 -Dindex        Trace major index operations.
 -Dlock         Trace when lockdir locks are taken or released.

=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py	2007-11-29 23:22:01 +0000
+++ b/bzrlib/transport/http/__init__.py	2007-11-30 09:51:22 +0000
@@ -328,6 +328,9 @@
         else:
             # The whole file will be downloaded anyway
             max_ranges = total
+        # TODO: Some web servers may ignore the range requests and return the
+        # whole file, we may want to detect that and avoid further requests.
+        # Hint: test_readv_multiple_get_requests will fail in that case .
         for group in xrange(0, len(coalesced), max_ranges):
             ranges = coalesced[group:group+max_ranges]
             # Note that the following may raise errors.InvalidRange. It's the

=== modified file 'bzrlib/transport/http/_pycurl.py'
--- a/bzrlib/transport/http/_pycurl.py	2007-10-31 12:38:11 +0000
+++ b/bzrlib/transport/http/_pycurl.py	2007-11-30 09:51:22 +0000
@@ -36,13 +36,12 @@
 import sys
 
 from bzrlib import (
+    debug,
     errors,
+    trace,
     __version__ as bzrlib_version,
     )
 import bzrlib
-from bzrlib.errors import (NoSuchFile,
-                           ConnectionError,
-                           DependencyNotPresent)
 from bzrlib.trace import mutter
 from bzrlib.transport.http import (
     ca_bundle,
@@ -55,7 +54,7 @@
     import pycurl
 except ImportError, e:
     mutter("failed to import pycurl: %s", e)
-    raise DependencyNotPresent('pycurl', e)
+    raise errors.DependencyNotPresent('pycurl', e)
 
 try:
     # see if we can actually initialize PyCurl - sometimes it will load but
@@ -70,7 +69,7 @@
     pycurl.Curl()
 except pycurl.error, e:
     mutter("failed to initialize pycurl: %s", e)
-    raise DependencyNotPresent('pycurl', e)
+    raise errors.DependencyNotPresent('pycurl', e)
 
 
 
@@ -112,7 +111,7 @@
             # protocols
             supported = pycurl.version_info()[8]
             if 'https' not in supported:
-                raise DependencyNotPresent('pycurl', 'no https support')
+                raise errors.DependencyNotPresent('pycurl', 'no https support')
         self.cabundle = ca_bundle.get_ca_path()
 
     def _get_curl(self):
@@ -202,7 +201,7 @@
         data.seek(0)
 
         if code == 404:
-            raise NoSuchFile(abspath)
+            raise errors.NoSuchFile(abspath)
         if code != 200:
             self._raise_curl_http_error(
                 curl, 'expected 200 or 404 for full response.')
@@ -264,9 +263,12 @@
 
     def _set_curl_options(self, curl):
         """Set options for all requests"""
-        ## curl.setopt(pycurl.VERBOSE, 1)
-        # TODO: maybe include a summary of the pycurl version
-        ua_str = 'bzr/%s (pycurl)' % (bzrlib.__version__,)
+        if 'http' in debug.debug_flags:
+            curl.setopt(pycurl.VERBOSE, 1)
+            # pycurl doesn't implement the CURLOPT_STDERR option, so we can't
+            # do : curl.setopt(pycurl.STDERR, trace._trace_file)
+
+        ua_str = 'bzr/%s (pycurl: %s)' % (bzrlib.__version__, pycurl.version)
         curl.setopt(pycurl.USERAGENT, ua_str)
         if self.cabundle:
             curl.setopt(pycurl.CAINFO, self.cabundle)
@@ -292,8 +294,8 @@
                         CURLE_COULDNT_CONNECT,
                         CURLE_GOT_NOTHING,
                         CURLE_COULDNT_RESOLVE_PROXY,):
-                raise ConnectionError('curl connection error (%s)\non %s'
-                              % (e[1], url))
+                raise errors.ConnectionError(
+                    'curl connection error (%s)\non %s' % (e[1], url))
             elif e[0] == CURLE_PARTIAL_FILE:
                 # Pycurl itself has detected a short read.  We do
                 # not have all the information for the

=== modified file 'bzrlib/transport/http/_urllib.py'
--- a/bzrlib/transport/http/_urllib.py	2007-11-04 15:44:32 +0000
+++ b/bzrlib/transport/http/_urllib.py	2007-11-30 09:51:22 +0000
@@ -20,10 +20,10 @@
 
 from bzrlib import (
     errors,
+    trace,
     urlutils,
     )
-from bzrlib.trace import mutter
-from bzrlib.transport.http import HttpTransportBase
+from bzrlib.transport import http
 # TODO: handle_response should be integrated into the _urllib2_wrappers
 from bzrlib.transport.http.response import handle_response
 from bzrlib.transport.http._urllib2_wrappers import (
@@ -32,7 +32,7 @@
     )
 
 
-class HttpTransport_urllib(HttpTransportBase):
+class HttpTransport_urllib(http.HttpTransportBase):
     """Python urllib transport for http and https."""
 
     # In order to debug we have to issue our traces in sync with
@@ -85,7 +85,6 @@
         request.auth = auth
         request.proxy_auth = proxy_auth
 
-        mutter('%s: [%s]' % (request.method, request.get_full_url()))
         if self._debuglevel > 0:
             print 'perform: %s base: %s, url: %s' % (request.method, self.base,
                                                      request.get_full_url())
@@ -108,8 +107,8 @@
                                            qual_proto=self._scheme)
 
         if request.redirected_to is not None:
-            mutter('redirected from: %s to: %s' % (request.get_full_url(),
-                                                   request.redirected_to))
+            trace.mutter('redirected from: %s to: %s' % (request.get_full_url(),
+                                                         request.redirected_to))
 
         return response
 

=== modified file 'bzrlib/transport/http/_urllib2_wrappers.py'
--- a/bzrlib/transport/http/_urllib2_wrappers.py	2007-11-29 15:43:33 +0000
+++ b/bzrlib/transport/http/_urllib2_wrappers.py	2007-11-30 09:51:22 +0000
@@ -32,6 +32,9 @@
 handle authentication schemes.
 """
 
+# TODO: now that we have -Dhttp most of the needs should be covered in a more
+# accessible way (i.e. no need to edit the source), if experience confirms
+# that, delete all DEBUG uses -- vila20071130 (happy birthday).
 DEBUG = 0
 
 # FIXME: Oversimplifying, two kind of exceptions should be
@@ -56,7 +59,9 @@
 from bzrlib import __version__ as bzrlib_version
 from bzrlib import (
     config,
+    debug,
     errors,
+    trace,
     transport,
     ui,
     )
@@ -144,6 +149,13 @@
 
     # XXX: Needs refactoring at the caller level.
     def __init__(self, host, port=None, strict=None, proxied_host=None):
+        if 'http' in debug.debug_flags:
+            netloc = host
+            if port is not None:
+                netloc += '%d' % port
+            if proxied_host is not None:
+                netloc += '(proxy for %s)' % proxied_host
+            trace.mutter('* About to connect() to %s' % netloc)
         httplib.HTTPConnection.__init__(self, host, port, strict)
         self.proxied_host = proxied_host
 
@@ -426,12 +438,17 @@
         headers.update(request.unredirected_hdrs)
 
         try:
-            connection._send_request(request.get_method(),
-                                     request.get_selector(),
+            method = request.get_method()
+            url = request.get_selector()
+            connection._send_request(method, url,
                                      # FIXME: implements 100-continue
                                      #None, # We don't send the body yet
                                      request.get_data(),
                                      headers)
+            if 'http' in debug.debug_flags:
+                trace.mutter('> %s %s' % (method, url))
+                hdrs = ['%s: %s' % (k, v) for k,v in headers.items()]
+                trace.mutter('> ' + '\n> '.join(hdrs) + '\n')
             if self._debuglevel > 0:
                 print 'Request sent: [%r]' % request
             response = connection.getresponse()
@@ -454,6 +471,17 @@
 #            connection.send(body)
 #            response = connection.getresponse()
 
+        if 'http' in debug.debug_flags:
+            version = 'HTTP/%d.%d'
+            try:
+                version = version % (response.version / 10,
+                                     response.version % 10)
+            except:
+                version = 'HTTP/%r' % version
+            trace.mutter('< %s %s %s' % (version, response.status,
+                                            response.reason))
+            hdrs = [h.rstrip('\r\n') for h in response.msg.headers]
+            trace.mutter('< ' + '\n< '.join(hdrs) + '\n')
         if self._debuglevel > 0:
             print 'Receives response: %r' % response
             print '  For: %r(%r)' % (request.get_method(),



More information about the bazaar-commits mailing list