Rev 3950: Start implementing http activity reporting at socket level. in file:///net/bigmamac/Volumes/home/vila/src/bzr/experimental/pb-http/

Vincent Ladeuil v.ladeuil+lp at free.fr
Thu Jan 29 14:27:30 GMT 2009


At file:///net/bigmamac/Volumes/home/vila/src/bzr/experimental/pb-http/

------------------------------------------------------------
revno: 3950
revision-id: v.ladeuil+lp at free.fr-20090129142728-1iu8017zgso23w0i
parent: v.ladeuil+lp at free.fr-20090126081302-ad8l8nhn2sscymxt
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: pb-http
timestamp: Thu 2009-01-29 15:27:28 +0100
message:
  Start implementing http activity reporting at socket level.
  
  * bzrlib/transport/http/_urllib2_wrappers.py:
  (_ReportingFileSocket, _ReportingSocket): Wrappers for
  socket._socket and socket_filesocket objects providing activity
  reporting.
  (Response.__init__): Deleted. Wrapping the file socket object is
  now done handled at connection time.
  (AbstractHTTPConnection._wrap_socket_for_reporting): Wrap the
  underlying socket into a reporting one.
  (HTTPConnection.__init__, HTTPSConnection.__init__): Accept a
  report_activity parameter.
  (HTTPSConnection.connect): Wrap the newly created socket.
  (HTTPSConnection.connect_to_origin): Wrap the newly created *or*
  connected ssl socket.
  (ConnectionHandler.__init__): Accept a report_activity parameter.
  (ConnectionHandler.create_connection): Pass the report_activity
  parameter to all created connections.
  (Opener.__init__): Accept a report_activity parameter.
  
  * bzrlib/transport/http/_urllib.py:
  (HttpTransport_urllib.__init__): Provide the report activity
  function to the opener.
  (HttpTransport_urllib._get, HttpTransport_urllib._post): Don't use
  the RangeFile report activity facility anymore.
  
  * bzrlib/transport/http/__init__.py:
  (HttpTransportBase.get_bytes): Deleted, this gratuitously add a
  useless level of buffering.
  
  * bzrlib/tests/test_http.py:
  (ActivityHttpServer, PreRecoredRequestHandler, TestActivity): Test
  activity reporting by http clients.
-------------- next part --------------
=== modified file 'BRANCH.TODO'
--- a/BRANCH.TODO	2008-07-03 04:03:02 +0000
+++ b/BRANCH.TODO	2009-01-29 14:27:28 +0000
@@ -3,3 +3,11 @@
 # 
 #
 
+* try to address pycurl failing activity tests or raise not
+   applicable otherwise
+
+* complete _ReportingFileSocket implementation for write
+   operations ?
+
+* triple check https activity reporting (including proxy
+   connection)
\ No newline at end of file

=== modified file 'bzrlib/tests/test_http.py'
--- a/bzrlib/tests/test_http.py	2009-01-08 16:57:10 +0000
+++ b/bzrlib/tests/test_http.py	2009-01-29 14:27:28 +0000
@@ -136,6 +136,7 @@
                  TestProxyHttpServer,
                  TestRanges,
                  TestSpecificRequestHandler,
+                 TestActivity,
                  )
     is_also_testing_for_protocols = tests.condition_isinstance(tp_classes)
 
@@ -1807,3 +1808,98 @@
                              'https://foo.example.com/foo')
         self.assertIsInstance(r, type(t))
         self.assertEquals(t._user, r._user)
+
+
+class ActivityHttpServer(http_server.HttpServer):
+
+    def __init__(self, request_handler=None,
+                 protocol_version=None):
+        super(ActivityHttpServer, self).__init__(
+            request_handler=request_handler, protocol_version=protocol_version)
+        # Bytes read and written by the server
+        self.bytes_read = 0
+        self.bytes_written = 0
+
+
+class PreRecoredRequestHandler(http_server.TestingHTTPRequestHandler):
+    """Pre-recorded request handler.
+
+
+    The only thing we care about here is how many bytes travel on the wire. But
+    since we want to measure it for a real http client, we have to send it
+    correct responses.
+
+    We expect to receive a *single* request nothing more (and we won't even
+    check what request it is, we just measure the bytes read until an empty
+    line.
+    """
+
+    canned_response = ''
+
+    def handle_one_request(self):
+        tcs = self.server.test_case_server
+        if not isinstance(tcs, ActivityHttpServer):
+            raise AssertionError(
+                'PreRecoredRequestHandler assumes a %r server, not %r'
+                % (ActivityHttpServer, tcs))
+        requestline = self.rfile.readline()
+        headers = self.MessageClass(self.rfile, 0)
+        # We just read: the request, the headers, an empty line indicating the
+        # end of the headers.
+        bytes_read = len(requestline)
+        for line in headers.headers:
+            bytes_read += len(line)
+        bytes_read += len('\r\n')
+        tcs.bytes_read = bytes_read
+        self.wfile.write(self.canned_response)
+        tcs.bytes_written = len(self.canned_response)
+
+
+class TestActivity(tests.TestCase):
+    """Test socket activity reporting.
+
+    We use a special purpose server to control the bytes sent and received and
+    be able to predict the activity on the client socket.
+    """
+
+    def test_http_get(self):
+        class MyRequestHandler(PreRecoredRequestHandler):
+
+            canned_response = '''HTTP/1.1 200 OK\r
+Date: Tue, 11 Jul 2006 04:32:56 GMT\r
+Server: Apache/2.0.54 (Fedora)\r
+Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r
+ETag: "56691-23-38e9ae00"\r
+Accept-Ranges: bytes\r
+Content-Length: 35\r
+Connection: close\r
+Content-Type: text/plain; charset=UTF-8\r
+\r
+Bazaar-NG meta directory, format 1
+'''
+
+        server = ActivityHttpServer(MyRequestHandler, self._protocol_version)
+        server.setUp()
+        self.addCleanup(server.tearDown)
+
+        activities = {}
+        def report_activity(t, bytes, direction):
+            count = activities.get(direction, 0)
+            count += bytes
+            activities[direction] = count
+
+        # We override at class level because constructors may propagate the
+        # bound method and render instance overriding ineffective (an
+        # alternative would be be to define a specific ui factory instead...)
+        orig_ra = self._transport._report_activity
+        def restore_report_activity():
+            self._transport._report_activity = orig_ra
+        self.addCleanup(restore_report_activity)
+        self._transport._report_activity = report_activity
+
+        t = self._transport(server.get_url())
+
+        self.assertEqual('Bazaar-NG meta directory, format 1\n',
+                         t.get('foo/bar').read())
+        self.assertEqual(server.bytes_read, activities.get('write', 0))
+        self.assertEqual(server.bytes_written, activities.get('read', 0))

=== modified file 'bzrlib/transport/http/__init__.py'
--- a/bzrlib/transport/http/__init__.py	2009-01-23 21:22:39 +0000
+++ b/bzrlib/transport/http/__init__.py	2009-01-29 14:27:28 +0000
@@ -123,17 +123,13 @@
 
         :param relpath: The relative path to the file
         """
+        code, response_file = self._get(relpath, None)
         # FIXME: some callers want an iterable... One step forward, three steps
         # backwards :-/ And not only an iterable, but an iterable that can be
         # seeked backwards, so we will never be able to do that.  One such
         # known client is bzrlib.bundle.serializer.v4.get_bundle_reader. At the
         # time of this writing it's even the only known client -- vila20071203
-        return StringIO(self.get_bytes(relpath))
-
-    def get_bytes(self, relpath):
-        """See Transport.get_bytes()."""
-        code, response_file = self._get(relpath, None)
-        return response_file.read()
+        return StringIO(response_file.read())
 
     def _get(self, relpath, ranges, tail_amount=0):
         """Get a file, or part of a file.

=== modified file 'bzrlib/transport/http/_urllib.py'
--- a/bzrlib/transport/http/_urllib.py	2009-01-23 21:22:39 +0000
+++ b/bzrlib/transport/http/_urllib.py	2009-01-29 14:27:28 +0000
@@ -47,7 +47,8 @@
         if _from_transport is not None:
             self._opener = _from_transport._opener
         else:
-            self._opener = self._opener_class()
+            self._opener = self._opener_class(
+                report_activity=self._report_activity)
 
     def _perform(self, request):
         """Send the request to the server and handles common errors.
@@ -100,7 +101,6 @@
 
     def _get(self, relpath, offsets, tail_amount=0):
         """See HttpTransport._get"""
-
         abspath = self._remote_path(relpath)
         headers = {}
         accepted_errors = [200, 404]
@@ -126,8 +126,7 @@
             raise errors.InvalidHttpRange(abspath, range_header,
                                           'Server return code %d' % code)
 
-        data = handle_response(abspath, code, response.info(), response,
-            report_activity=self._report_activity)
+        data = handle_response(abspath, code, response.info(), response)
         return code, data
 
     def _post(self, body_bytes):
@@ -137,8 +136,7 @@
         response = self._perform(Request('POST', abspath, body_bytes,
                                          accepted_errors=[200, 403]))
         code = response.code
-        data = handle_response(abspath, code, response.info(), response,
-            report_activity=self._report_activity)
+        data = handle_response(abspath, code, response.info(), response)
         return code, data
 
     def _head(self, relpath):

=== modified file 'bzrlib/transport/http/_urllib2_wrappers.py'
--- a/bzrlib/transport/http/_urllib2_wrappers.py	2009-01-08 16:57:10 +0000
+++ b/bzrlib/transport/http/_urllib2_wrappers.py	2009-01-29 14:27:28 +0000
@@ -67,13 +67,54 @@
     )
 
 
-class _BufferedMakefileSocket(object):
-
-    def __init__(self, sock):
+class _ReportingFileSocket(object):
+
+    def __init__(self, filesock, report_activity=None):
+        self.filesock = filesock
+        self._report_activity = report_activity
+
+
+    def read(self, size=1):
+        s = self.filesock.read(size)
+        self._report_activity(len(s), 'read')
+        return s
+
+    def readline(self, size=-1):
+        s = self.filesock.readline(size)
+        self._report_activity(len(s), 'read')
+        return s
+
+    def __getattr__(self, name):
+        return getattr(self.filesock, name)
+
+
+class _ReportingSocket(object):
+
+    def __init__(self, sock, report_activity=None):
         self.sock = sock
+        self._report_activity = report_activity
+
+    def send(self, s, *args):
+        self.sock.send(s, *args)
+        self._report_activity(len(s), 'write')
+
+    def sendall(self, s, *args):
+        self.sock.send(s, *args)
+        self._report_activity(len(s), 'write')
+
+    def recv(self, *args):
+        s = self.sock.recv(*args)
+        self._report_activity(len(s), 'read')
+        return s
 
     def makefile(self, mode='r', bufsize=-1):
-        return self.sock.makefile(mode, 65536)
+        # httplib creates a fileobject that doesn't do buffering, which
+        # makes fp.readline() very expensive because it only reads one byte
+        # at a time.  So we wrap the socket in an object that forces
+        # sock.makefile to make a buffered file.
+        fsock = self.sock.makefile(mode, 65536)
+        # And wrap that into a reporting kind of fileobject
+        return _ReportingFileSocket(fsock, self._report_activity)
 
     def __getattr__(self, name):
         return getattr(self.sock, name)
@@ -96,14 +137,6 @@
     # 8k chunks should be fine.
     _discarded_buf_size = 8192
 
-    def __init__(self, sock, *args, **kwargs):
-        # httplib creates a fileobject that doesn't do buffering, which
-        # makes fp.readline() very expensive because it only reads one byte
-        # at a time.  So we wrap the socket in an object that forces
-        # sock.makefile to make a buffered file.
-        sock = _BufferedMakefileSocket(sock)
-        httplib.HTTPResponse.__init__(self, sock, *args, **kwargs)
-
     def begin(self):
         """Begin to read the response from the server.
 
@@ -178,8 +211,10 @@
     # we want to warn. But not below a given thresold.
     _range_warning_thresold = 1024 * 1024
 
-    def __init__(self):
+    def __init__(self,
+                 report_activity=None):
         self._response = None
+        self._report_activity = report_activity
         self._ranges_received_whole_file = None
 
     def _mutter_connect(self):
@@ -216,12 +251,17 @@
         # Restore our preciousss
         self.sock = sock
 
+    def _wrap_socket_for_reporting(self, sock):
+        """Wrap the socket before anybody use it."""
+        self.sock = _ReportingSocket(sock, self._report_activity)
+
 
 class HTTPConnection(AbstractHTTPConnection, httplib.HTTPConnection):
 
     # XXX: Needs refactoring at the caller level.
-    def __init__(self, host, port=None, proxied_host=None):
-        AbstractHTTPConnection.__init__(self)
+    def __init__(self, host, port=None, proxied_host=None,
+                 report_activity=None):
+        AbstractHTTPConnection.__init__(self, report_activity=report_activity)
         # Use strict=True since we don't support HTTP/0.9
         httplib.HTTPConnection.__init__(self, host, port, strict=True)
         self.proxied_host = proxied_host
@@ -230,6 +270,7 @@
         if 'http' in debug.debug_flags:
             self._mutter_connect()
         httplib.HTTPConnection.connect(self)
+        self._wrap_socket_for_reporting(self.sock)
 
 
 # Build the appropriate socket wrapper for ssl
@@ -248,8 +289,9 @@
 class HTTPSConnection(AbstractHTTPConnection, httplib.HTTPSConnection):
 
     def __init__(self, host, port=None, key_file=None, cert_file=None,
-                 proxied_host=None):
-        AbstractHTTPConnection.__init__(self)
+                 proxied_host=None,
+                 report_activity=None):
+        AbstractHTTPConnection.__init__(self, report_activity=report_activity)
         # Use strict=True since we don't support HTTP/0.9
         httplib.HTTPSConnection.__init__(self, host, port,
                                          key_file, cert_file, strict=True)
@@ -259,11 +301,14 @@
         if 'http' in debug.debug_flags:
             self._mutter_connect()
         httplib.HTTPConnection.connect(self)
+        self._wrap_socket_for_reporting(self.sock)
         if self.proxied_host is None:
             self.connect_to_origin()
 
     def connect_to_origin(self):
-        self.sock = _ssl_wrap_socket(self.sock, self.key_file, self.cert_file)
+        ssl_sock = _ssl_wrap_socket(self.sock, self.key_file, self.cert_file)
+        # Wrap the ssl socket before anybody use it
+        self._wrap_socket_for_reporting(ssl_sock)
 
 
 class Request(urllib2.Request):
@@ -355,6 +400,9 @@
 
     handler_order = 1000 # after all pre-processings
 
+    def __init__(self, report_activity=None):
+        self._report_activity = report_activity
+
     def create_connection(self, request, http_connection_class):
         host = request.get_host()
         if not host:
@@ -366,7 +414,8 @@
         # request is made)
         try:
             connection = http_connection_class(
-                host, proxied_host=request.proxied_host)
+                host, proxied_host=request.proxied_host,
+                report_activity=self._report_activity)
         except httplib.InvalidURL, exception:
             # There is only one occurrence of InvalidURL in httplib
             raise errors.InvalidURL(request.get_full_url(),
@@ -1370,9 +1419,11 @@
     def __init__(self,
                  connection=ConnectionHandler,
                  redirect=HTTPRedirectHandler,
-                 error=HTTPErrorProcessor,):
-        self._opener = urllib2.build_opener( \
-            connection, redirect, error,
+                 error=HTTPErrorProcessor,
+                 report_activity=None):
+        self._opener = urllib2.build_opener(
+            connection(report_activity=report_activity),
+            redirect, error,
             ProxyHandler(),
             HTTPBasicAuthHandler(),
             HTTPDigestAuthHandler(),



More information about the bazaar-commits mailing list