[rfc] [patch] pycurl transport

Wed Jan 11 04:13:12 GMT 2006

On 11 Jan 2006, Martin Pool <mbp at sourcefrog.net> wrote:
> This patch adds a new http/https transport which uses the PyCurl
> library.  <pycurl.sf.net>

*This* patch :)


-- 
Martin
-------------- next part --------------
=== added file 'bzrlib/transport/pycurlhttp.py'

--- /dev/null	
+++ bzrlib/transport/pycurlhttp.py	
@@ -0,0 +1,64 @@
+# Copyright (C) 2006 Canonical Ltd
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""http/https transport using pycurl"""
+
+# TODO: test reporting of http errors
+
+from StringIO import StringIO
+import pycurl
+
+from bzrlib.trace import mutter
+from bzrlib.errors import TransportError, NoSuchFile
+from bzrlib.transport import Transport
+from bzrlib.transport.http import HttpTransportBase
+
+class PyCurlTransport(HttpTransportBase):
+    def __init__(self, base):
+        super(PyCurlTransport, self).__init__(base)
+        self.curl = pycurl.Curl()
+        mutter('imported pycurl %s' % pycurl.version)
+
+    def get(self, relpath, decode=False):
+        if decode:
+            raise NotImplementedError
+        return self._get_url(self.abspath(relpath))
+
+    def _get_url(self, abspath):
+        sio = StringIO()
+        # pycurl needs plain ascii
+        if isinstance(abspath, unicode):
+            # XXX: HttpTransportBase.abspath should probably url-escape
+            # unicode characters if any in the path - domain name must be
+            # IDNA-escaped
+            abspath = abspath.encode('ascii')
+        self.curl.setopt(pycurl.URL, abspath)
+        ## self.curl.setopt(pycurl.VERBOSE, 1)
+        self.curl.setopt(pycurl.WRITEFUNCTION, sio.write)
+        headers = ['Cache-control: must-revalidate',
+                   'Pragma:']
+        self.curl.setopt(pycurl.HTTPHEADER, headers)
+        self.curl.perform()
+        code = self.curl.getinfo(pycurl.HTTP_CODE)
+        if code == 404:
+            raise NoSuchFile(abspath)
+        elif not 200 <= code <= 399:
+            raise TransportError('http error %d acccessing %s' % 
+                    (code, self.curl.getinfo(pycurl.EFFECTIVE_URL)))
+        sio.seek(0)
+        return sio
+
+

=== modified file 'bzrlib/transport/__init__.py'
--- bzrlib/transport/__init__.py	
+++ bzrlib/transport/__init__.py	
@@ -13,6 +13,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
 """Transport is an abstraction layer to handle file access.
 
 The abstraction is to allow access from the local filesystem, as well
@@ -82,7 +83,7 @@
         using a subdirectory or parent directory. This allows connections 
         to be pooled, rather than a new one needed for each subdir.
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.clone)
 
     def should_cache(self):
         """Return True if the data pulled across should be cached locally.
@@ -147,7 +148,7 @@
         XXX: Robert Collins 20051016 - is this really needed in the public
              interface ?
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.abspath)
 
     def relpath(self, abspath):
         """Return the local path portion from a given absolute path.
@@ -171,7 +172,7 @@
         Note that some transports MAY allow querying on directories, but this
         is not part of the protocol.
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.has)
 
     def has_multi(self, relpaths, pb=None):
         """Return True/False for each entry in relpaths"""
@@ -195,14 +196,14 @@
         As with other listing functions, only some transports implement this,.
         you may check via is_listable to determine if it will.
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.iter_files_recursive)
 
     def get(self, relpath):
         """Get the file at the given relative path.
 
         :param relpath: The relative path to the file
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.get)
 
     def get_multi(self, relpaths, pb=None):
         """Get a list of file-like objects, one for each entry in relpaths.
@@ -229,7 +230,7 @@
         :param mode: The mode for the newly created file, 
                      None means just use the default
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.put)
 
     def put_multi(self, files, mode=None, pb=None):
         """Put a set of files or strings into the location.
@@ -245,7 +246,7 @@
 
     def mkdir(self, relpath, mode=None):
         """Create a directory at the given path."""
-        raise NotImplementedError
+        raise NotImplementedError(self.mkdir)
 
     def mkdir_multi(self, relpaths, mode=None, pb=None):
         """Create a group of directories"""
@@ -257,7 +258,7 @@
         """Append the text in the file-like or string object to 
         the supplied location.
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.append)
 
     def append_multi(self, files, pb=None):
         """Append the text in each file-like or string object to
@@ -270,7 +271,7 @@
 
     def copy(self, rel_from, rel_to):
         """Copy the item at rel_from to the location at rel_to"""
-        raise NotImplementedError
+        raise NotImplementedError(self.copy)
 
     def copy_multi(self, relpaths, pb=None):
         """Copy a bunch of entries.
@@ -298,7 +299,7 @@
 
     def move(self, rel_from, rel_to):
         """Move the item at rel_from to the location at rel_to"""
-        raise NotImplementedError
+        raise NotImplementedError(self.move)
 
     def move_multi(self, relpaths, pb=None):
         """Move a bunch of entries.
@@ -317,11 +318,11 @@
         """
         # This is not implemented, because you need to do special tricks to
         # extract the basename, and add it to rel_to
-        raise NotImplementedError
+        raise NotImplementedError(self.move_multi_to)
 
     def delete(self, relpath):
         """Delete the item at relpath"""
-        raise NotImplementedError
+        raise NotImplementedError(self.delete)
 
     def delete_multi(self, relpaths, pb=None):
         """Queue up a bunch of deletes to be done.
@@ -338,7 +339,7 @@
         ALSO NOTE: Stats of directories may not be supported on some 
         transports.
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.stat)
 
     def stat_multi(self, relpaths, pb=None):
         """Stat multiple files and return the information.
@@ -354,7 +355,7 @@
 
     def listable(self):
         """Return True if this store supports listing."""
-        raise NotImplementedError
+        raise NotImplementedError(self.listable)
 
     def list_dir(self, relpath):
         """Return a list of all files at the given location.
@@ -370,7 +371,7 @@
 
         :return: A lock object, which should contain an unlock() function.
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.lock_read)
 
     def lock_write(self, relpath):
         """Lock the given file for exclusive (write) access.
@@ -378,7 +379,7 @@
 
         :return: A lock object, which should contain an unlock() function.
         """
-        raise NotImplementedError
+        raise NotImplementedError(self.lock_write)
 
 
 def get_transport(base):
@@ -419,7 +420,9 @@
 register_lazy_transport(None, 'bzrlib.transport.local', 'LocalTransport')
 register_lazy_transport('file://', 'bzrlib.transport.local', 'LocalTransport')
 register_lazy_transport('sftp://', 'bzrlib.transport.sftp', 'SFTPTransport')
-register_lazy_transport('http://', 'bzrlib.transport.http', 'HttpTransport')
-register_lazy_transport('https://', 'bzrlib.transport.http', 'HttpTransport')
+## register_lazy_transport('http://', 'bzrlib.transport.http', 'HttpTransport')
+## register_lazy_transport('https://', 'bzrlib.transport.http', 'HttpTransport')
+register_lazy_transport('http://', 'bzrlib.transport.pycurlhttp', 'PyCurlTransport')
+register_lazy_transport('https://', 'bzrlib.transport.pycurlhttp', 'PyCurlTransport')
 register_lazy_transport('ftp://', 'bzrlib.transport.ftp', 'FtpTransport')
 register_lazy_transport('aftp://', 'bzrlib.transport.ftp', 'FtpTransport')

=== modified file 'bzrlib/transport/http.py'
--- bzrlib/transport/http.py	
+++ bzrlib/transport/http.py	
@@ -60,47 +60,18 @@
     url = scheme + '//' + host + port + path
     return url
     
-def get_url(url):
-    import urllib2
-    mutter("get_url %s" % url)
-    manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
-    url = extract_auth(url, manager)
-    auth_handler = urllib2.HTTPBasicAuthHandler(manager)
-    opener = urllib2.build_opener(auth_handler)
-    url_f = opener.open(url)
-    return url_f
-
-class HttpTransport(Transport):
-    """This is the transport agent for http:// access.
-    
-    TODO: Implement pipelined versions of all of the *_multi() functions.
-    """
-
+
+class HttpTransportBase(Transport):
+    """Base class for http implementations.
+
+    Does URL parsing, etc, but not any network IO."""
     def __init__(self, base):
         """Set the base path where files will be stored."""
         assert base.startswith('http://') or base.startswith('https://')
-        super(HttpTransport, self).__init__(base)
-        # In the future we might actually connect to the remote host
-        # rather than using get_url
-        # self._connection = None
+        super(HttpTransportBase, self).__init__(base)
         (self._proto, self._host,
             self._path, self._parameters,
             self._query, self._fragment) = urlparse.urlparse(self.base)
-
-    def should_cache(self):
-        """Return True if the data pulled across should be cached locally.
-        """
-        return True
-
-    def clone(self, offset=None):
-        """Return a new HttpTransport with root at self.base + offset
-        For now HttpTransport does not actually connect, so just return
-        a new HttpTransport object.
-        """
-        if offset is None:
-            return HttpTransport(self.base)
-        else:
-            return HttpTransport(self.abspath(offset))
 
     def abspath(self, relpath):
         """Return the full url to the given relative path.
@@ -140,6 +111,72 @@
         return urlparse.urlunparse((self._proto,
                 self._host, path, '', '', ''))
 
+    def stat(self, relpath):
+        """Return the stat information for a file.
+        """
+        raise TransportNotPossible('http does not support stat()')
+
+    def lock_read(self, relpath):
+        """Lock the given file for shared (read) access.
+        :return: A lock object, which should be passed to Transport.unlock()
+        """
+        # The old RemoteBranch ignore lock for reading, so we will
+        # continue that tradition and return a bogus lock object.
+        class BogusLock(object):
+            def __init__(self, path):
+                self.path = path
+            def unlock(self):
+                pass
+        return BogusLock(relpath)
+
+    def lock_write(self, relpath):
+        """Lock the given file for exclusive (write) access.
+        WARNING: many transports do not support this, so trying avoid using it
+
+        :return: A lock object, which should be passed to Transport.unlock()
+        """
+        raise TransportNotPossible('http does not support lock_write()')
+
+    def clone(self, offset=None):
+        """Return a new HttpTransport with root at self.base + offset
+        For now HttpTransport does not actually connect, so just return
+        a new HttpTransport object.
+        """
+        if offset is None:
+            return self.__class__(self.base)
+        else:
+            return self.__class__(self.abspath(offset))
+
+    def listable(self):
+        """Returns false - http has no reliable way to list directories."""
+        # well, we could try DAV...
+        return False
+
+
+class HttpTransport(HttpTransportBase):
+    """Python urllib transport for http and https.
+    
+    TODO: Implement pipelined versions of all of the *_multi() functions.
+    """
+
+    def __init__(self, base):
+        """Set the base path where files will be stored."""
+        super(HttpTransport, self).__init__(base)
+
+    def _get_url(self, url):
+        mutter("get_url %s" % url)
+        manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
+        url = extract_auth(url, manager)
+        auth_handler = urllib2.HTTPBasicAuthHandler(manager)
+        opener = urllib2.build_opener(auth_handler)
+        url_f = opener.open(url)
+        return url_f
+
+    def should_cache(self):
+        """Return True if the data pulled across should be cached locally.
+        """
+        return True
+
     def has(self, relpath):
         """Does the target location exist?
 
@@ -154,7 +191,7 @@
         path = relpath
         try:
             path = self.abspath(relpath)
-            f = get_url(path)
+            f = self._get_url(path)
             # Without the read and then close()
             # we tend to have busy sockets.
             f.read()
@@ -180,7 +217,7 @@
         path = relpath
         try:
             path = self.abspath(relpath)
-            return get_url(path)
+            return self._get_url(path)
         except urllib2.HTTPError, e:
             mutter('url error code: %s for has url: %r', e.code, path)
             if e.code == 404:
@@ -242,32 +279,3 @@
         """Delete the item at relpath"""
         raise TransportNotPossible('http does not support delete()')
 
-    def listable(self):
-        """See Transport.listable."""
-        return False
-
-    def stat(self, relpath):
-        """Return the stat information for a file.
-        """
-        raise TransportNotPossible('http does not support stat()')
-
-    def lock_read(self, relpath):
-        """Lock the given file for shared (read) access.
-        :return: A lock object, which should be passed to Transport.unlock()
-        """
-        # The old RemoteBranch ignore lock for reading, so we will
-        # continue that tradition and return a bogus lock object.
-        class BogusLock(object):
-            def __init__(self, path):
-                self.path = path
-            def unlock(self):
-                pass
-        return BogusLock(relpath)
-
-    def lock_write(self, relpath):
-        """Lock the given file for exclusive (write) access.
-        WARNING: many transports do not support this, so trying avoid using it
-
-        :return: A lock object, which should be passed to Transport.unlock()
-        """
-        raise TransportNotPossible('http does not support lock_write()')

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060111/6031d5d4/attachment.pgp