[rfc] [patch] pycurl transport
Martin Pool
mbp at sourcefrog.net
Wed Jan 11 04:13:12 GMT 2006
On 11 Jan 2006, Martin Pool <mbp at sourcefrog.net> wrote:
> This patch adds a new http/https transport which uses the PyCurl
> library. <pycurl.sf.net>
*This* patch :)
--
Martin
-------------- next part --------------
=== added file 'bzrlib/transport/pycurlhttp.py'
--- /dev/null
+++ bzrlib/transport/pycurlhttp.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2006 Canonical Ltd
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""http/https transport using pycurl"""
+
+# TODO: test reporting of http errors
+
+from StringIO import StringIO
+import pycurl
+
+from bzrlib.trace import mutter
+from bzrlib.errors import TransportError, NoSuchFile
+from bzrlib.transport import Transport
+from bzrlib.transport.http import HttpTransportBase
+
+class PyCurlTransport(HttpTransportBase):
+ def __init__(self, base):
+ super(PyCurlTransport, self).__init__(base)
+ self.curl = pycurl.Curl()
+ mutter('imported pycurl %s' % pycurl.version)
+
+ def get(self, relpath, decode=False):
+ if decode:
+ raise NotImplementedError
+ return self._get_url(self.abspath(relpath))
+
+ def _get_url(self, abspath):
+ sio = StringIO()
+ # pycurl needs plain ascii
+ if isinstance(abspath, unicode):
+ # XXX: HttpTransportBase.abspath should probably url-escape
+ # unicode characters if any in the path - domain name must be
+ # IDNA-escaped
+ abspath = abspath.encode('ascii')
+ self.curl.setopt(pycurl.URL, abspath)
+ ## self.curl.setopt(pycurl.VERBOSE, 1)
+ self.curl.setopt(pycurl.WRITEFUNCTION, sio.write)
+ headers = ['Cache-control: must-revalidate',
+ 'Pragma:']
+ self.curl.setopt(pycurl.HTTPHEADER, headers)
+ self.curl.perform()
+ code = self.curl.getinfo(pycurl.HTTP_CODE)
+ if code == 404:
+ raise NoSuchFile(abspath)
+ elif not 200 <= code <= 399:
+ raise TransportError('http error %d acccessing %s' %
+ (code, self.curl.getinfo(pycurl.EFFECTIVE_URL)))
+ sio.seek(0)
+ return sio
+
+
=== modified file 'bzrlib/transport/__init__.py'
--- bzrlib/transport/__init__.py
+++ bzrlib/transport/__init__.py
@@ -13,6 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
"""Transport is an abstraction layer to handle file access.
The abstraction is to allow access from the local filesystem, as well
@@ -82,7 +83,7 @@
using a subdirectory or parent directory. This allows connections
to be pooled, rather than a new one needed for each subdir.
"""
- raise NotImplementedError
+ raise NotImplementedError(self.clone)
def should_cache(self):
"""Return True if the data pulled across should be cached locally.
@@ -147,7 +148,7 @@
XXX: Robert Collins 20051016 - is this really needed in the public
interface ?
"""
- raise NotImplementedError
+ raise NotImplementedError(self.abspath)
def relpath(self, abspath):
"""Return the local path portion from a given absolute path.
@@ -171,7 +172,7 @@
Note that some transports MAY allow querying on directories, but this
is not part of the protocol.
"""
- raise NotImplementedError
+ raise NotImplementedError(self.has)
def has_multi(self, relpaths, pb=None):
"""Return True/False for each entry in relpaths"""
@@ -195,14 +196,14 @@
As with other listing functions, only some transports implement this,.
you may check via is_listable to determine if it will.
"""
- raise NotImplementedError
+ raise NotImplementedError(self.iter_files_recursive)
def get(self, relpath):
"""Get the file at the given relative path.
:param relpath: The relative path to the file
"""
- raise NotImplementedError
+ raise NotImplementedError(self.get)
def get_multi(self, relpaths, pb=None):
"""Get a list of file-like objects, one for each entry in relpaths.
@@ -229,7 +230,7 @@
:param mode: The mode for the newly created file,
None means just use the default
"""
- raise NotImplementedError
+ raise NotImplementedError(self.put)
def put_multi(self, files, mode=None, pb=None):
"""Put a set of files or strings into the location.
@@ -245,7 +246,7 @@
def mkdir(self, relpath, mode=None):
"""Create a directory at the given path."""
- raise NotImplementedError
+ raise NotImplementedError(self.mkdir)
def mkdir_multi(self, relpaths, mode=None, pb=None):
"""Create a group of directories"""
@@ -257,7 +258,7 @@
"""Append the text in the file-like or string object to
the supplied location.
"""
- raise NotImplementedError
+ raise NotImplementedError(self.append)
def append_multi(self, files, pb=None):
"""Append the text in each file-like or string object to
@@ -270,7 +271,7 @@
def copy(self, rel_from, rel_to):
"""Copy the item at rel_from to the location at rel_to"""
- raise NotImplementedError
+ raise NotImplementedError(self.copy)
def copy_multi(self, relpaths, pb=None):
"""Copy a bunch of entries.
@@ -298,7 +299,7 @@
def move(self, rel_from, rel_to):
"""Move the item at rel_from to the location at rel_to"""
- raise NotImplementedError
+ raise NotImplementedError(self.move)
def move_multi(self, relpaths, pb=None):
"""Move a bunch of entries.
@@ -317,11 +318,11 @@
"""
# This is not implemented, because you need to do special tricks to
# extract the basename, and add it to rel_to
- raise NotImplementedError
+ raise NotImplementedError(self.move_multi_to)
def delete(self, relpath):
"""Delete the item at relpath"""
- raise NotImplementedError
+ raise NotImplementedError(self.delete)
def delete_multi(self, relpaths, pb=None):
"""Queue up a bunch of deletes to be done.
@@ -338,7 +339,7 @@
ALSO NOTE: Stats of directories may not be supported on some
transports.
"""
- raise NotImplementedError
+ raise NotImplementedError(self.stat)
def stat_multi(self, relpaths, pb=None):
"""Stat multiple files and return the information.
@@ -354,7 +355,7 @@
def listable(self):
"""Return True if this store supports listing."""
- raise NotImplementedError
+ raise NotImplementedError(self.listable)
def list_dir(self, relpath):
"""Return a list of all files at the given location.
@@ -370,7 +371,7 @@
:return: A lock object, which should contain an unlock() function.
"""
- raise NotImplementedError
+ raise NotImplementedError(self.lock_read)
def lock_write(self, relpath):
"""Lock the given file for exclusive (write) access.
@@ -378,7 +379,7 @@
:return: A lock object, which should contain an unlock() function.
"""
- raise NotImplementedError
+ raise NotImplementedError(self.lock_write)
def get_transport(base):
@@ -419,7 +420,9 @@
register_lazy_transport(None, 'bzrlib.transport.local', 'LocalTransport')
register_lazy_transport('file://', 'bzrlib.transport.local', 'LocalTransport')
register_lazy_transport('sftp://', 'bzrlib.transport.sftp', 'SFTPTransport')
-register_lazy_transport('http://', 'bzrlib.transport.http', 'HttpTransport')
-register_lazy_transport('https://', 'bzrlib.transport.http', 'HttpTransport')
+## register_lazy_transport('http://', 'bzrlib.transport.http', 'HttpTransport')
+## register_lazy_transport('https://', 'bzrlib.transport.http', 'HttpTransport')
+register_lazy_transport('http://', 'bzrlib.transport.pycurlhttp', 'PyCurlTransport')
+register_lazy_transport('https://', 'bzrlib.transport.pycurlhttp', 'PyCurlTransport')
register_lazy_transport('ftp://', 'bzrlib.transport.ftp', 'FtpTransport')
register_lazy_transport('aftp://', 'bzrlib.transport.ftp', 'FtpTransport')
=== modified file 'bzrlib/transport/http.py'
--- bzrlib/transport/http.py
+++ bzrlib/transport/http.py
@@ -60,47 +60,18 @@
url = scheme + '//' + host + port + path
return url
-def get_url(url):
- import urllib2
- mutter("get_url %s" % url)
- manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
- url = extract_auth(url, manager)
- auth_handler = urllib2.HTTPBasicAuthHandler(manager)
- opener = urllib2.build_opener(auth_handler)
- url_f = opener.open(url)
- return url_f
-
-class HttpTransport(Transport):
- """This is the transport agent for http:// access.
-
- TODO: Implement pipelined versions of all of the *_multi() functions.
- """
-
+
+class HttpTransportBase(Transport):
+ """Base class for http implementations.
+
+ Does URL parsing, etc, but not any network IO."""
def __init__(self, base):
"""Set the base path where files will be stored."""
assert base.startswith('http://') or base.startswith('https://')
- super(HttpTransport, self).__init__(base)
- # In the future we might actually connect to the remote host
- # rather than using get_url
- # self._connection = None
+ super(HttpTransportBase, self).__init__(base)
(self._proto, self._host,
self._path, self._parameters,
self._query, self._fragment) = urlparse.urlparse(self.base)
-
- def should_cache(self):
- """Return True if the data pulled across should be cached locally.
- """
- return True
-
- def clone(self, offset=None):
- """Return a new HttpTransport with root at self.base + offset
- For now HttpTransport does not actually connect, so just return
- a new HttpTransport object.
- """
- if offset is None:
- return HttpTransport(self.base)
- else:
- return HttpTransport(self.abspath(offset))
def abspath(self, relpath):
"""Return the full url to the given relative path.
@@ -140,6 +111,72 @@
return urlparse.urlunparse((self._proto,
self._host, path, '', '', ''))
+ def stat(self, relpath):
+ """Return the stat information for a file.
+ """
+ raise TransportNotPossible('http does not support stat()')
+
+ def lock_read(self, relpath):
+ """Lock the given file for shared (read) access.
+ :return: A lock object, which should be passed to Transport.unlock()
+ """
+ # The old RemoteBranch ignore lock for reading, so we will
+ # continue that tradition and return a bogus lock object.
+ class BogusLock(object):
+ def __init__(self, path):
+ self.path = path
+ def unlock(self):
+ pass
+ return BogusLock(relpath)
+
+ def lock_write(self, relpath):
+ """Lock the given file for exclusive (write) access.
+ WARNING: many transports do not support this, so trying avoid using it
+
+ :return: A lock object, which should be passed to Transport.unlock()
+ """
+ raise TransportNotPossible('http does not support lock_write()')
+
+ def clone(self, offset=None):
+ """Return a new HttpTransport with root at self.base + offset
+ For now HttpTransport does not actually connect, so just return
+ a new HttpTransport object.
+ """
+ if offset is None:
+ return self.__class__(self.base)
+ else:
+ return self.__class__(self.abspath(offset))
+
+ def listable(self):
+ """Returns false - http has no reliable way to list directories."""
+ # well, we could try DAV...
+ return False
+
+
+class HttpTransport(HttpTransportBase):
+ """Python urllib transport for http and https.
+
+ TODO: Implement pipelined versions of all of the *_multi() functions.
+ """
+
+ def __init__(self, base):
+ """Set the base path where files will be stored."""
+ super(HttpTransport, self).__init__(base)
+
+ def _get_url(self, url):
+ mutter("get_url %s" % url)
+ manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
+ url = extract_auth(url, manager)
+ auth_handler = urllib2.HTTPBasicAuthHandler(manager)
+ opener = urllib2.build_opener(auth_handler)
+ url_f = opener.open(url)
+ return url_f
+
+ def should_cache(self):
+ """Return True if the data pulled across should be cached locally.
+ """
+ return True
+
def has(self, relpath):
"""Does the target location exist?
@@ -154,7 +191,7 @@
path = relpath
try:
path = self.abspath(relpath)
- f = get_url(path)
+ f = self._get_url(path)
# Without the read and then close()
# we tend to have busy sockets.
f.read()
@@ -180,7 +217,7 @@
path = relpath
try:
path = self.abspath(relpath)
- return get_url(path)
+ return self._get_url(path)
except urllib2.HTTPError, e:
mutter('url error code: %s for has url: %r', e.code, path)
if e.code == 404:
@@ -242,32 +279,3 @@
"""Delete the item at relpath"""
raise TransportNotPossible('http does not support delete()')
- def listable(self):
- """See Transport.listable."""
- return False
-
- def stat(self, relpath):
- """Return the stat information for a file.
- """
- raise TransportNotPossible('http does not support stat()')
-
- def lock_read(self, relpath):
- """Lock the given file for shared (read) access.
- :return: A lock object, which should be passed to Transport.unlock()
- """
- # The old RemoteBranch ignore lock for reading, so we will
- # continue that tradition and return a bogus lock object.
- class BogusLock(object):
- def __init__(self, path):
- self.path = path
- def unlock(self):
- pass
- return BogusLock(relpath)
-
- def lock_write(self, relpath):
- """Lock the given file for exclusive (write) access.
- WARNING: many transports do not support this, so trying avoid using it
-
- :return: A lock object, which should be passed to Transport.unlock()
- """
- raise TransportNotPossible('http does not support lock_write()')
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060111/6031d5d4/attachment.pgp
More information about the bazaar
mailing list