[PATCH 0/4] Speed improvement in fetch/clone

Goffredo Baroncelli kreijack at alice.it
Wed Jan 18 19:18:34 GMT 2006


On Wednesday 18 January 2006 05:00, Martin Pool wrote:
> On 15 Dec 2005, Goffredo Baroncelli <kreijack at alice.it> wrote:
> > Hi all,

> I'm just about to commit the merge of this into bzr.dev.  Robert has
> suggested that it should be in 0.7 too because of the relative
> simplicity of the change and the likelihood of a large performance gain,
> and I agree.
> 
> I'm just doing a branch of the main branch using this as a final manual
> check.  

Thanks for your great job !!!!!!!!!!

With our changes and the little patch below ( which implements multiple request in one 
connection) I was able to copy the bzr.dev repository from a local server in less 
than 2.5 minutes !!!!!

$ time ../bzr.dev/bzr clone 'http://127.0.0.1:8077/bazaar/bazaar-ng_stable_branch'
nched 1522 revision(s).

real    2m25.821s
user    0m50.689s
sys     0m12.499s



> It does seem to have affected the progress bar display. (?) 
Yes, i will check why

> -- 
> Martin
Goffredo

=== modified file 'bzrlib/transport/http.py'
--- bzrlib/transport/http.py
+++ bzrlib/transport/http.py
@@ -21,6 +21,8 @@
 import urllib, urllib2
 import urlparse
 from warnings import warn
+import httplib
+import cStringIO

 from bzrlib.transport import Transport, Server
 from bzrlib.errors import (TransportNotPossible, NoSuchFile,
@@ -60,7 +62,70 @@
             password_manager.add_password(None, host, username, password)
     url = scheme + '//' + host + port + path
     return url
-
+
+
+class http_get_error(StandardError):
+    def __init__(self,code,reason):
+        self.code = code
+        self.reason = reason
+    def __str__(self):
+        return "".join(self.reason).join("error=%s"%self.code)
+
+class http_get:
+
+    def __init__(self):
+        self.conn = {}
+        self.lastt=0
+
+    def get_url(self, url):
+
+        mutter("http_get.get_url %s" % url)
+
+        if not url.startswith("http://"):
+            raise "Unsupported protocol"
+
+        url = url[7:]
+
+        path_pos = url.find('/')
+        if path_pos>= 0:
+            site = url[:path_pos]
+            path = url[path_pos:]
+        else:
+            # TODO check it
+            site = url
+            path = '/'
+
+        if not site in self.conn.keys( ):
+            self.conn[site] = httplib.HTTPConnection(site)
+
+        c = self.conn[site]
+
+        while True:
+            try:
+                res = None
+                c.request("GET",path)
+                res = c.getresponse( )
+                if res.status != 200:
+                    res.read( ) # TODO why is needed ??? check it !!!
+                    raise http_get_error(res.status, "Path doesn't exist ( site = %s, path =%s )"%(site,path))
+
+                return cStringIO.StringIO(res.read())
+
+            except httplib.BadStatusLine, e:
+                if e.line: raise
+
+                # the connection was disconnect: retry
+                self.conn[site] = httplib.HTTPConnection(site)
+                c = self.conn[site]
+
+#def get_url(url):
+#    import urllib2
+#    mutter("get_url %s" % url)
+#    url_f = urllib2.urlopen(url)
+#    return url_f
+
+
+
 def get_url(url):
     import urllib2
     mutter("get_url %s" % url)
@@ -89,6 +154,8 @@
         (self._proto, self._host,
             self._path, self._parameters,
             self._query, self._fragment) = urlparse.urlparse(self.base)
+
+        self.http_get = http_get( )

     def should_cache(self):
         """Return True if the data pulled across should be cached locally.
@@ -157,13 +224,16 @@
         path = relpath
         try:
             path = self.abspath(relpath)
-            f = get_url(path)
+            #f = get_url(path)
+
+            f = self.http_get.get_url(self.abspath(relpath))
+
             # Without the read and then close()
             # we tend to have busy sockets.
             f.read()
             f.close()
             return True
-        except urllib2.URLError, e:
+        except (http_get_error,urllib2.URLError), e:
             mutter('url error code: %s for has url: %r', e.code, path)
             if e.code == 404:
                 return False
@@ -183,8 +253,10 @@
         path = relpath
         try:
             path = self.abspath(relpath)
-            return get_url(path)
-        except urllib2.HTTPError, e:
+#            return get_url(path)
+#        except urllib2.HTTPError, e:
+            return self.http_get.get_url(self.abspath(relpath))
+        except (http_get_error,urllib2.HTTPError), e:
             mutter('url error code: %s for has url: %r', e.code, path)
             if e.code == 404:
                 raise NoSuchFile(path, extra=e)


----
 

-- 
gpg key@ keyserver.linux.it: Goffredo Baroncelli (ghigo) <kreijack at inwind.it>
Key fingerprint = CE3C 7E01 6782 30A3 5B87  87C0 BB86 505C 6B2A CFF9
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060118/1e0fa1cc/attachment.pgp 


More information about the bazaar mailing list