Rev 4710: First cut at a possible fix for bug #343218 in http://bazaar.launchpad.net/~jameinel/bzr/2.0.4-faster-export-343218

John Arbash Meinel john at arbash-meinel.com
Tue Dec 15 16:40:34 GMT 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.0.4-faster-export-343218

------------------------------------------------------------
revno: 4710
revision-id: john at arbash-meinel.com-20091215164026-uuqi0rbvb61338bo
parent: pqm at pqm.ubuntu.com-20091214223015-02khbbtpzqbxlglm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0.4-faster-export-343218
timestamp: Tue 2009-12-15 10:40:26 -0600
message:
  First cut at a possible fix for bug #343218
  
  Basically, use iter_files_bytes() to grab all of the content for all files
  in one pass, rather than repeated calls to 'tree.get_file_lines()'.
  
  I haven't been able to test to completion, but against my local webserver
  the old code gets a networking error after 1-2minutes, the new code finishes
  in 25s.
-------------- next part --------------
=== modified file 'bzrlib/export/dir_exporter.py'
--- a/bzrlib/export/dir_exporter.py	2009-07-29 13:46:55 +0000
+++ b/bzrlib/export/dir_exporter.py	2009-12-15 16:40:26 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2005 Canonical Ltd
+# Copyright (C) 2005, 2009 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -52,21 +52,17 @@
                 raise errors.BzrError("Can't export tree to non-empty directory.")
         else:
             raise
+    # Iterate everything, building up the files we will want to export, and
+    # creating the directories and symlinks that we need.
+    # This tracks (file_id, (destination_path, executable))
+    # This matches the api that tree.iter_files_bytes() wants
+    # Note in the case of revision trees, this does trigger a double inventory
+    # lookup, hopefully it isn't too expensive.
+    to_fetch = []
     for dp, ie in _export_iter_entries(tree, subdir):
         fullpath = osutils.pathjoin(dest, dp)
         if ie.kind == "file":
-            if filtered:
-                chunks = tree.get_file_lines(ie.file_id)
-                filters = tree._content_filter_stack(dp)
-                context = ContentFilterContext(dp, tree, ie)
-                contents = filtered_output_bytes(chunks, filters, context)
-                content = ''.join(contents)
-                fileobj = StringIO.StringIO(content)
-            else:
-                fileobj = tree.get_file(ie.file_id)
-            osutils.pumpfile(fileobj, file(fullpath, 'wb'))
-            if tree.is_executable(ie.file_id):
-                os.chmod(fullpath, 0755)
+            to_fetch.append((ie.file_id, (dp, tree.is_executable(ie.file_id))))
         elif ie.kind == "directory":
             os.mkdir(fullpath)
         elif ie.kind == "symlink":
@@ -80,3 +76,19 @@
         else:
             raise errors.BzrError("don't know how to export {%s} of kind %r" %
                (ie.file_id, ie.kind))
+    # The data returned here can be in any order, but we've already created all
+    # the directories
+    for (relpath, executable), chunks in tree.iter_files_bytes(to_fetch):
+        if filtered:
+            filters = tree._content_filter_stack(relpath)
+            context = ContentFilterContext(relpath, tree, ie)
+            context = ContentFilterContext(relpath, tree, ie)
+            chunks = filtered_output_bytes(chunks, filters, context)
+        fullpath = osutils.pathjoin(dest, relpath)
+        out = open(fullpath, 'wb')
+        try:
+            out.writelines(chunks)
+        finally:
+            out.close()
+        if executable:
+            os.chmod(fullpath, 0755)



More information about the bazaar-commits mailing list