Rev 4901: Move the patch for dir exporter into a helper. in http://bazaar.launchpad.net/~jameinel/bzr/2.1.0rc1-faster-export-343218

John Arbash Meinel john at arbash-meinel.com
Tue Dec 15 17:39:39 GMT 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1.0rc1-faster-export-343218

------------------------------------------------------------
revno: 4901
revision-id: john at arbash-meinel.com-20091215173931-w03rohk6vpk3lahq
parent: john at arbash-meinel.com-20091215172713-6pf30cealfvh1l2f
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1.0rc1-faster-export-343218
timestamp: Tue 2009-12-15 11:39:31 -0600
message:
  Move the patch for dir exporter into a helper.
  
  This makes it easier for other exporters to be fast. Note that because
  the content is defined as being returned in arbitrary order, this may
  not actually be appropriate for tar and zip exporters. People complained
  that 'bzr export foo.tar.gz' wasn't stable enough because of gz timestamps,
  this would be far worse.
-------------- next part --------------
=== modified file 'bzrlib/export/__init__.py'
--- a/bzrlib/export/__init__.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/export/__init__.py	2009-12-15 17:39:31 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2005 Canonical Ltd
+# Copyright (C) 2005, 2009 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -162,6 +162,44 @@
         yield entry
 
 
+def _export_entries_and_content(tree, subdir, filtered):
+    """Iter the entries for tree suitable for exporting.
+
+    Also, get the content for files, etc, in an efficient manner. To make this
+    efficient, we first create all directories and symlinks, and then we
+    return all of the file content in random order. (Defined as the most
+    efficient that the source can return them.)
+
+    :param tree: A Tree object (WT or RevisionTree is acceptable).
+    :param subdir: None, or a relative path to export.
+    :return: Yield (relpath, inv_entry, content)
+        For directories, content is None, for symlinks, content is the symlink
+        target, for files, content is ([chunks], exec) with 'chunks' of text
+        for the file and whether the file is executable. Also, if content
+        filters are active the chunks will be in convenient form.
+    """
+    to_fetch = []
+    for relpath, ie in _export_iter_entries(tree, subdir):
+        if ie.kind == "file":
+            to_fetch.append((ie.file_id,
+                             (relpath, ie, tree.is_executable(ie.file_id))))
+        elif ie.kind == "directory":
+            yield (relpath, ie, None)
+        elif ie.kind == "symlink":
+            yield (relpath, ie, tree.get_symlink_target(ie.file_id))
+        else:
+            raise errors.BzrError("don't know how to export {%s} of kind %r"
+                                  % (ie.file_id, ie.kind))
+    # The data returned here can be in any order, but we've already yielded all
+    # the directories
+    for (relpath, ie, executable), chunks in tree.iter_files_bytes(to_fetch):
+        if filtered:
+            filters = tree._content_filter_stack(relpath)
+            context = ContentFilterContext(relpath, tree, ie)
+            chunks = filtered_output_bytes(chunks, filters, context)
+        yield (relpath, ie, (chunks, executable))
+
+
 register_lazy_exporter(None, [], 'bzrlib.export.dir_exporter', 'dir_exporter')
 register_lazy_exporter('dir', [], 'bzrlib.export.dir_exporter', 'dir_exporter')
 register_lazy_exporter('tar', ['.tar'], 'bzrlib.export.tar_exporter', 'tar_exporter')

=== modified file 'bzrlib/export/dir_exporter.py'
--- a/bzrlib/export/dir_exporter.py	2009-12-15 17:20:40 +0000
+++ b/bzrlib/export/dir_exporter.py	2009-12-15 17:39:31 +0000
@@ -21,8 +21,11 @@
 import os
 import StringIO
 
-from bzrlib import errors, osutils
-from bzrlib.export import _export_iter_entries
+from bzrlib import (
+    errors,
+    export,
+    osutils,
+    )
 from bzrlib.filters import (
     ContentFilterContext,
     filtered_output_bytes,
@@ -54,20 +57,26 @@
             raise
     # Iterate everything, building up the files we will want to export, and
     # creating the directories and symlinks that we need.
-    # This tracks (file_id, (destination_path, executable))
-    # This matches the api that tree.iter_files_bytes() wants
-    # Note in the case of revision trees, this does trigger a double inventory
-    # lookup, hopefully it isn't too expensive.
-    to_fetch = []
-    for dp, ie in _export_iter_entries(tree, subdir):
-        fullpath = osutils.pathjoin(dest, dp)
+    for relpath, ie, content in export._export_entries_and_content(tree, subdir,
+                                                                   filtered):
+        # TODO: Shouldn't we be doing something with subdir as well as dest?
+        fullpath = osutils.pathjoin(dest, relpath)
         if ie.kind == "file":
-            to_fetch.append((ie.file_id, (dp, tree.is_executable(ie.file_id))))
+            # Note that files will come after all the directories, and will be
+            # returned in 'random' order.
+            chunks, executable = content
+            out = open(fullpath, 'wb')
+            try:
+                out.writelines(chunks)
+            finally:
+                out.close()
+            if executable:
+                os.chmod(fullpath, 0755)
         elif ie.kind == "directory":
             os.mkdir(fullpath)
         elif ie.kind == "symlink":
             try:
-                symlink_target = tree.get_symlink_target(ie.file_id)
+                symlink_target = content
                 os.symlink(symlink_target, fullpath)
             except OSError,e:
                 raise errors.BzrError(
@@ -76,18 +85,3 @@
         else:
             raise errors.BzrError("don't know how to export {%s} of kind %r" %
                (ie.file_id, ie.kind))
-    # The data returned here can be in any order, but we've already created all
-    # the directories
-    for (relpath, executable), chunks in tree.iter_files_bytes(to_fetch):
-        if filtered:
-            filters = tree._content_filter_stack(relpath)
-            context = ContentFilterContext(relpath, tree, ie)
-            chunks = filtered_output_bytes(chunks, filters, context)
-        fullpath = osutils.pathjoin(dest, relpath)
-        out = open(fullpath, 'wb')
-        try:
-            out.writelines(chunks)
-        finally:
-            out.close()
-        if executable:
-            os.chmod(fullpath, 0755)



More information about the bazaar-commits mailing list