Rev 4823: Factor out the common code into a helper so that smart streaming also benefits. in http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b4-convert-kg-heads

Mon Nov 30 03:29:40 GMT 2009

At http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b4-convert-kg-heads

------------------------------------------------------------
revno: 4823
revision-id: john at arbash-meinel.com-20091130032936-bla411rbg7c2ey4k
parent: john at arbash-meinel.com-20091130031622-qqr6g04iv5g8zl5n
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1.0b4-convert-kg-heads
timestamp: Sun 2009-11-29 21:29:36 -0600
message:
  Factor out the common code into a helper so that smart streaming also benefits.
-------------- next part --------------
=== modified file 'bzrlib/fetch.py'

--- a/bzrlib/fetch.py	2009-08-07 04:29:36 +0000
+++ b/bzrlib/fetch.py	2009-11-30 03:29:36 +0000
@@ -28,6 +28,8 @@
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
 from bzrlib import (
+    graph as _mod_graph,
+    static_tuple,
     tsort,
     versionedfile,
     )
@@ -36,10 +38,10 @@
 from bzrlib import (
     errors,
     symbol_versioning,
+    ui,
     )
 from bzrlib.revision import NULL_REVISION
 from bzrlib.trace import mutter
-import bzrlib.ui
 
 
 class RepoFetcher(object):
@@ -96,7 +98,7 @@
         # assert not missing
         self.count_total = 0
         self.file_ids_names = {}
-        pb = bzrlib.ui.ui_factory.nested_progress_bar()
+        pb = ui.ui_factory.nested_progress_bar()
         pb.show_pct = pb.show_count = False
         try:
             pb.update("Finding revisions", 0, 2)
@@ -123,7 +125,7 @@
             raise errors.IncompatibleRepositories(
                 self.from_repository, self.to_repository,
                 "different rich-root support")
-        pb = bzrlib.ui.ui_factory.nested_progress_bar()
+        pb = ui.ui_factory.nested_progress_bar()
         try:
             pb.update("Get stream source")
             source = self.from_repository._get_source(
@@ -251,13 +253,22 @@
         # yet, and are unlikely to in non-rich-root environments anyway.
         root_id_order.sort(key=operator.itemgetter(0))
         # Create a record stream containing the roots to create.
-        from bzrlib.graph import FrozenHeadsCache
-        graph = FrozenHeadsCache(graph)
+        if len(revs) > 100:
+            graph = _get_rich_root_heads_graph(self.source_repo, revs)
         new_roots_stream = _new_root_data_stream(
             root_id_order, rev_id_to_root_id, parent_map, self.source, graph)
         return [('texts', new_roots_stream)]
 
 
+def _get_rich_root_heads_graph(source_repo, revision_ids):
+    """Get a Graph object suitable for asking heads() for new rich roots."""
+    st = static_tuple.StaticTuple
+    revision_keys = [st(r_id).intern() for r_id in revision_ids]
+    known_graph = source_repo.revisions.get_known_graph_ancestry(
+                    revision_keys)
+    a_graph = graph.GraphThunkIdsToKeys(known_graph)
+
+
 def _new_root_data_stream(
     root_keys_to_create, rev_id_to_root_id_map, parent_map, repo, graph=None):
     """Generate a texts substream of synthesised root entries.

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-11-22 05:36:26 +0000
+++ b/bzrlib/repository.py	2009-11-30 03:29:36 +0000
@@ -26,6 +26,7 @@
     chk_map,
     debug,
     errors,
+    fetch as _mod_fetch,
     fifo_cache,
     generate_ids,
     gpg,
@@ -38,7 +39,6 @@
     lru_cache,
     osutils,
     revision as _mod_revision,
-    static_tuple,
     symbol_versioning,
     tsort,
     ui,
@@ -3413,8 +3413,7 @@
                    provided a default one will be created.
         :return: None.
         """
-        from bzrlib.fetch import RepoFetcher
-        f = RepoFetcher(to_repository=self.target,
+        f = _mod_fetch.RepoFetcher(to_repository=self.target,
                                from_repository=self.source,
                                last_revision=revision_id,
                                fetch_spec=fetch_spec,
@@ -3820,13 +3819,15 @@
                 basis_id, delta, current_revision_id, parents_parents)
             cache[current_revision_id] = parent_tree
 
-    def _fetch_batch(self, revision_ids, basis_id, cache, graph=None):
+    def _fetch_batch(self, revision_ids, basis_id, cache, a_graph=None):
         """Fetch across a few revisions.
 
         :param revision_ids: The revisions to copy
         :param basis_id: The revision_id of a tree that must be in cache, used
             as a basis for delta when no other base is available
         :param cache: A cache of RevisionTrees that we can use.
+        :param a_graph: A Graph object to determine the heads() of the
+            rich-root data stream.
         :return: The revision_id of the last converted tree. The RevisionTree
             for it will be in cache
         """
@@ -3896,10 +3897,9 @@
         from_texts = self.source.texts
         to_texts = self.target.texts
         if root_keys_to_create:
-            from bzrlib.fetch import _new_root_data_stream
-            root_stream = _new_root_data_stream(
+            root_stream = _mod_fetch._new_root_data_stream(
                 root_keys_to_create, self._revision_id_to_root_id, parent_map,
-                self.source, graph=graph)
+                self.source, graph=a_graph)
             to_texts.insert_record_stream(root_stream)
         to_texts.insert_record_stream(from_texts.get_record_stream(
             text_keys, self.target._format._fetch_order,
@@ -3962,21 +3962,11 @@
         cache[basis_id] = basis_tree
         del basis_tree # We don't want to hang on to it here
         hints = []
-        if self._converting_to_rich_root:
-            st = static_tuple.StaticTuple
-            revision_keys = [st(r_id).intern() for r_id in revision_ids]
-            known_graph = self.source.revisions.get_known_graph_ancestry(
-                            revision_keys)
-            class ThunkIdsToKeysHeads(object):
-                def __init__(self, kg):
-                    self.kg = kg
-                def heads(self, revision_ids):
-                    revision_keys = [(r,) for r in revision_ids]
-                    heads = self.kg.heads(revision_keys)
-                    return set([h[0] for h in heads])
-            graph = ThunkIdsToKeysHeads(known_graph)
+        if self._converting_to_rich_root and len(revision_ids) > 100:
+            a_graph = _mod_fetch._get_rich_root_heads_graph(self.source,
+                                                            revision_ids)
         else:
-            graph = None
+            a_graph = None
 
         for offset in range(0, len(revision_ids), batch_size):
             self.target.start_write_group()
@@ -3985,7 +3975,7 @@
                           len(revision_ids))
                 batch = revision_ids[offset:offset+batch_size]
                 basis_id = self._fetch_batch(batch, basis_id, cache,
-                                             graph=graph)
+                                             a_graph=the_graph)
             except:
                 self.target.abort_write_group()
                 raise
@@ -4464,8 +4454,7 @@
         fetching the inventory weave.
         """
         if self._rich_root_upgrade():
-            import bzrlib.fetch
-            return bzrlib.fetch.Inter1and2Helper(
+            return _mod_fetch.Inter1and2Helper(
                 self.from_repository).generate_root_texts(revs)
         else:
             return []