Rev 132: Re-introduced --expand-all. in http://bazaar.launchpad.net/~bzr/bzr-history-db/trunk

John Arbash Meinel john at arbash-meinel.com
Fri Oct 15 22:07:45 BST 2010


At http://bazaar.launchpad.net/~bzr/bzr-history-db/trunk

------------------------------------------------------------
revno: 132
revision-id: john at arbash-meinel.com-20101015210728-05zq6w7rq9p9129d
parent: john at arbash-meinel.com-20100707200738-g5o7qdqo46a49nwc
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Fri 2010-10-15 16:07:28 -0500
message:
  Re-introduced --expand-all.
  
  It turns out it will be useful for Aaron while testing, so I brought it
  back. This way we can determine the worst-case scaling of the data
  structure. It is O(C*N_branches) and it has a fair multiplier (probably C<100)
  However, that is much better than O(History*N_branches).
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py	2010-05-21 17:43:13 +0000
+++ b/__init__.py	2010-10-15 21:07:28 +0000
@@ -55,11 +55,14 @@
                         help='Consider this an incremental update.'),
                      option.Option('validate',
                         help='Do extra checks to ensure correctness.'),
+                     option.Option('expand-all',
+                        help='For all imported revisions,'
+                             ' expand them as branch tips'),
                     ]
     hidden = True
 
     def run(self, directory='.', db=None, incremental=False,
-            validate=False):
+            validate=False, expand_all=False):
         import pprint
         from bzrlib import branch
         b = branch.Branch.open(directory)
@@ -68,7 +71,7 @@
             db = _ensure_db_for_command(db, b)
             importer = _mod_history_db.Importer(db, b, incremental=incremental,
                                                 validate=validate)
-            importer.do_import()
+            importer.do_import(expand_all=expand_all)
         finally:
             b.unlock()
         trace.note('Stats:\n%s' % (pprint.pformat(dict(importer._stats)),))

=== modified file 'history_db.py'
--- a/history_db.py	2010-07-07 20:07:38 +0000
+++ b/history_db.py	2010-10-15 21:07:28 +0000
@@ -107,6 +107,7 @@
         self._stats = defaultdict(lambda: 0)
         # Map child_id => [parent_db_ids]
         self._db_parent_map = {}
+        self._suppress_progress = False
 
     def set_max_cache_size(self, size):
         """Tell SQLite how many megabytes to cache internally."""
@@ -208,11 +209,31 @@
                                  "  (child, parent, parent_idx)"
                                  "VALUES (?, ?, ?)", data)
 
-    def do_import(self):
+    def do_import(self, expand_all=False):
         if revision.is_null(self._branch_tip_rev_id):
             return
         merge_sorted = self._import_tip(self._branch_tip_rev_id)
         self._db_conn.commit()
+        if not expand_all:
+            return
+        # We now it will be incremental from here on out
+        self._incremental = True
+        pb = ui.ui_factory.nested_progress_bar()
+        newly_added_count = 0
+        # We walk from the tip, backwards, on the assumption that importing a
+        # given tip will end up importing all of its mainline revisions, and
+        # then there will be nothing to do when we reach the previous tip.
+        # At this point, we want to stop detailing the individual import steps,
+        # as we are going to be doing lots of them.
+        self._suppress_progress = True
+        for idx, node in enumerate(merge_sorted):
+            pb.update('expanding', idx, len(merge_sorted))
+            new_merged = self._import_tip(node.key[0])
+            newly_added_count += len(new_merged)
+            if newly_added_count > 1000:
+                self._db_conn.commit()
+                newly_added_count = 0
+        self._db_conn.commit()
 
     def _get_merge_sorted_tip(self, tip_revision_id):
         if self._incremental:
@@ -265,8 +286,8 @@
             merge_sorted = self._graph.merge_sort((tip_revision_id,))
         return merge_sorted
 
-    def _import_tip(self, tip_revision_id, suppress_progress_and_commit=False):
-        if suppress_progress_and_commit:
+    def _import_tip(self, tip_revision_id):
+        if self._suppress_progress:
             pb = None
         else:
             pb = ui.ui_factory.nested_progress_bar()
@@ -343,7 +364,9 @@
         pb = ui.ui_factory.nested_progress_bar()
         try:
             while needed:
-                pb.update('Finding ancestry', len(all_needed), len(all_needed))
+                if not self._suppress_progress:
+                    pb.update('Finding ancestry', len(all_needed),
+                                                  len(all_needed))
                 rev_id = needed.pop()
                 if rev_id in known:
                     # We may add particular parents multiple times, just ignore



More information about the bazaar-commits mailing list