Rev 22: Do indexing in groups of 5000, to limit peak memory use. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

Robert Collins robertc at robertcollins.net
Wed Jun 11 13:10:25 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

------------------------------------------------------------
revno: 22
revision-id: robertc at robertcollins.net-20080611121020-xoig9yvahbdg1gs8
parent: robertc at robertcollins.net-20080611082950-3abaodt5wpm4c5ac
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2008-06-11 22:10:20 +1000
message:
  Do indexing in groups of 5000, to limit peak memory use.
modified:
  index.py                       index.py-20080608055509-hnimeek7q8tctkqf-2
=== modified file 'index.py'
--- a/index.py	2008-06-11 08:29:50 +0000
+++ b/index.py	2008-06-11 12:10:20 +0000
@@ -27,6 +27,7 @@
 from bzrlib.lockdir import LockDir
 from bzrlib.plugins.search import errors
 from bzrlib.revision import NULL_REVISION
+from bzrlib.tsort import topo_sort
 
 
 _FORMAT_1 = 'bzr-search search folder 1\n'
@@ -217,19 +218,25 @@
         # TODO: split into groups of <reasonable memory size> for which we
         # then:
         _ensure_regexes()
-        builder = ComponentIndexBuilder()
-        # here: index texts
-        # here: index inventory/paths
-        # here: index revisions
-        terms = self._terms_for_texts(locked_branch.repository,
-            revisions_to_index)
-        self._add_terms(builder, terms)
-        terms = self._terms_for_revs(locked_branch.repository,
-            revisions_to_index)
-        self._add_terms(builder, terms)
-        for rev_id in revisions_to_index:
-            builder.add_revision(rev_id)
-        self._add_index(builder)
+        graph = locked_branch.repository.get_graph()
+        parent_map = graph.get_parent_map(revisions_to_index)
+        order = topo_sort(parent_map)
+        # Assume 5000 revisions is tolerable for indexing:
+        for offset in range(len(order) / 5000 + 1):
+            revision_group = order[offset * 5000:(offset + 1) * 5000]
+            builder = ComponentIndexBuilder()
+            # here: index texts
+            # here: index inventory/paths
+            # here: index revisions
+            terms = self._terms_for_texts(locked_branch.repository,
+                revision_group)
+            self._add_terms(builder, terms)
+            terms = self._terms_for_revs(locked_branch.repository,
+                revision_group)
+            self._add_terms(builder, terms)
+            for rev_id in revision_group:
+                builder.add_revision(rev_id)
+            self._add_index(builder)
 
     def _add_index(self, builder):
         """Add a new component index to the list of indices."""




More information about the bazaar-commits mailing list