Rev 22: Do indexing in groups of 5000, to limit peak memory use. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
Robert Collins
robertc at robertcollins.net
Wed Jun 11 13:10:25 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
------------------------------------------------------------
revno: 22
revision-id: robertc at robertcollins.net-20080611121020-xoig9yvahbdg1gs8
parent: robertc at robertcollins.net-20080611082950-3abaodt5wpm4c5ac
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2008-06-11 22:10:20 +1000
message:
Do indexing in groups of 5000, to limit peak memory use.
modified:
index.py index.py-20080608055509-hnimeek7q8tctkqf-2
=== modified file 'index.py'
--- a/index.py 2008-06-11 08:29:50 +0000
+++ b/index.py 2008-06-11 12:10:20 +0000
@@ -27,6 +27,7 @@
from bzrlib.lockdir import LockDir
from bzrlib.plugins.search import errors
from bzrlib.revision import NULL_REVISION
+from bzrlib.tsort import topo_sort
_FORMAT_1 = 'bzr-search search folder 1\n'
@@ -217,19 +218,25 @@
# TODO: split into groups of <reasonable memory size> for which we
# then:
_ensure_regexes()
- builder = ComponentIndexBuilder()
- # here: index texts
- # here: index inventory/paths
- # here: index revisions
- terms = self._terms_for_texts(locked_branch.repository,
- revisions_to_index)
- self._add_terms(builder, terms)
- terms = self._terms_for_revs(locked_branch.repository,
- revisions_to_index)
- self._add_terms(builder, terms)
- for rev_id in revisions_to_index:
- builder.add_revision(rev_id)
- self._add_index(builder)
+ graph = locked_branch.repository.get_graph()
+ parent_map = graph.get_parent_map(revisions_to_index)
+ order = topo_sort(parent_map)
+ # Assume 5000 revisions is tolerable for indexing:
+ for offset in range(len(order) / 5000 + 1):
+ revision_group = order[offset * 5000:(offset + 1) * 5000]
+ builder = ComponentIndexBuilder()
+ # here: index texts
+ # here: index inventory/paths
+ # here: index revisions
+ terms = self._terms_for_texts(locked_branch.repository,
+ revision_group)
+ self._add_terms(builder, terms)
+ terms = self._terms_for_revs(locked_branch.repository,
+ revision_group)
+ self._add_terms(builder, terms)
+ for rev_id in revision_group:
+ builder.add_revision(rev_id)
+ self._add_index(builder)
def _add_index(self, builder):
"""Add a new component index to the list of indices."""
More information about the bazaar-commits
mailing list