Rev 2991: Add a new repositoy method _generate_text_key_index for use by reconcile/check. in http://people.ubuntu.com/~robertc/baz2.0/reconcile
Robert Collins
robertc at robertcollins.net
Thu Nov 15 02:07:51 GMT 2007
At http://people.ubuntu.com/~robertc/baz2.0/reconcile
------------------------------------------------------------
revno: 2991
revision-id:robertc at robertcollins.net-20071115020737-51o1kmv3jsf9x8aa
parent: robertc at robertcollins.net-20071114042424-ww2cu4nqfo5fv6c4
committer: Robert Collins <robertc at robertcollins.net>
branch nick: _generate_text_key_index
timestamp: Thu 2007-11-15 13:07:37 +1100
message:
Add a new repositoy method _generate_text_key_index for use by reconcile/check.
added:
bzrlib/tests/repository_implementations/test__generate_text_key_index.py test__generate_text_-20071114232121-00h9fd8qg8kjfa5k-1
modified:
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/repository_implementations/__init__.py __init__.py-20060131092037-9564957a7d4a841b
bzrlib/tests/repository_implementations/test_check_reconcile.py test_broken.py-20070928125406-62236394w0jpbpd6-2
bzrlib/tests/test_graph.py test_graph_walker.py-20070525030405-enq4r60hhi9xrujc-1
=== added file 'bzrlib/tests/repository_implementations/test__generate_text_key_index.py'
--- a/bzrlib/tests/repository_implementations/test__generate_text_key_index.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/repository_implementations/test__generate_text_key_index.py 2007-11-15 02:07:37 +0000
@@ -0,0 +1,30 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+"""Tests for the _generate_text_key_index API."""
+
+
+from bzrlib.tests.repository_implementations import TestCaseWithRepository
+
+
+class TestGenerateTextKeyIndex(TestCaseWithRepository):
+
+ def test_empty(self):
+ repo = self.make_repository('.')
+ repo.lock_read()
+ self.addCleanup(repo.unlock)
+ self.assertEqual({}, repo._generate_text_key_index())
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2007-10-24 06:48:13 +0000
+++ b/bzrlib/graph.py 2007-11-15 02:07:37 +0000
@@ -44,6 +44,17 @@
# 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A']
+class DictParentsProvider(object):
+
+ def __init__(self, ancestry):
+ self.ancestry = ancestry
+
+ def __repr__(self):
+ return 'DictParentsProvider(%r)' % self.ancestry
+
+ def get_parents(self, revisions):
+ return [self.ancestry.get(r, None) for r in revisions]
+
class _StackedParentsProvider(object):
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2007-11-14 04:24:24 +0000
+++ b/bzrlib/remote.py 2007-11-15 02:07:37 +0000
@@ -319,6 +319,17 @@
self._ensure_real()
return self._real_repository.find_text_key_references()
+ def _generate_text_key_index(self):
+ """Generate a new text key index for the repository.
+
+ This is an expensive function that will take considerable time to run.
+
+ :return: A dict mapping (file_id, revision_id) tuples to a list of
+ parents, also (file_id, revision_id) tuples.
+ """
+ self._ensure_real()
+ return self._real_repository._generate_text_key_index()
+
def get_revision_graph(self, revision_id=None):
"""See Repository.get_revision_graph()."""
if revision_id is None:
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-11-14 04:24:24 +0000
+++ b/bzrlib/repository.py 2007-11-15 02:07:37 +0000
@@ -39,6 +39,7 @@
revision as _mod_revision,
symbol_versioning,
transactions,
+ tsort,
ui,
)
from bzrlib.bundle import serializer
@@ -1227,6 +1228,91 @@
raise errors.NoSuchIdInRepository(self, file_id)
yield callable_data, weave.get_lines(revision_id)
+ def _generate_text_key_index(self):
+ """Generate a new text key index for the repository.
+
+ This is an expensive function that will take considerable time to run.
+
+ :return: A dict mapping text keys ((file_id, revision_id) tuples) to a
+ list of parents, also text keys. When a given key has no parents,
+ the parents list will be [NULL_REVISION].
+ """
+ # All revisions, to find inventory parents.
+ revision_graph = self.get_revision_graph_with_ghosts()
+ ancestors = revision_graph.get_ancestors()
+ revision_order = tsort.topo_sort(ancestors)
+ invalid_keys = set()
+ revision_keys = {}
+ for revision_id in revision_order:
+ revision_keys[revision_id] = set()
+ text_key_references = self.find_text_key_references()
+ text_count = len(text_key_references)
+ # a cache of the text keys to allow reuse; costs a dict of all the
+ # keys, but saves a 2-tuple for every child of a given key.
+ text_key_cache = {}
+ for text_key, valid in text_key_references.iteritems():
+ if not valid:
+ invalid_keys.add(text_key)
+ else:
+ revision_keys[text_key[1]].add(text_key)
+ text_key_cache[text_key] = text_key
+ del text_key_references
+ text_index = {}
+ text_graph = graph.Graph(graph.DictParentsProvider(text_index))
+ NULL_REVISION = _mod_revision.NULL_REVISION
+ pb = ui.ui_factory.nested_progress_bar()
+ batch_size = 10 # should be ~150MB on a 55K path tree
+ batch_count = len(revision_order) / batch_size + 1
+ processed_texts = 0
+ pb.update("Calculating text parents.", processed_texts, text_count)
+ for offset in xrange(batch_count):
+ to_query = revision_order[offset * batch_size:(offset + 1) *
+ batch_size]
+ if not to_query:
+ break
+ for rev_tree in self.revision_trees(to_query):
+ revision_id = rev_tree.get_revision_id()
+ parent_ids = ancestors[revision_id]
+ for text_key in revision_keys[revision_id]:
+ pb.update("Calculating text parents.", processed_texts)
+ processed_texts += 1
+ candidate_parents = []
+ for parent_id in parent_ids:
+ parent_text_key = (text_key[0], parent_id)
+ try:
+ check_parent = parent_text_key not in \
+ revision_keys[parent_id]
+ except KeyError:
+ # the parent parent_id is a ghost:
+ check_parent = False
+ # truncate the derived graph against this ghost.
+ parent_text_key = None
+ if check_parent:
+ # look at the parent commit details inventories to
+ # determine possible candidates in the per file graph.
+ # TODO: cache here.
+ parent_inv = self.revision_tree(parent_id).inventory
+ parent_entry = parent_inv._byid.get(
+ text_key[0], None)
+ if parent_entry is not None:
+ parent_text_key = (
+ text_key[0], parent_entry.revision)
+ else:
+ parent_text_key = None
+ if parent_text_key is not None:
+ candidate_parents.append(
+ text_key_cache[parent_text_key])
+ parent_heads = text_graph.heads(candidate_parents)
+ new_parents = list(parent_heads)
+ new_parents.sort(key=lambda x:candidate_parents.index(x))
+ if new_parents == []:
+ new_parents = [NULL_REVISION]
+ text_index[text_key] = new_parents
+
+ for text_key in invalid_keys:
+ text_index[text_key] = [NULL_REVISION]
+ return text_index
+
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
"""Get an iterable listing the keys of all the data introduced by a set
of revision IDs.
=== modified file 'bzrlib/tests/repository_implementations/__init__.py'
--- a/bzrlib/tests/repository_implementations/__init__.py 2007-11-14 04:24:24 +0000
+++ b/bzrlib/tests/repository_implementations/__init__.py 2007-11-15 02:07:37 +0000
@@ -27,6 +27,7 @@
from bzrlib import (
repository,
)
+from bzrlib.revision import NULL_REVISION
from bzrlib.repofmt import (
weaverepo,
)
@@ -143,6 +144,13 @@
def corrected_fulltexts(self):
return []
+ def repository_text_key_index(self):
+ result = {}
+ if self.versioned_root:
+ result.update(self.versioned_repository_text_keys())
+ result.update(self.repository_text_keys())
+ return result
+
class UndamagedRepositoryScenario(BrokenRepoScenario):
"""A scenario where the repository has no damage.
@@ -177,6 +185,12 @@
result.update({('a-file-id', 'rev1a'): True})
return result
+ def repository_text_keys(self):
+ return {('a-file-id', 'rev1a'):[NULL_REVISION]}
+
+ def versioned_repository_text_keys(self):
+ return {('TREE_ROOT', 'rev1a'):[NULL_REVISION]}
+
class FileParentIsNotInRevisionAncestryScenario(BrokenRepoScenario):
"""A scenario where a revision 'rev2' has 'a-file' with a
@@ -236,6 +250,14 @@
('a-file-id', 'rev2'): True})
return result
+ def repository_text_keys(self):
+ return {('a-file-id', 'rev1a'):[NULL_REVISION],
+ ('a-file-id', 'rev2'):[('a-file-id', 'rev1a')]}
+
+ def versioned_repository_text_keys(self):
+ return {('TREE_ROOT', 'rev1a'):[NULL_REVISION],
+ ('TREE_ROOT', 'rev2'):[('TREE_ROOT', 'rev1a')]}
+
class FileParentHasInaccessibleInventoryScenario(BrokenRepoScenario):
"""A scenario where a revision 'rev3' containing 'a-file' modified in
@@ -296,6 +318,14 @@
('a-file-id', 'rev3'): True})
return result
+ def repository_text_keys(self):
+ return {('a-file-id', 'rev2'):[NULL_REVISION],
+ ('a-file-id', 'rev3'):[NULL_REVISION]}
+
+ def versioned_repository_text_keys(self):
+ return {('TREE_ROOT', 'rev2'):[NULL_REVISION],
+ ('TREE_ROOT', 'rev3'):[NULL_REVISION]}
+
class FileParentsNotReferencedByAnyInventoryScenario(BrokenRepoScenario):
"""A scenario where a repository with file 'a-file' which has extra
@@ -433,6 +463,24 @@
('a-file-id', 'rev5'): True})
return result
+ def repository_text_keys(self):
+ return {('a-file-id', 'rev1a'): [NULL_REVISION],
+ ('a-file-id', 'rev2c'): [('a-file-id', 'rev1a')],
+ ('a-file-id', 'rev3'): [('a-file-id', 'rev1a')],
+ ('a-file-id', 'rev4'): [('a-file-id', 'rev1a')],
+ ('a-file-id', 'rev5'): [('a-file-id', 'rev2c')]}
+
+ def versioned_repository_text_keys(self):
+ return {('TREE_ROOT', 'rev1a'): [NULL_REVISION],
+ ('TREE_ROOT', 'rev2'): [('TREE_ROOT', 'rev1a')],
+ ('TREE_ROOT', 'rev2b'): [('TREE_ROOT', 'rev1a')],
+ ('TREE_ROOT', 'rev2c'): [('TREE_ROOT', 'rev1a')],
+ ('TREE_ROOT', 'rev3'): [('TREE_ROOT', 'rev1a')],
+ ('TREE_ROOT', 'rev4'):
+ [('TREE_ROOT', 'rev2'), ('TREE_ROOT', 'rev2b')],
+ ('TREE_ROOT', 'rev5'):
+ [('TREE_ROOT', 'rev2'), ('TREE_ROOT', 'rev2c')]}
+
class UnreferencedFileParentsFromNoOpMergeScenario(BrokenRepoScenario):
"""
@@ -516,6 +564,20 @@
('a-file-id', 'rev4'): True})
return result
+ def repository_text_keys(self):
+ return {('a-file-id', 'rev1a'): [NULL_REVISION],
+ ('a-file-id', 'rev1b'): [NULL_REVISION],
+ ('a-file-id', 'rev2'): [NULL_REVISION],
+ ('a-file-id', 'rev4'): [('a-file-id', 'rev2')]}
+
+ def versioned_repository_text_keys(self):
+ return {('TREE_ROOT', 'rev1a'): [NULL_REVISION],
+ ('TREE_ROOT', 'rev1b'): [NULL_REVISION],
+ ('TREE_ROOT', 'rev2'):
+ [('TREE_ROOT', 'rev1a'), ('TREE_ROOT', 'rev1b')],
+ ('TREE_ROOT', 'rev3'): [('TREE_ROOT', 'rev2')],
+ ('TREE_ROOT', 'rev4'): [('TREE_ROOT', 'rev3')]}
+
class TooManyParentsScenario(BrokenRepoScenario):
"""A scenario where 'broken-revision' of 'a-file' claims to have parents
@@ -577,6 +639,18 @@
('a-file-id', 'good-parent'): True})
return result
+ def repository_text_keys(self):
+ return {('a-file-id', 'bad-parent'): [NULL_REVISION],
+ ('a-file-id', 'broken-revision'):
+ [('a-file-id', 'good-parent')],
+ ('a-file-id', 'good-parent'): [('a-file-id', 'bad-parent')]}
+
+ def versioned_repository_text_keys(self):
+ return {('TREE_ROOT', 'bad-parent'): [NULL_REVISION],
+ ('TREE_ROOT', 'broken-revision'):
+ [('TREE_ROOT', 'good-parent')],
+ ('TREE_ROOT', 'good-parent'): [('TREE_ROOT', 'bad-parent')]}
+
class ClaimedFileParentDidNotModifyFileScenario(BrokenRepoScenario):
"""A scenario where the file parent is the same as the revision parent, but
@@ -646,6 +720,17 @@
result.update({('a-file-id', 'basis'): True,
('a-file-id', 'current'): True})
return result
+
+ def repository_text_keys(self):
+ return {('a-file-id', 'basis'): [NULL_REVISION],
+ ('a-file-id', 'current'): [('a-file-id', 'basis')]}
+
+ def versioned_repository_text_keys(self):
+ return {('TREE_ROOT', 'basis'): ['null:'],
+ ('TREE_ROOT', 'current'):
+ [('TREE_ROOT', 'modified-something-else')],
+ ('TREE_ROOT', 'modified-something-else'):
+ [('TREE_ROOT', 'basis')]}
class IncorrectlyOrderedParentsScenario(BrokenRepoScenario):
@@ -724,6 +809,22 @@
('a-file-id', 'parent-1'): True,
('a-file-id', 'parent-2'): True})
return result
+
+ def repository_text_keys(self):
+ return {('a-file-id', 'broken-revision-1-2'):
+ [('a-file-id', 'parent-1'), ('a-file-id', 'parent-2')],
+ ('a-file-id', 'broken-revision-2-1'):
+ [('a-file-id', 'parent-2'), ('a-file-id', 'parent-1')],
+ ('a-file-id', 'parent-1'): [NULL_REVISION],
+ ('a-file-id', 'parent-2'): [NULL_REVISION]}
+
+ def versioned_repository_text_keys(self):
+ return {('TREE_ROOT', 'broken-revision-1-2'):
+ [('TREE_ROOT', 'parent-1'), ('TREE_ROOT', 'parent-2')],
+ ('TREE_ROOT', 'broken-revision-2-1'):
+ [('TREE_ROOT', 'parent-2'), ('TREE_ROOT', 'parent-1')],
+ ('TREE_ROOT', 'parent-1'): [NULL_REVISION],
+ ('TREE_ROOT', 'parent-2'): [NULL_REVISION]}
all_broken_scenario_classes = [
@@ -770,6 +871,7 @@
'test_fetch',
'test_fileid_involved',
'test_find_text_key_references',
+ 'test__generate_text_key_index',
'test_has_same_location',
'test_is_write_locked',
'test_iter_reverse_revision_history',
=== modified file 'bzrlib/tests/repository_implementations/test_check_reconcile.py'
--- a/bzrlib/tests/repository_implementations/test_check_reconcile.py 2007-11-14 04:24:24 +0000
+++ b/bzrlib/tests/repository_implementations/test_check_reconcile.py 2007-11-15 02:07:37 +0000
@@ -206,3 +206,11 @@
self.addCleanup(repo.unlock)
self.assertEqual(scenario.repository_text_key_references(),
repo.find_text_key_references())
+
+ def test__generate_text_key_index(self):
+ """Test that the generated text key index has all entries."""
+ repo, scenario = self.prepare_test_repository()
+ repo.lock_read()
+ self.addCleanup(repo.unlock)
+ self.assertEqual(scenario.repository_text_key_index(),
+ repo._generate_text_key_index())
=== modified file 'bzrlib/tests/test_graph.py'
--- a/bzrlib/tests/test_graph.py 2007-10-22 05:44:49 +0000
+++ b/bzrlib/tests/test_graph.py 2007-11-15 02:07:37 +0000
@@ -145,18 +145,6 @@
return self._real_parents_provider.get_parents(nodes)
-class DictParentsProvider(object):
-
- def __init__(self, ancestry):
- self.ancestry = ancestry
-
- def __repr__(self):
- return 'DictParentsProvider(%r)' % self.ancestry
-
- def get_parents(self, revisions):
- return [self.ancestry.get(r, None) for r in revisions]
-
-
class TestGraph(TestCaseWithMemoryTransport):
def make_graph(self, ancestors):
@@ -303,8 +291,8 @@
def test_stacked_parents_provider(self):
- parents1 = DictParentsProvider({'rev2': ['rev3']})
- parents2 = DictParentsProvider({'rev1': ['rev4']})
+ parents1 = _mod_graph.DictParentsProvider({'rev2': ['rev3']})
+ parents2 = _mod_graph.DictParentsProvider({'rev1': ['rev4']})
stacked = _mod_graph._StackedParentsProvider([parents1, parents2])
self.assertEqual([['rev4',], ['rev3']],
stacked.get_parents(['rev1', 'rev2']))
More information about the bazaar-commits
mailing list