Rev 3000: implement a faster path for copying from packs back to knits. in http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/push_packs_to_knits
John Arbash Meinel
john at arbash-meinel.com
Sat Nov 17 00:16:15 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/0.93-dev/push_packs_to_knits
------------------------------------------------------------
revno: 3000
revision-id:john at arbash-meinel.com-20071117001455-abe9e9k8rb9pu239
parent: john at arbash-meinel.com-20071116235317-uymqhilped1rloqy
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: push_packs_to_knits
timestamp: Fri 2007-11-16 18:14:55 -0600
message:
implement a faster path for copying from packs back to knits.
Also include more interrepo tests for knit<=>knit and pack<=>knit.
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/tests/interrepository_implementations/__init__.py __init__.py-20060220054744-baf49a1f88f17b1a
bzrlib/tests/interrepository_implementations/test_interrepository.py test_interrepository.py-20060220061411-1ec13fa99e5e3eee
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-11-09 17:50:31 +0000
+++ b/bzrlib/knit.py 2007-11-17 00:14:55 +0000
@@ -74,6 +74,7 @@
lazy_import(globals(), """
from bzrlib import (
annotate,
+ lru_cache,
pack,
trace,
)
@@ -2224,6 +2225,41 @@
except AttributeError:
return False
+ def _text_by_text_join(self, pb, msg, version_ids, ignore_missing=False):
+ """Copy texts to the target by extracting and adding them one by one.
+
+ see join() for the parameter definitions.
+ """
+ version_ids = self._get_source_version_ids(version_ids, ignore_missing)
+ graph = self.source.get_graph(version_ids)
+ order = topo_sort(graph.items())
+
+ def size_of_content(content):
+ return sum(len(line) for line in content.text())
+ # Cache at most 10MB of parent texts
+ parent_cache = lru_cache.LRUSizeCache(max_size=10*1024*1024,
+ compute_size=size_of_content)
+ # TODO: jam 20071116 It would be nice to have a streaming interface to
+ # get multiple texts from a source. The source could be smarter
+ # about how it handled intermediate stages.
+ # TODO: jam 20071116 Consider using 'get_line_list' instead of lots of
+ # calls to get_lines()
+ to_process = [version for version in order
+ if version not in self.target]
+ total = len(to_process)
+ pb = ui.ui_factory.nested_progress_bar()
+ try:
+ for index, version in enumerate(to_process):
+ pb.update('Converting versioned data', index, total)
+ sha1, num_bytes, parent_text = self.target.add_lines(version,
+ self.source.get_parents(version),
+ self.source.get_lines(version),
+ parent_texts=parent_cache)
+ parent_cache[version] = parent_text
+ finally:
+ pb.finished()
+ return total
+
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
"""See InterVersionedFile.join."""
assert isinstance(self.source, KnitVersionedFile)
@@ -2239,8 +2275,8 @@
# We're converting from a plain to an annotated knit. This requires
# building the annotations from scratch. The generic join code
# handles this implicitly so we delegate to it.
- return super(InterKnit, self).join(pb, msg, version_ids,
- ignore_missing)
+ return self._text_by_text_join(pb, msg, version_ids,
+ ignore_missing)
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
if not version_ids:
=== modified file 'bzrlib/tests/interrepository_implementations/__init__.py'
--- a/bzrlib/tests/interrepository_implementations/__init__.py 2007-10-30 17:39:11 +0000
+++ b/bzrlib/tests/interrepository_implementations/__init__.py 2007-11-17 00:14:55 +0000
@@ -26,9 +26,10 @@
"""
from bzrlib.repository import (
+ InterKnitRepo,
+ InterKnit1and2,
+ InterModel1and2,
InterRepository,
- InterModel1and2,
- InterKnit1and2,
)
from bzrlib.tests import (
adapt_modules,
@@ -75,7 +76,7 @@
@staticmethod
def default_test_list():
"""Generate the default list of interrepo permutations to test."""
- from bzrlib.repofmt import knitrepo, weaverepo
+ from bzrlib.repofmt import knitrepo, pack_repo, weaverepo
result = []
# test the default InterRepository between format 6 and the current
# default format.
@@ -100,6 +101,21 @@
result.append((InterKnit1and2,
knitrepo.RepositoryFormatKnit1(),
knitrepo.RepositoryFormatKnit3()))
+ result.append((InterKnitRepo,
+ knitrepo.RepositoryFormatKnit1(),
+ knitrepo.RepositoryFormatKnit1()))
+ result.append((InterKnitRepo,
+ knitrepo.RepositoryFormatKnit1(),
+ pack_repo.RepositoryFormatKnitPack1()))
+ result.append((InterKnitRepo,
+ pack_repo.RepositoryFormatKnitPack1(),
+ knitrepo.RepositoryFormatKnit1()))
+ result.append((InterKnitRepo,
+ knitrepo.RepositoryFormatKnit3(),
+ pack_repo.RepositoryFormatKnitPack3()))
+ result.append((InterKnitRepo,
+ pack_repo.RepositoryFormatKnitPack3(),
+ knitrepo.RepositoryFormatKnit3()))
return result
=== modified file 'bzrlib/tests/interrepository_implementations/test_interrepository.py'
--- a/bzrlib/tests/interrepository_implementations/test_interrepository.py 2007-10-30 17:39:11 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_interrepository.py 2007-11-17 00:14:55 +0000
@@ -368,6 +368,10 @@
to_repo.fetch(from_tree.branch.repository, tree_rev)
# to_repo should have a file_graph for from_tree.path2id('subtree') and
# revid tree_rev.
- file_vf = to_repo.weave_store.get_weave(
- from_tree.path2id('subtree'), to_repo.get_transaction())
- self.assertEqual([tree_rev], file_vf.get_ancestry([tree_rev]))
+ to_repo.lock_read()
+ try:
+ file_vf = to_repo.weave_store.get_weave(
+ from_tree.path2id('subtree'), to_repo.get_transaction())
+ self.assertEqual([tree_rev], file_vf.get_ancestry([tree_rev]))
+ finally:
+ to_repo.unlock()
More information about the bazaar-commits
mailing list