Rev 2748: Split out _make_rev_pack_map to generalize use of inventories in http://sourcefrog.net/bzr/inv-split
Martin Pool
mbp at sourcefrog.net
Wed Aug 29 09:37:40 BST 2007
At http://sourcefrog.net/bzr/inv-split
------------------------------------------------------------
revno: 2748
revision-id: mbp at sourcefrog.net-20070829083739-xcjh0knlj56132rq
parent: mbp at sourcefrog.net-20070829080854-xwt7zhkap0nwgj74
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: inv-split
timestamp: Wed 2007-08-29 18:37:39 +1000
message:
Split out _make_rev_pack_map to generalize use of inventories
Add index of objects by hash.
modified:
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/tests/test_pack_repository.py test_pack_repository-20070828111851-nof5soh31tidz2dq-1
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-08-29 08:08:54 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-08-29 08:37:39 +0000
@@ -66,6 +66,8 @@
from bzrlib.trace import mutter, note, warning
+_HASH_INDEX_SUFFIX = '.hix'
+
class Pack(object):
"""An in memory proxy for a .pack and its indices."""
@@ -761,7 +763,7 @@
if getattr(self.repo, '_revision_knit', None) is not None:
return self.repo._revision_knit
self.repo._packs.ensure_loaded()
- pack_map, indices = self._make_rev_pack_map()
+ pack_map, indices = self.repo._make_rev_pack_map('.rix')
if self.repo.is_in_write_group():
# allow writing: queue writes to a new index
indices.insert(0, self.repo._revision_write_index)
@@ -785,17 +787,6 @@
access_method=knit_access)
return self.repo._revision_knit
- def _make_rev_pack_map(self):
- indices = []
- pack_map = {}
- for name in self.repo._packs.names():
- # TODO: maybe this should expose size to us to allow
- # sorting of the indices for better performance ?
- index_name = self.name_to_revision_index_name(name)
- indices.append(GraphIndex(self.transport, index_name))
- pack_map[indices[-1]] = (self.repo._pack_tuple(name))
- return pack_map, indices
-
def get_signature_file(self, transaction):
"""Get the signature versioned file object."""
if getattr(self.repo, '_signature_knit', None) is not None:
@@ -850,7 +841,8 @@
# create a pack map for the autopack code - XXX finish
# making a clear managed list of packs, indices and use
# that in these mapping classes
- self.repo._revision_pack_map = self._make_rev_pack_map()[0]
+ self.repo._revision_pack_map = self.repo._make_rev_pack_map(
+ '.rix')[0]
else:
del self.repo._revision_pack_map[self.repo._revision_write_index]
self.repo._revision_write_index = None
@@ -1201,12 +1193,12 @@
for dir_hash, dir_bytes in split_inventory._iter_serialized_parts():
# TODO: look in the existing index; if this text is in there then
# don't write it again.
- self._add_content_by_hash(dir_hash, dir_bytes)
+ self._add_bytes_by_hash(dir_hash, dir_bytes)
# return the last hash, which should be the root - this will fail if
# absolutely nothing was written, which is reasonable.
return dir_hash
- def _add_content_by_hash(self, new_hash, new_bytes):
+ def _add_bytes_by_hash(self, new_hash, new_bytes):
"""Add content to the repository indexed by hash.
"""
offset, length = self._open_pack_writer.add_bytes_record(
@@ -1215,6 +1207,38 @@
value = "%d %d" % (offset, length)
self._hash_write_index.add_nodes([(key, value)])
+ def _get_bytes_by_hash(self, h):
+ """Return the byte string with the given hash."""
+ # TODO: don't load the overall index every time, just get it once and hold it
+ # during the lock duration
+ hash_all_indices = self._combined_hash_index()
+ hits = hash_all_indices.iter_entries([h])
+
+ def _combined_hash_index(self):
+ # TODO: include the currently active index writer if there is one?
+ indices = self._make_rev_pack_map(_HASH_INDEX_SUFFIX)
+ return CombinedGraphIndex(indices)
+
+ def _make_rev_pack_map(self, suffix):
+ """Return information on existing indexes.
+
+ :param suffix: Index suffix added to pack name.
+
+ :returns: (pack_map, indices) where indices is a list of GraphIndex
+ objects, and pack_map is a mapping from those objects to the
+ pack tuple they describe.
+ """
+ indices = []
+ pack_map = {}
+ self._packs.ensure_loaded()
+ for name in self._packs.names():
+ # TODO: maybe this should expose size to us to allow
+ # sorting of the indices for better performance ?
+ index_name = name + suffix
+ indices.append(GraphIndex(self._index_transport, index_name))
+ pack_map[indices[-1]] = (self._pack_tuple(name))
+ return pack_map, indices
+
def _start_hash_index(self):
self._hash_write_index = InMemoryGraphIndex(reference_lists=0)
@@ -1222,8 +1246,8 @@
del self._hash_write_index
def _commit_hash_index(self, new_pack_name):
- new_hash_index_name = new_pack_name + '.hix'
- self.transport.put_file(new_hash_index_name,
+ new_hash_index_name = new_pack_name + _HASH_INDEX_SUFFIX
+ self._index_transport.put_file(new_hash_index_name,
self._hash_write_index.finish())
del self._hash_write_index
@@ -1236,6 +1260,7 @@
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
_revision_store, control_store, text_store)
index_transport = control_files._transport.clone('indices')
+ self._index_transport = index_transport
self._packs = RepositoryPackCollection(self, control_files._transport)
self._revision_store = GraphKnitRevisionStore(self, index_transport, self._revision_store)
self.weave_store = GraphKnitTextStore(self, index_transport, self.weave_store)
@@ -1358,6 +1383,7 @@
KnitRepository3.__init__(self, _format, a_bzrdir, control_files,
_revision_store, control_store, text_store)
index_transport = control_files._transport.clone('indices')
+ self._index_transport = index_transport
self._packs = RepositoryPackCollection(self, control_files._transport)
self._revision_store = GraphKnitRevisionStore(self, index_transport, self._revision_store)
self.weave_store = GraphKnitTextStore(self, index_transport, self.weave_store)
=== modified file 'bzrlib/tests/test_pack_repository.py'
--- a/bzrlib/tests/test_pack_repository.py 2007-08-29 08:08:54 +0000
+++ b/bzrlib/tests/test_pack_repository.py 2007-08-29 08:37:39 +0000
@@ -25,10 +25,11 @@
workingtree,
)
from bzrlib.inventory_split import SplitInventory
+from bzrlib.repofmt import pack_repo
from bzrlib.repository import RepositoryFormat
from bzrlib.tests import TestCase, TestCaseWithTransport
from bzrlib.transport import get_transport
-from bzrlib.repofmt import pack_repo
+from bzrlib.util.bencode import bdecode
class TestSplitInventory(TestCaseWithTransport):
@@ -53,4 +54,8 @@
repo.unlock()
# check it's a plausible hash name
self.assertEquals(40, len(root_hash))
- # check we can retrieve it
+ # check we can retrieve it, and that it looks like a plausible
+ # inventory
+ root_bytes = repo._get_bytes_by_hash(root_hash)
+ ## root_obs = bdecode(root_bytes)
+
More information about the bazaar-commits
mailing list