Rev 2747: some support for storing inventories into packs in http://sourcefrog.net/bzr/inv-split
Martin Pool
mbp at sourcefrog.net
Wed Aug 29 09:08:55 BST 2007
At http://sourcefrog.net/bzr/inv-split
------------------------------------------------------------
revno: 2747
revision-id: mbp at sourcefrog.net-20070829080854-xwt7zhkap0nwgj74
parent: mbp at sourcefrog.net-20070829064742-7jfhlbl7y2d82os6
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: inv-split
timestamp: Wed 2007-08-29 18:08:54 +1000
message:
some support for storing inventories into packs
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/inventory_split.py inventory_lazy.py-20070822123225-v3guzmdkesxlfesa-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/tests/test_inventory_split.py test_inventory_lazy.-20070822123233-9yyaaq16ypoy6rpt-1
bzrlib/tests/test_pack_repository.py test_pack_repository-20070828111851-nof5soh31tidz2dq-1
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-08-24 22:36:01 +0000
+++ b/bzrlib/index.py 2007-08-29 08:08:54 +0000
@@ -598,7 +598,10 @@
def add_nodes(self, nodes):
"""Add nodes to the index.
- :param nodes: An iterable of (key, node_refs, value) entries to add.
+ :param nodes: An iterable of index entries to add. If this
+ index has references, then this is a sequence of
+ (key, value, node_refs) entries; otherwise of (key, value) -- the
+ empty reference list can (and must) be omitted.
"""
if self.reference_lists:
for (key, value, node_refs) in nodes:
=== modified file 'bzrlib/inventory_split.py'
--- a/bzrlib/inventory_split.py 2007-08-29 06:47:42 +0000
+++ b/bzrlib/inventory_split.py 2007-08-29 08:08:54 +0000
@@ -30,6 +30,7 @@
from bzrlib import (
errors,
+ osutils,
xml5,
)
from bzrlib.inventory import (
@@ -93,6 +94,9 @@
def _iter_serialized_parts(self):
"""Yield a sequence of serialized hunks for this inventory.
+ Yields a sequence of (dir_hash, dir_bytes), being the ascii hash of
+ the directory, and the byte contents of its representation.
+
Each of these needs to be inserted into the repository to
completely store the inventory.
"""
@@ -118,7 +122,8 @@
else:
raise NotImplementedError(
"don't know how to encode %r" % ie)
- yield bencode(tuples)
+ dir_bytes = bencode(tuples)
+ yield osutils.sha_string(dir_bytes), dir_bytes
# TODO: Index by path, as well as by id.
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-08-28 11:00:49 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-08-29 08:08:54 +0000
@@ -1153,7 +1153,8 @@
self.repo._inv_write_index = InMemoryGraphIndex(reference_lists=2)
# if we have created an inventory index, add the new write index to it
if getattr(self.repo, '_inv_all_indices', None) is not None:
- self.repo._inv_all_indices.insert_index(0, self.repo._inv_write_index)
+ self.repo._inv_all_indices.insert_index(0,
+ self.repo._inv_write_index)
# we don't bother updating the knit layer, because there is not
# defined interface for adding inventories that should need the
# existing knit to be changed - its all behind 'repo.add_inventory'.
@@ -1190,8 +1191,42 @@
self._inventory_add_lines(inv_vf, revision_id, parents,
osutils.split_lines(inv_text))
return inv_sha1
- import pdb;pdb.set_trace()
-
+
+ def _add_split_inventory(self, split_inventory):
+ """Add a SplitInventory part by part into the repository.
+
+ This must be called in a write group.
+ """
+ index_additions = []
+ for dir_hash, dir_bytes in split_inventory._iter_serialized_parts():
+ # TODO: look in the existing index; if this text is in there then
+ # don't write it again.
+ self._add_content_by_hash(dir_hash, dir_bytes)
+ # return the last hash, which should be the root - this will fail if
+ # absolutely nothing was written, which is reasonable.
+ return dir_hash
+
+ def _add_content_by_hash(self, new_hash, new_bytes):
+ """Add content to the repository indexed by hash.
+ """
+ offset, length = self._open_pack_writer.add_bytes_record(
+ new_bytes, [(new_hash,)])
+ key = (new_hash,)
+ value = "%d %d" % (offset, length)
+ self._hash_write_index.add_nodes([(key, value)])
+
+ def _start_hash_index(self):
+ self._hash_write_index = InMemoryGraphIndex(reference_lists=0)
+
+ def _abort_hash_index(self):
+ del self._hash_write_index
+
+ def _commit_hash_index(self, new_pack_name):
+ new_hash_index_name = new_pack_name + '.hix'
+ self.transport.put_file(new_hash_index_name,
+ self._hash_write_index.finish())
+ del self._hash_write_index
+
class GraphKnitRepository1(_GraphKnitRepositoryBase, KnitRepository):
"""Experimental graph-knit using repository."""
@@ -1242,6 +1277,7 @@
self._revision_store.setup()
self.weave_store.setup()
self._inv_thunk.setup()
+ self._start_hash_index()
def _commit_write_group(self):
data_inserted = (self._revision_store.data_inserted() or
@@ -1268,6 +1304,7 @@
self.weave_store.flush(new_name)
self._inv_thunk.flush(new_name)
self._revision_store.flush(new_name)
+ self._commit_hash_index(new_name)
self._write_stream.close()
self._upload_transport.rename(self._open_pack_tuple[1],
'../packs/' + new_name + '.pack')
@@ -1362,6 +1399,7 @@
self._revision_store.setup()
self.weave_store.setup()
self._inv_thunk.setup()
+ self._start_hash_index()
def _commit_write_group(self):
data_inserted = (self._revision_store.data_inserted() or
@@ -1380,6 +1418,7 @@
self.weave_store.flush(new_name)
self._inv_thunk.flush(new_name)
self._revision_store.flush(new_name)
+ self._commit_hash_index(new_name)
self._write_stream.close()
self._upload_transport.rename(self._open_pack_tuple[1],
'../packs/' + new_name + '.pack')
=== modified file 'bzrlib/tests/test_inventory_split.py'
--- a/bzrlib/tests/test_inventory_split.py 2007-08-29 06:47:42 +0000
+++ b/bzrlib/tests/test_inventory_split.py 2007-08-29 08:08:54 +0000
@@ -16,6 +16,7 @@
from bzrlib.inventory_split import SplitInventory
+from bzrlib.osutils import sha_string
from bzrlib.tests import (
KnownFailure,
TestCaseInTempDir,
@@ -43,8 +44,8 @@
# an inventory that contains nothing but the root returns just one
# empty directory entry.
self.assertEqual(1, len(parts))
- self.assertEqual('le', parts[0])
- self.assertEqualBencoded(
+ self.assertEqual((sha_string('le'), 'le'), parts[0])
+ self.checkDirText(
[],
parts[0])
@@ -53,11 +54,12 @@
inv.add_path('f', kind='file', file_id='f-id')
parts = list(inv._iter_serialized_parts())
self.assertEqual(1, len(parts))
- self.assertEqualBencoded([['.', 'f-id', 'f']],
+ self.checkDirText([['.', 'f-id', 'f']],
parts[0])
- def assertEqualBencoded(self, expected_obj, bencoded_actual):
- self.assertEqual(expected_obj, bdecode(bencoded_actual))
+ def checkDirText(self, expected_obj, (dir_hash, dir_bytes)):
+ self.assertEqual(expected_obj, bdecode(dir_bytes))
+ self.assertEqual(sha_string(dir_bytes), dir_hash)
# TODO: test that the returned inventory parts have just exactly the
# format that we expect
=== modified file 'bzrlib/tests/test_pack_repository.py'
--- a/bzrlib/tests/test_pack_repository.py 2007-08-29 06:47:42 +0000
+++ b/bzrlib/tests/test_pack_repository.py 2007-08-29 08:08:54 +0000
@@ -17,33 +17,40 @@
"""Tests specific to the packed repository format."""
-from bzrlib import symbol_versioning
-from bzrlib.errors import (NotBranchError,
- NoSuchFile,
- UnknownFormatError,
- UnsupportedFormatError,
- )
-from bzrlib.index import GraphIndex
-from bzrlib.repository import RepositoryFormat
-from bzrlib.tests import TestCase, TestCaseWithTransport
-from bzrlib.transport import get_transport
-from bzrlib.transport.memory import MemoryServer
+
from bzrlib import (
bzrdir,
errors,
repository,
- upgrade,
workingtree,
)
+from bzrlib.inventory_split import SplitInventory
+from bzrlib.repository import RepositoryFormat
+from bzrlib.tests import TestCase, TestCaseWithTransport
+from bzrlib.transport import get_transport
from bzrlib.repofmt import pack_repo
class TestSplitInventory(TestCaseWithTransport):
def get_format(self):
- # TODO: Update this when a permanent name is allocated
+ # XXX: Update this when a permanent name is allocated, or make one by
+ # hand to avoid naming
return bzrdir.format_registry.make_bzrdir('experimental')
def test_add_split_inventory(self):
branch = self.make_branch('t1', format=self.get_format())
repo = branch.repository
+ inv = SplitInventory('root-id')
+ inv.add_path('hello', 'file', 'hello-id')
+ # now try to add that inventory into the repository: normally this
+ # would be done from within commit
+ repo.lock_write()
+ repo.start_write_group()
+ root_hash = repo._add_split_inventory(inv)
+ repo.commit_write_group()
+ # check that it was stored
+ repo.unlock()
+ # check it's a plausible hash name
+ self.assertEquals(40, len(root_hash))
+ # check we can retrieve it
More information about the bazaar-commits
mailing list