Rev 2746: start adding split-up inventories into pack repositories in http://sourcefrog.net/bzr/inv-split
Martin Pool
mbp at sourcefrog.net
Wed Aug 29 07:47:43 BST 2007
At http://sourcefrog.net/bzr/inv-split
------------------------------------------------------------
revno: 2746
revision-id: mbp at sourcefrog.net-20070829064742-7jfhlbl7y2d82os6
parent: mbp at sourcefrog.net-20070828110119-0oys2thu3h9m4ryw
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: inv-split
timestamp: Wed 2007-08-29 16:47:42 +1000
message:
start adding split-up inventories into pack repositories
added:
bzrlib/tests/test_pack_repository.py test_pack_repository-20070828111851-nof5soh31tidz2dq-1
modified:
bzrlib/inventory_split.py inventory_lazy.py-20070822123225-v3guzmdkesxlfesa-1
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
bzrlib/tests/test_inventory_split.py test_inventory_lazy.-20070822123233-9yyaaq16ypoy6rpt-1
=== added file 'bzrlib/tests/test_pack_repository.py'
--- a/bzrlib/tests/test_pack_repository.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_pack_repository.py 2007-08-29 06:47:42 +0000
@@ -0,0 +1,49 @@
+# Copyright (C) 2006, 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+"""Tests specific to the packed repository format."""
+
+from bzrlib import symbol_versioning
+from bzrlib.errors import (NotBranchError,
+ NoSuchFile,
+ UnknownFormatError,
+ UnsupportedFormatError,
+ )
+from bzrlib.index import GraphIndex
+from bzrlib.repository import RepositoryFormat
+from bzrlib.tests import TestCase, TestCaseWithTransport
+from bzrlib.transport import get_transport
+from bzrlib.transport.memory import MemoryServer
+from bzrlib import (
+ bzrdir,
+ errors,
+ repository,
+ upgrade,
+ workingtree,
+ )
+from bzrlib.repofmt import pack_repo
+
+
+class TestSplitInventory(TestCaseWithTransport):
+
+ def get_format(self):
+ # TODO: Update this when a permanent name is allocated
+ return bzrdir.format_registry.make_bzrdir('experimental')
+
+ def test_add_split_inventory(self):
+ branch = self.make_branch('t1', format=self.get_format())
+ repo = branch.repository
=== modified file 'bzrlib/inventory_split.py'
--- a/bzrlib/inventory_split.py 2007-08-28 11:01:19 +0000
+++ b/bzrlib/inventory_split.py 2007-08-29 06:47:42 +0000
@@ -17,13 +17,27 @@
"""SplitInventory implementation.
"""
+# This is stored in the repository with one text per directory, containing the
+# entries for that directory. Each directory is pointed-to by its sha-1.
+#
+# Directories are serialized as a bencoded list, with a sub-list for each
+# directory entry. The list begins with (kind, name, file-id) and is
+# continued in a different way depending on the kind.
+#
+# The revision object points to the root of the inventory by holding the root
+# file-id and the sha of its text.
+#
+
from bzrlib import (
- errors,
- xml5,
- )
+ errors,
+ xml5,
+ )
from bzrlib.inventory import (
- Inventory,
- )
+ Inventory,
+ )
+from bzrlib.util.bencode import (
+ bencode,
+ )
# possible values for SplitInventory._state:
_absent = '_absent'
@@ -42,8 +56,6 @@
# These inventories currently represent their contents as InventoryEntry
# objects (files, directories, symlinks, references). These are held in a
# global _byid dictionary indexed by file id.
- #
- # TODO: Index by path, as well as by id.
@staticmethod
def from_repository(repo, revision_id):
@@ -78,10 +90,42 @@
self.root = tmp_inv.root
self._state = _clean
- def has_id(self, file_id):
- self._ensure_loaded()
- return file_id in self._byid
-
- def path2id(self, name):
- self._ensure_loaded()
- return Inventory.path2id(self, name)
+ def _iter_serialized_parts(self):
+ """Yield a sequence of serialized hunks for this inventory.
+
+ Each of these needs to be inserted into the repository to
+ completely store the inventory.
+ """
+ # To commit, this needs to give back: the contents of all directories
+ # up to and including the root directory. Then the commit needs to
+ # end up storing the sha-1 and the root file id.
+ #
+ # for now, just iterate the whole thing as one big blob
+ #
+ # TODO: split it up by directory
+ #
+ # TODO: encode using bencode or something similar
+ #
+ # TODO: use our own iterator which doesn't need paths
+ tuples = []
+ for path, ie in self.iter_entries():
+ if ie is self.root:
+ continue
+ if ie.kind == 'directory':
+ tuples.append(('/', ie.file_id, ie.name))
+ elif ie.kind == 'file':
+ tuples.append(('.', ie.file_id, ie.name))
+ else:
+ raise NotImplementedError(
+ "don't know how to encode %r" % ie)
+ yield bencode(tuples)
+
+
+# TODO: Index by path, as well as by id.
+#
+# TODO: store split up by directory?
+#
+# TODO: split out code to serialize/deserialize
+#
+# TODO: what to do about the root directory? should it be stored directly? is
+# that a problem if we want to
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2007-08-28 11:01:19 +0000
+++ b/bzrlib/tests/__init__.py 2007-08-29 06:47:42 +0000
@@ -2411,6 +2411,7 @@
'bzrlib.tests.test_osutils',
'bzrlib.tests.test_osutils_encodings',
'bzrlib.tests.test_pack',
+ 'bzrlib.tests.test_pack_repository',
'bzrlib.tests.test_patch',
'bzrlib.tests.test_patches',
'bzrlib.tests.test_permissions',
=== modified file 'bzrlib/tests/test_inventory_split.py'
--- a/bzrlib/tests/test_inventory_split.py 2007-08-28 11:01:19 +0000
+++ b/bzrlib/tests/test_inventory_split.py 2007-08-29 06:47:42 +0000
@@ -22,7 +22,7 @@
TestCaseWithTransport,
TestCaseWithMemoryTransport,
)
-
+from bzrlib.util.bencode import bdecode
class TestInventoryFromRepository(TestCaseWithTransport):
# Test the new interface by which an inventory class pulls an instance out
@@ -31,23 +31,35 @@
def test_inventory_from_repository_no_access(self):
repo = self.make_repository('a')
- # you can construct an inv2 from a repository even if the repository
- # doesn't have the data, because it's not loaded until it's used.
+ # you can construct split inventory from a repository even if the
+ # repository doesn't have the data, because it's not loaded until
+ # it's used.
inv = SplitInventory.from_repository(repo, 'rev-id')
- def test_inventory_from_repository_iterate(self):
- # we can get an inventory from a repository and do the usual
- # operations on it
- tree = self.make_branch_and_tree('foo')
- self.build_tree(['foo/file'])
- tree.add(['file'], ['file-id'])
- tree.commit(message='initial commit', rev_id='revid-1')
- repo = tree.branch.repository
- # now get a lazy inventory
- inv = Inventory2.from_repository(repo, 'revid-1')
- # look inside it
- self.assertTrue(inv.has_id('file-id'))
- self.assertEquals(inv.path2id('file'), 'file-id')
- # and compare that to the inventory's default format
- ## self.assertEquals(repo.get_inventory('revid-1'), inv)
-
+ def test_serialize_root_only(self):
+ inv = SplitInventory('root-id')
+ part_iter = inv._iter_serialized_parts()
+ parts = list(part_iter)
+ # an inventory that contains nothing but the root returns just one
+ # empty directory entry.
+ self.assertEqual(1, len(parts))
+ self.assertEqual('le', parts[0])
+ self.assertEqualBencoded(
+ [],
+ parts[0])
+
+ def test_serialize_with_root_contents(self):
+ inv = SplitInventory('root-id')
+ inv.add_path('f', kind='file', file_id='f-id')
+ parts = list(inv._iter_serialized_parts())
+ self.assertEqual(1, len(parts))
+ self.assertEqualBencoded([['.', 'f-id', 'f']],
+ parts[0])
+
+ def assertEqualBencoded(self, expected_obj, bencoded_actual):
+ self.assertEqual(expected_obj, bdecode(bencoded_actual))
+
+ # TODO: test that the returned inventory parts have just exactly the
+ # format that we expect
+ #
+ # TODO: test that the inventory can be round-tripped successfully
More information about the bazaar-commits
mailing list