Rev 2746: start adding split-up inventories into pack repositories in http://sourcefrog.net/bzr/inv-split

Wed Aug 29 07:47:43 BST 2007

At http://sourcefrog.net/bzr/inv-split

------------------------------------------------------------
revno: 2746
revision-id: mbp at sourcefrog.net-20070829064742-7jfhlbl7y2d82os6
parent: mbp at sourcefrog.net-20070828110119-0oys2thu3h9m4ryw
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: inv-split
timestamp: Wed 2007-08-29 16:47:42 +1000
message:
  start adding split-up inventories into pack repositories
added:
  bzrlib/tests/test_pack_repository.py test_pack_repository-20070828111851-nof5soh31tidz2dq-1
modified:
  bzrlib/inventory_split.py      inventory_lazy.py-20070822123225-v3guzmdkesxlfesa-1
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/test_inventory_split.py test_inventory_lazy.-20070822123233-9yyaaq16ypoy6rpt-1
=== added file 'bzrlib/tests/test_pack_repository.py'

--- a/bzrlib/tests/test_pack_repository.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_pack_repository.py	2007-08-29 06:47:42 +0000
@@ -0,0 +1,49 @@
+# Copyright (C) 2006, 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+"""Tests specific to the packed repository format."""
+
+from bzrlib import symbol_versioning
+from bzrlib.errors import (NotBranchError,
+                           NoSuchFile,
+                           UnknownFormatError,
+                           UnsupportedFormatError,
+                           )
+from bzrlib.index import GraphIndex
+from bzrlib.repository import RepositoryFormat
+from bzrlib.tests import TestCase, TestCaseWithTransport
+from bzrlib.transport import get_transport
+from bzrlib.transport.memory import MemoryServer
+from bzrlib import (
+    bzrdir,
+    errors,
+    repository,
+    upgrade,
+    workingtree,
+    )
+from bzrlib.repofmt import pack_repo
+
+
+class TestSplitInventory(TestCaseWithTransport):
+
+    def get_format(self):
+        # TODO: Update this when a permanent name is allocated
+        return bzrdir.format_registry.make_bzrdir('experimental')
+
+    def test_add_split_inventory(self):
+        branch = self.make_branch('t1', format=self.get_format())
+        repo = branch.repository

=== modified file 'bzrlib/inventory_split.py'
--- a/bzrlib/inventory_split.py	2007-08-28 11:01:19 +0000
+++ b/bzrlib/inventory_split.py	2007-08-29 06:47:42 +0000
@@ -17,13 +17,27 @@
 """SplitInventory implementation.
 """
 
+# This is stored in the repository with one text per directory, containing the
+# entries for that directory.  Each directory is pointed-to by its sha-1.
+#
+# Directories are serialized as a bencoded list, with a sub-list for each
+# directory entry.  The list begins with (kind, name, file-id) and is
+# continued in a different way depending on the kind.
+#
+# The revision object points to the root of the inventory by holding the root
+# file-id and the sha of its text.
+#
+
 from bzrlib import (
-        errors,
-        xml5,
-        )
+    errors,
+    xml5,
+    )
 from bzrlib.inventory import (
-        Inventory,
-        )
+    Inventory,
+    )
+from bzrlib.util.bencode import (
+    bencode,
+    )
 
 # possible values for SplitInventory._state:
 _absent = '_absent'
@@ -42,8 +56,6 @@
     # These inventories currently represent their contents as InventoryEntry
     # objects (files, directories, symlinks, references).  These are held in a
     # global _byid dictionary indexed by file id.
-    #
-    # TODO: Index by path, as well as by id.
 
     @staticmethod
     def from_repository(repo, revision_id):
@@ -78,10 +90,42 @@
         self.root = tmp_inv.root
         self._state = _clean
 
-    def has_id(self, file_id):
-        self._ensure_loaded()
-        return file_id in self._byid
-
-    def path2id(self, name):
-        self._ensure_loaded()
-        return Inventory.path2id(self, name)
+    def _iter_serialized_parts(self):
+        """Yield a sequence of serialized hunks for this inventory.
+
+        Each of these needs to be inserted into the repository to 
+        completely store the inventory.
+        """
+        # To commit, this needs to give back: the contents of all directories
+        # up to and including the root directory.  Then the commit needs to
+        # end up storing the sha-1 and the root file id.
+        #
+        # for now, just iterate the whole thing as one big blob
+        #
+        # TODO: split it up by directory
+        #
+        # TODO: encode using bencode or something similar
+        #
+        # TODO: use our own iterator which doesn't need paths
+        tuples = []
+        for path, ie in self.iter_entries():
+            if ie is self.root:
+                continue
+            if ie.kind == 'directory':
+                tuples.append(('/', ie.file_id, ie.name))
+            elif ie.kind == 'file':
+                tuples.append(('.', ie.file_id, ie.name))
+            else:
+                raise NotImplementedError(
+                    "don't know how to encode %r" % ie)
+        yield bencode(tuples)
+
+
+# TODO: Index by path, as well as by id.
+# 
+# TODO: store split up by directory?
+#
+# TODO: split out code to serialize/deserialize
+#
+# TODO: what to do about the root directory?  should it be stored directly? is
+# that a problem if we want to 

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2007-08-28 11:01:19 +0000
+++ b/bzrlib/tests/__init__.py	2007-08-29 06:47:42 +0000
@@ -2411,6 +2411,7 @@
                    'bzrlib.tests.test_osutils',
                    'bzrlib.tests.test_osutils_encodings',
                    'bzrlib.tests.test_pack',
+                   'bzrlib.tests.test_pack_repository',
                    'bzrlib.tests.test_patch',
                    'bzrlib.tests.test_patches',
                    'bzrlib.tests.test_permissions',

=== modified file 'bzrlib/tests/test_inventory_split.py'
--- a/bzrlib/tests/test_inventory_split.py	2007-08-28 11:01:19 +0000
+++ b/bzrlib/tests/test_inventory_split.py	2007-08-29 06:47:42 +0000
@@ -22,7 +22,7 @@
         TestCaseWithTransport,
         TestCaseWithMemoryTransport,
         )
-
+from bzrlib.util.bencode import bdecode
 
 class TestInventoryFromRepository(TestCaseWithTransport):
     # Test the new interface by which an inventory class pulls an instance out
@@ -31,23 +31,35 @@
 
     def test_inventory_from_repository_no_access(self):
         repo = self.make_repository('a')
-        # you can construct an inv2 from a repository even if the repository
-        # doesn't have the data, because it's not loaded until it's used.
+        # you can construct split inventory from a repository even if the
+        # repository doesn't have the data, because it's not loaded until
+        # it's used.
         inv = SplitInventory.from_repository(repo, 'rev-id')
 
-    def test_inventory_from_repository_iterate(self):
-        # we can get an inventory from a repository and do the usual
-        # operations on it
-        tree = self.make_branch_and_tree('foo')
-        self.build_tree(['foo/file'])
-        tree.add(['file'], ['file-id'])
-        tree.commit(message='initial commit', rev_id='revid-1')
-        repo = tree.branch.repository
-        # now get a lazy inventory
-        inv = Inventory2.from_repository(repo, 'revid-1')
-        # look inside it
-        self.assertTrue(inv.has_id('file-id'))
-        self.assertEquals(inv.path2id('file'), 'file-id')
-        # and compare that to the inventory's default format
-        ## self.assertEquals(repo.get_inventory('revid-1'), inv)
-
+    def test_serialize_root_only(self):
+        inv = SplitInventory('root-id')
+        part_iter = inv._iter_serialized_parts()
+        parts = list(part_iter)
+        # an inventory that contains nothing but the root returns just one
+        # empty directory entry.  
+        self.assertEqual(1, len(parts))
+        self.assertEqual('le', parts[0])
+        self.assertEqualBencoded(
+            [],
+            parts[0])
+
+    def test_serialize_with_root_contents(self):
+        inv = SplitInventory('root-id')
+        inv.add_path('f', kind='file', file_id='f-id')
+        parts = list(inv._iter_serialized_parts())
+        self.assertEqual(1, len(parts))
+        self.assertEqualBencoded([['.', 'f-id', 'f']],
+            parts[0])
+
+    def assertEqualBencoded(self, expected_obj, bencoded_actual):
+        self.assertEqual(expected_obj, bdecode(bencoded_actual))
+
+    # TODO: test that the returned inventory parts have just exactly the
+    # format that we expect
+    #
+    # TODO: test that the inventory can be round-tripped successfully