Rev 3748: Implement commit-via-deltas for split inventory repositories. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Mon Oct 13 07:36:24 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 3748
revision-id: robertc at robertcollins.net-20081013063620-dos0y220vi51fose
parent: robertc at robertcollins.net-20081013045426-s3l3b0g6x8wogd6i
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2008-10-13 17:36:20 +1100
message:
  Implement commit-via-deltas for split inventory repositories.
modified:
  bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
  bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test_inv.py       testinv.py-20050722220913-1dc326138d1a5892
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2008-10-10 02:27:35 +0000
+++ b/bzrlib/chk_map.py	2008-10-13 06:36:20 +0000
@@ -94,6 +94,13 @@
                 bytes = self._read_bytes(key)
                 yield name, ValueNode.deserialise(bytes).value
 
+    def key(self):
+        """Return the key for this map."""
+        if isinstance(self._root_node, tuple):
+            return self._root_node
+        else:
+            return self._root_node._key
+
     def _map(self, key, value):
         """Map key to value."""
         self._ensure_root()

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2008-10-10 02:27:35 +0000
+++ b/bzrlib/inventory.py	2008-10-13 06:36:20 +0000
@@ -714,6 +714,20 @@
 class CommonInventory(object):
     """Basic inventory logic, defined in terms of primitives like has_id."""
 
+    def id2path(self, file_id):
+        """Return as a string the path to file_id.
+        
+        >>> i = Inventory()
+        >>> e = i.add(InventoryDirectory('src-id', 'src', ROOT_ID))
+        >>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
+        >>> print i.id2path('foo-id')
+        src/foo.c
+        """
+        # get all names, skipping root
+        return '/'.join(reversed(
+            [parent.name for parent in 
+             self._iter_file_id_parents(file_id)][:-1]))
+            
     def iter_entries(self, from_dir=None):
         """Return (path, entry) pairs, in order by name."""
         if from_dir is None:
@@ -1187,20 +1201,6 @@
             p.insert(0, parent.file_id)
         return p
 
-    def id2path(self, file_id):
-        """Return as a string the path to file_id.
-        
-        >>> i = Inventory()
-        >>> e = i.add(InventoryDirectory('src-id', 'src', ROOT_ID))
-        >>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
-        >>> print i.id2path('foo-id')
-        src/foo.c
-        """
-        # get all names, skipping root
-        return '/'.join(reversed(
-            [parent.name for parent in 
-             self._iter_file_id_parents(file_id)][:-1]))
-            
     def path2id(self, name):
         """Walk down through directories to return entry of last component.
 
@@ -1460,6 +1460,16 @@
         # Perhaps have an explicit 'contains' method on CHKMap ?
         return len(list(self.id_to_entry.iteritems([file_id]))) == 1
 
+    def _iter_file_id_parents(self, file_id):
+        """Yield the parents of file_id up to the root."""
+        while file_id is not None:
+            try:
+                ie = self[file_id]
+            except KeyError:
+                raise errors.NoSuchId(tree=None, file_id=file_id)
+            yield ie
+            file_id = ie.parent_id
+
     def __iter__(self):
         """Iterate over the entire inventory contents; size-of-tree - beware!."""
         for file_id, _ in self.id_to_entry.iteritems():
@@ -1470,12 +1480,43 @@
         # Might want to cache the length in the meta node.
         return len([item for item in self])
 
+    def path2id(self, name):
+        """Walk down through directories to return entry of last component.
+
+        names may be either a list of path components, or a single
+        string, in which case it is automatically split.
+
+        This returns the entry of the last component in the path,
+        which may be either a file or a directory.
+
+        Returns None IFF the path is not found.
+        """
+        if isinstance(name, basestring):
+            name = osutils.splitpath(name)
+
+        # mutter("lookup path %r" % name)
+
+        parent = self.root
+        if parent is None:
+            return None
+        for f in name:
+            try:
+                children = getattr(parent, 'children', None)
+                if children is None:
+                    return None
+                cie = children[f]
+                parent = cie
+            except KeyError:
+                # or raise an error?
+                return None
+        return parent.file_id
+
     def to_lines(self):
         """Serialise the inventory to lines."""
         lines = ["chkinventory:\n"]
         lines.append("revision_id: %s\n" % self.revision_id)
         lines.append("root_id: %s\n" % self.root_id)
-        lines.append("id_to_entry: %s\n" % self.id_to_entry._root_node._key)
+        lines.append("id_to_entry: %s\n" % self.id_to_entry.key())
         return lines
 
     @property

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2008-10-13 04:54:26 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2008-10-13 06:36:20 +0000
@@ -2037,6 +2037,42 @@
         return self._inventory_add_lines(revision_id, parents,
             inv_lines, check_content=False)
 
+    def add_inventory_delta(self, basis_revision_id, delta, new_revision_id,
+        parents):
+        """Add a new inventory expressed as a delta against another revision.
+        
+        :param basis_revision_id: The inventory id the delta was created
+            against.
+        :param delta: The inventory delta (see Inventory.apply_delta for
+            details).
+        :param new_revision_id: The revision id that the inventory is being
+            added for.
+        :param parents: The revision ids of the parents that revision_id is
+            known to have and are in the repository already. These are supplied
+            for repositories that depend on the inventory graph for revision
+            graph access, as well as for those that pun ancestry with delta
+            compression.
+
+        :returns: The validator(which is a sha1 digest, though what is sha'd is
+            repository format specific) of the serialized inventory.
+        """
+        if basis_revision_id == _mod_revision.NULL_REVISION:
+            return KnitPackRepository.add_inventory_delta(self,
+                basis_revision_id, delta, new_revision_id, parents)
+        if not self.is_in_write_group():
+            raise AssertionError("%r not in write group" % (self,))
+        _mod_revision.check_not_reserved_id(new_revision_id)
+        basis_tree = self.revision_tree(basis_revision_id)
+        basis_tree.lock_read()
+        try:
+            basis_inv = basis_tree.inventory
+            result = basis_inv.create_by_apply_delta(delta, new_revision_id)
+            inv_lines = result.to_lines()
+            return self._inventory_add_lines(new_revision_id, parents,
+                inv_lines, check_content=False)
+        finally:
+            basis_tree.unlock()
+
     def _iter_inventories(self, revision_ids):
         """Iterate over many inventory objects."""
         keys = [(revision_id,) for revision_id in revision_ids]
@@ -2583,6 +2619,7 @@
     index_builder_class = BTreeBuilder
     index_class = BTreeGraphIndex
     supports_chks = True
+    _commit_inv_deltas = True
 
     def _get_matching_bzrdir(self):
         return bzrdir.format_registry.make_bzrdir('development3')
@@ -2623,6 +2660,7 @@
     index_builder_class = BTreeBuilder
     index_class = BTreeGraphIndex
     supports_chks = True
+    _commit_inv_deltas = True
 
     def _get_matching_bzrdir(self):
         return bzrdir.format_registry.make_bzrdir(

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-10-13 04:54:26 +0000
+++ b/bzrlib/repository.py	2008-10-13 06:36:20 +0000
@@ -152,8 +152,13 @@
         deserializing the inventory, while we already have a copy in
         memory.
         """
-        return RevisionTree(self.repository, self.new_inventory,
-                            self._new_revision_id)
+        if self.repository._format._commit_inv_deltas:
+            # incremental access repositories may not have a full cached
+            # inventory.
+            return self.repository.revision_tree(self._new_revision_id)
+        else:
+            return RevisionTree(self.repository, self.new_inventory,
+                                self._new_revision_id)
 
     def finish_inventory(self):
         """Tell the builder that the inventory is finished."""
@@ -162,11 +167,21 @@
                 ' record_entry_contents, as of bzr 0.10.')
             self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
         self.new_inventory.revision_id = self._new_revision_id
-        self.inv_sha1 = self.repository.add_inventory(
-            self._new_revision_id,
-            self.new_inventory,
-            self.parents
-            )
+        if (self.repository._format._commit_inv_deltas and
+            self._recording_deletes):
+            try:
+                basis_id = self.parents[0]
+            except IndexError:
+                basis_id = _mod_revision.NULL_REVISION
+            self.inv_sha1 = self.repository.add_inventory_delta(
+                basis_id, self.basis_delta, self._new_revision_id,
+                self.parents)
+        else:
+            self.inv_sha1 = self.repository.add_inventory(
+                self._new_revision_id,
+                self.new_inventory,
+                self.parents
+                )
 
     def _gen_revision_id(self):
         """Return new revision-id."""
@@ -1762,7 +1777,7 @@
         # TODO: refactor this to use an existing revision object
         # so we don't need to read it in twice.
         if revision_id == _mod_revision.NULL_REVISION:
-            return RevisionTree(self, Inventory(root_id=None), 
+            return RevisionTree(self, Inventory(root_id=None),
                                 _mod_revision.NULL_REVISION)
         else:
             inv = self.get_revision_inventory(revision_id)
@@ -2206,6 +2221,8 @@
     # Does this format support CHK bytestring lookups. Set to True or False in
     # derived classes.
     supports_chks = None
+    # Should commit add an inventory, or an inventory delta to the repository.
+    _commit_inv_deltas = True
 
     def __str__(self):
         return "<%s>" % self.__class__.__name__

=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py	2008-10-10 02:27:35 +0000
+++ b/bzrlib/tests/test_inv.py	2008-10-13 06:36:20 +0000
@@ -331,6 +331,48 @@
         chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
         self.assertFalse(chk_inv.has_id('fileid'))
 
+    def test_id2path(self):
+        inv = Inventory()
+        inv.revision_id = "revid"
+        inv.root.revision = "rootrev"
+        direntry = InventoryDirectory("dirid", "dir", inv.root.file_id)
+        fileentry = InventoryFile("fileid", "file", "dirid")
+        inv.add(direntry)
+        inv.add(fileentry)
+        inv["fileid"].revision = "filerev"
+        inv["fileid"].executable = True
+        inv["fileid"].text_sha1 = "ffff"
+        inv["fileid"].text_size = 1
+        inv["dirid"].revision = "filerev"
+        chk_bytes = self.get_chk_bytes()
+        chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
+        bytes = ''.join(chk_inv.to_lines())
+        new_inv = CHKInventory.deserialise(chk_bytes, bytes, ("revid",))
+        self.assertEqual('', new_inv.id2path(inv.root.file_id))
+        self.assertEqual('dir', new_inv.id2path('dirid'))
+        self.assertEqual('dir/file', new_inv.id2path('fileid'))
+
+    def test_path2id(self):
+        inv = Inventory()
+        inv.revision_id = "revid"
+        inv.root.revision = "rootrev"
+        direntry = InventoryDirectory("dirid", "dir", inv.root.file_id)
+        fileentry = InventoryFile("fileid", "file", "dirid")
+        inv.add(direntry)
+        inv.add(fileentry)
+        inv["fileid"].revision = "filerev"
+        inv["fileid"].executable = True
+        inv["fileid"].text_sha1 = "ffff"
+        inv["fileid"].text_size = 1
+        inv["dirid"].revision = "filerev"
+        chk_bytes = self.get_chk_bytes()
+        chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
+        bytes = ''.join(chk_inv.to_lines())
+        new_inv = CHKInventory.deserialise(chk_bytes, bytes, ("revid",))
+        self.assertEqual(inv.root.file_id, new_inv.path2id(''))
+        self.assertEqual('dirid', new_inv.path2id('dir'))
+        self.assertEqual('fileid', new_inv.path2id('dir/file'))
+
     def test_create_by_apply_delta_empty_add_child(self):
         inv = Inventory()
         inv.revision_id = "revid"




More information about the bazaar-commits mailing list