Rev 3880: Merge in the add_inventory_delta code. in http://bzr.arbash-meinel.com/branches/bzr/1.11/differ_serializer

John Arbash Meinel john at arbash-meinel.com
Fri Dec 5 16:02:41 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.11/differ_serializer

------------------------------------------------------------
revno: 3880
revision-id: john at arbash-meinel.com-20081205160233-f4c61by1u3kf5sqj
parent: pqm at pqm.ubuntu.com-20081205135154-uwqcpl3lruah9fo3
parent: robertc at robertcollins.net-20081013044331-ibghe8j4no7uhb55
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: differ_serializer
timestamp: Fri 2008-12-05 10:02:33 -0600
message:
  Merge in the add_inventory_delta code.
added:
  bzrlib/tests/per_repository/test_add_inventory_delta.py test_add_inventory_d-20081013002626-rut81igtlqb4590z-1
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/__init__.py             __init__.py-20050309040759-33e65acf91bbcd5d
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/per_repository/__init__.py __init__.py-20060131092037-9564957a7d4a841b
  bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
  bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
    ------------------------------------------------------------
    revno: 3775.2.3
    revision-id: robertc at robertcollins.net-20081013044331-ibghe8j4no7uhb55
    parent: robertc at robertcollins.net-20081013043229-dn4s7hfg6h6zcobm
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit-delta
    timestamp: Mon 2008-10-13 15:43:31 +1100
    message:
      Delegate basis inventory calculation during commit to the CommitBuilder object.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/__init__.py             __init__.py-20050309040759-33e65acf91bbcd5d
      bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
    ------------------------------------------------------------
    revno: 3775.2.2
    revision-id: robertc at robertcollins.net-20081013043229-dn4s7hfg6h6zcobm
    parent: robertc at robertcollins.net-20081013002817-xxxsr37afvuhbzdx
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit-delta
    timestamp: Mon 2008-10-13 15:32:29 +1100
    message:
      Teach CommitBuilder to accumulate inventory deltas.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
      bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
    ------------------------------------------------------------
    revno: 3775.2.1
    revision-id: robertc at robertcollins.net-20081013002817-xxxsr37afvuhbzdx
    parent: pqm at pqm.ubuntu.com-20081012204951-j2dgh06nuzrak1ri
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: commit-delta
    timestamp: Mon 2008-10-13 11:28:17 +1100
    message:
      Create bzrlib.repository.Repository.add_inventory_delta for adding inventories via deltas.
    added:
      bzrlib/tests/per_repository/test_add_inventory_delta.py test_add_inventory_d-20081013002626-rut81igtlqb4590z-1
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
      bzrlib/tests/per_repository/__init__.py __init__.py-20060131092037-9564957a7d4a841b
      bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS	2008-12-04 20:41:53 +0000
+++ b/NEWS	2008-12-05 16:02:33 +0000
@@ -32,6 +32,11 @@
 
   API CHANGES:
 
+    * The logic in commit now delegates inventory basis calculations to
+      the ``CommitBuilder`` object; this requires that the commit builder
+      in use has been updated to support the new ``recording_deletes`` and
+      ``record_delete`` methods. (Robert Collins)
+
   TESTING:
 
   INTERNALS:
@@ -42,6 +47,15 @@
       returned from each pack in turn, in forward I/O order.
       (John Arbash Meinel)
 
+    * New method ``bzrlib.repository.Repository.add_inventory_delta``
+      allows adding an inventory via an inventory delta, which can be
+      more efficient for some repository types. (Robert Collins)
+
+    * Repository ``CommitBuilder`` objects can now accumulate an inventory
+      delta. To enable this functionality call ``builder.recording_deletes``
+      and additionally call ``builder.record_delete`` when a delete
+      against the basis occurs. (Robert Collins)
+
 
 bzr 1.10rc1 2008-11-28
 ----------------------

=== modified file 'bzrlib/__init__.py'
--- a/bzrlib/__init__.py	2008-11-28 06:31:17 +0000
+++ b/bzrlib/__init__.py	2008-12-05 16:02:33 +0000
@@ -54,7 +54,7 @@
 
 
 # API compatibility version: bzrlib is currently API compatible with 1.7.
-api_minimum_version = (1, 7, 0)
+api_minimum_version = (1, 9, 0)
 
 
 def _format_version_tuple(version_info):

=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2008-11-13 05:48:26 +0000
+++ b/bzrlib/commit.py	2008-12-05 16:02:33 +0000
@@ -285,9 +285,6 @@
         self.committer = committer
         self.strict = strict
         self.verbose = verbose
-        # accumulates an inventory delta to the basis entry, so we can make
-        # just the necessary updates to the workingtree's cached basis.
-        self._basis_delta = []
 
         self.work_tree.lock_write()
         self.pb = bzrlib.ui.ui_factory.nested_progress_bar()
@@ -357,6 +354,7 @@
                 self.config, timestamp, timezone, committer, revprops, rev_id)
             
             try:
+                self.builder.recording_deletes()
                 # find the location being committed to
                 if self.bound_branch:
                     master_location = self.master_branch.base
@@ -414,7 +412,7 @@
             # Make the working tree up to date with the branch
             self._set_progress_stage("Updating the working tree")
             self.work_tree.update_basis_by_delta(self.rev_id,
-                 self._basis_delta)
+                 self.builder.basis_delta)
             self.reporter.completed(new_revno, self.rev_id)
             self._process_post_hooks(old_revno, new_revno)
         finally:
@@ -433,7 +431,7 @@
         # A merge with no effect on files
         if len(self.parents) > 1:
             return
-        # TODO: we could simplify this by using self._basis_delta.
+        # TODO: we could simplify this by using self.builder.basis_delta.
 
         # The initial commit adds a root directory, but this in itself is not
         # a worthwhile commit.
@@ -696,12 +694,10 @@
                 # required after that changes.
                 if len(self.parents) > 1:
                     ie.revision = None
-                delta, version_recorded, _ = self.builder.record_entry_contents(
+                _, version_recorded, _ = self.builder.record_entry_contents(
                     ie, self.parent_invs, path, self.basis_tree, None)
                 if version_recorded:
                     self.any_entries_changed = True
-                if delta:
-                    self._basis_delta.append(delta)
 
     def _report_and_accumulate_deletes(self):
         # XXX: Could the list of deleted paths and ids be instead taken from
@@ -726,7 +722,7 @@
             deleted.sort()
             # XXX: this is not quite directory-order sorting
             for path, file_id in deleted:
-                self._basis_delta.append((path, None, file_id, None))
+                self.builder.record_delete(path, file_id)
                 self.reporter.deleted(path)
 
     def _populate_from_inventory(self):
@@ -863,10 +859,8 @@
             ie.revision = None
         # For carried over entries we don't care about the fs hash - the repo
         # isn't generating a sha, so we're not saving computation time.
-        delta, version_recorded, fs_hash = self.builder.record_entry_contents(
+        _, version_recorded, fs_hash = self.builder.record_entry_contents(
             ie, self.parent_invs, path, self.work_tree, content_summary)
-        if delta:
-            self._basis_delta.append(delta)
         if version_recorded:
             self.any_entries_changed = True
         if report_changes:

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2008-12-01 23:50:52 +0000
+++ b/bzrlib/remote.py	2008-12-05 16:02:33 +0000
@@ -766,6 +766,12 @@
         self._ensure_real()
         return self._real_repository.add_inventory(revid, inv, parents)
 
+    def add_inventory_delta(self, basis_revision_id, delta, new_revision_id,
+        parents):
+        self._ensure_real()
+        return self._real_repository.add_inventory_delta(basis_revision_id,
+            delta, new_revision_id, parents)
+
     def add_revision(self, rev_id, rev, inv=None, config=None):
         self._ensure_real()
         return self._real_repository.add_revision(

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-12-01 19:07:21 +0000
+++ b/bzrlib/repository.py	2008-12-05 16:02:33 +0000
@@ -119,6 +119,8 @@
 
         self._generate_revision_if_needed()
         self.__heads = graph.HeadsCache(repository.get_graph()).heads
+        self.basis_delta = []
+        self._recording_deletes = False
 
     def _validate_unicode_text(self, text, context):
         """Verify things like commit messages don't have bogus characters."""
@@ -230,15 +232,43 @@
         """Get a delta against the basis inventory for ie."""
         if ie.file_id not in basis_inv:
             # add
-            return (None, path, ie.file_id, ie)
+            result = (None, path, ie.file_id, ie)
+            self.basis_delta.append(result)
+            return result
         elif ie != basis_inv[ie.file_id]:
             # common but altered
             # TODO: avoid tis id2path call.
-            return (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
+            result = (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
+            self.basis_delta.append(result)
+            return result
         else:
             # common, unaltered
             return None
 
+    def record_delete(self, path, file_id):
+        """Record that a delete occured against a basis tree.
+
+        This is an optional API - when used it adds items to the basis_delta
+        being accumulated by the commit builder. It cannot be called unless the
+        method recording_deletes() has been called to inform the builder that a
+        delta is being supplied.
+
+        :param path: The path of the thing deleted.
+        :param file_id: The file id that was deleted.
+        """
+        if not self._recording_deletes:
+            raise AssertionError("recording deletes not activated.")
+        self.basis_delta.append((path, None, file_id, None))
+
+    def recording_deletes(self):
+        """Tell the commit builder that deletes are being notified.
+
+        This enables the accumulation of an inventory delta; for the resulting
+        commit to be valid deletes against the basis MUST be recorded via
+        builder.record_delete().
+        """
+        self._recording_deletes = True
+
     def record_entry_contents(self, ie, parent_invs, path, tree,
         content_summary):
         """Record the content of ie from tree into the commit if needed.
@@ -296,15 +326,19 @@
         if ie.revision is not None:
             if not self._versioned_root and path == '':
                 # repositories that do not version the root set the root's
-                # revision to the new commit even when no change occurs, and
-                # this masks when a change may have occurred against the basis,
-                # so calculate if one happened.
+                # revision to the new commit even when no change occurs (more
+                # specifically, they do not record a revision on the root; and
+                # the rev id is assigned to the root during deserialisation -
+                # this masks when a change may have occurred against the basis.
+                # To match this we always issue a delta, because the revision
+                # of the root will always be changing.
                 if ie.file_id in basis_inv:
                     delta = (basis_inv.id2path(ie.file_id), path,
                         ie.file_id, ie)
                 else:
                     # add
                     delta = (None, path, ie.file_id, ie)
+                self.basis_delta.append(delta)
                 return delta, False, None
             else:
                 # we don't need to commit this, because the caller already
@@ -614,6 +648,42 @@
         return self._inventory_add_lines(revision_id, parents,
             inv_lines, check_content=False)
 
+    def add_inventory_delta(self, basis_revision_id, delta, new_revision_id,
+        parents):
+        """Add a new inventory expressed as a delta against another revision.
+        
+        :param basis_revision_id: The inventory id the delta was created
+            against.
+        :param delta: The inventory delta (see Inventory.apply_delta for
+            details).
+        :param new_revision_id: The revision id that the inventory is being
+            added for.
+        :param parents: The revision ids of the parents that revision_id is
+            known to have and are in the repository already. These are supplied
+            for repositories that depend on the inventory graph for revision
+            graph access, as well as for those that pun ancestry with delta
+            compression.
+
+        :returns: The validator(which is a sha1 digest, though what is sha'd is
+            repository format specific) of the serialized inventory.
+        """
+        if not self.is_in_write_group():
+            raise AssertionError("%r not in write group" % (self,))
+        _mod_revision.check_not_reserved_id(new_revision_id)
+        basis_tree = self.revision_tree(basis_revision_id)
+        basis_tree.lock_read()
+        try:
+            # Note that this mutates the inventory of basis_tree, which not all
+            # inventory implementations may support: A better idiom would be to
+            # return a new inventory, but as there is no revision tree cache in
+            # repository this is safe for now - RBC 20081013
+            basis_inv = basis_tree.inventory
+            basis_inv.apply_delta(delta)
+            basis_inv.revision_id = new_revision_id
+            return self.add_inventory(new_revision_id, basis_inv, parents)
+        finally:
+            basis_tree.unlock()
+
     def _inventory_add_lines(self, revision_id, parents, lines,
         check_content=True):
         """Store lines in inv_vf and return the sha1 of the inventory."""

=== modified file 'bzrlib/tests/per_repository/__init__.py'
--- a/bzrlib/tests/per_repository/__init__.py	2008-11-12 03:56:51 +0000
+++ b/bzrlib/tests/per_repository/__init__.py	2008-12-05 16:02:33 +0000
@@ -858,6 +858,7 @@
     result.addTests(basic_tests)
     prefix = 'bzrlib.tests.per_repository.'
     test_repository_modules = [
+        'test_add_inventory_delta',
         'test_add_fallback_repository',
         'test_break_lock',
         'test_check',

=== added file 'bzrlib/tests/per_repository/test_add_inventory_delta.py'
--- a/bzrlib/tests/per_repository/test_add_inventory_delta.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/per_repository/test_add_inventory_delta.py	2008-10-13 00:28:17 +0000
@@ -0,0 +1,90 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for Repository.add_inventory_delta."""
+
+from bzrlib import errors, revision
+from bzrlib.tests.per_repository import TestCaseWithRepository
+
+
+class TestAddInventoryDelta(TestCaseWithRepository):
+
+    def _get_repo_in_write_group(self, path='repository'):
+        repo = self.make_repository(path)
+        repo.lock_write()
+        self.addCleanup(repo.unlock)
+        repo.start_write_group()
+        return repo
+    
+    def test_basis_missing_errors(self):
+        repo = self._get_repo_in_write_group()
+        try:
+            self.assertRaises(errors.NoSuchRevision,
+                repo.add_inventory_delta, "missing-revision", [], "new-revision",
+                ["missing-revision"])
+        finally:
+            repo.abort_write_group()
+
+    def test_not_in_write_group_errors(self):
+        repo = self.make_repository('repository')
+        repo.lock_write()
+        self.addCleanup(repo.unlock)
+        self.assertRaises(AssertionError, repo.add_inventory_delta,
+            "missing-revision", [], "new-revision", ["missing-revision"])
+
+    def make_inv_delta(self, old, new):
+        """Make an inventory delta from two inventories."""
+        old_ids = set(old._byid.iterkeys())
+        new_ids = set(new._byid.iterkeys())
+        adds = new_ids - old_ids
+        deletes = old_ids - new_ids
+        common = old_ids.intersection(new_ids)
+        delta = []
+        for file_id in deletes:
+            delta.append((old.id2path(file_id), None, file_id, None))
+        for file_id in adds:
+            delta.append((None, new.id2path(file_id), file_id, new[file_id]))
+        for file_id in common:
+            if old[file_id] != new[file_id]:
+                delta.append((old.id2path(file_id), new.id2path(file_id),
+                    file_id, new[file_id]))
+        return delta
+
+    def test_same_validator(self):
+        # Adding an inventory via delta or direct results in the same
+        # validator.
+        tree = self.make_branch_and_tree('tree')
+        revid = tree.commit("empty post")
+        revtree = tree.basis_tree()
+        revtree.lock_read()
+        self.addCleanup(revtree.unlock)
+        new_inv = revtree.inventory
+        delta = self.make_inv_delta(
+            tree.branch.repository.revision_tree(revision.NULL_REVISION).inventory,
+            new_inv)
+        repo_direct = self._get_repo_in_write_group('direct')
+        add_validator = repo_direct.add_inventory(revid, new_inv, [])
+        repo_direct.commit_write_group()
+        repo_delta = self._get_repo_in_write_group('delta')
+        try:
+            delta_validator = repo_delta.add_inventory_delta(revision.NULL_REVISION,
+                delta, revid, [])
+        except:
+            repo_delta.abort_write_group()
+            raise
+        else:
+            repo_delta.commit_write_group()
+        self.assertEqual(add_validator, delta_validator)

=== modified file 'bzrlib/tests/per_repository/test_commit_builder.py'
--- a/bzrlib/tests/per_repository/test_commit_builder.py	2008-11-12 07:38:57 +0000
+++ b/bzrlib/tests/per_repository/test_commit_builder.py	2008-12-05 16:02:33 +0000
@@ -162,6 +162,7 @@
                 self.assertEqual(
                     ('', '', ie.file_id, ie),
                     delta)
+                self.assertEqual(delta, builder.basis_delta[-1])
             else:
                 self.assertEqual(None, delta)
             # Directories do not get hashed.
@@ -191,6 +192,57 @@
         # but thats all the current contract guarantees anyway.
         self.assertEqual(rev_id, tree.branch.repository.get_inventory(rev_id).revision_id)
 
+    def test_record_delete(self):
+        tree = self.make_branch_and_tree(".")
+        self.build_tree(["foo"])
+        tree.add(["foo"], ["foo-id"])
+        rev_id = tree.commit("added foo")
+        # Remove the inventory details for foo-id, because
+        # record_entry_contents ends up copying root verbatim.
+        tree.unversion(["foo-id"])
+        tree.lock_write()
+        try:
+            basis = tree.branch.repository.revision_tree(rev_id)
+            builder = tree.branch.get_commit_builder([rev_id])
+            try:
+                builder.recording_deletes()
+                if builder.record_root_entry is True:
+                    parent_invs = [basis.inventory]
+                    del basis.inventory.root.children['foo']
+                    builder.record_entry_contents(basis.inventory.root,
+                        parent_invs, '', tree, tree.path_content_summary(''))
+                builder.record_delete("foo", "foo-id")
+                self.assertEqual(("foo", None, "foo-id", None),
+                    builder.basis_delta[-1])
+                builder.finish_inventory()
+                rev_id2 = builder.commit('delete foo')
+            except:
+                tree.branch.repository.abort_write_group()
+                raise
+        finally:
+            tree.unlock()
+        rev_tree = builder.revision_tree()
+        rev_tree.lock_read()
+        self.addCleanup(rev_tree.unlock)
+        self.assertFalse(rev_tree.path2id('foo'))
+
+    def test_record_delete_without_notification(self):
+        tree = self.make_branch_and_tree(".")
+        self.build_tree(["foo"])
+        tree.add(["foo"], ["foo-id"])
+        rev_id = tree.commit("added foo")
+        tree.lock_write()
+        try:
+            builder = tree.branch.get_commit_builder([rev_id])
+            try:
+                self.record_root(builder, tree)
+                self.assertRaises(AssertionError,
+                    builder.record_delete, "foo", "foo-id")
+            finally:
+                tree.branch.repository.abort_write_group()
+        finally:
+            tree.unlock()
+
     def test_revision_tree(self):
         tree = self.make_branch_and_tree(".")
         tree.lock_write()
@@ -424,6 +476,7 @@
             new_entry = builder.new_inventory[file_id]
             if delta_against_basis:
                 expected_delta = (name, new_name, file_id, new_entry)
+                self.assertEqual(expected_delta, builder.basis_delta[-1])
             else:
                 expected_delta = None
             self.assertEqual(expected_delta, delta)

=== modified file 'bzrlib/tests/per_repository/test_repository.py'
--- a/bzrlib/tests/per_repository/test_repository.py	2008-12-01 19:07:21 +0000
+++ b/bzrlib/tests/per_repository/test_repository.py	2008-12-05 16:02:33 +0000
@@ -1008,6 +1008,8 @@
         try:
             self.assertRaises(errors.ReservedId, repo.add_inventory, 'reserved:',
                               None, None)
+            self.assertRaises(errors.ReservedId, repo.add_inventory_delta,
+                "foo", [], 'reserved:', None)
             self.assertRaises(errors.ReservedId, repo.add_revision, 'reserved:',
                               None)
         finally:



More information about the bazaar-commits mailing list