Rev 3880: Merge in the add_inventory_delta code. in http://bzr.arbash-meinel.com/branches/bzr/1.11/differ_serializer
John Arbash Meinel
john at arbash-meinel.com
Fri Dec 5 16:02:41 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.11/differ_serializer
------------------------------------------------------------
revno: 3880
revision-id: john at arbash-meinel.com-20081205160233-f4c61by1u3kf5sqj
parent: pqm at pqm.ubuntu.com-20081205135154-uwqcpl3lruah9fo3
parent: robertc at robertcollins.net-20081013044331-ibghe8j4no7uhb55
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: differ_serializer
timestamp: Fri 2008-12-05 10:02:33 -0600
message:
Merge in the add_inventory_delta code.
added:
bzrlib/tests/per_repository/test_add_inventory_delta.py test_add_inventory_d-20081013002626-rut81igtlqb4590z-1
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/__init__.py __init__.py-20050309040759-33e65acf91bbcd5d
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/per_repository/__init__.py __init__.py-20060131092037-9564957a7d4a841b
bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
------------------------------------------------------------
revno: 3775.2.3
revision-id: robertc at robertcollins.net-20081013044331-ibghe8j4no7uhb55
parent: robertc at robertcollins.net-20081013043229-dn4s7hfg6h6zcobm
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit-delta
timestamp: Mon 2008-10-13 15:43:31 +1100
message:
Delegate basis inventory calculation during commit to the CommitBuilder object.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/__init__.py __init__.py-20050309040759-33e65acf91bbcd5d
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
------------------------------------------------------------
revno: 3775.2.2
revision-id: robertc at robertcollins.net-20081013043229-dn4s7hfg6h6zcobm
parent: robertc at robertcollins.net-20081013002817-xxxsr37afvuhbzdx
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit-delta
timestamp: Mon 2008-10-13 15:32:29 +1100
message:
Teach CommitBuilder to accumulate inventory deltas.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/per_repository/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
------------------------------------------------------------
revno: 3775.2.1
revision-id: robertc at robertcollins.net-20081013002817-xxxsr37afvuhbzdx
parent: pqm at pqm.ubuntu.com-20081012204951-j2dgh06nuzrak1ri
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit-delta
timestamp: Mon 2008-10-13 11:28:17 +1100
message:
Create bzrlib.repository.Repository.add_inventory_delta for adding inventories via deltas.
added:
bzrlib/tests/per_repository/test_add_inventory_delta.py test_add_inventory_d-20081013002626-rut81igtlqb4590z-1
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/per_repository/__init__.py __init__.py-20060131092037-9564957a7d4a841b
bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS 2008-12-04 20:41:53 +0000
+++ b/NEWS 2008-12-05 16:02:33 +0000
@@ -32,6 +32,11 @@
API CHANGES:
+ * The logic in commit now delegates inventory basis calculations to
+ the ``CommitBuilder`` object; this requires that the commit builder
+ in use has been updated to support the new ``recording_deletes`` and
+ ``record_delete`` methods. (Robert Collins)
+
TESTING:
INTERNALS:
@@ -42,6 +47,15 @@
returned from each pack in turn, in forward I/O order.
(John Arbash Meinel)
+ * New method ``bzrlib.repository.Repository.add_inventory_delta``
+ allows adding an inventory via an inventory delta, which can be
+ more efficient for some repository types. (Robert Collins)
+
+ * Repository ``CommitBuilder`` objects can now accumulate an inventory
+ delta. To enable this functionality call ``builder.recording_deletes``
+ and additionally call ``builder.record_delete`` when a delete
+ against the basis occurs. (Robert Collins)
+
bzr 1.10rc1 2008-11-28
----------------------
=== modified file 'bzrlib/__init__.py'
--- a/bzrlib/__init__.py 2008-11-28 06:31:17 +0000
+++ b/bzrlib/__init__.py 2008-12-05 16:02:33 +0000
@@ -54,7 +54,7 @@
# API compatibility version: bzrlib is currently API compatible with 1.7.
-api_minimum_version = (1, 7, 0)
+api_minimum_version = (1, 9, 0)
def _format_version_tuple(version_info):
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py 2008-11-13 05:48:26 +0000
+++ b/bzrlib/commit.py 2008-12-05 16:02:33 +0000
@@ -285,9 +285,6 @@
self.committer = committer
self.strict = strict
self.verbose = verbose
- # accumulates an inventory delta to the basis entry, so we can make
- # just the necessary updates to the workingtree's cached basis.
- self._basis_delta = []
self.work_tree.lock_write()
self.pb = bzrlib.ui.ui_factory.nested_progress_bar()
@@ -357,6 +354,7 @@
self.config, timestamp, timezone, committer, revprops, rev_id)
try:
+ self.builder.recording_deletes()
# find the location being committed to
if self.bound_branch:
master_location = self.master_branch.base
@@ -414,7 +412,7 @@
# Make the working tree up to date with the branch
self._set_progress_stage("Updating the working tree")
self.work_tree.update_basis_by_delta(self.rev_id,
- self._basis_delta)
+ self.builder.basis_delta)
self.reporter.completed(new_revno, self.rev_id)
self._process_post_hooks(old_revno, new_revno)
finally:
@@ -433,7 +431,7 @@
# A merge with no effect on files
if len(self.parents) > 1:
return
- # TODO: we could simplify this by using self._basis_delta.
+ # TODO: we could simplify this by using self.builder.basis_delta.
# The initial commit adds a root directory, but this in itself is not
# a worthwhile commit.
@@ -696,12 +694,10 @@
# required after that changes.
if len(self.parents) > 1:
ie.revision = None
- delta, version_recorded, _ = self.builder.record_entry_contents(
+ _, version_recorded, _ = self.builder.record_entry_contents(
ie, self.parent_invs, path, self.basis_tree, None)
if version_recorded:
self.any_entries_changed = True
- if delta:
- self._basis_delta.append(delta)
def _report_and_accumulate_deletes(self):
# XXX: Could the list of deleted paths and ids be instead taken from
@@ -726,7 +722,7 @@
deleted.sort()
# XXX: this is not quite directory-order sorting
for path, file_id in deleted:
- self._basis_delta.append((path, None, file_id, None))
+ self.builder.record_delete(path, file_id)
self.reporter.deleted(path)
def _populate_from_inventory(self):
@@ -863,10 +859,8 @@
ie.revision = None
# For carried over entries we don't care about the fs hash - the repo
# isn't generating a sha, so we're not saving computation time.
- delta, version_recorded, fs_hash = self.builder.record_entry_contents(
+ _, version_recorded, fs_hash = self.builder.record_entry_contents(
ie, self.parent_invs, path, self.work_tree, content_summary)
- if delta:
- self._basis_delta.append(delta)
if version_recorded:
self.any_entries_changed = True
if report_changes:
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2008-12-01 23:50:52 +0000
+++ b/bzrlib/remote.py 2008-12-05 16:02:33 +0000
@@ -766,6 +766,12 @@
self._ensure_real()
return self._real_repository.add_inventory(revid, inv, parents)
+ def add_inventory_delta(self, basis_revision_id, delta, new_revision_id,
+ parents):
+ self._ensure_real()
+ return self._real_repository.add_inventory_delta(basis_revision_id,
+ delta, new_revision_id, parents)
+
def add_revision(self, rev_id, rev, inv=None, config=None):
self._ensure_real()
return self._real_repository.add_revision(
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2008-12-01 19:07:21 +0000
+++ b/bzrlib/repository.py 2008-12-05 16:02:33 +0000
@@ -119,6 +119,8 @@
self._generate_revision_if_needed()
self.__heads = graph.HeadsCache(repository.get_graph()).heads
+ self.basis_delta = []
+ self._recording_deletes = False
def _validate_unicode_text(self, text, context):
"""Verify things like commit messages don't have bogus characters."""
@@ -230,15 +232,43 @@
"""Get a delta against the basis inventory for ie."""
if ie.file_id not in basis_inv:
# add
- return (None, path, ie.file_id, ie)
+ result = (None, path, ie.file_id, ie)
+ self.basis_delta.append(result)
+ return result
elif ie != basis_inv[ie.file_id]:
# common but altered
# TODO: avoid tis id2path call.
- return (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
+ result = (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
+ self.basis_delta.append(result)
+ return result
else:
# common, unaltered
return None
+ def record_delete(self, path, file_id):
+ """Record that a delete occured against a basis tree.
+
+ This is an optional API - when used it adds items to the basis_delta
+ being accumulated by the commit builder. It cannot be called unless the
+ method recording_deletes() has been called to inform the builder that a
+ delta is being supplied.
+
+ :param path: The path of the thing deleted.
+ :param file_id: The file id that was deleted.
+ """
+ if not self._recording_deletes:
+ raise AssertionError("recording deletes not activated.")
+ self.basis_delta.append((path, None, file_id, None))
+
+ def recording_deletes(self):
+ """Tell the commit builder that deletes are being notified.
+
+ This enables the accumulation of an inventory delta; for the resulting
+ commit to be valid deletes against the basis MUST be recorded via
+ builder.record_delete().
+ """
+ self._recording_deletes = True
+
def record_entry_contents(self, ie, parent_invs, path, tree,
content_summary):
"""Record the content of ie from tree into the commit if needed.
@@ -296,15 +326,19 @@
if ie.revision is not None:
if not self._versioned_root and path == '':
# repositories that do not version the root set the root's
- # revision to the new commit even when no change occurs, and
- # this masks when a change may have occurred against the basis,
- # so calculate if one happened.
+ # revision to the new commit even when no change occurs (more
+ # specifically, they do not record a revision on the root; and
+ # the rev id is assigned to the root during deserialisation -
+ # this masks when a change may have occurred against the basis.
+ # To match this we always issue a delta, because the revision
+ # of the root will always be changing.
if ie.file_id in basis_inv:
delta = (basis_inv.id2path(ie.file_id), path,
ie.file_id, ie)
else:
# add
delta = (None, path, ie.file_id, ie)
+ self.basis_delta.append(delta)
return delta, False, None
else:
# we don't need to commit this, because the caller already
@@ -614,6 +648,42 @@
return self._inventory_add_lines(revision_id, parents,
inv_lines, check_content=False)
+ def add_inventory_delta(self, basis_revision_id, delta, new_revision_id,
+ parents):
+ """Add a new inventory expressed as a delta against another revision.
+
+ :param basis_revision_id: The inventory id the delta was created
+ against.
+ :param delta: The inventory delta (see Inventory.apply_delta for
+ details).
+ :param new_revision_id: The revision id that the inventory is being
+ added for.
+ :param parents: The revision ids of the parents that revision_id is
+ known to have and are in the repository already. These are supplied
+ for repositories that depend on the inventory graph for revision
+ graph access, as well as for those that pun ancestry with delta
+ compression.
+
+ :returns: The validator(which is a sha1 digest, though what is sha'd is
+ repository format specific) of the serialized inventory.
+ """
+ if not self.is_in_write_group():
+ raise AssertionError("%r not in write group" % (self,))
+ _mod_revision.check_not_reserved_id(new_revision_id)
+ basis_tree = self.revision_tree(basis_revision_id)
+ basis_tree.lock_read()
+ try:
+ # Note that this mutates the inventory of basis_tree, which not all
+ # inventory implementations may support: A better idiom would be to
+ # return a new inventory, but as there is no revision tree cache in
+ # repository this is safe for now - RBC 20081013
+ basis_inv = basis_tree.inventory
+ basis_inv.apply_delta(delta)
+ basis_inv.revision_id = new_revision_id
+ return self.add_inventory(new_revision_id, basis_inv, parents)
+ finally:
+ basis_tree.unlock()
+
def _inventory_add_lines(self, revision_id, parents, lines,
check_content=True):
"""Store lines in inv_vf and return the sha1 of the inventory."""
=== modified file 'bzrlib/tests/per_repository/__init__.py'
--- a/bzrlib/tests/per_repository/__init__.py 2008-11-12 03:56:51 +0000
+++ b/bzrlib/tests/per_repository/__init__.py 2008-12-05 16:02:33 +0000
@@ -858,6 +858,7 @@
result.addTests(basic_tests)
prefix = 'bzrlib.tests.per_repository.'
test_repository_modules = [
+ 'test_add_inventory_delta',
'test_add_fallback_repository',
'test_break_lock',
'test_check',
=== added file 'bzrlib/tests/per_repository/test_add_inventory_delta.py'
--- a/bzrlib/tests/per_repository/test_add_inventory_delta.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/per_repository/test_add_inventory_delta.py 2008-10-13 00:28:17 +0000
@@ -0,0 +1,90 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Tests for Repository.add_inventory_delta."""
+
+from bzrlib import errors, revision
+from bzrlib.tests.per_repository import TestCaseWithRepository
+
+
+class TestAddInventoryDelta(TestCaseWithRepository):
+
+ def _get_repo_in_write_group(self, path='repository'):
+ repo = self.make_repository(path)
+ repo.lock_write()
+ self.addCleanup(repo.unlock)
+ repo.start_write_group()
+ return repo
+
+ def test_basis_missing_errors(self):
+ repo = self._get_repo_in_write_group()
+ try:
+ self.assertRaises(errors.NoSuchRevision,
+ repo.add_inventory_delta, "missing-revision", [], "new-revision",
+ ["missing-revision"])
+ finally:
+ repo.abort_write_group()
+
+ def test_not_in_write_group_errors(self):
+ repo = self.make_repository('repository')
+ repo.lock_write()
+ self.addCleanup(repo.unlock)
+ self.assertRaises(AssertionError, repo.add_inventory_delta,
+ "missing-revision", [], "new-revision", ["missing-revision"])
+
+ def make_inv_delta(self, old, new):
+ """Make an inventory delta from two inventories."""
+ old_ids = set(old._byid.iterkeys())
+ new_ids = set(new._byid.iterkeys())
+ adds = new_ids - old_ids
+ deletes = old_ids - new_ids
+ common = old_ids.intersection(new_ids)
+ delta = []
+ for file_id in deletes:
+ delta.append((old.id2path(file_id), None, file_id, None))
+ for file_id in adds:
+ delta.append((None, new.id2path(file_id), file_id, new[file_id]))
+ for file_id in common:
+ if old[file_id] != new[file_id]:
+ delta.append((old.id2path(file_id), new.id2path(file_id),
+ file_id, new[file_id]))
+ return delta
+
+ def test_same_validator(self):
+ # Adding an inventory via delta or direct results in the same
+ # validator.
+ tree = self.make_branch_and_tree('tree')
+ revid = tree.commit("empty post")
+ revtree = tree.basis_tree()
+ revtree.lock_read()
+ self.addCleanup(revtree.unlock)
+ new_inv = revtree.inventory
+ delta = self.make_inv_delta(
+ tree.branch.repository.revision_tree(revision.NULL_REVISION).inventory,
+ new_inv)
+ repo_direct = self._get_repo_in_write_group('direct')
+ add_validator = repo_direct.add_inventory(revid, new_inv, [])
+ repo_direct.commit_write_group()
+ repo_delta = self._get_repo_in_write_group('delta')
+ try:
+ delta_validator = repo_delta.add_inventory_delta(revision.NULL_REVISION,
+ delta, revid, [])
+ except:
+ repo_delta.abort_write_group()
+ raise
+ else:
+ repo_delta.commit_write_group()
+ self.assertEqual(add_validator, delta_validator)
=== modified file 'bzrlib/tests/per_repository/test_commit_builder.py'
--- a/bzrlib/tests/per_repository/test_commit_builder.py 2008-11-12 07:38:57 +0000
+++ b/bzrlib/tests/per_repository/test_commit_builder.py 2008-12-05 16:02:33 +0000
@@ -162,6 +162,7 @@
self.assertEqual(
('', '', ie.file_id, ie),
delta)
+ self.assertEqual(delta, builder.basis_delta[-1])
else:
self.assertEqual(None, delta)
# Directories do not get hashed.
@@ -191,6 +192,57 @@
# but thats all the current contract guarantees anyway.
self.assertEqual(rev_id, tree.branch.repository.get_inventory(rev_id).revision_id)
+ def test_record_delete(self):
+ tree = self.make_branch_and_tree(".")
+ self.build_tree(["foo"])
+ tree.add(["foo"], ["foo-id"])
+ rev_id = tree.commit("added foo")
+ # Remove the inventory details for foo-id, because
+ # record_entry_contents ends up copying root verbatim.
+ tree.unversion(["foo-id"])
+ tree.lock_write()
+ try:
+ basis = tree.branch.repository.revision_tree(rev_id)
+ builder = tree.branch.get_commit_builder([rev_id])
+ try:
+ builder.recording_deletes()
+ if builder.record_root_entry is True:
+ parent_invs = [basis.inventory]
+ del basis.inventory.root.children['foo']
+ builder.record_entry_contents(basis.inventory.root,
+ parent_invs, '', tree, tree.path_content_summary(''))
+ builder.record_delete("foo", "foo-id")
+ self.assertEqual(("foo", None, "foo-id", None),
+ builder.basis_delta[-1])
+ builder.finish_inventory()
+ rev_id2 = builder.commit('delete foo')
+ except:
+ tree.branch.repository.abort_write_group()
+ raise
+ finally:
+ tree.unlock()
+ rev_tree = builder.revision_tree()
+ rev_tree.lock_read()
+ self.addCleanup(rev_tree.unlock)
+ self.assertFalse(rev_tree.path2id('foo'))
+
+ def test_record_delete_without_notification(self):
+ tree = self.make_branch_and_tree(".")
+ self.build_tree(["foo"])
+ tree.add(["foo"], ["foo-id"])
+ rev_id = tree.commit("added foo")
+ tree.lock_write()
+ try:
+ builder = tree.branch.get_commit_builder([rev_id])
+ try:
+ self.record_root(builder, tree)
+ self.assertRaises(AssertionError,
+ builder.record_delete, "foo", "foo-id")
+ finally:
+ tree.branch.repository.abort_write_group()
+ finally:
+ tree.unlock()
+
def test_revision_tree(self):
tree = self.make_branch_and_tree(".")
tree.lock_write()
@@ -424,6 +476,7 @@
new_entry = builder.new_inventory[file_id]
if delta_against_basis:
expected_delta = (name, new_name, file_id, new_entry)
+ self.assertEqual(expected_delta, builder.basis_delta[-1])
else:
expected_delta = None
self.assertEqual(expected_delta, delta)
=== modified file 'bzrlib/tests/per_repository/test_repository.py'
--- a/bzrlib/tests/per_repository/test_repository.py 2008-12-01 19:07:21 +0000
+++ b/bzrlib/tests/per_repository/test_repository.py 2008-12-05 16:02:33 +0000
@@ -1008,6 +1008,8 @@
try:
self.assertRaises(errors.ReservedId, repo.add_inventory, 'reserved:',
None, None)
+ self.assertRaises(errors.ReservedId, repo.add_inventory_delta,
+ "foo", [], 'reserved:', None)
self.assertRaises(errors.ReservedId, repo.add_revision, 'reserved:',
None)
finally:
More information about the bazaar-commits
mailing list