Rev 3748: Implement commit-via-deltas for split inventory repositories. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Mon Oct 13 07:36:24 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 3748
revision-id: robertc at robertcollins.net-20081013063620-dos0y220vi51fose
parent: robertc at robertcollins.net-20081013045426-s3l3b0g6x8wogd6i
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2008-10-13 17:36:20 +1100
message:
Implement commit-via-deltas for split inventory repositories.
modified:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/test_inv.py testinv.py-20050722220913-1dc326138d1a5892
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2008-10-10 02:27:35 +0000
+++ b/bzrlib/chk_map.py 2008-10-13 06:36:20 +0000
@@ -94,6 +94,13 @@
bytes = self._read_bytes(key)
yield name, ValueNode.deserialise(bytes).value
+ def key(self):
+ """Return the key for this map."""
+ if isinstance(self._root_node, tuple):
+ return self._root_node
+ else:
+ return self._root_node._key
+
def _map(self, key, value):
"""Map key to value."""
self._ensure_root()
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2008-10-10 02:27:35 +0000
+++ b/bzrlib/inventory.py 2008-10-13 06:36:20 +0000
@@ -714,6 +714,20 @@
class CommonInventory(object):
"""Basic inventory logic, defined in terms of primitives like has_id."""
+ def id2path(self, file_id):
+ """Return as a string the path to file_id.
+
+ >>> i = Inventory()
+ >>> e = i.add(InventoryDirectory('src-id', 'src', ROOT_ID))
+ >>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
+ >>> print i.id2path('foo-id')
+ src/foo.c
+ """
+ # get all names, skipping root
+ return '/'.join(reversed(
+ [parent.name for parent in
+ self._iter_file_id_parents(file_id)][:-1]))
+
def iter_entries(self, from_dir=None):
"""Return (path, entry) pairs, in order by name."""
if from_dir is None:
@@ -1187,20 +1201,6 @@
p.insert(0, parent.file_id)
return p
- def id2path(self, file_id):
- """Return as a string the path to file_id.
-
- >>> i = Inventory()
- >>> e = i.add(InventoryDirectory('src-id', 'src', ROOT_ID))
- >>> e = i.add(InventoryFile('foo-id', 'foo.c', parent_id='src-id'))
- >>> print i.id2path('foo-id')
- src/foo.c
- """
- # get all names, skipping root
- return '/'.join(reversed(
- [parent.name for parent in
- self._iter_file_id_parents(file_id)][:-1]))
-
def path2id(self, name):
"""Walk down through directories to return entry of last component.
@@ -1460,6 +1460,16 @@
# Perhaps have an explicit 'contains' method on CHKMap ?
return len(list(self.id_to_entry.iteritems([file_id]))) == 1
+ def _iter_file_id_parents(self, file_id):
+ """Yield the parents of file_id up to the root."""
+ while file_id is not None:
+ try:
+ ie = self[file_id]
+ except KeyError:
+ raise errors.NoSuchId(tree=None, file_id=file_id)
+ yield ie
+ file_id = ie.parent_id
+
def __iter__(self):
"""Iterate over the entire inventory contents; size-of-tree - beware!."""
for file_id, _ in self.id_to_entry.iteritems():
@@ -1470,12 +1480,43 @@
# Might want to cache the length in the meta node.
return len([item for item in self])
+ def path2id(self, name):
+ """Walk down through directories to return entry of last component.
+
+ names may be either a list of path components, or a single
+ string, in which case it is automatically split.
+
+ This returns the entry of the last component in the path,
+ which may be either a file or a directory.
+
+ Returns None IFF the path is not found.
+ """
+ if isinstance(name, basestring):
+ name = osutils.splitpath(name)
+
+ # mutter("lookup path %r" % name)
+
+ parent = self.root
+ if parent is None:
+ return None
+ for f in name:
+ try:
+ children = getattr(parent, 'children', None)
+ if children is None:
+ return None
+ cie = children[f]
+ parent = cie
+ except KeyError:
+ # or raise an error?
+ return None
+ return parent.file_id
+
def to_lines(self):
"""Serialise the inventory to lines."""
lines = ["chkinventory:\n"]
lines.append("revision_id: %s\n" % self.revision_id)
lines.append("root_id: %s\n" % self.root_id)
- lines.append("id_to_entry: %s\n" % self.id_to_entry._root_node._key)
+ lines.append("id_to_entry: %s\n" % self.id_to_entry.key())
return lines
@property
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2008-10-13 04:54:26 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2008-10-13 06:36:20 +0000
@@ -2037,6 +2037,42 @@
return self._inventory_add_lines(revision_id, parents,
inv_lines, check_content=False)
+ def add_inventory_delta(self, basis_revision_id, delta, new_revision_id,
+ parents):
+ """Add a new inventory expressed as a delta against another revision.
+
+ :param basis_revision_id: The inventory id the delta was created
+ against.
+ :param delta: The inventory delta (see Inventory.apply_delta for
+ details).
+ :param new_revision_id: The revision id that the inventory is being
+ added for.
+ :param parents: The revision ids of the parents that revision_id is
+ known to have and are in the repository already. These are supplied
+ for repositories that depend on the inventory graph for revision
+ graph access, as well as for those that pun ancestry with delta
+ compression.
+
+ :returns: The validator(which is a sha1 digest, though what is sha'd is
+ repository format specific) of the serialized inventory.
+ """
+ if basis_revision_id == _mod_revision.NULL_REVISION:
+ return KnitPackRepository.add_inventory_delta(self,
+ basis_revision_id, delta, new_revision_id, parents)
+ if not self.is_in_write_group():
+ raise AssertionError("%r not in write group" % (self,))
+ _mod_revision.check_not_reserved_id(new_revision_id)
+ basis_tree = self.revision_tree(basis_revision_id)
+ basis_tree.lock_read()
+ try:
+ basis_inv = basis_tree.inventory
+ result = basis_inv.create_by_apply_delta(delta, new_revision_id)
+ inv_lines = result.to_lines()
+ return self._inventory_add_lines(new_revision_id, parents,
+ inv_lines, check_content=False)
+ finally:
+ basis_tree.unlock()
+
def _iter_inventories(self, revision_ids):
"""Iterate over many inventory objects."""
keys = [(revision_id,) for revision_id in revision_ids]
@@ -2583,6 +2619,7 @@
index_builder_class = BTreeBuilder
index_class = BTreeGraphIndex
supports_chks = True
+ _commit_inv_deltas = True
def _get_matching_bzrdir(self):
return bzrdir.format_registry.make_bzrdir('development3')
@@ -2623,6 +2660,7 @@
index_builder_class = BTreeBuilder
index_class = BTreeGraphIndex
supports_chks = True
+ _commit_inv_deltas = True
def _get_matching_bzrdir(self):
return bzrdir.format_registry.make_bzrdir(
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2008-10-13 04:54:26 +0000
+++ b/bzrlib/repository.py 2008-10-13 06:36:20 +0000
@@ -152,8 +152,13 @@
deserializing the inventory, while we already have a copy in
memory.
"""
- return RevisionTree(self.repository, self.new_inventory,
- self._new_revision_id)
+ if self.repository._format._commit_inv_deltas:
+ # incremental access repositories may not have a full cached
+ # inventory.
+ return self.repository.revision_tree(self._new_revision_id)
+ else:
+ return RevisionTree(self.repository, self.new_inventory,
+ self._new_revision_id)
def finish_inventory(self):
"""Tell the builder that the inventory is finished."""
@@ -162,11 +167,21 @@
' record_entry_contents, as of bzr 0.10.')
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
self.new_inventory.revision_id = self._new_revision_id
- self.inv_sha1 = self.repository.add_inventory(
- self._new_revision_id,
- self.new_inventory,
- self.parents
- )
+ if (self.repository._format._commit_inv_deltas and
+ self._recording_deletes):
+ try:
+ basis_id = self.parents[0]
+ except IndexError:
+ basis_id = _mod_revision.NULL_REVISION
+ self.inv_sha1 = self.repository.add_inventory_delta(
+ basis_id, self.basis_delta, self._new_revision_id,
+ self.parents)
+ else:
+ self.inv_sha1 = self.repository.add_inventory(
+ self._new_revision_id,
+ self.new_inventory,
+ self.parents
+ )
def _gen_revision_id(self):
"""Return new revision-id."""
@@ -1762,7 +1777,7 @@
# TODO: refactor this to use an existing revision object
# so we don't need to read it in twice.
if revision_id == _mod_revision.NULL_REVISION:
- return RevisionTree(self, Inventory(root_id=None),
+ return RevisionTree(self, Inventory(root_id=None),
_mod_revision.NULL_REVISION)
else:
inv = self.get_revision_inventory(revision_id)
@@ -2206,6 +2221,8 @@
# Does this format support CHK bytestring lookups. Set to True or False in
# derived classes.
supports_chks = None
+ # Should commit add an inventory, or an inventory delta to the repository.
+ _commit_inv_deltas = True
def __str__(self):
return "<%s>" % self.__class__.__name__
=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py 2008-10-10 02:27:35 +0000
+++ b/bzrlib/tests/test_inv.py 2008-10-13 06:36:20 +0000
@@ -331,6 +331,48 @@
chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
self.assertFalse(chk_inv.has_id('fileid'))
+ def test_id2path(self):
+ inv = Inventory()
+ inv.revision_id = "revid"
+ inv.root.revision = "rootrev"
+ direntry = InventoryDirectory("dirid", "dir", inv.root.file_id)
+ fileentry = InventoryFile("fileid", "file", "dirid")
+ inv.add(direntry)
+ inv.add(fileentry)
+ inv["fileid"].revision = "filerev"
+ inv["fileid"].executable = True
+ inv["fileid"].text_sha1 = "ffff"
+ inv["fileid"].text_size = 1
+ inv["dirid"].revision = "filerev"
+ chk_bytes = self.get_chk_bytes()
+ chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
+ bytes = ''.join(chk_inv.to_lines())
+ new_inv = CHKInventory.deserialise(chk_bytes, bytes, ("revid",))
+ self.assertEqual('', new_inv.id2path(inv.root.file_id))
+ self.assertEqual('dir', new_inv.id2path('dirid'))
+ self.assertEqual('dir/file', new_inv.id2path('fileid'))
+
+ def test_path2id(self):
+ inv = Inventory()
+ inv.revision_id = "revid"
+ inv.root.revision = "rootrev"
+ direntry = InventoryDirectory("dirid", "dir", inv.root.file_id)
+ fileentry = InventoryFile("fileid", "file", "dirid")
+ inv.add(direntry)
+ inv.add(fileentry)
+ inv["fileid"].revision = "filerev"
+ inv["fileid"].executable = True
+ inv["fileid"].text_sha1 = "ffff"
+ inv["fileid"].text_size = 1
+ inv["dirid"].revision = "filerev"
+ chk_bytes = self.get_chk_bytes()
+ chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
+ bytes = ''.join(chk_inv.to_lines())
+ new_inv = CHKInventory.deserialise(chk_bytes, bytes, ("revid",))
+ self.assertEqual(inv.root.file_id, new_inv.path2id(''))
+ self.assertEqual('dirid', new_inv.path2id('dir'))
+ self.assertEqual('fileid', new_inv.path2id('dir/file'))
+
def test_create_by_apply_delta_empty_add_child(self):
inv = Inventory()
inv.revision_id = "revid"
More information about the bazaar-commits
mailing list