Rev 2785: nuke _read_tree_state and snapshot from inventory, moving responsibility into the commit builder. in http://people.ubuntu.com/~robertc/baz2.0/commit
Robert Collins
robertc at robertcollins.net
Wed Sep 5 06:51:54 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/commit
------------------------------------------------------------
revno: 2785
revision-id: robertc at robertcollins.net-20070905055134-pwbueao0qq6krf9u
parent: robertc at robertcollins.net-20070905035316-1gp1s0fx6gmvbnqu
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit
timestamp: Wed 2007-09-05 15:51:34 +1000
message:
nuke _read_tree_state and snapshot from inventory, moving responsibility into the commit builder.
modified:
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/memorytree.py memorytree.py-20060906023413-4wlkalbdpsxi2r4y-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/repository_implementations/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py 2007-09-02 05:37:08 +0000
+++ b/bzrlib/commit.py 2007-09-05 05:51:34 +0000
@@ -227,7 +227,6 @@
self.rev_id = None
self.specific_files = specific_files
self.allow_pointless = allow_pointless
- self.recursive = recursive
self.revprops = revprops
self.message_callback = message_callback
self.timestamp = timestamp
@@ -296,6 +295,8 @@
entries_title="Directory")
self.builder = self.branch.get_commit_builder(self.parents,
self.config, timestamp, timezone, committer, revprops, rev_id)
+ # tell the builder about the chosen recursive behaviour
+ self.builder.recursive = recursive
try:
self._update_builder_with_changes()
@@ -665,6 +666,7 @@
work_inv = self.work_tree.inventory
assert work_inv.root is not None
entries = work_inv.iter_entries()
+ # XXX: Note that entries may have the wrong kind.
if not self.builder.record_root_entry:
entries.next()
for path, existing_ie in entries:
@@ -683,15 +685,18 @@
if is_inside_any(deleted_paths, path):
continue
if not specific_files or is_inside_any(specific_files, path):
+ # TODO: fix double-stat here.
if not self.work_tree.has_filename(path):
deleted_paths.add(path)
self.reporter.missing(path)
deleted_ids.append(file_id)
continue
+ # TODO: have the builder do the nested commit just-in-time IF and
+ # only if needed.
try:
kind = self.work_tree.kind(file_id)
# TODO: specific_files filtering before nested tree processing
- if kind == 'tree-reference' and self.recursive == 'down':
+ if kind == 'tree-reference' and self.builder.recursive == 'down':
self._commit_nested_tree(file_id, path)
except errors.NoSuchFile:
pass
@@ -700,7 +705,7 @@
# Note: I don't particularly want to have the existing_ie
# parameter but the test suite currently (28-Jun-07) breaks
# without it thanks to a unicode normalisation issue. :-(
- definitely_changed = kind != existing_ie.kind
+ definitely_changed = kind != existing_ie.kind
self._record_entry(path, file_id, specific_files, kind, name,
parent_id, definitely_changed, existing_ie)
@@ -722,7 +727,7 @@
self.work_tree.branch.repository
try:
sub_tree.commit(message=None, revprops=self.revprops,
- recursive=self.recursive,
+ recursive=self.builder.recursive,
message_callback=self.message_callback,
timestamp=self.timestamp, timezone=self.timezone,
committer=self.committer,
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2007-09-04 03:53:07 +0000
+++ b/bzrlib/inventory.py 2007-09-05 05:51:34 +0000
@@ -433,7 +433,7 @@
self.revision))
def snapshot(self, revision, path, previous_entries,
- work_tree, commit_builder):
+ work_tree, commit_builder, store_if_unchanged):
"""Make a snapshot of this entry which may or may not have changed.
This means that all its fields are populated, that it has its
@@ -442,7 +442,7 @@
:return: True if anything was recorded
"""
# cannot be unchanged unless there is only one parent file rev.
- self._read_tree_state(path, work_tree)
+ # self._read_tree_state(path, work_tree)
if len(previous_entries) == 1:
parent_ie = previous_entries.values()[0]
if self._unchanged(parent_ie):
@@ -582,11 +582,6 @@
"""See InventoryEntry._put_on_disk."""
os.mkdir(fullpath)
- def _snapshot_text(self, file_parents, work_tree, commit_builder):
- """See InventoryEntry._snapshot_text."""
- commit_builder.modified_directory(self.file_id, file_parents)
- return True
-
class InventoryFile(InventoryEntry):
"""A file in an inventory."""
@@ -716,35 +711,6 @@
def _forget_tree_state(self):
self.text_sha1 = None
- def snapshot(self, revision, path, previous_entries,
- work_tree, commit_builder):
- """See InventoryEntry.snapshot."""
- # Note: We use a custom implementation of this method for files
- # because it's a performance critical part of commit.
-
- # If this is the initial commit for this file, we know the sha is
- # coming later so skip calculating it now (in _read_tree_state())
- if len(previous_entries) == 0:
- self.executable = work_tree.is_executable(self.file_id, path=path)
- else:
- self._read_tree_state(path, work_tree)
-
- # If nothing is changed from the sole parent, there's nothing to do
- if len(previous_entries) == 1:
- parent_ie = previous_entries.values()[0]
- if self._unchanged(parent_ie):
- self.revision = parent_ie.revision
- return False
-
- # Add the file to the repository
- self.revision = revision
- def get_content_byte_lines():
- return work_tree.get_file(self.file_id, path).readlines()
- self.text_sha1, self.text_size = commit_builder.modified_file_text(
- self.file_id, previous_entries, get_content_byte_lines,
- self.text_sha1, self.text_size)
- return True
-
def _unchanged(self, previous_ie):
"""See InventoryEntry._unchanged."""
compatible = super(InventoryFile, self)._unchanged(previous_ie)
@@ -845,12 +811,6 @@
compatible = False
return compatible
- def _snapshot_text(self, file_parents, work_tree, commit_builder):
- """See InventoryEntry._snapshot_text."""
- commit_builder.modified_link(
- self.file_id, file_parents, self.symlink_target)
- return True
-
class TreeReference(InventoryEntry):
@@ -866,10 +826,6 @@
return TreeReference(self.file_id, self.name, self.parent_id,
self.revision, self.reference_revision)
- def _snapshot_text(self, file_parents, work_tree, commit_builder):
- commit_builder.modified_reference(self.file_id, file_parents)
- return True
-
def _read_tree_state(self, path, work_tree):
"""Populate fields in the inventory entry from the given tree.
"""
=== modified file 'bzrlib/memorytree.py'
--- a/bzrlib/memorytree.py 2007-08-23 00:12:35 +0000
+++ b/bzrlib/memorytree.py 2007-09-05 05:51:34 +0000
@@ -98,6 +98,26 @@
return None, False, None
return entry.kind, entry.executable, None
+ def path_content_summary(self, path):
+ """See Tree.path_content_summary."""
+ id = self.path2id(path)
+ if id is None:
+ return 'missing', None, None, None
+ kind = self.kind(id)
+ if kind == 'file':
+ bytes = self._file_transport.get_bytes(path)
+ size = len(bytes)
+ executable = self._inventory[id].executable
+ sha1 = None # no stat cache
+ return (kind, size, executable, sha1)
+ elif kind == 'directory':
+ # memory tree does not support nested trees yet.
+ return kind, None, None, None
+ elif kind == 'symlink':
+ raise NotImplementedError('symlink support')
+ else:
+ raise NotImplementedError('unknown kind')
+
def _file_size(self, entry, stat_value):
"""See Tree._file_size."""
if entry is None:
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-09-03 22:17:20 +0000
+++ b/bzrlib/repository.py 2007-09-05 05:51:34 +0000
@@ -2219,6 +2219,17 @@
"""
if self.new_inventory.root is None:
self._check_root(ie, parent_invs, tree)
+ content_summary = tree.path_content_summary(path)
+ kind = content_summary[0]
+ # XXX: repository specific check for nested tree support goes here - if
+ # the repo doesn't want nested trees we skip it ?
+ if (kind == 'tree-reference' and
+ not self.repository._format.supports_tree_reference):
+ # mismatch between commit builder logic and repository:
+ # this needs the entry creation pushed down into the builder.
+ raise NotImplementedError
+ # transitional assert only, will remove before release.
+ assert ie.kind == kind
self.new_inventory.add(ie)
# ie.revision is always None if the InventoryEntry is considered
@@ -2227,78 +2238,125 @@
if ie.revision is not None:
return
+ # XXX: Friction: parent_candidates should return a list not a dict
+ # so that we don't have to walk the inventories again.
parent_candiate_entries = ie.parent_candidates(parent_invs)
- heads = self.repository.get_graph().heads(parent_candiate_entries.keys())
- # XXX: Note that this is unordered - and this is tolerable because
- # the previous code was also unordered.
- previous_entries = dict((head, parent_candiate_entries[head]) for head
- in heads)
- # we are creating a new revision for ie in the history store and
- # inventory.
- ie.snapshot(self._new_revision_id, path, previous_entries, tree, self)
-
- def modified_directory(self, file_id, file_parents):
- """Record the presence of a symbolic link.
-
- :param file_id: The file_id of the link to record.
- :param file_parents: The per-file parent revision ids.
- """
- self._add_text_to_weave(file_id, [], file_parents.keys())
-
- def modified_reference(self, file_id, file_parents):
- """Record the modification of a reference.
-
- :param file_id: The file_id of the link to record.
- :param file_parents: The per-file parent revision ids.
- """
- self._add_text_to_weave(file_id, [], file_parents.keys())
-
- def modified_file_text(self, file_id, file_parents,
- get_content_byte_lines, text_sha1=None,
- text_size=None):
- """Record the text of file file_id
-
- :param file_id: The file_id of the file to record the text of.
- :param file_parents: The per-file parent revision ids.
- :param get_content_byte_lines: A callable which will return the byte
- lines for the file.
- :param text_sha1: Optional SHA1 of the file contents.
- :param text_size: Optional size of the file contents.
- """
- # mutter('storing text of file {%s} in revision {%s} into %r',
- # file_id, self._new_revision_id, self.repository.weave_store)
- # special case to avoid diffing on renames or
- # reparenting
- if (len(file_parents) == 1
- and text_sha1 == file_parents.values()[0].text_sha1
- and text_size == file_parents.values()[0].text_size):
- previous_ie = file_parents.values()[0]
- versionedfile = self.repository.weave_store.get_weave(file_id,
- self.repository.get_transaction())
- versionedfile.clone_text(self._new_revision_id,
- previous_ie.revision, file_parents.keys())
- return text_sha1, text_size
+ head_set = self.repository.get_graph().heads(parent_candiate_entries.keys())
+ heads = []
+ for inv in parent_invs:
+ if ie.file_id in inv:
+ old_rev = inv[ie.file_id].revision
+ if old_rev in head_set:
+ heads.append(inv[ie.file_id].revision)
+ head_set.remove(inv[ie.file_id].revision)
+
+ store = False
+ # now we check to see if we need to write a new record to the
+ # file-graph.
+ # We write a new entry unless there is one head to the ancestors, and
+ # the kind-derived content is unchanged.
+
+ # Cheapest check first: no ancestors, or more the one head in the
+ # ancestors, we write a new node.
+ if len(heads) != 1:
+ store = True
+ if not store:
+ # There is a single head, look it up for comparison
+ parent_entry = parent_candiate_entries[heads[0]]
+ # if the non-content specific data has changed, we'll be writing a
+ # node:
+ if (parent_entry.parent_id != ie.parent_id or
+ parent_entry.name != ie.name):
+ store = True
+ # now we need to do content specific checks:
+ if not store:
+ # if the kind changed the content obviously has
+ if kind != parent_entry.kind:
+ store = True
+ if kind == 'file':
+ if not store:
+ if (# if the file length changed we have to store:
+ parent_entry.text_size != content_summary[1] or
+ # if the exec bit has changed we have to store:
+ parent_entry.executable != content_summary[2]):
+ store = True
+ elif parent_entry.text_sha1 == content_summary[3]:
+ # all meta and content is unchanged (using a hash cache
+ # hit to check the sha)
+ ie.revision = parent_entry.revision
+ ie.text_size = parent_entry.text_size
+ ie.text_sha1 = parent_entry.text_sha1
+ ie.executable = parent_entry.executable
+ return
+ else:
+ # Either there is only a hash change(no hash cache entry,
+ # or same size content change), or there is no change on
+ # this file at all.
+ # There is a race condition when inserting content into the
+ # knit though that can result in different content being
+ # inserted so even though we may have had a hash cache hit
+ # here we still tell the store the hash we would *not*
+ # store a new text on, which means that it can avoid for us
+ # without a race condition and without double-shaing the
+ # lines.
+ nostore_sha = parent_entry.text_sha1
+ if store:
+ nostore_sha = None
+ try:
+ ie.executable = content_summary[2]
+ lines = tree.get_file(ie.file_id, path).readlines()
+ ie.text_sha1, ie.text_size = self._add_text_to_weave(
+ ie.file_id, lines, heads, nostore_sha)
+ ie.revision = self._new_revision_id
+ except errors.ExistingContent:
+ # we are not going to store a new file graph node as it turns
+ # out to be unchanged.
+ ie.revision = parent_entry.revision
+ ie.text_size = parent_entry.text_size
+ ie.text_sha1 = parent_entry.text_sha1
+ ie.executable = parent_entry.executable
+ return
+ elif kind == 'directory':
+ if not store:
+ # all data is meta here, so carry over:
+ ie.revision = parent_entry.revision
+ return
+ lines = []
+ self._add_text_to_weave(ie.file_id, lines, heads, None)
+ elif kind == 'symlink':
+ current_link_target = content_summary[3]
+ if not store:
+ # symmlink target is not generic metadata, check if it has
+ # changed.
+ if current_link_target != parent_entry.symlink_target:
+ store = True
+ if not store:
+ ie.revision = parent_entry.revision
+ ie.symlink_target = parent_entry.symlink_target
+ return
+ ie.symlink_target = current_link_target
+ lines = []
+ self._add_text_to_weave(ie.file_id, lines, heads, None)
+ elif kind == 'tree-reference':
+ if not store:
+ # all data is meta here, so carry over:
+ ie.revision = parent_entry.revision
+ return
+ lines = []
+ self._add_text_to_weave(ie.file_id, lines, heads, None)
else:
- new_lines = get_content_byte_lines()
- return self._add_text_to_weave(file_id, new_lines,
- file_parents.keys())
-
- def modified_link(self, file_id, file_parents, link_target):
- """Record the presence of a symbolic link.
-
- :param file_id: The file_id of the link to record.
- :param file_parents: The per-file parent revision ids.
- :param link_target: Target location of this link.
- """
- self._add_text_to_weave(file_id, [], file_parents.keys())
-
- def _add_text_to_weave(self, file_id, new_lines, parents):
+ raise NotImplementedError('unknown kind')
+ ie.revision = self._new_revision_id
+
+ def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
versionedfile = self.repository.weave_store.get_weave_or_empty(
file_id, self.repository.get_transaction())
- result = versionedfile.add_lines(
- self._new_revision_id, parents, new_lines)[0:2]
- versionedfile.clear_cache()
- return result
+ try:
+ return versionedfile.add_lines(
+ self._new_revision_id, parents, new_lines,
+ nostore_sha=nostore_sha)[0:2]
+ finally:
+ versionedfile.clear_cache()
class _CommitBuilder(CommitBuilder):
=== modified file 'bzrlib/tests/repository_implementations/test_commit_builder.py'
--- a/bzrlib/tests/repository_implementations/test_commit_builder.py 2007-09-04 03:53:07 +0000
+++ b/bzrlib/tests/repository_implementations/test_commit_builder.py 2007-09-05 05:51:34 +0000
@@ -349,6 +349,8 @@
rev4 = tree1.commit('')
tree3, = self._get_revtrees(tree1, [rev4])
self.assertEqual(rev4, tree3.inventory[name + 'id'].revision)
+ # TODO: change this to an assertFileGraph call to check the
+ # parent order of rev4: it should be rev2, rev3
self.assertFileAncestry([rev1, rev2, rev3, rev4], tree1, name,
[rev1, rev3, rev2, rev4])
More information about the bazaar-commits
mailing list