Rev 2785: nuke _read_tree_state and snapshot from inventory, moving responsibility into the commit builder. in http://people.ubuntu.com/~robertc/baz2.0/commit

Robert Collins robertc at robertcollins.net
Wed Sep 5 06:51:54 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/commit

------------------------------------------------------------
revno: 2785
revision-id: robertc at robertcollins.net-20070905055134-pwbueao0qq6krf9u
parent: robertc at robertcollins.net-20070905035316-1gp1s0fx6gmvbnqu
committer: Robert Collins <robertc at robertcollins.net>
branch nick: commit
timestamp: Wed 2007-09-05 15:51:34 +1000
message:
  nuke _read_tree_state and snapshot from inventory, moving responsibility into the commit builder.
modified:
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
  bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
  bzrlib/memorytree.py           memorytree.py-20060906023413-4wlkalbdpsxi2r4y-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/repository_implementations/test_commit_builder.py test_commit_builder.py-20060606110838-76e3ra5slucqus81-1
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2007-09-02 05:37:08 +0000
+++ b/bzrlib/commit.py	2007-09-05 05:51:34 +0000
@@ -227,7 +227,6 @@
         self.rev_id = None
         self.specific_files = specific_files
         self.allow_pointless = allow_pointless
-        self.recursive = recursive
         self.revprops = revprops
         self.message_callback = message_callback
         self.timestamp = timestamp
@@ -296,6 +295,8 @@
                     entries_title="Directory")
             self.builder = self.branch.get_commit_builder(self.parents,
                 self.config, timestamp, timezone, committer, revprops, rev_id)
+            # tell the builder about the chosen recursive behaviour
+            self.builder.recursive = recursive
             
             try:
                 self._update_builder_with_changes()
@@ -665,6 +666,7 @@
         work_inv = self.work_tree.inventory
         assert work_inv.root is not None
         entries = work_inv.iter_entries()
+        # XXX: Note that entries may have the wrong kind.
         if not self.builder.record_root_entry:
             entries.next()
         for path, existing_ie in entries:
@@ -683,15 +685,18 @@
             if is_inside_any(deleted_paths, path):
                 continue
             if not specific_files or is_inside_any(specific_files, path):
+                # TODO: fix double-stat here.
                 if not self.work_tree.has_filename(path):
                     deleted_paths.add(path)
                     self.reporter.missing(path)
                     deleted_ids.append(file_id)
                     continue
+            # TODO: have the builder do the nested commit just-in-time IF and
+            # only if needed.
             try:
                 kind = self.work_tree.kind(file_id)
                 # TODO: specific_files filtering before nested tree processing
-                if kind == 'tree-reference' and self.recursive == 'down':
+                if kind == 'tree-reference' and self.builder.recursive == 'down':
                     self._commit_nested_tree(file_id, path)
             except errors.NoSuchFile:
                 pass
@@ -700,7 +705,7 @@
             # Note: I don't particularly want to have the existing_ie
             # parameter but the test suite currently (28-Jun-07) breaks
             # without it thanks to a unicode normalisation issue. :-(
-            definitely_changed = kind != existing_ie.kind 
+            definitely_changed = kind != existing_ie.kind
             self._record_entry(path, file_id, specific_files, kind, name,
                 parent_id, definitely_changed, existing_ie)
 
@@ -722,7 +727,7 @@
                 self.work_tree.branch.repository
         try:
             sub_tree.commit(message=None, revprops=self.revprops,
-                recursive=self.recursive,
+                recursive=self.builder.recursive,
                 message_callback=self.message_callback,
                 timestamp=self.timestamp, timezone=self.timezone,
                 committer=self.committer,

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2007-09-04 03:53:07 +0000
+++ b/bzrlib/inventory.py	2007-09-05 05:51:34 +0000
@@ -433,7 +433,7 @@
                    self.revision))
 
     def snapshot(self, revision, path, previous_entries,
-                 work_tree, commit_builder):
+        work_tree, commit_builder, store_if_unchanged):
         """Make a snapshot of this entry which may or may not have changed.
         
         This means that all its fields are populated, that it has its
@@ -442,7 +442,7 @@
         :return: True if anything was recorded
         """
         # cannot be unchanged unless there is only one parent file rev.
-        self._read_tree_state(path, work_tree)
+        # self._read_tree_state(path, work_tree)
         if len(previous_entries) == 1:
             parent_ie = previous_entries.values()[0]
             if self._unchanged(parent_ie):
@@ -582,11 +582,6 @@
         """See InventoryEntry._put_on_disk."""
         os.mkdir(fullpath)
 
-    def _snapshot_text(self, file_parents, work_tree, commit_builder):
-        """See InventoryEntry._snapshot_text."""
-        commit_builder.modified_directory(self.file_id, file_parents)
-        return True
-
 
 class InventoryFile(InventoryEntry):
     """A file in an inventory."""
@@ -716,35 +711,6 @@
     def _forget_tree_state(self):
         self.text_sha1 = None
 
-    def snapshot(self, revision, path, previous_entries,
-                 work_tree, commit_builder):
-        """See InventoryEntry.snapshot."""
-        # Note: We use a custom implementation of this method for files
-        # because it's a performance critical part of commit.
-
-        # If this is the initial commit for this file, we know the sha is
-        # coming later so skip calculating it now (in _read_tree_state())
-        if len(previous_entries) == 0:
-            self.executable = work_tree.is_executable(self.file_id, path=path)
-        else:
-            self._read_tree_state(path, work_tree)
-
-        # If nothing is changed from the sole parent, there's nothing to do
-        if len(previous_entries) == 1:
-            parent_ie = previous_entries.values()[0]
-            if self._unchanged(parent_ie):
-                self.revision = parent_ie.revision
-                return False
-
-        # Add the file to the repository
-        self.revision = revision
-        def get_content_byte_lines():
-            return work_tree.get_file(self.file_id, path).readlines()
-        self.text_sha1, self.text_size = commit_builder.modified_file_text(
-            self.file_id, previous_entries, get_content_byte_lines,
-            self.text_sha1, self.text_size)
-        return True
-
     def _unchanged(self, previous_ie):
         """See InventoryEntry._unchanged."""
         compatible = super(InventoryFile, self)._unchanged(previous_ie)
@@ -845,12 +811,6 @@
             compatible = False
         return compatible
 
-    def _snapshot_text(self, file_parents, work_tree, commit_builder):
-        """See InventoryEntry._snapshot_text."""
-        commit_builder.modified_link(
-            self.file_id, file_parents, self.symlink_target)
-        return True
-
 
 class TreeReference(InventoryEntry):
     
@@ -866,10 +826,6 @@
         return TreeReference(self.file_id, self.name, self.parent_id,
                              self.revision, self.reference_revision)
 
-    def _snapshot_text(self, file_parents, work_tree, commit_builder):
-        commit_builder.modified_reference(self.file_id, file_parents)
-        return True
-
     def _read_tree_state(self, path, work_tree):
         """Populate fields in the inventory entry from the given tree.
         """

=== modified file 'bzrlib/memorytree.py'
--- a/bzrlib/memorytree.py	2007-08-23 00:12:35 +0000
+++ b/bzrlib/memorytree.py	2007-09-05 05:51:34 +0000
@@ -98,6 +98,26 @@
             return None, False, None
         return entry.kind, entry.executable, None
 
+    def path_content_summary(self, path):
+        """See Tree.path_content_summary."""
+        id = self.path2id(path)
+        if id is None:
+            return 'missing', None, None, None
+        kind = self.kind(id)
+        if kind == 'file':
+            bytes = self._file_transport.get_bytes(path)
+            size = len(bytes)
+            executable = self._inventory[id].executable
+            sha1 = None # no stat cache
+            return (kind, size, executable, sha1)
+        elif kind == 'directory':
+            # memory tree does not support nested trees yet.
+            return kind, None, None, None
+        elif kind == 'symlink':
+            raise NotImplementedError('symlink support')
+        else:
+            raise NotImplementedError('unknown kind')
+
     def _file_size(self, entry, stat_value):
         """See Tree._file_size."""
         if entry is None:

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-09-03 22:17:20 +0000
+++ b/bzrlib/repository.py	2007-09-05 05:51:34 +0000
@@ -2219,6 +2219,17 @@
         """
         if self.new_inventory.root is None:
             self._check_root(ie, parent_invs, tree)
+        content_summary = tree.path_content_summary(path)
+        kind = content_summary[0]
+        # XXX: repository specific check for nested tree support goes here - if
+        # the repo doesn't want nested trees we skip it ?
+        if (kind == 'tree-reference' and
+            not self.repository._format.supports_tree_reference):
+            # mismatch between commit builder logic and repository:
+            # this needs the entry creation pushed down into the builder.
+            raise NotImplementedError
+        # transitional assert only, will remove before release.
+        assert ie.kind == kind
         self.new_inventory.add(ie)
 
         # ie.revision is always None if the InventoryEntry is considered
@@ -2227,78 +2238,125 @@
         if ie.revision is not None:
             return
 
+        # XXX: Friction: parent_candidates should return a list not a dict
+        #      so that we don't have to walk the inventories again.
         parent_candiate_entries = ie.parent_candidates(parent_invs)
-        heads = self.repository.get_graph().heads(parent_candiate_entries.keys())
-        # XXX: Note that this is unordered - and this is tolerable because 
-        # the previous code was also unordered.
-        previous_entries = dict((head, parent_candiate_entries[head]) for head
-            in heads)
-        # we are creating a new revision for ie in the history store and
-        # inventory.
-        ie.snapshot(self._new_revision_id, path, previous_entries, tree, self)
-
-    def modified_directory(self, file_id, file_parents):
-        """Record the presence of a symbolic link.
-
-        :param file_id: The file_id of the link to record.
-        :param file_parents: The per-file parent revision ids.
-        """
-        self._add_text_to_weave(file_id, [], file_parents.keys())
-
-    def modified_reference(self, file_id, file_parents):
-        """Record the modification of a reference.
-
-        :param file_id: The file_id of the link to record.
-        :param file_parents: The per-file parent revision ids.
-        """
-        self._add_text_to_weave(file_id, [], file_parents.keys())
-    
-    def modified_file_text(self, file_id, file_parents,
-                           get_content_byte_lines, text_sha1=None,
-                           text_size=None):
-        """Record the text of file file_id
-
-        :param file_id: The file_id of the file to record the text of.
-        :param file_parents: The per-file parent revision ids.
-        :param get_content_byte_lines: A callable which will return the byte
-            lines for the file.
-        :param text_sha1: Optional SHA1 of the file contents.
-        :param text_size: Optional size of the file contents.
-        """
-        # mutter('storing text of file {%s} in revision {%s} into %r',
-        #        file_id, self._new_revision_id, self.repository.weave_store)
-        # special case to avoid diffing on renames or 
-        # reparenting
-        if (len(file_parents) == 1
-            and text_sha1 == file_parents.values()[0].text_sha1
-            and text_size == file_parents.values()[0].text_size):
-            previous_ie = file_parents.values()[0]
-            versionedfile = self.repository.weave_store.get_weave(file_id,
-                self.repository.get_transaction())
-            versionedfile.clone_text(self._new_revision_id,
-                previous_ie.revision, file_parents.keys())
-            return text_sha1, text_size
+        head_set = self.repository.get_graph().heads(parent_candiate_entries.keys())
+        heads = []
+        for inv in parent_invs:
+            if ie.file_id in inv:
+                old_rev = inv[ie.file_id].revision
+                if old_rev in head_set:
+                    heads.append(inv[ie.file_id].revision)
+                    head_set.remove(inv[ie.file_id].revision)
+
+        store = False
+        # now we check to see if we need to write a new record to the
+        # file-graph.
+        # We write a new entry unless there is one head to the ancestors, and
+        # the kind-derived content is unchanged.
+
+        # Cheapest check first: no ancestors, or more the one head in the
+        # ancestors, we write a new node.
+        if len(heads) != 1:
+            store = True
+        if not store:
+            # There is a single head, look it up for comparison
+            parent_entry = parent_candiate_entries[heads[0]]
+            # if the non-content specific data has changed, we'll be writing a
+            # node:
+            if (parent_entry.parent_id != ie.parent_id or
+                parent_entry.name != ie.name):
+                store = True
+        # now we need to do content specific checks:
+        if not store:
+            # if the kind changed the content obviously has
+            if kind != parent_entry.kind:
+                store = True
+        if kind == 'file':
+            if not store:
+                if (# if the file length changed we have to store:
+                    parent_entry.text_size != content_summary[1] or
+                    # if the exec bit has changed we have to store:
+                    parent_entry.executable != content_summary[2]):
+                    store = True
+                elif parent_entry.text_sha1 == content_summary[3]:
+                    # all meta and content is unchanged (using a hash cache
+                    # hit to check the sha)
+                    ie.revision = parent_entry.revision
+                    ie.text_size = parent_entry.text_size
+                    ie.text_sha1 = parent_entry.text_sha1
+                    ie.executable = parent_entry.executable
+                    return
+                else:
+                    # Either there is only a hash change(no hash cache entry,
+                    # or same size content change), or there is no change on
+                    # this file at all.
+                    # There is a race condition when inserting content into the
+                    # knit though that can result in different content being
+                    # inserted so even though we may have had a hash cache hit
+                    # here we still tell the store the hash we would *not*
+                    # store a new text on, which means that it can avoid for us
+                    # without a race condition and without double-shaing the
+                    # lines.
+                    nostore_sha = parent_entry.text_sha1
+            if store:
+                nostore_sha = None
+            try:
+                ie.executable = content_summary[2]
+                lines = tree.get_file(ie.file_id, path).readlines()
+                ie.text_sha1, ie.text_size = self._add_text_to_weave(
+                    ie.file_id, lines, heads, nostore_sha)
+                ie.revision = self._new_revision_id
+            except errors.ExistingContent:
+                # we are not going to store a new file graph node as it turns
+                # out to be unchanged.
+                ie.revision = parent_entry.revision
+                ie.text_size = parent_entry.text_size
+                ie.text_sha1 = parent_entry.text_sha1
+                ie.executable = parent_entry.executable
+                return
+        elif kind == 'directory':
+            if not store:
+                # all data is meta here, so carry over:
+                ie.revision = parent_entry.revision
+                return
+            lines = []
+            self._add_text_to_weave(ie.file_id, lines, heads, None)
+        elif kind == 'symlink':
+            current_link_target = content_summary[3]
+            if not store:
+                # symmlink target is not generic metadata, check if it has
+                # changed.
+                if current_link_target != parent_entry.symlink_target:
+                    store = True
+            if not store:
+                ie.revision = parent_entry.revision
+                ie.symlink_target = parent_entry.symlink_target
+                return
+            ie.symlink_target = current_link_target
+            lines = []
+            self._add_text_to_weave(ie.file_id, lines, heads, None)
+        elif kind == 'tree-reference':
+            if not store:
+                # all data is meta here, so carry over:
+                ie.revision = parent_entry.revision
+                return
+            lines = []
+            self._add_text_to_weave(ie.file_id, lines, heads, None)
         else:
-            new_lines = get_content_byte_lines()
-            return self._add_text_to_weave(file_id, new_lines,
-                file_parents.keys())
-
-    def modified_link(self, file_id, file_parents, link_target):
-        """Record the presence of a symbolic link.
-
-        :param file_id: The file_id of the link to record.
-        :param file_parents: The per-file parent revision ids.
-        :param link_target: Target location of this link.
-        """
-        self._add_text_to_weave(file_id, [], file_parents.keys())
-
-    def _add_text_to_weave(self, file_id, new_lines, parents):
+            raise NotImplementedError('unknown kind')
+        ie.revision = self._new_revision_id
+
+    def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
         versionedfile = self.repository.weave_store.get_weave_or_empty(
             file_id, self.repository.get_transaction())
-        result = versionedfile.add_lines(
-            self._new_revision_id, parents, new_lines)[0:2]
-        versionedfile.clear_cache()
-        return result
+        try:
+            return versionedfile.add_lines(
+                self._new_revision_id, parents, new_lines,
+                nostore_sha=nostore_sha)[0:2]
+        finally:
+            versionedfile.clear_cache()
 
 
 class _CommitBuilder(CommitBuilder):

=== modified file 'bzrlib/tests/repository_implementations/test_commit_builder.py'
--- a/bzrlib/tests/repository_implementations/test_commit_builder.py	2007-09-04 03:53:07 +0000
+++ b/bzrlib/tests/repository_implementations/test_commit_builder.py	2007-09-05 05:51:34 +0000
@@ -349,6 +349,8 @@
         rev4 = tree1.commit('')
         tree3, = self._get_revtrees(tree1, [rev4])
         self.assertEqual(rev4, tree3.inventory[name + 'id'].revision)
+        # TODO: change this to an assertFileGraph call to check the
+        # parent order of rev4: it should be rev2, rev3
         self.assertFileAncestry([rev1, rev2, rev3, rev4], tree1, name,
             [rev1, rev3, rev2, rev4])
 



More information about the bazaar-commits mailing list