Rev 2552: Refactor of commit to walk the working inventory less in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Tue Jun 26 07:17:24 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2552
revision-id: pqm at pqm.ubuntu.com-20070626061722-8m49gdqd8cb8zc0c
parent: pqm at pqm.ubuntu.com-20070626045533-qni2zzf3cv41c1t8
parent: ian.clatworthy at internode.on.net-20070626043739-2ksn3hbnynt6zggj
committer: Canonical.com Patch Queue Manager<pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2007-06-26 07:17:22 +0100
message:
  Refactor of commit to walk the working inventory less
modified:
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
    ------------------------------------------------------------
    revno: 2550.1.1
    merged: ian.clatworthy at internode.on.net-20070626043739-2ksn3hbnynt6zggj
    parent: pqm at pqm.ubuntu.com-20070625174647-pocypsjmp861qgv7
    parent: ian.clatworthy at internode.on.net-20070621001613-j04fg9qt7kh48llg
    committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
    branch nick: ianc-integration
    timestamp: Tue 2007-06-26 14:37:39 +1000
    message:
      Refactor of commit to walk the working inventory less
    ------------------------------------------------------------
    revno: 2541.1.1
    merged: ian.clatworthy at internode.on.net-20070621001613-j04fg9qt7kh48llg
    parent: pqm at pqm.ubuntu.com-20070620205507-uuolokc2e0eckrb7
    committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
    branch nick: bzr.fast-commit
    timestamp: Thu 2007-06-21 10:16:13 +1000
    message:
      Refactor commit.commit() to walk the working inventory once/less
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2007-06-20 03:32:32 +0000
+++ b/bzrlib/commit.py	2007-06-21 00:16:13 +0000
@@ -263,11 +263,11 @@
             if self.config is None:
                 self.config = self.branch.get_config()
 
-            self.work_inv = self.work_tree.inventory
-            self.basis_inv = self.basis_tree.inventory
+            # If provided, ensure the specified files are versioned
             if specific_files is not None:
-                # Ensure specified files are versioned
-                # (We don't actually need the ids here)
+                # Note: We don't actually need the IDs here. This routine
+                # is being called because it raises PathNotVerisonedError
+                # as a side effect of finding the IDs.
                 # XXX: Dont we have filter_unversioned to do this more
                 # cheaply?
                 tree.find_ids_across_trees(specific_files,
@@ -288,26 +288,28 @@
             self.pb.show_count = True
             self.pb.show_bar = False
 
+            # After a merge, a selected file commit is not supported.
+            # See 'bzr help merge' for an explanation as to why.
+            self.basis_inv = self.basis_tree.inventory
             self._gather_parents()
             if len(self.parents) > 1 and self.specific_files:
                 raise errors.CannotCommitSelectedFileMerge(self.specific_files)
             
-            # Build the new inventory
+            # Collect the changes
             self._emit_progress_set_stage("Collecting changes", show_entries=True)
             self.builder = self.branch.get_commit_builder(self.parents,
                 self.config, timestamp, timezone, committer, revprops, rev_id)
-            self._remove_deleted()
-            self._populate_new_inv()
-            self._report_deletes()
+            self._update_builder_with_changes()
             self._check_pointless()
 
-            # TODO: Now the new inventory is known, check for conflicts and
-            # prompt the user for a commit message.
+            # TODO: Now the new inventory is known, check for conflicts.
             # ADHB 2006-08-08: If this is done, populate_new_inv should not add
             # weave lines, because nothing should be recorded until it is known
             # that commit will succeed.
             self._emit_progress_set_stage("Saving data locally")
             self.builder.finish_inventory()
+
+            # Prompt the user for a commit message if none provided
             message = message_callback(self)
             assert isinstance(message, unicode), type(message)
             self.message = message
@@ -563,59 +565,57 @@
             else:
                 mutter('commit parent ghost revision {%s}', revision)
 
-    def _remove_deleted(self):
-        """Remove deleted files from the working inventories.
-
-        This is done prior to taking the working inventory as the
-        basis for the new committed inventory.
-
-        This returns true if any files
-        *that existed in the basis inventory* were deleted.
-        Files that were added and deleted
-        in the working copy don't matter.
-        """
-        specific = self.specific_files
-        deleted_ids = []
-        deleted_paths = set()
-        for path, ie in self.work_inv.iter_entries():
-            if is_inside_any(deleted_paths, path):
-                # The tree will delete the required ids recursively.
-                continue
-            if specific and not is_inside_any(specific, path):
-                continue
-            if not self.work_tree.has_filename(path):
-                deleted_paths.add(path)
-                self.reporter.missing(path)
-                deleted_ids.append(ie.file_id)
-        self.work_tree.unversion(deleted_ids)
-
-    def _populate_new_inv(self):
-        """Build revision inventory.
-
-        This creates a new empty inventory. Depending on
-        which files are selected for commit, and what is present in the
-        current tree, the new inventory is populated. inventory entries 
-        which are candidates for modification have their revision set to
-        None; inventory entries that are carried over untouched have their
-        revision set to their prior value.
-        """
+    def _update_builder_with_changes(self):
+        """Update the commit builder with the data about what has changed.
+        """
+        # Build the revision inventory.
+        #
+        # This starts by creating a new empty inventory. Depending on
+        # which files are selected for commit, and what is present in the
+        # current tree, the new inventory is populated. inventory entries 
+        # which are candidates for modification have their revision set to
+        # None; inventory entries that are carried over untouched have their
+        # revision set to their prior value.
+        #
         # ESEPARATIONOFCONCERNS: this function is diffing and using the diff
         # results to create a new inventory at the same time, which results
         # in bugs like #46635.  Any reason not to use/enhance Tree.changes_from?
         # ADHB 11-07-2006
-        mutter("Selecting files for commit with filter %s", self.specific_files)
-        assert self.work_inv.root is not None
-        entries = self.work_inv.iter_entries()
+
+        specific_files = self.specific_files
+        mutter("Selecting files for commit with filter %s", specific_files)
+        work_inv = self.work_tree.inventory
+        assert work_inv.root is not None
+        self.pb_entries_total = len(work_inv)
+
+        # Check and warn about old CommitBuilders
+        entries = work_inv.iter_entries()
         if not self.builder.record_root_entry:
             symbol_versioning.warn('CommitBuilders should support recording'
                 ' the root entry as of bzr 0.10.', DeprecationWarning, 
                 stacklevel=1)
             self.builder.new_inventory.add(self.basis_inv.root.copy())
             entries.next()
-        self.pb_entries_total = len(self.work_inv)
+
+        deleted_ids = []
+        deleted_paths = set()
         for path, new_ie in entries:
             self._emit_progress_next_entry()
             file_id = new_ie.file_id
+
+            # Skip files that have been deleted from the working tree.
+            # The deleted files/directories are also recorded so they
+            # can be explicitly unversioned later. Note that when a
+            # filter of specific files is given, we must only skip/record
+            # deleted files matching that filter.
+            if is_inside_any(deleted_paths, path):
+                continue
+            if not specific_files or is_inside_any(specific_files, path):
+                if not self.work_tree.has_filename(path):
+                    deleted_paths.add(path)
+                    self.reporter.missing(path)
+                    deleted_ids.append(file_id)
+                    continue
             try:
                 kind = self.work_tree.kind(file_id)
                 if kind == 'tree-reference' and self.recursive == 'down':
@@ -649,8 +649,8 @@
             except errors.NoSuchFile:
                 pass
             # mutter('check %s {%s}', path, file_id)
-            if (not self.specific_files or 
-                is_inside_or_parent_of_any(self.specific_files, path)):
+            if (not specific_files or 
+                is_inside_or_parent_of_any(specific_files, path)):
                     # mutter('%s selected for commit', path)
                     ie = new_ie.copy()
                     ie.revision = None
@@ -677,19 +677,32 @@
             else:
                 self.reporter.snapshot_change(change, path)
 
-        if not self.specific_files:
-            return
-
-        # ignore removals that don't match filespec
-        for path, new_ie in self.basis_inv.iter_entries():
-            if new_ie.file_id in self.work_inv:
-                continue
-            if is_inside_any(self.specific_files, path):
-                continue
-            ie = new_ie.copy()
-            ie.revision = None
-            self.builder.record_entry_contents(ie, self.parent_invs, path,
-                                               self.basis_tree)
+        # Unversion IDs that were found to be deleted
+        self.work_tree.unversion(deleted_ids)
+
+        # If specific files/directories were nominated, it is possible
+        # that some data from outside those needs to be preserved from
+        # the basis tree. For example, if a file x is moved from out of
+        # directory foo into directory bar and the user requests
+        # ``commit foo``, then information about bar/x must also be
+        # recorded.
+        if specific_files:
+            for path, new_ie in self.basis_inv.iter_entries():
+                if new_ie.file_id in work_inv:
+                    continue
+                if is_inside_any(specific_files, path):
+                    continue
+                ie = new_ie.copy()
+                ie.revision = None
+                self.builder.record_entry_contents(ie, self.parent_invs, path,
+                                                   self.basis_tree)
+
+        # Report what was deleted. We could skip this when no deletes are
+        # detected to gain a performance win, but it arguably serves as a
+        # 'safety check' by informing the user whenever anything disappears.
+        for path, ie in self.basis_inv.iter_entries():
+            if ie.file_id not in self.builder.new_inventory:
+                self.reporter.deleted(path)
 
     def _emit_progress_set_stage(self, name, show_entries=False):
         """Set the progress stage and emit an update to the progress bar."""
@@ -714,8 +727,3 @@
             text = "%s - Stage" % (self.pb_stage_name)
         self.pb.update(text, self.pb_stage_count, self.pb_stage_total)
 
-    def _report_deletes(self):
-        for path, ie in self.basis_inv.iter_entries():
-            if ie.file_id not in self.builder.new_inventory:
-                self.reporter.deleted(path)
-




More information about the bazaar-commits mailing list