Rev 2761: Do not create many transient knit objects, saving 4% on commit. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Wed Sep 12 07:33:50 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2761
revision-id: robertc at robertcollins.net-20070912063340-rebmp08maq9lmiyl
parent: robertc at robertcollins.net-20070912043809-98gc15f2rk87k0nf
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2007-09-12 16:33:40 +1000
message:
  Do not create many transient knit objects, saving 4% on commit.
modified:
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2007-09-12 04:38:09 +0000
+++ b/bzrlib/commit.py	2007-09-12 06:33:40 +0000
@@ -708,16 +708,15 @@
             # deleted files matching that filter.
             if is_inside_any(deleted_paths, path):
                 continue
+            # TODO: have the builder do the nested commit just-in-time IF and
+            # only if needed.
+            content_summary = self.work_tree.path_content_summary(path)
             if not specific_files or is_inside_any(specific_files, path):
-                # TODO: fix double-stat here.
-                if not self.work_tree.has_filename(path):
+                if content_summary[0] == 'missing':
                     deleted_paths.add(path)
                     self.reporter.missing(path)
                     deleted_ids.append(file_id)
                     continue
-            # TODO: have the builder do the nested commit just-in-time IF and
-            # only if needed.
-            content_summary = self.work_tree.path_content_summary(path)
             if content_summary[0] == 'tree-reference':
                 # enforce repository nested tree policy.
                 if (not self.work_tree.supports_tree_reference() or

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-09-10 07:11:05 +0000
+++ b/bzrlib/knit.py	2007-09-12 06:33:40 +0000
@@ -408,9 +408,8 @@
     """
 
     def __init__(self, relpath, transport, file_mode=None, access_mode=None,
-                 factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,
-                 create=False, create_parent_dir=False, delay_create=False,
-                 dir_mode=None, index=None, access_method=None):
+        factory=None, delta=True, create=False, create_parent_dir=False,
+        delay_create=False, dir_mode=None, index=None, access_method=None):
         """Construct a knit at location specified by relpath.
         
         :param create: If not True, only open an existing knit.
@@ -421,10 +420,6 @@
             actually be created until the first data is stored.
         :param index: An index to use for the knit.
         """
-        if deprecated_passed(basis_knit):
-            warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"
-                 " deprecated as of bzr 0.9.",
-                 DeprecationWarning, stacklevel=2)
         if access_mode is None:
             access_mode = 'w'
         super(KnitVersionedFile, self).__init__(access_mode)

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2007-09-02 21:35:54 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2007-09-12 06:33:40 +0000
@@ -255,6 +255,9 @@
 
 class KnitRepository3(KnitRepository):
 
+    # knit3 repositories need a RootCommitBuilder
+    _commit_builder_class = RootCommitBuilder
+
     def __init__(self, _format, a_bzrdir, control_files, _revision_store,
                  control_store, text_store):
         KnitRepository.__init__(self, _format, a_bzrdir, control_files,
@@ -281,26 +284,6 @@
         assert inv.root.revision is not None
         return KnitRepository.serialise_inventory(self, inv)
 
-    def get_commit_builder(self, branch, parents, config, timestamp=None,
-                           timezone=None, committer=None, revprops=None,
-                           revision_id=None):
-        """Obtain a CommitBuilder for this repository.
-        
-        :param branch: Branch to commit to.
-        :param parents: Revision ids of the parents of the new revision.
-        :param config: Configuration to use.
-        :param timestamp: Optional timestamp recorded for commit.
-        :param timezone: Optional timezone for timestamp.
-        :param committer: Optional committer to set for commit.
-        :param revprops: Optional dictionary of revision properties.
-        :param revision_id: Optional revision id.
-        """
-        revision_id = osutils.safe_revision_id(revision_id)
-        result = RootCommitBuilder(self, parents, config, timestamp, timezone,
-                                 committer, revprops, revision_id)
-        self.start_write_group()
-        return result
-
 
 class RepositoryFormatKnit(MetaDirRepositoryFormat):
     """Bzr repository knit format (generalized). 

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-09-09 23:45:05 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-09-12 06:33:40 +0000
@@ -53,8 +53,10 @@
 from bzrlib.decorators import needs_read_lock, needs_write_lock
 from bzrlib.repofmt.knitrepo import KnitRepository, KnitRepository3
 from bzrlib.repository import (
+    CommitBuilder,
     MetaDirRepository,
     MetaDirRepositoryFormat,
+    RootCommitBuilder,
     )
 import bzrlib.revision as _mod_revision
 from bzrlib.store.revision.knit import KnitRevisionStore
@@ -62,6 +64,32 @@
 from bzrlib.trace import mutter, note, warning
 
 
+class PackCommitBuilder(CommitBuilder):
+    """A subclass of CommitBuilder to add texts with pack semantics.
+    
+    Specifically this uses one knit object rather than one knit object per
+    added text, reducing memory and object pressure.
+    """
+
+    def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
+        return self.repository._packs._add_text_to_weave(file_id,
+            self._new_revision_id, new_lines, parents, nostore_sha,
+            self.random_revid)
+
+
+class PackRootCommitBuilder(RootCommitBuilder):
+    """A subclass of RootCommitBuilder to add texts with pack semantics.
+    
+    Specifically this uses one knit object rather than one knit object per
+    added text, reducing memory and object pressure.
+    """
+
+    def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
+        return self.repository._packs._add_text_to_weave(file_id,
+            self._new_revision_id, new_lines, parents, nostore_sha,
+            self.random_revid)
+
+
 class Pack(object):
     """An in memory proxy for a .pack and its indices."""
 
@@ -111,6 +139,18 @@
             # sigatures 'knit' accessed : update it.
             self.repo._signature_all_indices.insert_index(0,
                 pack.signature_index)
+        
+    def _add_text_to_weave(self, file_id, revision_id, new_lines, parents,
+        nostore_sha, random_revid):
+        file_id_index = GraphIndexPrefixAdapter(
+            self.repo._text_all_indices,
+            (file_id, ), 1,
+            add_nodes_callback=self.repo._text_write_index.add_nodes)
+        self.repo._text_knit._index._graph_index = file_id_index
+        self.repo._text_knit._index._add_callback = file_id_index.add_nodes
+        return self.repo._text_knit.add_lines_with_ghosts(
+            revision_id, parents, new_lines, nostore_sha=nostore_sha,
+            random_id=random_revid)[0:2]
 
     def all_pack_details(self):
         """Return a list of all the packs as transport,name tuples.
@@ -991,7 +1031,7 @@
         if for_write or self.repo.is_in_write_group():
             # allow writing: queue writes to a new index
             indices.insert(0, self.repo._text_write_index)
-        self._setup_knit_access(self.repo.is_in_write_group())
+        self._setup_knit(self.repo.is_in_write_group())
         self.repo._text_all_indices = CombinedGraphIndex(indices)
 
     def flush(self, new_name, new_pack):
@@ -1001,7 +1041,7 @@
         new_pack.text_index_length = self.transport.put_file(
             new_index_name, self.repo._text_write_index.finish())
         self.repo._text_write_index = None
-        self._setup_knit_access(False)
+        self._setup_knit(False)
         if self.repo._text_all_indices is not None:
             # text 'knits' have been used, replace the mutated memory index
             # with the new on-disk one. XXX: is this really a good idea?
@@ -1012,13 +1052,13 @@
             # - clearly we need a remove_index call too.
             del self.repo._text_all_indices._indices[1]
 
-    def get_weave_or_empty(self, file_id, transaction):
+    def get_weave_or_empty(self, file_id, transaction, force_write=False):
         """Get a 'Knit' backed by the .tix indices.
 
         The transaction parameter is ignored.
         """
         self._ensure_all_index()
-        if self.repo.is_in_write_group():
+        if force_write or self.repo.is_in_write_group():
             add_callback = self.repo._text_write_index.add_nodes
             self.repo._text_pack_map[self.repo._text_write_index] = self.repo._open_pack_tuple
         else:
@@ -1054,6 +1094,8 @@
         self.repo._text_write_index = None
         # no access object.
         self.repo._text_knit_access = None
+        # no write-knit
+        self.repo._text_knit = None
         # remove all constructed text data indices
         self.repo._text_all_indices = None
         # and the pack map
@@ -1069,15 +1111,21 @@
         # adjust them.
         # prepare to do writes.
         self._ensure_all_index(True)
-        self._setup_knit_access(True)
+        self._setup_knit(True)
     
-    def _setup_knit_access(self, for_write):
+    def _setup_knit(self, for_write):
         if for_write:
             writer = (self.repo._open_pack_writer, self.repo._text_write_index)
         else:
             writer = None
         self.repo._text_knit_access = _PackAccess(
             self.repo._text_pack_map, writer)
+        if for_write:
+            # a reused knit object for commit specifically.
+            self.repo._text_knit = self.get_weave_or_empty(
+                'all-texts', self.repo.get_transaction(), for_write)
+        else:
+            self.repo._text_knit = None
 
 
 class InventoryKnitThunk(object):
@@ -1191,6 +1239,8 @@
 class GraphKnitRepository1(KnitRepository):
     """Experimental graph-knit using repository."""
 
+    _commit_builder_class = PackCommitBuilder
+
     def __init__(self, _format, a_bzrdir, control_files, _revision_store,
                  control_store, text_store):
         KnitRepository.__init__(self, _format, a_bzrdir, control_files,
@@ -1320,6 +1370,8 @@
 class GraphKnitRepository3(KnitRepository3):
     """Experimental graph-knit using subtrees repository."""
 
+    _commit_builder_class = PackRootCommitBuilder
+
     def __init__(self, _format, a_bzrdir, control_files, _revision_store,
                  control_store, text_store):
         KnitRepository3.__init__(self, _format, a_bzrdir, control_files,

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-09-12 04:38:09 +0000
+++ b/bzrlib/repository.py	2007-09-12 06:33:40 +0000
@@ -61,6 +61,336 @@
 _deprecation_warning_done = False
 
 
+class CommitBuilder(object):
+    """Provides an interface to build up a commit.
+
+    This allows describing a tree to be committed without needing to 
+    know the internals of the format of the repository.
+    """
+    
+    # all clients should supply tree roots.
+    record_root_entry = True
+
+    def __init__(self, repository, parents, config, timestamp=None, 
+                 timezone=None, committer=None, revprops=None, 
+                 revision_id=None):
+        """Initiate a CommitBuilder.
+
+        :param repository: Repository to commit to.
+        :param parents: Revision ids of the parents of the new revision.
+        :param config: Configuration to use.
+        :param timestamp: Optional timestamp recorded for commit.
+        :param timezone: Optional timezone for timestamp.
+        :param committer: Optional committer to set for commit.
+        :param revprops: Optional dictionary of revision properties.
+        :param revision_id: Optional revision id.
+        """
+        self._config = config
+
+        if committer is None:
+            self._committer = self._config.username()
+        else:
+            assert isinstance(committer, basestring), type(committer)
+            self._committer = committer
+
+        self.new_inventory = Inventory(None)
+        self._new_revision_id = osutils.safe_revision_id(revision_id)
+        self.parents = parents
+        self.repository = repository
+
+        self._revprops = {}
+        if revprops is not None:
+            self._revprops.update(revprops)
+
+        if timestamp is None:
+            timestamp = time.time()
+        # Restrict resolution to 1ms
+        self._timestamp = round(timestamp, 3)
+
+        if timezone is None:
+            self._timezone = osutils.local_time_offset()
+        else:
+            self._timezone = int(timezone)
+
+        self._generate_revision_if_needed()
+
+    def commit(self, message):
+        """Make the actual commit.
+
+        :return: The revision id of the recorded revision.
+        """
+        rev = _mod_revision.Revision(
+                       timestamp=self._timestamp,
+                       timezone=self._timezone,
+                       committer=self._committer,
+                       message=message,
+                       inventory_sha1=self.inv_sha1,
+                       revision_id=self._new_revision_id,
+                       properties=self._revprops)
+        rev.parent_ids = self.parents
+        self.repository.add_revision(self._new_revision_id, rev,
+            self.new_inventory, self._config)
+        self.repository.commit_write_group()
+        return self._new_revision_id
+
+    def abort(self):
+        """Abort the commit that is being built.
+        """
+        self.repository.abort_write_group()
+
+    def revision_tree(self):
+        """Return the tree that was just committed.
+
+        After calling commit() this can be called to get a RevisionTree
+        representing the newly committed tree. This is preferred to
+        calling Repository.revision_tree() because that may require
+        deserializing the inventory, while we already have a copy in
+        memory.
+        """
+        return RevisionTree(self.repository, self.new_inventory,
+                            self._new_revision_id)
+
+    def finish_inventory(self):
+        """Tell the builder that the inventory is finished."""
+        if self.new_inventory.root is None:
+            symbol_versioning.warn('Root entry should be supplied to'
+                ' record_entry_contents, as of bzr 0.10.',
+                 DeprecationWarning, stacklevel=2)
+            self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
+        self.new_inventory.revision_id = self._new_revision_id
+        self.inv_sha1 = self.repository.add_inventory(
+            self._new_revision_id,
+            self.new_inventory,
+            self.parents
+            )
+
+    def _gen_revision_id(self):
+        """Return new revision-id."""
+        return generate_ids.gen_revision_id(self._config.username(),
+                                            self._timestamp)
+
+    def _generate_revision_if_needed(self):
+        """Create a revision id if None was supplied.
+        
+        If the repository can not support user-specified revision ids
+        they should override this function and raise CannotSetRevisionId
+        if _new_revision_id is not None.
+
+        :raises: CannotSetRevisionId
+        """
+        if self._new_revision_id is None:
+            self._new_revision_id = self._gen_revision_id()
+            self.random_revid = True
+        else:
+            self.random_revid = False
+
+    def _check_root(self, ie, parent_invs, tree):
+        """Helper for record_entry_contents.
+
+        :param ie: An entry being added.
+        :param parent_invs: The inventories of the parent revisions of the
+            commit.
+        :param tree: The tree that is being committed.
+        """
+        if ie.parent_id is not None:
+            # if ie is not root, add a root automatically.
+            symbol_versioning.warn('Root entry should be supplied to'
+                ' record_entry_contents, as of bzr 0.10.',
+                 DeprecationWarning, stacklevel=2)
+            self.record_entry_contents(tree.inventory.root.copy(), parent_invs,
+                                       '', tree, tree.path_content_summary(''))
+        else:
+            # In this revision format, root entries have no knit or weave When
+            # serializing out to disk and back in root.revision is always
+            # _new_revision_id
+            ie.revision = self._new_revision_id
+
+    def record_entry_contents(self, ie, parent_invs, path, tree,
+        content_summary):
+        """Record the content of ie from tree into the commit if needed.
+
+        Side effect: sets ie.revision when unchanged
+
+        :param ie: An inventory entry present in the commit.
+        :param parent_invs: The inventories of the parent revisions of the
+            commit.
+        :param path: The path the entry is at in the tree.
+        :param tree: The tree which contains this entry and should be used to 
+            obtain content.
+        :param content_summary: Summary data from the tree about the paths
+            content - stat, length, exec, sha/link target. This is only
+            accessed when the entry has a revision of None - that is when it is
+            a candidate to commit.
+        """
+        if self.new_inventory.root is None:
+            self._check_root(ie, parent_invs, tree)
+        if ie.revision is None:
+            kind = content_summary[0]
+        else:
+            # ie is carried over from a prior commit
+            kind = ie.kind
+        # XXX: repository specific check for nested tree support goes here - if
+        # the repo doesn't want nested trees we skip it ?
+        if (kind == 'tree-reference' and
+            not self.repository._format.supports_tree_reference):
+            # mismatch between commit builder logic and repository:
+            # this needs the entry creation pushed down into the builder.
+            raise NotImplementedError
+        # transitional assert only, will remove before release.
+        assert ie.kind == kind
+        self.new_inventory.add(ie)
+
+        # ie.revision is always None if the InventoryEntry is considered
+        # for committing. ie.snapshot will record the correct revision 
+        # which may be the sole parent if it is untouched.
+        if ie.revision is not None:
+            return
+
+        # XXX: Friction: parent_candidates should return a list not a dict
+        #      so that we don't have to walk the inventories again.
+        parent_candiate_entries = ie.parent_candidates(parent_invs)
+        head_set = self.repository.get_graph().heads(parent_candiate_entries.keys())
+        heads = []
+        for inv in parent_invs:
+            if ie.file_id in inv:
+                old_rev = inv[ie.file_id].revision
+                if old_rev in head_set:
+                    heads.append(inv[ie.file_id].revision)
+                    head_set.remove(inv[ie.file_id].revision)
+
+        store = False
+        # now we check to see if we need to write a new record to the
+        # file-graph.
+        # We write a new entry unless there is one head to the ancestors, and
+        # the kind-derived content is unchanged.
+
+        # Cheapest check first: no ancestors, or more the one head in the
+        # ancestors, we write a new node.
+        if len(heads) != 1:
+            store = True
+        if not store:
+            # There is a single head, look it up for comparison
+            parent_entry = parent_candiate_entries[heads[0]]
+            # if the non-content specific data has changed, we'll be writing a
+            # node:
+            if (parent_entry.parent_id != ie.parent_id or
+                parent_entry.name != ie.name):
+                store = True
+        # now we need to do content specific checks:
+        if not store:
+            # if the kind changed the content obviously has
+            if kind != parent_entry.kind:
+                store = True
+        if kind == 'file':
+            if not store:
+                if (# if the file length changed we have to store:
+                    parent_entry.text_size != content_summary[1] or
+                    # if the exec bit has changed we have to store:
+                    parent_entry.executable != content_summary[2]):
+                    store = True
+                elif parent_entry.text_sha1 == content_summary[3]:
+                    # all meta and content is unchanged (using a hash cache
+                    # hit to check the sha)
+                    ie.revision = parent_entry.revision
+                    ie.text_size = parent_entry.text_size
+                    ie.text_sha1 = parent_entry.text_sha1
+                    ie.executable = parent_entry.executable
+                    return
+                else:
+                    # Either there is only a hash change(no hash cache entry,
+                    # or same size content change), or there is no change on
+                    # this file at all.
+                    # There is a race condition when inserting content into the
+                    # knit though that can result in different content being
+                    # inserted so even though we may have had a hash cache hit
+                    # here we still tell the store the hash we would *not*
+                    # store a new text on, which means that it can avoid for us
+                    # without a race condition and without double-shaing the
+                    # lines.
+                    nostore_sha = parent_entry.text_sha1
+            if store:
+                nostore_sha = None
+            try:
+                ie.executable = content_summary[2]
+                lines = tree.get_file(ie.file_id, path).readlines()
+                ie.text_sha1, ie.text_size = self._add_text_to_weave(
+                    ie.file_id, lines, heads, nostore_sha)
+            except errors.ExistingContent:
+                # we are not going to store a new file graph node as it turns
+                # out to be unchanged.
+                ie.revision = parent_entry.revision
+                ie.text_size = parent_entry.text_size
+                ie.text_sha1 = parent_entry.text_sha1
+                ie.executable = parent_entry.executable
+                return
+        elif kind == 'directory':
+            if not store:
+                # all data is meta here, nothing specific to directory, so
+                # carry over:
+                ie.revision = parent_entry.revision
+                return
+            lines = []
+            self._add_text_to_weave(ie.file_id, lines, heads, None)
+        elif kind == 'symlink':
+            current_link_target = content_summary[3]
+            if not store:
+                # symmlink target is not generic metadata, check if it has
+                # changed.
+                if current_link_target != parent_entry.symlink_target:
+                    store = True
+            if not store:
+                # unchanged, carry over.
+                ie.revision = parent_entry.revision
+                ie.symlink_target = parent_entry.symlink_target
+                return
+            ie.symlink_target = current_link_target
+            lines = []
+            self._add_text_to_weave(ie.file_id, lines, heads, None)
+        elif kind == 'tree-reference':
+            if not store:
+                if content_summary[3] != parent_entry.reference_revision:
+                    store = True
+            if not store:
+                # unchanged, carry over.
+                ie.reference_revision = parent_entry.reference_revision
+                ie.revision = parent_entry.revision
+                return
+            ie.reference_revision = content_summary[3]
+            lines = []
+            self._add_text_to_weave(ie.file_id, lines, heads, None)
+        else:
+            raise NotImplementedError('unknown kind')
+        ie.revision = self._new_revision_id
+
+    def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
+        versionedfile = self.repository.weave_store.get_weave_or_empty(
+            file_id, self.repository.get_transaction())
+        # Don't change this to add_lines - add_lines_with_ghosts is cheaper
+        # than add_lines, and allows committing when a parent is ghosted for
+        # some reason.
+        try:
+            return versionedfile.add_lines_with_ghosts(
+                self._new_revision_id, parents, new_lines,
+                nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
+        finally:
+            versionedfile.clear_cache()
+
+
+class RootCommitBuilder(CommitBuilder):
+    """This commitbuilder actually records the root id"""
+    
+    def _check_root(self, ie, parent_invs, tree):
+        """Helper for record_entry_contents.
+
+        :param ie: An entry being added.
+        :param parent_invs: The inventories of the parent revisions of the
+            commit.
+        :param tree: The tree that is being committed.
+        """
+        # ie must be root for this builder
+        assert ie.parent_id is None
+
+
 ######################################################################
 # Repositories
 
@@ -76,6 +406,12 @@
     remote) disk.
     """
 
+    # What class to use for a CommitBuilder. Often its simpler to change this
+    # in a Repository class subclass rather than to override
+    # get_commit_builder.
+    _commit_builder_class = CommitBuilder
+    # The search regex used by xml based repositories to determine what things
+    # where changed in a single commit.
     _file_ids_altered_regex = lazy_regex.lazy_compile(
         r'file_id="(?P<file_id>[^"]+)"'
         r'.* revision="(?P<revision_id>[^"]+)"'
@@ -476,8 +812,8 @@
         :param revision_id: Optional revision id.
         """
         revision_id = osutils.safe_revision_id(revision_id)
-        result = CommitBuilder(self, parents, config, timestamp, timezone,
-                              committer, revprops, revision_id)
+        result = self.__class__._commit_builder_class(self, parents, config,
+            timestamp, timezone, committer, revprops, revision_id)
         self.start_write_group()
         return result
 
@@ -2254,336 +2590,6 @@
         self.pb.update(message, self.count, self.total)
 
 
-class CommitBuilder(object):
-    """Provides an interface to build up a commit.
-
-    This allows describing a tree to be committed without needing to 
-    know the internals of the format of the repository.
-    """
-    
-    # all clients should supply tree roots.
-    record_root_entry = True
-
-    def __init__(self, repository, parents, config, timestamp=None, 
-                 timezone=None, committer=None, revprops=None, 
-                 revision_id=None):
-        """Initiate a CommitBuilder.
-
-        :param repository: Repository to commit to.
-        :param parents: Revision ids of the parents of the new revision.
-        :param config: Configuration to use.
-        :param timestamp: Optional timestamp recorded for commit.
-        :param timezone: Optional timezone for timestamp.
-        :param committer: Optional committer to set for commit.
-        :param revprops: Optional dictionary of revision properties.
-        :param revision_id: Optional revision id.
-        """
-        self._config = config
-
-        if committer is None:
-            self._committer = self._config.username()
-        else:
-            assert isinstance(committer, basestring), type(committer)
-            self._committer = committer
-
-        self.new_inventory = Inventory(None)
-        self._new_revision_id = osutils.safe_revision_id(revision_id)
-        self.parents = parents
-        self.repository = repository
-
-        self._revprops = {}
-        if revprops is not None:
-            self._revprops.update(revprops)
-
-        if timestamp is None:
-            timestamp = time.time()
-        # Restrict resolution to 1ms
-        self._timestamp = round(timestamp, 3)
-
-        if timezone is None:
-            self._timezone = osutils.local_time_offset()
-        else:
-            self._timezone = int(timezone)
-
-        self._generate_revision_if_needed()
-
-    def commit(self, message):
-        """Make the actual commit.
-
-        :return: The revision id of the recorded revision.
-        """
-        rev = _mod_revision.Revision(
-                       timestamp=self._timestamp,
-                       timezone=self._timezone,
-                       committer=self._committer,
-                       message=message,
-                       inventory_sha1=self.inv_sha1,
-                       revision_id=self._new_revision_id,
-                       properties=self._revprops)
-        rev.parent_ids = self.parents
-        self.repository.add_revision(self._new_revision_id, rev,
-            self.new_inventory, self._config)
-        self.repository.commit_write_group()
-        return self._new_revision_id
-
-    def abort(self):
-        """Abort the commit that is being built.
-        """
-        self.repository.abort_write_group()
-
-    def revision_tree(self):
-        """Return the tree that was just committed.
-
-        After calling commit() this can be called to get a RevisionTree
-        representing the newly committed tree. This is preferred to
-        calling Repository.revision_tree() because that may require
-        deserializing the inventory, while we already have a copy in
-        memory.
-        """
-        return RevisionTree(self.repository, self.new_inventory,
-                            self._new_revision_id)
-
-    def finish_inventory(self):
-        """Tell the builder that the inventory is finished."""
-        if self.new_inventory.root is None:
-            symbol_versioning.warn('Root entry should be supplied to'
-                ' record_entry_contents, as of bzr 0.10.',
-                 DeprecationWarning, stacklevel=2)
-            self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
-        self.new_inventory.revision_id = self._new_revision_id
-        self.inv_sha1 = self.repository.add_inventory(
-            self._new_revision_id,
-            self.new_inventory,
-            self.parents
-            )
-
-    def _gen_revision_id(self):
-        """Return new revision-id."""
-        return generate_ids.gen_revision_id(self._config.username(),
-                                            self._timestamp)
-
-    def _generate_revision_if_needed(self):
-        """Create a revision id if None was supplied.
-        
-        If the repository can not support user-specified revision ids
-        they should override this function and raise CannotSetRevisionId
-        if _new_revision_id is not None.
-
-        :raises: CannotSetRevisionId
-        """
-        if self._new_revision_id is None:
-            self._new_revision_id = self._gen_revision_id()
-            self.random_revid = True
-        else:
-            self.random_revid = False
-
-    def _check_root(self, ie, parent_invs, tree):
-        """Helper for record_entry_contents.
-
-        :param ie: An entry being added.
-        :param parent_invs: The inventories of the parent revisions of the
-            commit.
-        :param tree: The tree that is being committed.
-        """
-        if ie.parent_id is not None:
-            # if ie is not root, add a root automatically.
-            symbol_versioning.warn('Root entry should be supplied to'
-                ' record_entry_contents, as of bzr 0.10.',
-                 DeprecationWarning, stacklevel=2)
-            self.record_entry_contents(tree.inventory.root.copy(), parent_invs,
-                                       '', tree, tree.path_content_summary(''))
-        else:
-            # In this revision format, root entries have no knit or weave When
-            # serializing out to disk and back in root.revision is always
-            # _new_revision_id
-            ie.revision = self._new_revision_id
-
-    def record_entry_contents(self, ie, parent_invs, path, tree,
-        content_summary):
-        """Record the content of ie from tree into the commit if needed.
-
-        Side effect: sets ie.revision when unchanged
-
-        :param ie: An inventory entry present in the commit.
-        :param parent_invs: The inventories of the parent revisions of the
-            commit.
-        :param path: The path the entry is at in the tree.
-        :param tree: The tree which contains this entry and should be used to 
-            obtain content.
-        :param content_summary: Summary data from the tree about the paths
-            content - stat, length, exec, sha/link target. This is only
-            accessed when the entry has a revision of None - that is when it is
-            a candidate to commit.
-        """
-        if self.new_inventory.root is None:
-            self._check_root(ie, parent_invs, tree)
-        if ie.revision is None:
-            kind = content_summary[0]
-        else:
-            # ie is carried over from a prior commit
-            kind = ie.kind
-        # XXX: repository specific check for nested tree support goes here - if
-        # the repo doesn't want nested trees we skip it ?
-        if (kind == 'tree-reference' and
-            not self.repository._format.supports_tree_reference):
-            # mismatch between commit builder logic and repository:
-            # this needs the entry creation pushed down into the builder.
-            raise NotImplementedError
-        # transitional assert only, will remove before release.
-        assert ie.kind == kind
-        self.new_inventory.add(ie)
-
-        # ie.revision is always None if the InventoryEntry is considered
-        # for committing. ie.snapshot will record the correct revision 
-        # which may be the sole parent if it is untouched.
-        if ie.revision is not None:
-            return
-
-        # XXX: Friction: parent_candidates should return a list not a dict
-        #      so that we don't have to walk the inventories again.
-        parent_candiate_entries = ie.parent_candidates(parent_invs)
-        head_set = self.repository.get_graph().heads(parent_candiate_entries.keys())
-        heads = []
-        for inv in parent_invs:
-            if ie.file_id in inv:
-                old_rev = inv[ie.file_id].revision
-                if old_rev in head_set:
-                    heads.append(inv[ie.file_id].revision)
-                    head_set.remove(inv[ie.file_id].revision)
-
-        store = False
-        # now we check to see if we need to write a new record to the
-        # file-graph.
-        # We write a new entry unless there is one head to the ancestors, and
-        # the kind-derived content is unchanged.
-
-        # Cheapest check first: no ancestors, or more the one head in the
-        # ancestors, we write a new node.
-        if len(heads) != 1:
-            store = True
-        if not store:
-            # There is a single head, look it up for comparison
-            parent_entry = parent_candiate_entries[heads[0]]
-            # if the non-content specific data has changed, we'll be writing a
-            # node:
-            if (parent_entry.parent_id != ie.parent_id or
-                parent_entry.name != ie.name):
-                store = True
-        # now we need to do content specific checks:
-        if not store:
-            # if the kind changed the content obviously has
-            if kind != parent_entry.kind:
-                store = True
-        if kind == 'file':
-            if not store:
-                if (# if the file length changed we have to store:
-                    parent_entry.text_size != content_summary[1] or
-                    # if the exec bit has changed we have to store:
-                    parent_entry.executable != content_summary[2]):
-                    store = True
-                elif parent_entry.text_sha1 == content_summary[3]:
-                    # all meta and content is unchanged (using a hash cache
-                    # hit to check the sha)
-                    ie.revision = parent_entry.revision
-                    ie.text_size = parent_entry.text_size
-                    ie.text_sha1 = parent_entry.text_sha1
-                    ie.executable = parent_entry.executable
-                    return
-                else:
-                    # Either there is only a hash change(no hash cache entry,
-                    # or same size content change), or there is no change on
-                    # this file at all.
-                    # There is a race condition when inserting content into the
-                    # knit though that can result in different content being
-                    # inserted so even though we may have had a hash cache hit
-                    # here we still tell the store the hash we would *not*
-                    # store a new text on, which means that it can avoid for us
-                    # without a race condition and without double-shaing the
-                    # lines.
-                    nostore_sha = parent_entry.text_sha1
-            if store:
-                nostore_sha = None
-            try:
-                ie.executable = content_summary[2]
-                lines = tree.get_file(ie.file_id, path).readlines()
-                ie.text_sha1, ie.text_size = self._add_text_to_weave(
-                    ie.file_id, lines, heads, nostore_sha)
-            except errors.ExistingContent:
-                # we are not going to store a new file graph node as it turns
-                # out to be unchanged.
-                ie.revision = parent_entry.revision
-                ie.text_size = parent_entry.text_size
-                ie.text_sha1 = parent_entry.text_sha1
-                ie.executable = parent_entry.executable
-                return
-        elif kind == 'directory':
-            if not store:
-                # all data is meta here, nothing specific to directory, so
-                # carry over:
-                ie.revision = parent_entry.revision
-                return
-            lines = []
-            self._add_text_to_weave(ie.file_id, lines, heads, None)
-        elif kind == 'symlink':
-            current_link_target = content_summary[3]
-            if not store:
-                # symmlink target is not generic metadata, check if it has
-                # changed.
-                if current_link_target != parent_entry.symlink_target:
-                    store = True
-            if not store:
-                # unchanged, carry over.
-                ie.revision = parent_entry.revision
-                ie.symlink_target = parent_entry.symlink_target
-                return
-            ie.symlink_target = current_link_target
-            lines = []
-            self._add_text_to_weave(ie.file_id, lines, heads, None)
-        elif kind == 'tree-reference':
-            if not store:
-                if content_summary[3] != parent_entry.reference_revision:
-                    store = True
-            if not store:
-                # unchanged, carry over.
-                ie.reference_revision = parent_entry.reference_revision
-                ie.revision = parent_entry.revision
-                return
-            ie.reference_revision = content_summary[3]
-            lines = []
-            self._add_text_to_weave(ie.file_id, lines, heads, None)
-        else:
-            raise NotImplementedError('unknown kind')
-        ie.revision = self._new_revision_id
-
-    def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
-        versionedfile = self.repository.weave_store.get_weave_or_empty(
-            file_id, self.repository.get_transaction())
-        # Don't change this to add_lines - add_lines_with_ghosts is cheaper
-        # than add_lines, and allows committing when a parent is ghosted for
-        # some reason.
-        try:
-            return versionedfile.add_lines_with_ghosts(
-                self._new_revision_id, parents, new_lines,
-                nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
-        finally:
-            versionedfile.clear_cache()
-
-
-class RootCommitBuilder(CommitBuilder):
-    """This commitbuilder actually records the root id"""
-    
-    def _check_root(self, ie, parent_invs, tree):
-        """Helper for record_entry_contents.
-
-        :param ie: An entry being added.
-        :param parent_invs: The inventories of the parent revisions of the
-            commit.
-        :param tree: The tree that is being committed.
-        """
-        # ie must be root for this builder
-        assert ie.parent_id is None
-
-
 _unescape_map = {
     'apos':"'",
     'quot':'"',



More information about the bazaar-commits mailing list