Rev 2761: Do not create many transient knit objects, saving 4% on commit. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Wed Sep 12 07:33:50 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2761
revision-id: robertc at robertcollins.net-20070912063340-rebmp08maq9lmiyl
parent: robertc at robertcollins.net-20070912043809-98gc15f2rk87k0nf
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2007-09-12 16:33:40 +1000
message:
Do not create many transient knit objects, saving 4% on commit.
modified:
bzrlib/commit.py commit.py-20050511101309-79ec1a0168e0e825
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py 2007-09-12 04:38:09 +0000
+++ b/bzrlib/commit.py 2007-09-12 06:33:40 +0000
@@ -708,16 +708,15 @@
# deleted files matching that filter.
if is_inside_any(deleted_paths, path):
continue
+ # TODO: have the builder do the nested commit just-in-time IF and
+ # only if needed.
+ content_summary = self.work_tree.path_content_summary(path)
if not specific_files or is_inside_any(specific_files, path):
- # TODO: fix double-stat here.
- if not self.work_tree.has_filename(path):
+ if content_summary[0] == 'missing':
deleted_paths.add(path)
self.reporter.missing(path)
deleted_ids.append(file_id)
continue
- # TODO: have the builder do the nested commit just-in-time IF and
- # only if needed.
- content_summary = self.work_tree.path_content_summary(path)
if content_summary[0] == 'tree-reference':
# enforce repository nested tree policy.
if (not self.work_tree.supports_tree_reference() or
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-09-10 07:11:05 +0000
+++ b/bzrlib/knit.py 2007-09-12 06:33:40 +0000
@@ -408,9 +408,8 @@
"""
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
- factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,
- create=False, create_parent_dir=False, delay_create=False,
- dir_mode=None, index=None, access_method=None):
+ factory=None, delta=True, create=False, create_parent_dir=False,
+ delay_create=False, dir_mode=None, index=None, access_method=None):
"""Construct a knit at location specified by relpath.
:param create: If not True, only open an existing knit.
@@ -421,10 +420,6 @@
actually be created until the first data is stored.
:param index: An index to use for the knit.
"""
- if deprecated_passed(basis_knit):
- warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"
- " deprecated as of bzr 0.9.",
- DeprecationWarning, stacklevel=2)
if access_mode is None:
access_mode = 'w'
super(KnitVersionedFile, self).__init__(access_mode)
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2007-09-02 21:35:54 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2007-09-12 06:33:40 +0000
@@ -255,6 +255,9 @@
class KnitRepository3(KnitRepository):
+ # knit3 repositories need a RootCommitBuilder
+ _commit_builder_class = RootCommitBuilder
+
def __init__(self, _format, a_bzrdir, control_files, _revision_store,
control_store, text_store):
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
@@ -281,26 +284,6 @@
assert inv.root.revision is not None
return KnitRepository.serialise_inventory(self, inv)
- def get_commit_builder(self, branch, parents, config, timestamp=None,
- timezone=None, committer=None, revprops=None,
- revision_id=None):
- """Obtain a CommitBuilder for this repository.
-
- :param branch: Branch to commit to.
- :param parents: Revision ids of the parents of the new revision.
- :param config: Configuration to use.
- :param timestamp: Optional timestamp recorded for commit.
- :param timezone: Optional timezone for timestamp.
- :param committer: Optional committer to set for commit.
- :param revprops: Optional dictionary of revision properties.
- :param revision_id: Optional revision id.
- """
- revision_id = osutils.safe_revision_id(revision_id)
- result = RootCommitBuilder(self, parents, config, timestamp, timezone,
- committer, revprops, revision_id)
- self.start_write_group()
- return result
-
class RepositoryFormatKnit(MetaDirRepositoryFormat):
"""Bzr repository knit format (generalized).
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-09-09 23:45:05 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-09-12 06:33:40 +0000
@@ -53,8 +53,10 @@
from bzrlib.decorators import needs_read_lock, needs_write_lock
from bzrlib.repofmt.knitrepo import KnitRepository, KnitRepository3
from bzrlib.repository import (
+ CommitBuilder,
MetaDirRepository,
MetaDirRepositoryFormat,
+ RootCommitBuilder,
)
import bzrlib.revision as _mod_revision
from bzrlib.store.revision.knit import KnitRevisionStore
@@ -62,6 +64,32 @@
from bzrlib.trace import mutter, note, warning
+class PackCommitBuilder(CommitBuilder):
+ """A subclass of CommitBuilder to add texts with pack semantics.
+
+ Specifically this uses one knit object rather than one knit object per
+ added text, reducing memory and object pressure.
+ """
+
+ def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
+ return self.repository._packs._add_text_to_weave(file_id,
+ self._new_revision_id, new_lines, parents, nostore_sha,
+ self.random_revid)
+
+
+class PackRootCommitBuilder(RootCommitBuilder):
+ """A subclass of RootCommitBuilder to add texts with pack semantics.
+
+ Specifically this uses one knit object rather than one knit object per
+ added text, reducing memory and object pressure.
+ """
+
+ def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
+ return self.repository._packs._add_text_to_weave(file_id,
+ self._new_revision_id, new_lines, parents, nostore_sha,
+ self.random_revid)
+
+
class Pack(object):
"""An in memory proxy for a .pack and its indices."""
@@ -111,6 +139,18 @@
# sigatures 'knit' accessed : update it.
self.repo._signature_all_indices.insert_index(0,
pack.signature_index)
+
+ def _add_text_to_weave(self, file_id, revision_id, new_lines, parents,
+ nostore_sha, random_revid):
+ file_id_index = GraphIndexPrefixAdapter(
+ self.repo._text_all_indices,
+ (file_id, ), 1,
+ add_nodes_callback=self.repo._text_write_index.add_nodes)
+ self.repo._text_knit._index._graph_index = file_id_index
+ self.repo._text_knit._index._add_callback = file_id_index.add_nodes
+ return self.repo._text_knit.add_lines_with_ghosts(
+ revision_id, parents, new_lines, nostore_sha=nostore_sha,
+ random_id=random_revid)[0:2]
def all_pack_details(self):
"""Return a list of all the packs as transport,name tuples.
@@ -991,7 +1031,7 @@
if for_write or self.repo.is_in_write_group():
# allow writing: queue writes to a new index
indices.insert(0, self.repo._text_write_index)
- self._setup_knit_access(self.repo.is_in_write_group())
+ self._setup_knit(self.repo.is_in_write_group())
self.repo._text_all_indices = CombinedGraphIndex(indices)
def flush(self, new_name, new_pack):
@@ -1001,7 +1041,7 @@
new_pack.text_index_length = self.transport.put_file(
new_index_name, self.repo._text_write_index.finish())
self.repo._text_write_index = None
- self._setup_knit_access(False)
+ self._setup_knit(False)
if self.repo._text_all_indices is not None:
# text 'knits' have been used, replace the mutated memory index
# with the new on-disk one. XXX: is this really a good idea?
@@ -1012,13 +1052,13 @@
# - clearly we need a remove_index call too.
del self.repo._text_all_indices._indices[1]
- def get_weave_or_empty(self, file_id, transaction):
+ def get_weave_or_empty(self, file_id, transaction, force_write=False):
"""Get a 'Knit' backed by the .tix indices.
The transaction parameter is ignored.
"""
self._ensure_all_index()
- if self.repo.is_in_write_group():
+ if force_write or self.repo.is_in_write_group():
add_callback = self.repo._text_write_index.add_nodes
self.repo._text_pack_map[self.repo._text_write_index] = self.repo._open_pack_tuple
else:
@@ -1054,6 +1094,8 @@
self.repo._text_write_index = None
# no access object.
self.repo._text_knit_access = None
+ # no write-knit
+ self.repo._text_knit = None
# remove all constructed text data indices
self.repo._text_all_indices = None
# and the pack map
@@ -1069,15 +1111,21 @@
# adjust them.
# prepare to do writes.
self._ensure_all_index(True)
- self._setup_knit_access(True)
+ self._setup_knit(True)
- def _setup_knit_access(self, for_write):
+ def _setup_knit(self, for_write):
if for_write:
writer = (self.repo._open_pack_writer, self.repo._text_write_index)
else:
writer = None
self.repo._text_knit_access = _PackAccess(
self.repo._text_pack_map, writer)
+ if for_write:
+ # a reused knit object for commit specifically.
+ self.repo._text_knit = self.get_weave_or_empty(
+ 'all-texts', self.repo.get_transaction(), for_write)
+ else:
+ self.repo._text_knit = None
class InventoryKnitThunk(object):
@@ -1191,6 +1239,8 @@
class GraphKnitRepository1(KnitRepository):
"""Experimental graph-knit using repository."""
+ _commit_builder_class = PackCommitBuilder
+
def __init__(self, _format, a_bzrdir, control_files, _revision_store,
control_store, text_store):
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
@@ -1320,6 +1370,8 @@
class GraphKnitRepository3(KnitRepository3):
"""Experimental graph-knit using subtrees repository."""
+ _commit_builder_class = PackRootCommitBuilder
+
def __init__(self, _format, a_bzrdir, control_files, _revision_store,
control_store, text_store):
KnitRepository3.__init__(self, _format, a_bzrdir, control_files,
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-09-12 04:38:09 +0000
+++ b/bzrlib/repository.py 2007-09-12 06:33:40 +0000
@@ -61,6 +61,336 @@
_deprecation_warning_done = False
+class CommitBuilder(object):
+ """Provides an interface to build up a commit.
+
+ This allows describing a tree to be committed without needing to
+ know the internals of the format of the repository.
+ """
+
+ # all clients should supply tree roots.
+ record_root_entry = True
+
+ def __init__(self, repository, parents, config, timestamp=None,
+ timezone=None, committer=None, revprops=None,
+ revision_id=None):
+ """Initiate a CommitBuilder.
+
+ :param repository: Repository to commit to.
+ :param parents: Revision ids of the parents of the new revision.
+ :param config: Configuration to use.
+ :param timestamp: Optional timestamp recorded for commit.
+ :param timezone: Optional timezone for timestamp.
+ :param committer: Optional committer to set for commit.
+ :param revprops: Optional dictionary of revision properties.
+ :param revision_id: Optional revision id.
+ """
+ self._config = config
+
+ if committer is None:
+ self._committer = self._config.username()
+ else:
+ assert isinstance(committer, basestring), type(committer)
+ self._committer = committer
+
+ self.new_inventory = Inventory(None)
+ self._new_revision_id = osutils.safe_revision_id(revision_id)
+ self.parents = parents
+ self.repository = repository
+
+ self._revprops = {}
+ if revprops is not None:
+ self._revprops.update(revprops)
+
+ if timestamp is None:
+ timestamp = time.time()
+ # Restrict resolution to 1ms
+ self._timestamp = round(timestamp, 3)
+
+ if timezone is None:
+ self._timezone = osutils.local_time_offset()
+ else:
+ self._timezone = int(timezone)
+
+ self._generate_revision_if_needed()
+
+ def commit(self, message):
+ """Make the actual commit.
+
+ :return: The revision id of the recorded revision.
+ """
+ rev = _mod_revision.Revision(
+ timestamp=self._timestamp,
+ timezone=self._timezone,
+ committer=self._committer,
+ message=message,
+ inventory_sha1=self.inv_sha1,
+ revision_id=self._new_revision_id,
+ properties=self._revprops)
+ rev.parent_ids = self.parents
+ self.repository.add_revision(self._new_revision_id, rev,
+ self.new_inventory, self._config)
+ self.repository.commit_write_group()
+ return self._new_revision_id
+
+ def abort(self):
+ """Abort the commit that is being built.
+ """
+ self.repository.abort_write_group()
+
+ def revision_tree(self):
+ """Return the tree that was just committed.
+
+ After calling commit() this can be called to get a RevisionTree
+ representing the newly committed tree. This is preferred to
+ calling Repository.revision_tree() because that may require
+ deserializing the inventory, while we already have a copy in
+ memory.
+ """
+ return RevisionTree(self.repository, self.new_inventory,
+ self._new_revision_id)
+
+ def finish_inventory(self):
+ """Tell the builder that the inventory is finished."""
+ if self.new_inventory.root is None:
+ symbol_versioning.warn('Root entry should be supplied to'
+ ' record_entry_contents, as of bzr 0.10.',
+ DeprecationWarning, stacklevel=2)
+ self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
+ self.new_inventory.revision_id = self._new_revision_id
+ self.inv_sha1 = self.repository.add_inventory(
+ self._new_revision_id,
+ self.new_inventory,
+ self.parents
+ )
+
+ def _gen_revision_id(self):
+ """Return new revision-id."""
+ return generate_ids.gen_revision_id(self._config.username(),
+ self._timestamp)
+
+ def _generate_revision_if_needed(self):
+ """Create a revision id if None was supplied.
+
+ If the repository can not support user-specified revision ids
+ they should override this function and raise CannotSetRevisionId
+ if _new_revision_id is not None.
+
+ :raises: CannotSetRevisionId
+ """
+ if self._new_revision_id is None:
+ self._new_revision_id = self._gen_revision_id()
+ self.random_revid = True
+ else:
+ self.random_revid = False
+
+ def _check_root(self, ie, parent_invs, tree):
+ """Helper for record_entry_contents.
+
+ :param ie: An entry being added.
+ :param parent_invs: The inventories of the parent revisions of the
+ commit.
+ :param tree: The tree that is being committed.
+ """
+ if ie.parent_id is not None:
+ # if ie is not root, add a root automatically.
+ symbol_versioning.warn('Root entry should be supplied to'
+ ' record_entry_contents, as of bzr 0.10.',
+ DeprecationWarning, stacklevel=2)
+ self.record_entry_contents(tree.inventory.root.copy(), parent_invs,
+ '', tree, tree.path_content_summary(''))
+ else:
+ # In this revision format, root entries have no knit or weave When
+ # serializing out to disk and back in root.revision is always
+ # _new_revision_id
+ ie.revision = self._new_revision_id
+
+ def record_entry_contents(self, ie, parent_invs, path, tree,
+ content_summary):
+ """Record the content of ie from tree into the commit if needed.
+
+ Side effect: sets ie.revision when unchanged
+
+ :param ie: An inventory entry present in the commit.
+ :param parent_invs: The inventories of the parent revisions of the
+ commit.
+ :param path: The path the entry is at in the tree.
+ :param tree: The tree which contains this entry and should be used to
+ obtain content.
+ :param content_summary: Summary data from the tree about the paths
+ content - stat, length, exec, sha/link target. This is only
+ accessed when the entry has a revision of None - that is when it is
+ a candidate to commit.
+ """
+ if self.new_inventory.root is None:
+ self._check_root(ie, parent_invs, tree)
+ if ie.revision is None:
+ kind = content_summary[0]
+ else:
+ # ie is carried over from a prior commit
+ kind = ie.kind
+ # XXX: repository specific check for nested tree support goes here - if
+ # the repo doesn't want nested trees we skip it ?
+ if (kind == 'tree-reference' and
+ not self.repository._format.supports_tree_reference):
+ # mismatch between commit builder logic and repository:
+ # this needs the entry creation pushed down into the builder.
+ raise NotImplementedError
+ # transitional assert only, will remove before release.
+ assert ie.kind == kind
+ self.new_inventory.add(ie)
+
+ # ie.revision is always None if the InventoryEntry is considered
+ # for committing. ie.snapshot will record the correct revision
+ # which may be the sole parent if it is untouched.
+ if ie.revision is not None:
+ return
+
+ # XXX: Friction: parent_candidates should return a list not a dict
+ # so that we don't have to walk the inventories again.
+ parent_candiate_entries = ie.parent_candidates(parent_invs)
+ head_set = self.repository.get_graph().heads(parent_candiate_entries.keys())
+ heads = []
+ for inv in parent_invs:
+ if ie.file_id in inv:
+ old_rev = inv[ie.file_id].revision
+ if old_rev in head_set:
+ heads.append(inv[ie.file_id].revision)
+ head_set.remove(inv[ie.file_id].revision)
+
+ store = False
+ # now we check to see if we need to write a new record to the
+ # file-graph.
+ # We write a new entry unless there is one head to the ancestors, and
+ # the kind-derived content is unchanged.
+
+ # Cheapest check first: no ancestors, or more the one head in the
+ # ancestors, we write a new node.
+ if len(heads) != 1:
+ store = True
+ if not store:
+ # There is a single head, look it up for comparison
+ parent_entry = parent_candiate_entries[heads[0]]
+ # if the non-content specific data has changed, we'll be writing a
+ # node:
+ if (parent_entry.parent_id != ie.parent_id or
+ parent_entry.name != ie.name):
+ store = True
+ # now we need to do content specific checks:
+ if not store:
+ # if the kind changed the content obviously has
+ if kind != parent_entry.kind:
+ store = True
+ if kind == 'file':
+ if not store:
+ if (# if the file length changed we have to store:
+ parent_entry.text_size != content_summary[1] or
+ # if the exec bit has changed we have to store:
+ parent_entry.executable != content_summary[2]):
+ store = True
+ elif parent_entry.text_sha1 == content_summary[3]:
+ # all meta and content is unchanged (using a hash cache
+ # hit to check the sha)
+ ie.revision = parent_entry.revision
+ ie.text_size = parent_entry.text_size
+ ie.text_sha1 = parent_entry.text_sha1
+ ie.executable = parent_entry.executable
+ return
+ else:
+ # Either there is only a hash change(no hash cache entry,
+ # or same size content change), or there is no change on
+ # this file at all.
+ # There is a race condition when inserting content into the
+ # knit though that can result in different content being
+ # inserted so even though we may have had a hash cache hit
+ # here we still tell the store the hash we would *not*
+ # store a new text on, which means that it can avoid for us
+ # without a race condition and without double-shaing the
+ # lines.
+ nostore_sha = parent_entry.text_sha1
+ if store:
+ nostore_sha = None
+ try:
+ ie.executable = content_summary[2]
+ lines = tree.get_file(ie.file_id, path).readlines()
+ ie.text_sha1, ie.text_size = self._add_text_to_weave(
+ ie.file_id, lines, heads, nostore_sha)
+ except errors.ExistingContent:
+ # we are not going to store a new file graph node as it turns
+ # out to be unchanged.
+ ie.revision = parent_entry.revision
+ ie.text_size = parent_entry.text_size
+ ie.text_sha1 = parent_entry.text_sha1
+ ie.executable = parent_entry.executable
+ return
+ elif kind == 'directory':
+ if not store:
+ # all data is meta here, nothing specific to directory, so
+ # carry over:
+ ie.revision = parent_entry.revision
+ return
+ lines = []
+ self._add_text_to_weave(ie.file_id, lines, heads, None)
+ elif kind == 'symlink':
+ current_link_target = content_summary[3]
+ if not store:
+ # symmlink target is not generic metadata, check if it has
+ # changed.
+ if current_link_target != parent_entry.symlink_target:
+ store = True
+ if not store:
+ # unchanged, carry over.
+ ie.revision = parent_entry.revision
+ ie.symlink_target = parent_entry.symlink_target
+ return
+ ie.symlink_target = current_link_target
+ lines = []
+ self._add_text_to_weave(ie.file_id, lines, heads, None)
+ elif kind == 'tree-reference':
+ if not store:
+ if content_summary[3] != parent_entry.reference_revision:
+ store = True
+ if not store:
+ # unchanged, carry over.
+ ie.reference_revision = parent_entry.reference_revision
+ ie.revision = parent_entry.revision
+ return
+ ie.reference_revision = content_summary[3]
+ lines = []
+ self._add_text_to_weave(ie.file_id, lines, heads, None)
+ else:
+ raise NotImplementedError('unknown kind')
+ ie.revision = self._new_revision_id
+
+ def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
+ versionedfile = self.repository.weave_store.get_weave_or_empty(
+ file_id, self.repository.get_transaction())
+ # Don't change this to add_lines - add_lines_with_ghosts is cheaper
+ # than add_lines, and allows committing when a parent is ghosted for
+ # some reason.
+ try:
+ return versionedfile.add_lines_with_ghosts(
+ self._new_revision_id, parents, new_lines,
+ nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
+ finally:
+ versionedfile.clear_cache()
+
+
+class RootCommitBuilder(CommitBuilder):
+ """This commitbuilder actually records the root id"""
+
+ def _check_root(self, ie, parent_invs, tree):
+ """Helper for record_entry_contents.
+
+ :param ie: An entry being added.
+ :param parent_invs: The inventories of the parent revisions of the
+ commit.
+ :param tree: The tree that is being committed.
+ """
+ # ie must be root for this builder
+ assert ie.parent_id is None
+
+
######################################################################
# Repositories
@@ -76,6 +406,12 @@
remote) disk.
"""
+ # What class to use for a CommitBuilder. Often its simpler to change this
+ # in a Repository class subclass rather than to override
+ # get_commit_builder.
+ _commit_builder_class = CommitBuilder
+ # The search regex used by xml based repositories to determine what things
+ # where changed in a single commit.
_file_ids_altered_regex = lazy_regex.lazy_compile(
r'file_id="(?P<file_id>[^"]+)"'
r'.* revision="(?P<revision_id>[^"]+)"'
@@ -476,8 +812,8 @@
:param revision_id: Optional revision id.
"""
revision_id = osutils.safe_revision_id(revision_id)
- result = CommitBuilder(self, parents, config, timestamp, timezone,
- committer, revprops, revision_id)
+ result = self.__class__._commit_builder_class(self, parents, config,
+ timestamp, timezone, committer, revprops, revision_id)
self.start_write_group()
return result
@@ -2254,336 +2590,6 @@
self.pb.update(message, self.count, self.total)
-class CommitBuilder(object):
- """Provides an interface to build up a commit.
-
- This allows describing a tree to be committed without needing to
- know the internals of the format of the repository.
- """
-
- # all clients should supply tree roots.
- record_root_entry = True
-
- def __init__(self, repository, parents, config, timestamp=None,
- timezone=None, committer=None, revprops=None,
- revision_id=None):
- """Initiate a CommitBuilder.
-
- :param repository: Repository to commit to.
- :param parents: Revision ids of the parents of the new revision.
- :param config: Configuration to use.
- :param timestamp: Optional timestamp recorded for commit.
- :param timezone: Optional timezone for timestamp.
- :param committer: Optional committer to set for commit.
- :param revprops: Optional dictionary of revision properties.
- :param revision_id: Optional revision id.
- """
- self._config = config
-
- if committer is None:
- self._committer = self._config.username()
- else:
- assert isinstance(committer, basestring), type(committer)
- self._committer = committer
-
- self.new_inventory = Inventory(None)
- self._new_revision_id = osutils.safe_revision_id(revision_id)
- self.parents = parents
- self.repository = repository
-
- self._revprops = {}
- if revprops is not None:
- self._revprops.update(revprops)
-
- if timestamp is None:
- timestamp = time.time()
- # Restrict resolution to 1ms
- self._timestamp = round(timestamp, 3)
-
- if timezone is None:
- self._timezone = osutils.local_time_offset()
- else:
- self._timezone = int(timezone)
-
- self._generate_revision_if_needed()
-
- def commit(self, message):
- """Make the actual commit.
-
- :return: The revision id of the recorded revision.
- """
- rev = _mod_revision.Revision(
- timestamp=self._timestamp,
- timezone=self._timezone,
- committer=self._committer,
- message=message,
- inventory_sha1=self.inv_sha1,
- revision_id=self._new_revision_id,
- properties=self._revprops)
- rev.parent_ids = self.parents
- self.repository.add_revision(self._new_revision_id, rev,
- self.new_inventory, self._config)
- self.repository.commit_write_group()
- return self._new_revision_id
-
- def abort(self):
- """Abort the commit that is being built.
- """
- self.repository.abort_write_group()
-
- def revision_tree(self):
- """Return the tree that was just committed.
-
- After calling commit() this can be called to get a RevisionTree
- representing the newly committed tree. This is preferred to
- calling Repository.revision_tree() because that may require
- deserializing the inventory, while we already have a copy in
- memory.
- """
- return RevisionTree(self.repository, self.new_inventory,
- self._new_revision_id)
-
- def finish_inventory(self):
- """Tell the builder that the inventory is finished."""
- if self.new_inventory.root is None:
- symbol_versioning.warn('Root entry should be supplied to'
- ' record_entry_contents, as of bzr 0.10.',
- DeprecationWarning, stacklevel=2)
- self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
- self.new_inventory.revision_id = self._new_revision_id
- self.inv_sha1 = self.repository.add_inventory(
- self._new_revision_id,
- self.new_inventory,
- self.parents
- )
-
- def _gen_revision_id(self):
- """Return new revision-id."""
- return generate_ids.gen_revision_id(self._config.username(),
- self._timestamp)
-
- def _generate_revision_if_needed(self):
- """Create a revision id if None was supplied.
-
- If the repository can not support user-specified revision ids
- they should override this function and raise CannotSetRevisionId
- if _new_revision_id is not None.
-
- :raises: CannotSetRevisionId
- """
- if self._new_revision_id is None:
- self._new_revision_id = self._gen_revision_id()
- self.random_revid = True
- else:
- self.random_revid = False
-
- def _check_root(self, ie, parent_invs, tree):
- """Helper for record_entry_contents.
-
- :param ie: An entry being added.
- :param parent_invs: The inventories of the parent revisions of the
- commit.
- :param tree: The tree that is being committed.
- """
- if ie.parent_id is not None:
- # if ie is not root, add a root automatically.
- symbol_versioning.warn('Root entry should be supplied to'
- ' record_entry_contents, as of bzr 0.10.',
- DeprecationWarning, stacklevel=2)
- self.record_entry_contents(tree.inventory.root.copy(), parent_invs,
- '', tree, tree.path_content_summary(''))
- else:
- # In this revision format, root entries have no knit or weave When
- # serializing out to disk and back in root.revision is always
- # _new_revision_id
- ie.revision = self._new_revision_id
-
- def record_entry_contents(self, ie, parent_invs, path, tree,
- content_summary):
- """Record the content of ie from tree into the commit if needed.
-
- Side effect: sets ie.revision when unchanged
-
- :param ie: An inventory entry present in the commit.
- :param parent_invs: The inventories of the parent revisions of the
- commit.
- :param path: The path the entry is at in the tree.
- :param tree: The tree which contains this entry and should be used to
- obtain content.
- :param content_summary: Summary data from the tree about the paths
- content - stat, length, exec, sha/link target. This is only
- accessed when the entry has a revision of None - that is when it is
- a candidate to commit.
- """
- if self.new_inventory.root is None:
- self._check_root(ie, parent_invs, tree)
- if ie.revision is None:
- kind = content_summary[0]
- else:
- # ie is carried over from a prior commit
- kind = ie.kind
- # XXX: repository specific check for nested tree support goes here - if
- # the repo doesn't want nested trees we skip it ?
- if (kind == 'tree-reference' and
- not self.repository._format.supports_tree_reference):
- # mismatch between commit builder logic and repository:
- # this needs the entry creation pushed down into the builder.
- raise NotImplementedError
- # transitional assert only, will remove before release.
- assert ie.kind == kind
- self.new_inventory.add(ie)
-
- # ie.revision is always None if the InventoryEntry is considered
- # for committing. ie.snapshot will record the correct revision
- # which may be the sole parent if it is untouched.
- if ie.revision is not None:
- return
-
- # XXX: Friction: parent_candidates should return a list not a dict
- # so that we don't have to walk the inventories again.
- parent_candiate_entries = ie.parent_candidates(parent_invs)
- head_set = self.repository.get_graph().heads(parent_candiate_entries.keys())
- heads = []
- for inv in parent_invs:
- if ie.file_id in inv:
- old_rev = inv[ie.file_id].revision
- if old_rev in head_set:
- heads.append(inv[ie.file_id].revision)
- head_set.remove(inv[ie.file_id].revision)
-
- store = False
- # now we check to see if we need to write a new record to the
- # file-graph.
- # We write a new entry unless there is one head to the ancestors, and
- # the kind-derived content is unchanged.
-
- # Cheapest check first: no ancestors, or more the one head in the
- # ancestors, we write a new node.
- if len(heads) != 1:
- store = True
- if not store:
- # There is a single head, look it up for comparison
- parent_entry = parent_candiate_entries[heads[0]]
- # if the non-content specific data has changed, we'll be writing a
- # node:
- if (parent_entry.parent_id != ie.parent_id or
- parent_entry.name != ie.name):
- store = True
- # now we need to do content specific checks:
- if not store:
- # if the kind changed the content obviously has
- if kind != parent_entry.kind:
- store = True
- if kind == 'file':
- if not store:
- if (# if the file length changed we have to store:
- parent_entry.text_size != content_summary[1] or
- # if the exec bit has changed we have to store:
- parent_entry.executable != content_summary[2]):
- store = True
- elif parent_entry.text_sha1 == content_summary[3]:
- # all meta and content is unchanged (using a hash cache
- # hit to check the sha)
- ie.revision = parent_entry.revision
- ie.text_size = parent_entry.text_size
- ie.text_sha1 = parent_entry.text_sha1
- ie.executable = parent_entry.executable
- return
- else:
- # Either there is only a hash change(no hash cache entry,
- # or same size content change), or there is no change on
- # this file at all.
- # There is a race condition when inserting content into the
- # knit though that can result in different content being
- # inserted so even though we may have had a hash cache hit
- # here we still tell the store the hash we would *not*
- # store a new text on, which means that it can avoid for us
- # without a race condition and without double-shaing the
- # lines.
- nostore_sha = parent_entry.text_sha1
- if store:
- nostore_sha = None
- try:
- ie.executable = content_summary[2]
- lines = tree.get_file(ie.file_id, path).readlines()
- ie.text_sha1, ie.text_size = self._add_text_to_weave(
- ie.file_id, lines, heads, nostore_sha)
- except errors.ExistingContent:
- # we are not going to store a new file graph node as it turns
- # out to be unchanged.
- ie.revision = parent_entry.revision
- ie.text_size = parent_entry.text_size
- ie.text_sha1 = parent_entry.text_sha1
- ie.executable = parent_entry.executable
- return
- elif kind == 'directory':
- if not store:
- # all data is meta here, nothing specific to directory, so
- # carry over:
- ie.revision = parent_entry.revision
- return
- lines = []
- self._add_text_to_weave(ie.file_id, lines, heads, None)
- elif kind == 'symlink':
- current_link_target = content_summary[3]
- if not store:
- # symmlink target is not generic metadata, check if it has
- # changed.
- if current_link_target != parent_entry.symlink_target:
- store = True
- if not store:
- # unchanged, carry over.
- ie.revision = parent_entry.revision
- ie.symlink_target = parent_entry.symlink_target
- return
- ie.symlink_target = current_link_target
- lines = []
- self._add_text_to_weave(ie.file_id, lines, heads, None)
- elif kind == 'tree-reference':
- if not store:
- if content_summary[3] != parent_entry.reference_revision:
- store = True
- if not store:
- # unchanged, carry over.
- ie.reference_revision = parent_entry.reference_revision
- ie.revision = parent_entry.revision
- return
- ie.reference_revision = content_summary[3]
- lines = []
- self._add_text_to_weave(ie.file_id, lines, heads, None)
- else:
- raise NotImplementedError('unknown kind')
- ie.revision = self._new_revision_id
-
- def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
- versionedfile = self.repository.weave_store.get_weave_or_empty(
- file_id, self.repository.get_transaction())
- # Don't change this to add_lines - add_lines_with_ghosts is cheaper
- # than add_lines, and allows committing when a parent is ghosted for
- # some reason.
- try:
- return versionedfile.add_lines_with_ghosts(
- self._new_revision_id, parents, new_lines,
- nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
- finally:
- versionedfile.clear_cache()
-
-
-class RootCommitBuilder(CommitBuilder):
- """This commitbuilder actually records the root id"""
-
- def _check_root(self, ie, parent_invs, tree):
- """Helper for record_entry_contents.
-
- :param ie: An entry being added.
- :param parent_invs: The inventories of the parent revisions of the
- commit.
- :param tree: The tree that is being committed.
- """
- # ie must be root for this builder
- assert ie.parent_id is None
-
-
_unescape_map = {
'apos':"'",
'quot':'"',
More information about the bazaar-commits
mailing list