Rev 2774: (Ian Clatworthy) Quicker initial commit - skip SHAing twice & skip path lookup as we know it in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Fri Aug 31 08:39:29 BST 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2774
revision-id: pqm at pqm.ubuntu.com-20070831073911-5saibq43v0uvsh7r
parent: pqm at pqm.ubuntu.com-20070831062501-zi3hbjphrmz4jv4x
parent: ian.clatworthy at internode.on.net-20070831055421-iwcem9phv1needkk
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2007-08-31 08:39:11 +0100
message:
(Ian Clatworthy) Quicker initial commit - skip SHAing twice & skip path lookup as we know it
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/memorytree.py memorytree.py-20060906023413-4wlkalbdpsxi2r4y-1
bzrlib/revisiontree.py revisiontree.py-20060724012533-bg8xyryhxd0o0i0h-1
bzrlib/tests/tree_implementations/test_tree.py test_tree.py-20061215160206-usu7lwcj8aq2n3br-1
bzrlib/tree.py tree.py-20050309040759-9d5f2496be663e77
bzrlib/workingtree.py workingtree.py-20050511021032-29b6ec0a681e02e3
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
------------------------------------------------------------
revno: 2772.2.1
merged: ian.clatworthy at internode.on.net-20070831055421-iwcem9phv1needkk
parent: pqm at pqm.ubuntu.com-20070831020510-emrlta5dk6ta95zp
parent: ian.clatworthy at internode.on.net-20070830020221-92iai8t8tx4frqwk
committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
branch nick: ianc-integration
timestamp: Fri 2007-08-31 15:54:21 +1000
message:
(Ian Clatworthy) Quicker initial commit - skip SHAing twice & skip path lookup as we know it
------------------------------------------------------------
revno: 2743.3.6
merged: ian.clatworthy at internode.on.net-20070830020221-92iai8t8tx4frqwk
parent: ian.clatworthy at internode.on.net-20070830020037-lvqrgnm7lbmmcl8y
parent: pqm at pqm.ubuntu.com-20070829235511-o6ftd800xa245p9h
committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
branch nick: bzr.quicker-initial-commit
timestamp: Thu 2007-08-30 12:02:21 +1000
message:
Merge bzr.dev
------------------------------------------------------------
revno: 2743.3.5
merged: ian.clatworthy at internode.on.net-20070830020037-lvqrgnm7lbmmcl8y
parent: ian.clatworthy at internode.on.net-20070823005545-qxt0ga954hm80337
committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
branch nick: bzr.quicker-initial-commit
timestamp: Thu 2007-08-30 12:00:37 +1000
message:
Incorporate feedback from abentley
------------------------------------------------------------
revno: 2743.3.4
merged: ian.clatworthy at internode.on.net-20070823005545-qxt0ga954hm80337
parent: ian.clatworthy at internode.on.net-20070823001235-bjza31s3c5ffdxlj
committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
branch nick: bzr.quicker-initial-commit
timestamp: Thu 2007-08-23 10:55:45 +1000
message:
Add NEWS entry
------------------------------------------------------------
revno: 2743.3.3
merged: ian.clatworthy at internode.on.net-20070823001235-bjza31s3c5ffdxlj
parent: ian.clatworthy at internode.on.net-20070823001136-ufjqjo803qe7kv0f
committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
branch nick: bzr.quicker-initial-commit
timestamp: Thu 2007-08-23 10:12:35 +1000
message:
Skip path lookup for tree.get_file() when we already know the path
------------------------------------------------------------
revno: 2743.3.2
merged: ian.clatworthy at internode.on.net-20070823001136-ufjqjo803qe7kv0f
parent: ian.clatworthy at internode.on.net-20070823001040-gw56j9fcbkjcys3j
committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
branch nick: bzr.quicker-initial-commit
timestamp: Thu 2007-08-23 10:11:36 +1000
message:
Custom snapshot method for InventoryFile so don't SHA twice
------------------------------------------------------------
revno: 2743.3.1
merged: ian.clatworthy at internode.on.net-20070823001040-gw56j9fcbkjcys3j
parent: pqm at pqm.ubuntu.com-20070822052832-nxby1d1plok4syek
committer: Ian Clatworthy <ian.clatworthy at internode.on.net>
branch nick: bzr.quicker-initial-commit
timestamp: Thu 2007-08-23 10:10:40 +1000
message:
Simplify InventoryEntry.snapshot and clean-up the return value
=== modified file 'NEWS'
--- a/NEWS 2007-08-31 05:32:00 +0000
+++ b/NEWS 2007-08-31 07:39:11 +0000
@@ -100,6 +100,10 @@
only a small increase in space used (and in some cases a reduction in
space). (Robert Collins)
+ * Initial commit no longer SHAs files twice and now reuses the path
+ rather than looking it up again, making it faster.
+ (Ian Clatworthy)
+
API BREAKS:
* ``Branch.append_revision`` is removed altogether; please use
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2007-07-26 21:18:35 +0000
+++ b/bzrlib/inventory.py 2007-08-31 05:54:21 +0000
@@ -422,40 +422,23 @@
This means that all its fields are populated, that it has its
text stored in the text store or weave.
+
+ :return: True if anything was recorded
"""
- # mutter('new parents of %s are %r', path, previous_entries)
+ # cannot be unchanged unless there is only one parent file rev.
self._read_tree_state(path, work_tree)
- # TODO: Where should we determine whether to reuse a
- # previous revision id or create a new revision? 20060606
if len(previous_entries) == 1:
- # cannot be unchanged unless there is only one parent file rev.
parent_ie = previous_entries.values()[0]
if self._unchanged(parent_ie):
- # mutter("found unchanged entry")
self.revision = parent_ie.revision
- return "unchanged"
- return self._snapshot_into_revision(revision, previous_entries,
- work_tree, commit_builder)
-
- def _snapshot_into_revision(self, revision, previous_entries, work_tree,
- commit_builder):
- """Record this revision unconditionally into a store.
-
- The entry's last-changed revision property (`revision`) is updated to
- that of the new revision.
-
- :param revision: id of the new revision that is being recorded.
-
- :returns: String description of the commit (e.g. "merged", "modified"), etc.
- """
- # mutter('new revision {%s} for {%s}', revision, self.file_id)
+ return False
self.revision = revision
- self._snapshot_text(previous_entries, work_tree, commit_builder)
+ return self._snapshot_text(previous_entries, work_tree, commit_builder)
def _snapshot_text(self, file_parents, work_tree, commit_builder):
"""Record the 'text' of this entry, whatever form that takes.
-
- This default implementation simply adds an empty text.
+
+ :return: True if anything was recorded
"""
raise NotImplementedError(self._snapshot_text)
@@ -586,6 +569,7 @@
def _snapshot_text(self, file_parents, work_tree, commit_builder):
"""See InventoryEntry._snapshot_text."""
commit_builder.modified_directory(self.file_id, file_parents)
+ return True
class InventoryFile(InventoryEntry):
@@ -716,12 +700,34 @@
def _forget_tree_state(self):
self.text_sha1 = None
- def _snapshot_text(self, file_parents, work_tree, commit_builder):
- """See InventoryEntry._snapshot_text."""
+ def snapshot(self, revision, path, previous_entries,
+ work_tree, commit_builder):
+ """See InventoryEntry.snapshot."""
+ # Note: We use a custom implementation of this method for files
+ # because it's a performance critical part of commit.
+
+ # If this is the initial commit for this file, we know the sha is
+ # coming later so skip calculating it now (in _read_tree_state())
+ if len(previous_entries) == 0:
+ self.executable = work_tree.is_executable(self.file_id, path=path)
+ else:
+ self._read_tree_state(path, work_tree)
+
+ # If nothing is changed from the sole parent, there's nothing to do
+ if len(previous_entries) == 1:
+ parent_ie = previous_entries.values()[0]
+ if self._unchanged(parent_ie):
+ self.revision = parent_ie.revision
+ return False
+
+ # Add the file to the repository
+ self.revision = revision
def get_content_byte_lines():
- return work_tree.get_file(self.file_id).readlines()
+ return work_tree.get_file(self.file_id, path).readlines()
self.text_sha1, self.text_size = commit_builder.modified_file_text(
- self.file_id, file_parents, get_content_byte_lines, self.text_sha1, self.text_size)
+ self.file_id, previous_entries, get_content_byte_lines,
+ self.text_sha1, self.text_size)
+ return True
def _unchanged(self, previous_ie):
"""See InventoryEntry._unchanged."""
@@ -827,6 +833,7 @@
"""See InventoryEntry._snapshot_text."""
commit_builder.modified_link(
self.file_id, file_parents, self.symlink_target)
+ return True
class TreeReference(InventoryEntry):
@@ -845,6 +852,7 @@
def _snapshot_text(self, file_parents, work_tree, commit_builder):
commit_builder.modified_reference(self.file_id, file_parents)
+ return True
def _read_tree_state(self, path, work_tree):
"""Populate fields in the inventory entry from the given tree.
=== modified file 'bzrlib/memorytree.py'
--- a/bzrlib/memorytree.py 2007-07-13 06:22:45 +0000
+++ b/bzrlib/memorytree.py 2007-08-23 00:12:35 +0000
@@ -79,9 +79,11 @@
missing files, so is a no-op.
"""
- def get_file(self, file_id):
+ def get_file(self, file_id, path=None):
"""See Tree.get_file."""
- return self._file_transport.get(self.id2path(file_id))
+ if path is None:
+ path = self.id2path(file_id)
+ return self._file_transport.get(path)
def get_file_sha1(self, file_id, path=None, stat_value=None):
"""See Tree.get_file_sha1()."""
=== modified file 'bzrlib/revisiontree.py'
--- a/bzrlib/revisiontree.py 2007-08-16 04:41:00 +0000
+++ b/bzrlib/revisiontree.py 2007-08-30 02:00:37 +0000
@@ -81,7 +81,7 @@
file_id = osutils.safe_file_id(file_id)
return ''.join(self.get_file_lines(file_id))
- def get_file(self, file_id):
+ def get_file(self, file_id, path=None):
file_id = osutils.safe_file_id(file_id)
return StringIO(self.get_file_text(file_id))
=== modified file 'bzrlib/tests/tree_implementations/test_tree.py'
--- a/bzrlib/tests/tree_implementations/test_tree.py 2007-08-28 14:40:42 +0000
+++ b/bzrlib/tests/tree_implementations/test_tree.py 2007-08-30 02:02:21 +0000
@@ -122,6 +122,23 @@
tree.unlock()
+class TestFileContent(TestCaseWithTree):
+
+ def test_get_file(self):
+ work_tree = self.make_branch_and_tree('wt')
+ tree = self.get_tree_no_parents_abc_content_2(work_tree)
+ tree.lock_read()
+ try:
+ # Test lookup without path works
+ lines = tree.get_file('a-id').readlines()
+ self.assertEqual(['foobar\n'], lines)
+ # Test lookup with path works
+ lines = tree.get_file('a-id', path='a').readlines()
+ self.assertEqual(['foobar\n'], lines)
+ finally:
+ tree.unlock()
+
+
class TestExtractFilesBytes(TestCaseWithTree):
def test_iter_files_bytes(self):
=== modified file 'bzrlib/tree.py'
--- a/bzrlib/tree.py 2007-08-27 14:55:48 +0000
+++ b/bzrlib/tree.py 2007-08-31 05:54:21 +0000
@@ -210,8 +210,12 @@
def _get_inventory(self):
return self._inventory
- def get_file(self, file_id):
- """Return a file object for the file file_id in the tree."""
+ def get_file(self, file_id, path=None):
+ """Return a file object for the file file_id in the tree.
+
+ If both file_id and path are defined, it is implementation defined as
+ to which one is used.
+ """
raise NotImplementedError(self.get_file)
def get_file_mtime(self, file_id, path=None):
=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py 2007-08-20 13:07:12 +0000
+++ b/bzrlib/workingtree.py 2007-08-23 00:12:35 +0000
@@ -451,9 +451,11 @@
def has_filename(self, filename):
return osutils.lexists(self.abspath(filename))
- def get_file(self, file_id):
- file_id = osutils.safe_file_id(file_id)
- return self.get_file_byname(self.id2path(file_id))
+ def get_file(self, file_id, path=None):
+ if path is None:
+ file_id = osutils.safe_file_id(file_id)
+ path = self.id2path(file_id)
+ return self.get_file_byname(path)
def get_file_text(self, file_id):
file_id = osutils.safe_file_id(file_id)
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2007-08-24 02:15:47 +0000
+++ b/bzrlib/workingtree_4.py 2007-08-30 02:02:21 +0000
@@ -1510,7 +1510,7 @@
return self._repository.weave_store.get_weave(file_id,
self._repository.get_transaction())
- def get_file(self, file_id):
+ def get_file(self, file_id, path=None):
return StringIO(self.get_file_text(file_id))
def get_file_lines(self, file_id):
More information about the bazaar-commits
mailing list