[MERGE REVIEW] Performance tweak for TreeTransform
John Arbash Meinel
john at arbash-meinel.com
Wed May 24 23:17:38 BST 2006
Aaron Bentley wrote:
> Hi all,
>
> I've done some profiling of TreeTransform (through the bzrtools import
> command) and found that TreeTransform.canonical_path was taking a lot of
> time. This patch speeds up canonical_path by a factor of 10, which
> increases overall import speed by 1/4 - 1/3.
>
> Aaron
The code looks pretty good. Do you have a branch of this somewhere?
I assume that the cache only lasts as long as the Transform object, right?
This doesn't seem to effect the time to build a tree, (at least by my
measured times for bzr branch). So what does this effect? Is it the
commit time, or the add time? I'm not sure what you mean by "import speed".
Also, this seems like a really good place to write one of our new
benchmark tests. Though since I really don't know how to do it, I
certainly wouldn't block merging it.
John
=:->
------------------------------------------------------------------------
=== modified file 'bzrlib/tests/test_transform.py'
--- bzrlib/tests/test_transform.py
+++ bzrlib/tests/test_transform.py
@@ -57,6 +57,8 @@
transform, root = self.get_transform()
self.assertIs(transform.get_tree_parent(root), ROOT_PARENT)
imaginary_id = transform.trans_id_tree_path('imaginary')
+ imaginary_id2 = transform.trans_id_tree_path('imaginary/')
+ self.assertEqual(imaginary_id, imaginary_id2)
self.assertEqual(transform.get_tree_parent(imaginary_id), root)
self.assertEqual(transform.final_kind(root), 'directory')
self.assertEqual(transform.final_file_id(root),
self.wt.get_root_id())
=== modified file 'bzrlib/transform.py'
--- bzrlib/transform.py
+++ bzrlib/transform.py
@@ -101,6 +101,10 @@
self._removed_id = set()
self._tree_path_ids = {}
self._tree_id_paths = {}
+ self._realpaths = {}
+ # Cache of realpath results, to speed up canonical_path
+ self._relpaths = {}
+ # Cache of relpath results, to speed up canonical_path
self._new_root = self.trans_id_tree_file_id(tree.get_root_id())
self.__done = False
self._pb = pb
@@ -211,9 +215,21 @@
def canonical_path(self, path):
"""Get the canonical tree-relative path"""
# don't follow final symlinks
- dirname, basename = os.path.split(self._tree.abspath(path))
- dirname = os.path.realpath(dirname)
- return self._tree.relpath(pathjoin(dirname, basename))
+ abs = self._tree.abspath(path)
+ if abs in self._relpaths:
+ return self._relpaths[abs]
+ dirname, basename = os.path.split(abs)
+ if dirname not in self._realpaths:
+ self._realpaths[dirname] = os.path.realpath(dirname)
+ dirname = self._realpaths[dirname]
+ abs = pathjoin(dirname, basename)
+ if dirname in self._relpaths:
+ relpath = pathjoin(self._relpaths[dirname], basename)
+ relpath = relpath.rstrip('/\\')
+ else:
+ relpath = self._tree.relpath(abs)
+ self._relpaths[abs] = relpath
+ return relpath
def trans_id_tree_path(self, path):
"""Determine (and maybe set) the transaction ID for a tree path."""
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 254 bytes
Desc: OpenPGP digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060524/bc34cb81/attachment.pgp
More information about the bazaar
mailing list