Rev 2328: 50% speedup in the dirstate->inventory conversion logic by caching the parent ids as we walk the tree. Some further work would be to maintain a stack of parents as we know we visit depth first. in sftp://bazaar.launchpad.net/%7Ebzr/bzr/dirstate/
Robert Collins
robertc at robertcollins.net
Fri Feb 16 02:24:11 GMT 2007
At sftp://bazaar.launchpad.net/%7Ebzr/bzr/dirstate/
------------------------------------------------------------
revno: 2328
revision-id: robertc at robertcollins.net-20070216022139-bia82eszdktack3k
parent: john at arbash-meinel.com-20070216010712-31haaz780hytg7e5
committer: Robert Collins <robertc at robertcollins.net>
branch nick: dirstate
timestamp: Fri 2007-02-16 13:21:39 +1100
message:
50% speedup in the dirstate->inventory conversion logic by caching the parent ids as we walk the tree. Some further work would be to maintain a stack of parents as we know we visit depth first.
modified:
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2007-02-16 00:55:29 +0000
+++ b/bzrlib/workingtree_4.py 2007-02-16 02:21:39 +0000
@@ -211,12 +211,15 @@
rows = self._dirstate._iter_rows()
root_row = rows.next()
inv = Inventory(root_id=root_row[0][3].decode('utf8'))
+ # we could do this straight out of the dirstate; it might be fast
+ # and should be profiled - RBC 20070216
+ parent_ids = {'' : inv.root.file_id}
for line in rows:
dirname, name, kind, fileid_utf8, size, stat, link_or_sha1 = line[0]
if dirname == '/':
# not in this revision tree.
continue
- parent_id = inv[inv.path2id(dirname.decode('utf8'))].file_id
+ parent_id = parent_ids[dirname]
file_id = fileid_utf8.decode('utf8')
entry = make_entry(kind, name.decode('utf8'), parent_id, file_id)
if kind == 'file':
@@ -224,6 +227,8 @@
#entry.text_size = size
#entry.text_sha1 = sha1
pass
+ elif kind == 'directory':
+ parent_ids[(dirname + '/' + name).strip('/')] = file_id
inv.add(entry)
self._inventory = inv
@@ -231,11 +236,12 @@
#if not path:
# path = self.inventory.id2path(file_id)
# # now lookup row by path
- row, parents = self._get_row(file_id=file_id)
+ row, parents = self._get_row(file_id=file_id, path=path)
assert row is not None, 'what error should this raise'
# TODO:
# if row stat is valid, use cached sha1, else, get a new sha1.
- path = (row[0] + '/' + row[1]).strip('/').decode('utf8')
+ if path is None:
+ path = (row[0] + '/' + row[1]).strip('/').decode('utf8')
return self._hashcache.get_sha1(path, stat_value)
def _get_inventory(self):
@@ -808,12 +814,15 @@
root_row = rows.next()
inv = Inventory(root_id=root_row[0][3].decode('utf8'),
revision_id=self._revision_id)
+ # we could do this straight out of the dirstate; it might be fast
+ # and should be profiled - RBC 20070216
+ parent_ids = {'' : inv.root.file_id}
for line in rows:
revid, kind, dirname, name, size, executable, sha1 = line[1][parent_index]
if not revid:
# not in this revision tree.
continue
- parent_id = inv[inv.path2id(dirname.decode('utf8'))].file_id
+ parent_id = parent_ids[dirname]
file_id = line[0][3].decode('utf8')
entry = make_entry(kind, name.decode('utf8'), parent_id, file_id)
entry.revision = revid.decode('utf8')
@@ -821,6 +830,8 @@
entry.executable = executable
entry.text_size = size
entry.text_sha1 = sha1
+ elif kind == 'directory':
+ parent_ids[(dirname + '/' + name).strip('/')] = file_id
inv.add(entry)
self._inventory = inv
More information about the bazaar-commits
mailing list