Rev 2328: 50% speedup in the dirstate->inventory conversion logic by caching the parent ids as we walk the tree. Some further work would be to maintain a stack of parents as we know we visit depth first. in sftp://bazaar.launchpad.net/%7Ebzr/bzr/dirstate/

Robert Collins robertc at robertcollins.net
Fri Feb 16 02:24:11 GMT 2007


At sftp://bazaar.launchpad.net/%7Ebzr/bzr/dirstate/

------------------------------------------------------------
revno: 2328
revision-id: robertc at robertcollins.net-20070216022139-bia82eszdktack3k
parent: john at arbash-meinel.com-20070216010712-31haaz780hytg7e5
committer: Robert Collins <robertc at robertcollins.net>
branch nick: dirstate
timestamp: Fri 2007-02-16 13:21:39 +1100
message:
  50% speedup in the dirstate->inventory conversion logic by caching the parent ids as we walk the tree. Some further work would be to maintain a stack of parents as we know we visit depth first.
modified:
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-16 00:55:29 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-16 02:21:39 +0000
@@ -211,12 +211,15 @@
         rows = self._dirstate._iter_rows()
         root_row = rows.next()
         inv = Inventory(root_id=root_row[0][3].decode('utf8'))
+        # we could do this straight out of the dirstate; it might be fast
+        # and should be profiled - RBC 20070216
+        parent_ids = {'' : inv.root.file_id}
         for line in rows:
             dirname, name, kind, fileid_utf8, size, stat, link_or_sha1 = line[0]
             if dirname == '/':
                 # not in this revision tree.
                 continue
-            parent_id = inv[inv.path2id(dirname.decode('utf8'))].file_id
+            parent_id = parent_ids[dirname]
             file_id = fileid_utf8.decode('utf8')
             entry = make_entry(kind, name.decode('utf8'), parent_id, file_id)
             if kind == 'file':
@@ -224,6 +227,8 @@
                 #entry.text_size = size
                 #entry.text_sha1 = sha1
                 pass
+            elif kind == 'directory':
+                parent_ids[(dirname + '/' + name).strip('/')] = file_id
             inv.add(entry)
         self._inventory = inv
 
@@ -231,11 +236,12 @@
         #if not path:
         #    path = self.inventory.id2path(file_id)
         #    # now lookup row by path
-        row, parents = self._get_row(file_id=file_id)
+        row, parents = self._get_row(file_id=file_id, path=path)
         assert row is not None, 'what error should this raise'
         # TODO:
         # if row stat is valid, use cached sha1, else, get a new sha1.
-        path = (row[0] + '/' + row[1]).strip('/').decode('utf8')
+        if path is None:
+            path = (row[0] + '/' + row[1]).strip('/').decode('utf8')
         return self._hashcache.get_sha1(path, stat_value)
 
     def _get_inventory(self):
@@ -808,12 +814,15 @@
         root_row = rows.next()
         inv = Inventory(root_id=root_row[0][3].decode('utf8'),
             revision_id=self._revision_id)
+        # we could do this straight out of the dirstate; it might be fast
+        # and should be profiled - RBC 20070216
+        parent_ids = {'' : inv.root.file_id}
         for line in rows:
             revid, kind, dirname, name, size, executable, sha1 = line[1][parent_index]
             if not revid:
                 # not in this revision tree.
                 continue
-            parent_id = inv[inv.path2id(dirname.decode('utf8'))].file_id
+            parent_id = parent_ids[dirname]
             file_id = line[0][3].decode('utf8')
             entry = make_entry(kind, name.decode('utf8'), parent_id, file_id)
             entry.revision = revid.decode('utf8')
@@ -821,6 +830,8 @@
                 entry.executable = executable
                 entry.text_size = size
                 entry.text_sha1 = sha1
+            elif kind == 'directory':
+                parent_ids[(dirname + '/' + name).strip('/')] = file_id
             inv.add(entry)
         self._inventory = inv
 



More information about the bazaar-commits mailing list