Rev 3889: Do Inventory.add() optimizations, and determine 'best' results. in http://bzr.arbash-meinel.com/branches/bzr/1.11/xml_cache

John Arbash Meinel john at arbash-meinel.com
Wed Dec 10 18:29:58 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.11/xml_cache

------------------------------------------------------------
revno: 3889
revision-id: john at arbash-meinel.com-20081210182935-dejc81qksqka717d
parent: john at arbash-meinel.com-20081210174726-7e0jy7j5kmq20alx
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: xml_cache
timestamp: Wed 2008-12-10 12:29:35 -0600
message:
  Do Inventory.add() optimizations, and determine 'best' results.
-------------- next part --------------
=== modified file 'bzrlib/xml5.py'
--- a/bzrlib/xml5.py	2008-04-24 07:22:53 +0000
+++ b/bzrlib/xml5.py	2008-12-10 18:29:35 +0000
@@ -16,6 +16,7 @@
 
 from bzrlib import (
     cache_utf8,
+    errors,
     inventory,
     xml6,
     xml8,
@@ -44,11 +45,33 @@
         if data_revision_id is not None:
             revision_id = cache_utf8.encode(data_revision_id)
         inv = inventory.Inventory(root_id, revision_id=revision_id)
+        # Optimizations tested
+        #   baseline w/entry cache  2.85s
+        #   using inv._byid         2.55s
+        #   avoiding attributes     2.46s
+        #   adding assertions       2.50s
+        #   last_parent cache       2.52s (worse, removed)
+        unpack_entry = self._unpack_entry
+        byid = inv._byid
         for e in elt:
-            ie = self._unpack_entry(e)
-            if ie.parent_id is None:
-                ie.parent_id = root_id
-            inv.add(ie)
+            ie = unpack_entry(e)
+            parent_id = ie.parent_id
+            if parent_id is None:
+                ie.parent_id = parent_id = root_id
+            try:
+                parent = byid[parent_id]
+            except KeyError:
+                raise errors.BzrError("parent_id {%s} not in inventory"
+                                      % (parent_id,))
+            if ie.file_id in byid:
+                raise errors.DuplicateFileId(ie.file_id,
+                                             byid[ie.file_id])
+            if ie.name in parent.children:
+                raise errors.BzrError("%s is already versioned"
+                    % (osutils.pathjoin(inv.id2path(parent_id),
+                       ie.name).encode('utf-8'),))
+            parent.children[ie.name] = ie
+            byid[ie.file_id] = ie
         if revision_id is not None:
             inv.root.revision = revision_id
         return inv

=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py	2008-12-10 17:47:26 +0000
+++ b/bzrlib/xml8.py	2008-12-10 18:29:35 +0000
@@ -368,16 +368,25 @@
         # Some timings for "repo.revision_trees(last_100_bzr_revs)"
         #   unmodified  4.1s
         #   using lru   3.5s
-        #   using fifo  2.9s
+        #   using fifo  2.83s
         #   lru._cache  2.8s
+        #   dict        2.75s
         # Note that a cache of 10k nodes is more than sufficient to hold all of
         # the inventory for the last 100 revs.
+        #   With inventory.add() optimizations, and not copying file entries,
+        #   performance gets to 2.00s
         key = (file_id, revision)
         try:
             # We copy it, because some operatations may mutate it
-            return _entry_cache[key].copy()
+            cached_ie = _entry_cache[key]
         except KeyError:
             pass
+        else:
+            # Only copying directory entries drops us 2.85s => 2.35s
+            if cached_ie.kind == 'directory':
+                return cached_ie.copy()
+            return cached_ie
+            # return cached_ie.copy()
 
         kind = elt.tag
         if not InventoryEntry.versionable_kind(kind):
@@ -411,7 +420,8 @@
         else:
             raise errors.UnsupportedInventoryKind(kind)
         ie.revision = revision
-        _entry_cache[key] = ie
+        if revision is not None:
+            _entry_cache[key] = ie
 
         return ie
 



More information about the bazaar-commits mailing list