Rev 4887: (jam) Faster conversions by enabling re-use of cached in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Thu Dec 10 17:35:41 GMT 2009

At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4887 [merge]
revision-id: pqm at pqm.ubuntu.com-20091210173537-7gui2z64ki7nioit
parent: pqm at pqm.ubuntu.com-20091210164716-e18k2to740e9eq7s
parent: john at arbash-meinel.com-20091203053103-00z8bryoyfhm9x52
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2009-12-10 17:35:37 +0000
message:
  (jam) Faster conversions by enabling re-use of cached
  	InventoryEntries.
modified:
  bzrlib/chk_serializer.py       chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/serializer.py           serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
  bzrlib/tests/test_xml.py       test_xml.py-20050905091053-80b45588931a9b35
  bzrlib/xml4.py                 xml4.py-20050916091259-db5ab55e7e6ca324
  bzrlib/xml5.py                 xml5.py-20080328030717-t9guwinq8hom0ar3-1
  bzrlib/xml7.py                 xml7.py-20061029182747-d5tiiny21bvrd2jj-1
  bzrlib/xml8.py                 xml5.py-20050907032657-aac8f960815b66b1
  bzrlib/xml_serializer.py       xml.py-20050309040759-57d51586fdec365d
=== modified file 'bzrlib/chk_serializer.py'

--- a/bzrlib/chk_serializer.py	2009-07-22 20:22:21 +0000
+++ b/bzrlib/chk_serializer.py	2009-12-03 04:55:02 +0000
@@ -139,7 +139,7 @@
     revision_format_num = None
     support_altered_by_hack = False
 
-    def _unpack_entry(self, elt):
+    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
         kind = elt.tag
         if not kind in self.supported_kinds:
             raise AssertionError('unsupported entry kind %s' % kind)
@@ -152,7 +152,8 @@
             return inventory.TreeReference(file_id, name, parent_id, revision,
                                            reference_revision)
         else:
-            return xml7.Serializer_v7._unpack_entry(self, elt)
+            return xml7.Serializer_v7._unpack_entry(self, elt,
+                entry_cache=entry_cache, return_from_cache=return_from_cache)
 
     def __init__(self, node_size, search_key_name):
         self.maximum_size = node_size

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-12-02 18:05:08 +0000
+++ b/bzrlib/repository.py	2009-12-03 05:31:03 +0000
@@ -1314,6 +1314,9 @@
         self._fallback_repositories = []
         # An InventoryEntry cache, used during deserialization
         self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)
+        # Is it safe to return inventory entries directly from the entry cache,
+        # rather copying them?
+        self._safe_to_return_from_cache = False
 
     def __repr__(self):
         if self._fallback_repositories:
@@ -2431,7 +2434,8 @@
         :param xml: A serialised inventory.
         """
         result = self._serializer.read_inventory_from_string(xml, revision_id,
-                    entry_cache=self._inventory_entry_cache)
+                    entry_cache=self._inventory_entry_cache,
+                    return_from_cache=self._safe_to_return_from_cache)
         if result.revision_id != revision_id:
             raise AssertionError('revision id mismatch %s != %s' % (
                 result.revision_id, revision_id))
@@ -3840,6 +3844,7 @@
         pending_revisions = []
         parent_map = self.source.get_parent_map(revision_ids)
         self._fetch_parent_invs_for_stacking(parent_map, cache)
+        self.source._safe_to_return_from_cache = True
         for tree in self.source.revision_trees(revision_ids):
             # Find a inventory delta for this revision.
             # Find text entries that need to be copied, too.
@@ -3893,6 +3898,7 @@
             pending_revisions.append(revision)
             cache[current_revision_id] = tree
             basis_id = current_revision_id
+        self.source._safe_to_return_from_cache = False
         # Copy file texts
         from_texts = self.source.texts
         to_texts = self.target.texts
@@ -3977,6 +3983,7 @@
                 basis_id = self._fetch_batch(batch, basis_id, cache,
                                              a_graph=a_graph)
             except:
+                self.source._safe_to_return_from_cache = False
                 self.target.abort_write_group()
                 raise
             else:

=== modified file 'bzrlib/serializer.py'
--- a/bzrlib/serializer.py	2009-07-29 17:44:34 +0000
+++ b/bzrlib/serializer.py	2009-12-03 04:55:02 +0000
@@ -50,7 +50,7 @@
         raise NotImplementedError(self.write_inventory_to_string)
 
     def read_inventory_from_string(self, string, revision_id=None,
-                                   entry_cache=None):
+                                   entry_cache=None, return_from_cache=False):
         """Read string into an inventory object.
 
         :param string: The serialized inventory to read.
@@ -64,6 +64,10 @@
         :param entry_cache: An optional cache of InventoryEntry objects. If
             supplied we will look up entries via (file_id, revision_id) which
             should map to a valid InventoryEntry (File/Directory/etc) object.
+        :param return_from_cache: Return entries directly from the cache,
+            rather than copying them first. This is only safe if the caller
+            promises not to mutate the returned inventory entries, but it can
+            make some operations significantly faster.
         """
         raise NotImplementedError(self.read_inventory_from_string)
 

=== modified file 'bzrlib/tests/test_xml.py'
--- a/bzrlib/tests/test_xml.py	2009-07-15 06:39:07 +0000
+++ b/bzrlib/tests/test_xml.py	2009-12-03 04:55:02 +0000
@@ -18,6 +18,7 @@
 
 from bzrlib import (
     errors,
+    fifo_cache,
     inventory,
     xml6,
     xml7,
@@ -290,6 +291,38 @@
                 _inventory_v5a, revision_id='test-rev-id')
         self.assertEqual('test-rev-id', inv.root.revision)
 
+    def test_unpack_inventory_5a_cache_and_copy(self):
+        # Passing an entry_cache should get populated with the objects
+        # But the returned objects should be copies if return_from_cache is
+        # False
+        entry_cache = fifo_cache.FIFOCache()
+        inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
+            _inventory_v5a, revision_id='test-rev-id',
+            entry_cache=entry_cache, return_from_cache=False)
+        for entry in inv.iter_just_entries():
+            key = (entry.file_id, entry.revision)
+            if entry.file_id is inv.root.file_id:
+                # The root id is inferred for xml v5
+                self.assertFalse(key in entry_cache)
+            else:
+                self.assertIsNot(entry, entry_cache[key])
+
+    def test_unpack_inventory_5a_cache_no_copy(self):
+        # Passing an entry_cache should get populated with the objects
+        # The returned objects should be exact if return_from_cache is
+        # True
+        entry_cache = fifo_cache.FIFOCache()
+        inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
+            _inventory_v5a, revision_id='test-rev-id',
+            entry_cache=entry_cache, return_from_cache=True)
+        for entry in inv.iter_just_entries():
+            key = (entry.file_id, entry.revision)
+            if entry.file_id is inv.root.file_id:
+                # The root id is inferred for xml v5
+                self.assertFalse(key in entry_cache)
+            else:
+                self.assertIs(entry, entry_cache[key])
+
     def test_unpack_inventory_5b(self):
         inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
                 _inventory_v5b, revision_id='test-rev-id')

=== modified file 'bzrlib/xml4.py'
--- a/bzrlib/xml4.py	2009-06-09 00:59:51 +0000
+++ b/bzrlib/xml4.py	2009-12-03 04:55:02 +0000
@@ -63,7 +63,8 @@
         return e
 
 
-    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):
+    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
+                          return_from_cache=False):
         """Construct from XML Element
 
         :param revision_id: Ignored parameter used by xml5.
@@ -71,14 +72,15 @@
         root_id = elt.get('file_id') or ROOT_ID
         inv = Inventory(root_id)
         for e in elt:
-            ie = self._unpack_entry(e, entry_cache=entry_cache)
+            ie = self._unpack_entry(e, entry_cache=entry_cache,
+                                    return_from_cache=return_from_cache)
             if ie.parent_id == ROOT_ID:
                 ie.parent_id = root_id
             inv.add(ie)
         return inv
 
 
-    def _unpack_entry(self, elt, entry_cache=None):
+    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
         ## original format inventories don't have a parent_id for
         ## nodes in the root directory, but it's cleaner to use one
         ## internally.

=== modified file 'bzrlib/xml5.py'
--- a/bzrlib/xml5.py	2009-07-15 06:39:26 +0000
+++ b/bzrlib/xml5.py	2009-12-03 04:55:02 +0000
@@ -30,7 +30,8 @@
     format_num = '5'
     root_id = inventory.ROOT_ID
 
-    def _unpack_inventory(self, elt, revision_id, entry_cache=None):
+    def _unpack_inventory(self, elt, revision_id, entry_cache=None,
+                          return_from_cache=False):
         """Construct from XML Element
         """
         root_id = elt.get('file_id') or inventory.ROOT_ID
@@ -54,7 +55,8 @@
         unpack_entry = self._unpack_entry
         byid = inv._byid
         for e in elt:
-            ie = unpack_entry(e, entry_cache=entry_cache)
+            ie = unpack_entry(e, entry_cache=entry_cache,
+                              return_from_cache=return_from_cache)
             parent_id = ie.parent_id
             if parent_id is None:
                 ie.parent_id = parent_id = root_id

=== modified file 'bzrlib/xml7.py'
--- a/bzrlib/xml7.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/xml7.py	2009-12-03 04:55:02 +0000
@@ -28,7 +28,7 @@
     supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])
     format_num = '7'
 
-    def _unpack_entry(self, elt, entry_cache=None):
+    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
         kind = elt.tag
         if not kind in self.supported_kinds:
             raise AssertionError('unsupported entry kind %s' % kind)
@@ -41,6 +41,7 @@
             return inventory.TreeReference(file_id, name, parent_id, revision,
                                            reference_revision)
         else:
-            return xml6.Serializer_v6._unpack_entry(self, elt)
+            return xml6.Serializer_v6._unpack_entry(self, elt,
+                entry_cache=entry_cache, return_from_cache=return_from_cache)
 
 serializer_v7 = Serializer_v7()

=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py	2009-07-07 04:32:13 +0000
+++ b/bzrlib/xml8.py	2009-12-03 04:55:02 +0000
@@ -371,7 +371,8 @@
             prop_elt.tail = '\n'
         top_elt.tail = '\n'
 
-    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):
+    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
+                          return_from_cache=False):
         """Construct from XML Element"""
         if elt.tag != 'inventory':
             raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)
@@ -384,12 +385,13 @@
             revision_id = cache_utf8.encode(revision_id)
         inv = inventory.Inventory(root_id=None, revision_id=revision_id)
         for e in elt:
-            ie = self._unpack_entry(e, entry_cache=entry_cache)
+            ie = self._unpack_entry(e, entry_cache=entry_cache,
+                                    return_from_cache=return_from_cache)
             inv.add(ie)
         self._check_cache_size(len(inv), entry_cache)
         return inv
 
-    def _unpack_entry(self, elt, entry_cache=None):
+    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
         elt_get = elt.get
         file_id = elt_get('file_id')
         revision = elt_get('revision')
@@ -433,9 +435,10 @@
                 pass
             else:
                 # Only copying directory entries drops us 2.85s => 2.35s
-                # if cached_ie.kind == 'directory':
-                #     return cached_ie.copy()
-                # return cached_ie
+                if return_from_cache:
+                    if cached_ie.kind == 'directory':
+                        return cached_ie.copy()
+                    return cached_ie
                 return cached_ie.copy()
 
         kind = elt.tag

=== modified file 'bzrlib/xml_serializer.py'
--- a/bzrlib/xml_serializer.py	2009-06-09 00:59:51 +0000
+++ b/bzrlib/xml_serializer.py	2009-12-03 04:55:02 +0000
@@ -55,7 +55,7 @@
     squashes_xml_invalid_characters = True
 
     def read_inventory_from_string(self, xml_string, revision_id=None,
-                                   entry_cache=None):
+                                   entry_cache=None, return_from_cache=False):
         """Read xml_string into an inventory object.
 
         :param xml_string: The xml to read.
@@ -69,10 +69,15 @@
         :param entry_cache: An optional cache of InventoryEntry objects. If
             supplied we will look up entries via (file_id, revision_id) which
             should map to a valid InventoryEntry (File/Directory/etc) object.
+        :param return_from_cache: Return entries directly from the cache,
+            rather than copying them first. This is only safe if the caller
+            promises not to mutate the returned inventory entries, but it can
+            make some operations significantly faster.
         """
         try:
             return self._unpack_inventory(fromstring(xml_string), revision_id,
-                                          entry_cache=entry_cache)
+                                          entry_cache=entry_cache,
+                                          return_from_cache=return_from_cache)
         except ParseError, e:
             raise errors.UnexpectedInventoryFormat(e)