Rev 4851: Change from being a per-serializer attribute to being a per-repo attribute. in http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b4-xml8

John Arbash Meinel john at arbash-meinel.com
Thu Dec 3 04:55:12 GMT 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1.0b4-xml8

------------------------------------------------------------
revno: 4851
revision-id: john at arbash-meinel.com-20091203045502-uvhmg6b1yjbzzt8q
parent: john at arbash-meinel.com-20091201212708-bjlq7ydk7xtmzuhd
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1.0b4-xml8
timestamp: Wed 2009-12-02 22:55:02 -0600
message:
  Change from being a per-serializer attribute to being a per-repo attribute.
  This means we have some churn on *all* of the serializer apis, but it means
  we *don't* have churn on all of the repository apis.
  
  It makes it more thread-safe, since serializers are global instances.
  Repositories aren't currently thread-safe anyway. (get_record_stream() specifically
  is known not to be thread-safe on 2a format repos.)
-------------- next part --------------
=== modified file 'bzrlib/chk_serializer.py'
--- a/bzrlib/chk_serializer.py	2009-07-22 20:22:21 +0000
+++ b/bzrlib/chk_serializer.py	2009-12-03 04:55:02 +0000
@@ -139,7 +139,7 @@
     revision_format_num = None
     support_altered_by_hack = False
 
-    def _unpack_entry(self, elt):
+    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
         kind = elt.tag
         if not kind in self.supported_kinds:
             raise AssertionError('unsupported entry kind %s' % kind)
@@ -152,7 +152,8 @@
             return inventory.TreeReference(file_id, name, parent_id, revision,
                                            reference_revision)
         else:
-            return xml7.Serializer_v7._unpack_entry(self, elt)
+            return xml7.Serializer_v7._unpack_entry(self, elt,
+                entry_cache=entry_cache, return_from_cache=return_from_cache)
 
     def __init__(self, node_size, search_key_name):
         self.maximum_size = node_size

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-12-01 21:27:08 +0000
+++ b/bzrlib/repository.py	2009-12-03 04:55:02 +0000
@@ -1314,6 +1314,9 @@
         self._fallback_repositories = []
         # An InventoryEntry cache, used during deserialization
         self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)
+        # Is it safe to return inventory entries directly from the entry cache,
+        # rather copying them?
+        self._safe_to_return_from_cache = False
 
     def __repr__(self):
         if self._fallback_repositories:
@@ -2431,7 +2434,8 @@
         :param xml: A serialised inventory.
         """
         result = self._serializer.read_inventory_from_string(xml, revision_id,
-                    entry_cache=self._inventory_entry_cache)
+                    entry_cache=self._inventory_entry_cache,
+                    return_from_cache=self._safe_to_return_from_cache)
         if result.revision_id != revision_id:
             raise AssertionError('revision id mismatch %s != %s' % (
                 result.revision_id, revision_id))
@@ -3840,7 +3844,7 @@
         pending_revisions = []
         parent_map = self.source.get_parent_map(revision_ids)
         self._fetch_parent_invs_for_stacking(parent_map, cache)
-        self.source._format._serializer.safe_to_use_cache_items = True
+        self.source._safe_to_return_from_cache = True
         for tree in self.source.revision_trees(revision_ids):
             # Find a inventory delta for this revision.
             # Find text entries that need to be copied, too.
@@ -3894,7 +3898,7 @@
             pending_revisions.append(revision)
             cache[current_revision_id] = tree
             basis_id = current_revision_id
-        self.source._format._serializer.safe_to_use_cache_items = False
+        self.source._safe_to_return_from_cache = False
         # Copy file texts
         from_texts = self.source.texts
         to_texts = self.target.texts
@@ -3979,7 +3983,7 @@
                 basis_id = self._fetch_batch(batch, basis_id, cache,
                                              a_graph=a_graph)
             except:
-                self.source._format._serializer.safe_to_use_cache_items = False
+                self.source._safe_to_return_from_cache = False
                 self.target.abort_write_group()
                 raise
             else:

=== modified file 'bzrlib/serializer.py'
--- a/bzrlib/serializer.py	2009-12-01 21:27:08 +0000
+++ b/bzrlib/serializer.py	2009-12-03 04:55:02 +0000
@@ -26,11 +26,6 @@
 
     squashes_xml_invalid_characters = False
 
-    # Setting this true will return InventoryEntry items directly from the
-    # cache, rather than copying them. It can be much faster for some
-    # operations, but the callers must not mutate the returned objects
-    safe_to_use_cache_items = False
-
     def write_inventory(self, inv, f):
         """Write inventory to a file.
 
@@ -55,7 +50,7 @@
         raise NotImplementedError(self.write_inventory_to_string)
 
     def read_inventory_from_string(self, string, revision_id=None,
-                                   entry_cache=None):
+                                   entry_cache=None, return_from_cache=False):
         """Read string into an inventory object.
 
         :param string: The serialized inventory to read.
@@ -69,6 +64,10 @@
         :param entry_cache: An optional cache of InventoryEntry objects. If
             supplied we will look up entries via (file_id, revision_id) which
             should map to a valid InventoryEntry (File/Directory/etc) object.
+        :param return_from_cache: Return entries directly from the cache,
+            rather than copying them first. This is only safe if the caller
+            promises not to mutate the returned inventory entries, but it can
+            make some operations significantly faster.
         """
         raise NotImplementedError(self.read_inventory_from_string)
 

=== modified file 'bzrlib/tests/test_xml.py'
--- a/bzrlib/tests/test_xml.py	2009-07-15 06:39:07 +0000
+++ b/bzrlib/tests/test_xml.py	2009-12-03 04:55:02 +0000
@@ -18,6 +18,7 @@
 
 from bzrlib import (
     errors,
+    fifo_cache,
     inventory,
     xml6,
     xml7,
@@ -290,6 +291,38 @@
                 _inventory_v5a, revision_id='test-rev-id')
         self.assertEqual('test-rev-id', inv.root.revision)
 
+    def test_unpack_inventory_5a_cache_and_copy(self):
+        # Passing an entry_cache should get populated with the objects
+        # But the returned objects should be copies if return_from_cache is
+        # False
+        entry_cache = fifo_cache.FIFOCache()
+        inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
+            _inventory_v5a, revision_id='test-rev-id',
+            entry_cache=entry_cache, return_from_cache=False)
+        for entry in inv.iter_just_entries():
+            key = (entry.file_id, entry.revision)
+            if entry.file_id is inv.root.file_id:
+                # The root id is inferred for xml v5
+                self.assertFalse(key in entry_cache)
+            else:
+                self.assertIsNot(entry, entry_cache[key])
+
+    def test_unpack_inventory_5a_cache_no_copy(self):
+        # Passing an entry_cache should get populated with the objects
+        # The returned objects should be exact if return_from_cache is
+        # True
+        entry_cache = fifo_cache.FIFOCache()
+        inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
+            _inventory_v5a, revision_id='test-rev-id',
+            entry_cache=entry_cache, return_from_cache=True)
+        for entry in inv.iter_just_entries():
+            key = (entry.file_id, entry.revision)
+            if entry.file_id is inv.root.file_id:
+                # The root id is inferred for xml v5
+                self.assertFalse(key in entry_cache)
+            else:
+                self.assertIs(entry, entry_cache[key])
+
     def test_unpack_inventory_5b(self):
         inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
                 _inventory_v5b, revision_id='test-rev-id')

=== modified file 'bzrlib/xml4.py'
--- a/bzrlib/xml4.py	2009-06-09 00:59:51 +0000
+++ b/bzrlib/xml4.py	2009-12-03 04:55:02 +0000
@@ -63,7 +63,8 @@
         return e
 
 
-    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):
+    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
+                          return_from_cache=False):
         """Construct from XML Element
 
         :param revision_id: Ignored parameter used by xml5.
@@ -71,14 +72,15 @@
         root_id = elt.get('file_id') or ROOT_ID
         inv = Inventory(root_id)
         for e in elt:
-            ie = self._unpack_entry(e, entry_cache=entry_cache)
+            ie = self._unpack_entry(e, entry_cache=entry_cache,
+                                    return_from_cache=return_from_cache)
             if ie.parent_id == ROOT_ID:
                 ie.parent_id = root_id
             inv.add(ie)
         return inv
 
 
-    def _unpack_entry(self, elt, entry_cache=None):
+    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
         ## original format inventories don't have a parent_id for
         ## nodes in the root directory, but it's cleaner to use one
         ## internally.

=== modified file 'bzrlib/xml5.py'
--- a/bzrlib/xml5.py	2009-07-15 06:39:26 +0000
+++ b/bzrlib/xml5.py	2009-12-03 04:55:02 +0000
@@ -30,7 +30,8 @@
     format_num = '5'
     root_id = inventory.ROOT_ID
 
-    def _unpack_inventory(self, elt, revision_id, entry_cache=None):
+    def _unpack_inventory(self, elt, revision_id, entry_cache=None,
+                          return_from_cache=False):
         """Construct from XML Element
         """
         root_id = elt.get('file_id') or inventory.ROOT_ID
@@ -54,7 +55,8 @@
         unpack_entry = self._unpack_entry
         byid = inv._byid
         for e in elt:
-            ie = unpack_entry(e, entry_cache=entry_cache)
+            ie = unpack_entry(e, entry_cache=entry_cache,
+                              return_from_cache=return_from_cache)
             parent_id = ie.parent_id
             if parent_id is None:
                 ie.parent_id = parent_id = root_id

=== modified file 'bzrlib/xml7.py'
--- a/bzrlib/xml7.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/xml7.py	2009-12-03 04:55:02 +0000
@@ -28,7 +28,7 @@
     supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])
     format_num = '7'
 
-    def _unpack_entry(self, elt, entry_cache=None):
+    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
         kind = elt.tag
         if not kind in self.supported_kinds:
             raise AssertionError('unsupported entry kind %s' % kind)
@@ -41,6 +41,7 @@
             return inventory.TreeReference(file_id, name, parent_id, revision,
                                            reference_revision)
         else:
-            return xml6.Serializer_v6._unpack_entry(self, elt)
+            return xml6.Serializer_v6._unpack_entry(self, elt,
+                entry_cache=entry_cache, return_from_cache=return_from_cache)
 
 serializer_v7 = Serializer_v7()

=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py	2009-12-01 21:27:08 +0000
+++ b/bzrlib/xml8.py	2009-12-03 04:55:02 +0000
@@ -371,7 +371,8 @@
             prop_elt.tail = '\n'
         top_elt.tail = '\n'
 
-    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):
+    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
+                          return_from_cache=False):
         """Construct from XML Element"""
         if elt.tag != 'inventory':
             raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)
@@ -384,12 +385,13 @@
             revision_id = cache_utf8.encode(revision_id)
         inv = inventory.Inventory(root_id=None, revision_id=revision_id)
         for e in elt:
-            ie = self._unpack_entry(e, entry_cache=entry_cache)
+            ie = self._unpack_entry(e, entry_cache=entry_cache,
+                                    return_from_cache=return_from_cache)
             inv.add(ie)
         self._check_cache_size(len(inv), entry_cache)
         return inv
 
-    def _unpack_entry(self, elt, entry_cache=None):
+    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
         elt_get = elt.get
         file_id = elt_get('file_id')
         revision = elt_get('revision')
@@ -433,7 +435,7 @@
                 pass
             else:
                 # Only copying directory entries drops us 2.85s => 2.35s
-                if self.safe_to_use_cache_items:
+                if return_from_cache:
                     if cached_ie.kind == 'directory':
                         return cached_ie.copy()
                     return cached_ie

=== modified file 'bzrlib/xml_serializer.py'
--- a/bzrlib/xml_serializer.py	2009-06-09 00:59:51 +0000
+++ b/bzrlib/xml_serializer.py	2009-12-03 04:55:02 +0000
@@ -55,7 +55,7 @@
     squashes_xml_invalid_characters = True
 
     def read_inventory_from_string(self, xml_string, revision_id=None,
-                                   entry_cache=None):
+                                   entry_cache=None, return_from_cache=False):
         """Read xml_string into an inventory object.
 
         :param xml_string: The xml to read.
@@ -69,10 +69,15 @@
         :param entry_cache: An optional cache of InventoryEntry objects. If
             supplied we will look up entries via (file_id, revision_id) which
             should map to a valid InventoryEntry (File/Directory/etc) object.
+        :param return_from_cache: Return entries directly from the cache,
+            rather than copying them first. This is only safe if the caller
+            promises not to mutate the returned inventory entries, but it can
+            make some operations significantly faster.
         """
         try:
             return self._unpack_inventory(fromstring(xml_string), revision_id,
-                                          entry_cache=entry_cache)
+                                          entry_cache=entry_cache,
+                                          return_from_cache=return_from_cache)
         except ParseError, e:
             raise errors.UnexpectedInventoryFormat(e)
 



More information about the bazaar-commits mailing list