Rev 4887: (jam) Faster conversions by enabling re-use of cached in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Thu Dec 10 17:35:41 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4887 [merge]
revision-id: pqm at pqm.ubuntu.com-20091210173537-7gui2z64ki7nioit
parent: pqm at pqm.ubuntu.com-20091210164716-e18k2to740e9eq7s
parent: john at arbash-meinel.com-20091203053103-00z8bryoyfhm9x52
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2009-12-10 17:35:37 +0000
message:
(jam) Faster conversions by enabling re-use of cached
InventoryEntries.
modified:
bzrlib/chk_serializer.py chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/serializer.py serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
bzrlib/tests/test_xml.py test_xml.py-20050905091053-80b45588931a9b35
bzrlib/xml4.py xml4.py-20050916091259-db5ab55e7e6ca324
bzrlib/xml5.py xml5.py-20080328030717-t9guwinq8hom0ar3-1
bzrlib/xml7.py xml7.py-20061029182747-d5tiiny21bvrd2jj-1
bzrlib/xml8.py xml5.py-20050907032657-aac8f960815b66b1
bzrlib/xml_serializer.py xml.py-20050309040759-57d51586fdec365d
=== modified file 'bzrlib/chk_serializer.py'
--- a/bzrlib/chk_serializer.py 2009-07-22 20:22:21 +0000
+++ b/bzrlib/chk_serializer.py 2009-12-03 04:55:02 +0000
@@ -139,7 +139,7 @@
revision_format_num = None
support_altered_by_hack = False
- def _unpack_entry(self, elt):
+ def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
kind = elt.tag
if not kind in self.supported_kinds:
raise AssertionError('unsupported entry kind %s' % kind)
@@ -152,7 +152,8 @@
return inventory.TreeReference(file_id, name, parent_id, revision,
reference_revision)
else:
- return xml7.Serializer_v7._unpack_entry(self, elt)
+ return xml7.Serializer_v7._unpack_entry(self, elt,
+ entry_cache=entry_cache, return_from_cache=return_from_cache)
def __init__(self, node_size, search_key_name):
self.maximum_size = node_size
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-12-02 18:05:08 +0000
+++ b/bzrlib/repository.py 2009-12-03 05:31:03 +0000
@@ -1314,6 +1314,9 @@
self._fallback_repositories = []
# An InventoryEntry cache, used during deserialization
self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)
+ # Is it safe to return inventory entries directly from the entry cache,
+ # rather copying them?
+ self._safe_to_return_from_cache = False
def __repr__(self):
if self._fallback_repositories:
@@ -2431,7 +2434,8 @@
:param xml: A serialised inventory.
"""
result = self._serializer.read_inventory_from_string(xml, revision_id,
- entry_cache=self._inventory_entry_cache)
+ entry_cache=self._inventory_entry_cache,
+ return_from_cache=self._safe_to_return_from_cache)
if result.revision_id != revision_id:
raise AssertionError('revision id mismatch %s != %s' % (
result.revision_id, revision_id))
@@ -3840,6 +3844,7 @@
pending_revisions = []
parent_map = self.source.get_parent_map(revision_ids)
self._fetch_parent_invs_for_stacking(parent_map, cache)
+ self.source._safe_to_return_from_cache = True
for tree in self.source.revision_trees(revision_ids):
# Find a inventory delta for this revision.
# Find text entries that need to be copied, too.
@@ -3893,6 +3898,7 @@
pending_revisions.append(revision)
cache[current_revision_id] = tree
basis_id = current_revision_id
+ self.source._safe_to_return_from_cache = False
# Copy file texts
from_texts = self.source.texts
to_texts = self.target.texts
@@ -3977,6 +3983,7 @@
basis_id = self._fetch_batch(batch, basis_id, cache,
a_graph=a_graph)
except:
+ self.source._safe_to_return_from_cache = False
self.target.abort_write_group()
raise
else:
=== modified file 'bzrlib/serializer.py'
--- a/bzrlib/serializer.py 2009-07-29 17:44:34 +0000
+++ b/bzrlib/serializer.py 2009-12-03 04:55:02 +0000
@@ -50,7 +50,7 @@
raise NotImplementedError(self.write_inventory_to_string)
def read_inventory_from_string(self, string, revision_id=None,
- entry_cache=None):
+ entry_cache=None, return_from_cache=False):
"""Read string into an inventory object.
:param string: The serialized inventory to read.
@@ -64,6 +64,10 @@
:param entry_cache: An optional cache of InventoryEntry objects. If
supplied we will look up entries via (file_id, revision_id) which
should map to a valid InventoryEntry (File/Directory/etc) object.
+ :param return_from_cache: Return entries directly from the cache,
+ rather than copying them first. This is only safe if the caller
+ promises not to mutate the returned inventory entries, but it can
+ make some operations significantly faster.
"""
raise NotImplementedError(self.read_inventory_from_string)
=== modified file 'bzrlib/tests/test_xml.py'
--- a/bzrlib/tests/test_xml.py 2009-07-15 06:39:07 +0000
+++ b/bzrlib/tests/test_xml.py 2009-12-03 04:55:02 +0000
@@ -18,6 +18,7 @@
from bzrlib import (
errors,
+ fifo_cache,
inventory,
xml6,
xml7,
@@ -290,6 +291,38 @@
_inventory_v5a, revision_id='test-rev-id')
self.assertEqual('test-rev-id', inv.root.revision)
+ def test_unpack_inventory_5a_cache_and_copy(self):
+ # Passing an entry_cache should get populated with the objects
+ # But the returned objects should be copies if return_from_cache is
+ # False
+ entry_cache = fifo_cache.FIFOCache()
+ inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
+ _inventory_v5a, revision_id='test-rev-id',
+ entry_cache=entry_cache, return_from_cache=False)
+ for entry in inv.iter_just_entries():
+ key = (entry.file_id, entry.revision)
+ if entry.file_id is inv.root.file_id:
+ # The root id is inferred for xml v5
+ self.assertFalse(key in entry_cache)
+ else:
+ self.assertIsNot(entry, entry_cache[key])
+
+ def test_unpack_inventory_5a_cache_no_copy(self):
+ # Passing an entry_cache should get populated with the objects
+ # The returned objects should be exact if return_from_cache is
+ # True
+ entry_cache = fifo_cache.FIFOCache()
+ inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
+ _inventory_v5a, revision_id='test-rev-id',
+ entry_cache=entry_cache, return_from_cache=True)
+ for entry in inv.iter_just_entries():
+ key = (entry.file_id, entry.revision)
+ if entry.file_id is inv.root.file_id:
+ # The root id is inferred for xml v5
+ self.assertFalse(key in entry_cache)
+ else:
+ self.assertIs(entry, entry_cache[key])
+
def test_unpack_inventory_5b(self):
inv = bzrlib.xml5.serializer_v5.read_inventory_from_string(
_inventory_v5b, revision_id='test-rev-id')
=== modified file 'bzrlib/xml4.py'
--- a/bzrlib/xml4.py 2009-06-09 00:59:51 +0000
+++ b/bzrlib/xml4.py 2009-12-03 04:55:02 +0000
@@ -63,7 +63,8 @@
return e
- def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):
+ def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
+ return_from_cache=False):
"""Construct from XML Element
:param revision_id: Ignored parameter used by xml5.
@@ -71,14 +72,15 @@
root_id = elt.get('file_id') or ROOT_ID
inv = Inventory(root_id)
for e in elt:
- ie = self._unpack_entry(e, entry_cache=entry_cache)
+ ie = self._unpack_entry(e, entry_cache=entry_cache,
+ return_from_cache=return_from_cache)
if ie.parent_id == ROOT_ID:
ie.parent_id = root_id
inv.add(ie)
return inv
- def _unpack_entry(self, elt, entry_cache=None):
+ def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
## original format inventories don't have a parent_id for
## nodes in the root directory, but it's cleaner to use one
## internally.
=== modified file 'bzrlib/xml5.py'
--- a/bzrlib/xml5.py 2009-07-15 06:39:26 +0000
+++ b/bzrlib/xml5.py 2009-12-03 04:55:02 +0000
@@ -30,7 +30,8 @@
format_num = '5'
root_id = inventory.ROOT_ID
- def _unpack_inventory(self, elt, revision_id, entry_cache=None):
+ def _unpack_inventory(self, elt, revision_id, entry_cache=None,
+ return_from_cache=False):
"""Construct from XML Element
"""
root_id = elt.get('file_id') or inventory.ROOT_ID
@@ -54,7 +55,8 @@
unpack_entry = self._unpack_entry
byid = inv._byid
for e in elt:
- ie = unpack_entry(e, entry_cache=entry_cache)
+ ie = unpack_entry(e, entry_cache=entry_cache,
+ return_from_cache=return_from_cache)
parent_id = ie.parent_id
if parent_id is None:
ie.parent_id = parent_id = root_id
=== modified file 'bzrlib/xml7.py'
--- a/bzrlib/xml7.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/xml7.py 2009-12-03 04:55:02 +0000
@@ -28,7 +28,7 @@
supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])
format_num = '7'
- def _unpack_entry(self, elt, entry_cache=None):
+ def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
kind = elt.tag
if not kind in self.supported_kinds:
raise AssertionError('unsupported entry kind %s' % kind)
@@ -41,6 +41,7 @@
return inventory.TreeReference(file_id, name, parent_id, revision,
reference_revision)
else:
- return xml6.Serializer_v6._unpack_entry(self, elt)
+ return xml6.Serializer_v6._unpack_entry(self, elt,
+ entry_cache=entry_cache, return_from_cache=return_from_cache)
serializer_v7 = Serializer_v7()
=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py 2009-07-07 04:32:13 +0000
+++ b/bzrlib/xml8.py 2009-12-03 04:55:02 +0000
@@ -371,7 +371,8 @@
prop_elt.tail = '\n'
top_elt.tail = '\n'
- def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):
+ def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
+ return_from_cache=False):
"""Construct from XML Element"""
if elt.tag != 'inventory':
raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)
@@ -384,12 +385,13 @@
revision_id = cache_utf8.encode(revision_id)
inv = inventory.Inventory(root_id=None, revision_id=revision_id)
for e in elt:
- ie = self._unpack_entry(e, entry_cache=entry_cache)
+ ie = self._unpack_entry(e, entry_cache=entry_cache,
+ return_from_cache=return_from_cache)
inv.add(ie)
self._check_cache_size(len(inv), entry_cache)
return inv
- def _unpack_entry(self, elt, entry_cache=None):
+ def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
elt_get = elt.get
file_id = elt_get('file_id')
revision = elt_get('revision')
@@ -433,9 +435,10 @@
pass
else:
# Only copying directory entries drops us 2.85s => 2.35s
- # if cached_ie.kind == 'directory':
- # return cached_ie.copy()
- # return cached_ie
+ if return_from_cache:
+ if cached_ie.kind == 'directory':
+ return cached_ie.copy()
+ return cached_ie
return cached_ie.copy()
kind = elt.tag
=== modified file 'bzrlib/xml_serializer.py'
--- a/bzrlib/xml_serializer.py 2009-06-09 00:59:51 +0000
+++ b/bzrlib/xml_serializer.py 2009-12-03 04:55:02 +0000
@@ -55,7 +55,7 @@
squashes_xml_invalid_characters = True
def read_inventory_from_string(self, xml_string, revision_id=None,
- entry_cache=None):
+ entry_cache=None, return_from_cache=False):
"""Read xml_string into an inventory object.
:param xml_string: The xml to read.
@@ -69,10 +69,15 @@
:param entry_cache: An optional cache of InventoryEntry objects. If
supplied we will look up entries via (file_id, revision_id) which
should map to a valid InventoryEntry (File/Directory/etc) object.
+ :param return_from_cache: Return entries directly from the cache,
+ rather than copying them first. This is only safe if the caller
+ promises not to mutate the returned inventory entries, but it can
+ make some operations significantly faster.
"""
try:
return self._unpack_inventory(fromstring(xml_string), revision_id,
- entry_cache=entry_cache)
+ entry_cache=entry_cache,
+ return_from_cache=return_from_cache)
except ParseError, e:
raise errors.UnexpectedInventoryFormat(e)
More information about the bazaar-commits
mailing list