Rev 3911: A bit more refactoring trying to refine iter_interesting nodes. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/branch_performance
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 26 20:58:31 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/branch_performance
------------------------------------------------------------
revno: 3911
revision-id: john at arbash-meinel.com-20090326205822-utcawvt4zylu11fw
parent: john at arbash-meinel.com-20090326164343-f704ghm7xkhxv2ad
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: branch_performance
timestamp: Thu 2009-03-26 15:58:22 -0500
message:
A bit more refactoring trying to refine iter_interesting nodes.
Still just as broken.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2009-03-26 16:43:43 +0000
+++ b/bzrlib/chk_map.py 2009-03-26 20:58:22 +0000
@@ -1281,35 +1281,91 @@
return node
-def _read_records(uninteresting_keys, interesting_keys, store):
+
+
+class _IterInteresting(object):
+ """Given a set of root keys, find the interesting external refs."""
+
+ def __init__(self, store, search_key_func):
+ self.store = store
+ self.search_key_func = search_key_func
+ self._interesting_pending = []
+ self._uninteresting_pending = []
+
+ def _read_records(self, uninteresting_keys, interesting_keys):
"""Read the bytes for the various keys. Also check the _page_cache."""
- to_read = []
- for key in uninteresting_keys:
- try:
- bytes = _page_cache[key]
- uninteresting_bytes[key] = bytes
- except KeyError:
- to_read.append(key)
- uninteresting_bytes[key] = None
- for key in interesting_keys:
- try:
- bytes = _page_cache[key]
- interesting_bytes[key] = bytes
- except KeyError:
- to_read.append(key)
- interesting_bytes[key] = None
- stream = store.get_record_stream(to_read, 'unordered', True)
- for record in stream:
- bytes = record.get_bytes_as('fulltext')
- if record.key in uninteresting_bytes:
- uninteresting_bytes[record.key] = bytes
- else:
- assert record.key in interesting_bytes
- interesting_bytes[record.key] = bytes
- # Do we want to do this? or does it just punish the page cache?
- _page_cache[key] = bytes
- return uninteresting_bytes, interesting_bytes
-
+ to_read = []
+ for key in uninteresting_keys:
+ try:
+ bytes = _page_cache[key]
+ uninteresting_bytes[key] = bytes
+ except KeyError:
+ to_read.append(key)
+ uninteresting_bytes[key] = None
+ for key in interesting_keys:
+ try:
+ bytes = _page_cache[key]
+ interesting_bytes[key] = bytes
+ except KeyError:
+ to_read.append(key)
+ interesting_bytes[key] = None
+ stream = self.store.get_record_stream(to_read, 'unordered', True)
+ for record in stream:
+ bytes = record.get_bytes_as('fulltext')
+ if record.key in uninteresting_bytes:
+ uninteresting_bytes[record.key] = bytes
+ else:
+ assert record.key in interesting_bytes
+ interesting_bytes[record.key] = bytes
+ # Do we want to do this? or does it just punish the page cache?
+ _page_cache[key] = bytes
+ return uninteresting_bytes, interesting_bytes
+
+ def process(self, interesting_root_keys, uninteresting_root_keys):
+ # There should be no overlap
+ assert not interesting_root_keys.intersection(self.uninteresting_root_keys)
+
+
+ def _process_common_prefix_nodes(self, uninteresting_bytes,
+ interesting_bytes):
+ """Handle the case when all entries are using the same prefix.
+
+ All of the entries in uninteresting_bytes and interesting_bytes have
+ been referenced by the exact same prefix, so we know their outgoing
+ pointers overlap exactly.
+ """
+ known_uninteresting_prefix_keys = set()
+ known_uninteresting_items = set()
+ maybe_interesting_prefix_keys = set()
+ maybe_interesting_items = set()
+ for key, bytes in uninteresting_bytes.iteritems():
+ # We don't care about search_key_func for this code, because we
+ # only care about external references.
+ node = _deserialise(bytes, key, search_key_func=search_key_func)
+ if type(node) is InternalNode:
+ known_uninteresting_prefix_keys.update(node._items.items())
+ else:
+ # LeafNode
+ known_uninteresting_items.update(node._items.items())
+ for key, bytes in interesting_bytes.iteritems():
+ node = _deserialise(bytes, key, search_key_func=search_key_func)
+ if type(node) is InternalNode:
+ maybe_interesting_prefix_keys.update(node._items.items())
+ else:
+ # LeafNode
+ maybe_interesting_items.update(node._items.iteritems())
+ for prefix, child in (maybe_interesting_prefix_keys
+ - known_uninteresting_prefix_keys):
+ heapq.heappush(interesting_pending, (prefix, None, child))
+ for prefix, child in (known_uninteresting_prefix_keys
+ - maybe_interesting_prefix_keys):
+ heapq.heappush(uninteresting_pending, (prefix, None, child))
+ for key, value in (maybe_interesting_items - known_uninteresting_items):
+ prefix = search_key_func(key)
+ heapq.heappush(interesting_pending, (prefix, key, value))
+ for key, value in (known_uninteresting_items - maybe_interesting_items):
+ prefix = search_key_func(key)
+ heapq.heappush(uninteresting_pending, (prefix, key, value))
def iter_interesting_nodes(store, interesting_root_keys,
uninteresting_root_keys,
@@ -1343,38 +1399,7 @@
# at the same prefix, so we can do heavy filtering
uninteresting_pending = []
interesting_pending = []
- known_uninteresting_prefix_keys = set()
- known_uninteresting_items = set()
- maybe_interesting_prefix_keys = set()
- maybe_interesting_items = set()
- for key, bytes in uninteresting_bytes.iteritems():
- # We don't care about search_key_func for this code, because we
- # only care about external references.
- node = _deserialise(bytes, key, search_key_func=search_key_func)
- if type(node) is InternalNode:
- known_uninteresting_prefix_keys.update(node._items.items())
- else:
- # LeafNode
- known_uninteresting_items.update(node._items.items())
- for key, bytes in interesting_bytes.iteritems():
- node = _deserialise(bytes, key, search_key_func=search_key_func)
- if type(node) is InternalNode:
- maybe_interesting_prefix_keys.update(node._items.items())
- else:
- # LeafNode
- maybe_interesting_items.update(node._items.iteritems())
- for prefix, child in (maybe_interesting_prefix_keys
- - known_uninteresting_prefix_keys):
- heapq.heappush(interesting_pending, (prefix, None, child))
- for prefix, child in (known_uninteresting_prefix_keys
- - maybe_interesting_prefix_keys):
- heapq.heappush(uninteresting_pending, (prefix, None, child))
- for key, value in (maybe_interesting_items - known_uninteresting_items):
- prefix = search_key_func(key)
- heapq.heappush(interesting_pending, (prefix, key, value))
- for key, value in (known_uninteresting_items - maybe_interesting_items):
- prefix = search_key_func(key)
- heapq.heappush(uninteresting_pending, (prefix, key, value))
+
BORK - BORK
More information about the bazaar-commits
mailing list