Rev 2626: Temporary performance hack for GraphIndex : load the entire index once and only once into ram. in http://people.ubuntu.com/~robertc/baz2.0/index
Robert Collins
robertc at robertcollins.net
Wed Jul 18 07:06:52 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/index
------------------------------------------------------------
revno: 2626
revision-id: robertc at robertcollins.net-20070718060649-k1c0mh6bmra497n2
parent: robertc at robertcollins.net-20070718044624-kb7pmne2pd96ekum
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index
timestamp: Wed 2007-07-18 16:06:49 +1000
message:
Temporary performance hack for GraphIndex : load the entire index once and only once into ram.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-07-18 04:46:24 +0000
+++ b/bzrlib/index.py 2007-07-18 06:06:49 +0000
@@ -196,20 +196,21 @@
"""
self._transport = transport
self._name = name
-
- def iter_all_entries(self):
- """Iterate over all keys within the index.
-
- :return: An iterable of (key, value) or (key, value, reference_lists).
- The former tuple is used when there are no reference lists in the
- index, making the API compatible with simple key:value index types.
- There is no defined order for the result iteration - it will be in
- the most efficient order for the index.
+ self._nodes = None
+ self._keys_by_offset = None
+
+ def _buffer_all(self):
+ """Buffer all the index data.
+
+ Mutates self._nodes and self.keys_by_offset.
"""
stream = self._transport.get(self._name)
self._read_prefix(stream)
line_count = 0
- self.keys_by_offset = {}
+ # raw data keyed by offset
+ self._keys_by_offset = {}
+ # ready-to-return key:value or key:value, node_ref_lists
+ self._nodes = {}
trailers = 0
pos = stream.tell()
for line in stream.readlines():
@@ -224,23 +225,41 @@
int(ref) for ref in ref_string.split('\r') if ref
]))
ref_lists = tuple(ref_lists)
- self.keys_by_offset[pos] = (key, absent, ref_lists, value)
+ self._keys_by_offset[pos] = (key, absent, ref_lists, value)
pos += len(line)
- for key, absent, references, value in self.keys_by_offset.itervalues():
+ for key, absent, references, value in self._keys_by_offset.itervalues():
if absent:
continue
# resolve references:
if self.node_ref_lists:
node_refs = []
for ref_list in references:
- node_refs.append(tuple([self.keys_by_offset[ref][0] for ref in ref_list]))
- yield (key, value, tuple(node_refs))
+ node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
+ self._nodes[key] = (value, tuple(node_refs))
else:
- yield (key, value)
+ self._nodes[key] = value
if trailers != 1:
# there must be one line - the empty trailer line.
raise errors.BadIndexData(self)
+ def iter_all_entries(self):
+ """Iterate over all keys within the index.
+
+ :return: An iterable of (key, value) or (key, value, reference_lists).
+ The former tuple is used when there are no reference lists in the
+ index, making the API compatible with simple key:value index types.
+ There is no defined order for the result iteration - it will be in
+ the most efficient order for the index.
+ """
+ if self._nodes is None:
+ self._buffer_all()
+ if self.node_ref_lists:
+ for key, (value, node_ref_lists) in self._nodes.iteritems():
+ yield key, value, node_ref_lists
+ else:
+ for key, value in self._nodes.iteritems():
+ yield key, value
+
def _read_prefix(self, stream):
signature = stream.read(len(self._signature()))
if not signature == self._signature():
More information about the bazaar-commits
mailing list