Rev 2667: Merge index improvements. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Wed Jul 18 07:16:24 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2667
revision-id: robertc at robertcollins.net-20070718061621-xz2w9x9zj5cwwnnk
parent: robertc at robertcollins.net-20070718060739-2l2y7bjd1rf121fo
parent: robertc at robertcollins.net-20070718061559-36wb5mc52pp26qqq
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2007-07-18 16:16:21 +1000
message:
Merge index improvements.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
------------------------------------------------------------
revno: 2592.1.25.2.10
revision-id: robertc at robertcollins.net-20070718061559-36wb5mc52pp26qqq
parent: robertc at robertcollins.net-20070718060649-k1c0mh6bmra497n2
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index
timestamp: Wed 2007-07-18 16:15:59 +1000
message:
Make GraphIndex.iter_entries do hash lookups rather than table scans.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
------------------------------------------------------------
revno: 2592.1.25.2.9
revision-id: robertc at robertcollins.net-20070718060649-k1c0mh6bmra497n2
parent: robertc at robertcollins.net-20070718044624-kb7pmne2pd96ekum
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index
timestamp: Wed 2007-07-18 16:06:49 +1000
message:
Temporary performance hack for GraphIndex : load the entire index once and only once into ram.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
------------------------------------------------------------
revno: 2592.1.25.2.8
revision-id: robertc at robertcollins.net-20070718044624-kb7pmne2pd96ekum
parent: pqm at pqm.ubuntu.com-20070717110203-zzmtp28nunhsoz12
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index
timestamp: Wed 2007-07-18 14:46:24 +1000
message:
InMemoryGraphIndex.add_nodes was inconsistent with other metods for non-node-reference indices.
modified:
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2007-07-18 04:31:29 +0000
+++ b/bzrlib/index.py 2007-07-18 06:16:21 +0000
@@ -196,20 +196,21 @@
"""
self._transport = transport
self._name = name
-
- def iter_all_entries(self):
- """Iterate over all keys within the index.
-
- :return: An iterable of (key, value) or (key, value, reference_lists).
- The former tuple is used when there are no reference lists in the
- index, making the API compatible with simple key:value index types.
- There is no defined order for the result iteration - it will be in
- the most efficient order for the index.
+ self._nodes = None
+ self._keys_by_offset = None
+
+ def _buffer_all(self):
+ """Buffer all the index data.
+
+ Mutates self._nodes and self.keys_by_offset.
"""
stream = self._transport.get(self._name)
self._read_prefix(stream)
line_count = 0
- self.keys_by_offset = {}
+ # raw data keyed by offset
+ self._keys_by_offset = {}
+ # ready-to-return key:value or key:value, node_ref_lists
+ self._nodes = {}
trailers = 0
pos = stream.tell()
for line in stream.readlines():
@@ -224,23 +225,41 @@
int(ref) for ref in ref_string.split('\r') if ref
]))
ref_lists = tuple(ref_lists)
- self.keys_by_offset[pos] = (key, absent, ref_lists, value)
+ self._keys_by_offset[pos] = (key, absent, ref_lists, value)
pos += len(line)
- for key, absent, references, value in self.keys_by_offset.itervalues():
+ for key, absent, references, value in self._keys_by_offset.itervalues():
if absent:
continue
# resolve references:
if self.node_ref_lists:
node_refs = []
for ref_list in references:
- node_refs.append(tuple([self.keys_by_offset[ref][0] for ref in ref_list]))
- yield (key, value, tuple(node_refs))
+ node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
+ self._nodes[key] = (value, tuple(node_refs))
else:
- yield (key, value)
+ self._nodes[key] = value
if trailers != 1:
# there must be one line - the empty trailer line.
raise errors.BadIndexData(self)
+ def iter_all_entries(self):
+ """Iterate over all keys within the index.
+
+ :return: An iterable of (key, value) or (key, value, reference_lists).
+ The former tuple is used when there are no reference lists in the
+ index, making the API compatible with simple key:value index types.
+ There is no defined order for the result iteration - it will be in
+ the most efficient order for the index.
+ """
+ if self._nodes is None:
+ self._buffer_all()
+ if self.node_ref_lists:
+ for key, (value, node_ref_lists) in self._nodes.iteritems():
+ yield key, value, node_ref_lists
+ else:
+ for key, value in self._nodes.iteritems():
+ yield key, value
+
def _read_prefix(self, stream):
signature = stream.read(len(self._signature()))
if not signature == self._signature():
@@ -264,12 +283,16 @@
keys = set(keys)
if not keys:
return
- for node in self.iter_all_entries():
- if not keys:
- return
- if node[0] in keys:
- yield node
- keys.remove(node[0])
+ if self._nodes is None:
+ self._buffer_all()
+ keys = keys.intersection(self._nodes)
+ if self.node_ref_lists:
+ for key in keys:
+ value, node_refs = self._nodes[key]
+ yield key, value, node_refs
+ else:
+ for key in keys:
+ yield key, self._nodes[key]
def _signature(self):
"""The file signature for this index type."""
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2007-07-15 15:40:37 +0000
+++ b/bzrlib/tests/test_index.py 2007-07-18 04:46:24 +0000
@@ -471,6 +471,16 @@
result.add_nodes(nodes)
return result
+ def test_add_nodes_no_refs(self):
+ index = self.make_index(0)
+ index.add_nodes([('name', 'data')])
+ index.add_nodes([('name2', ''), ('name3', '')])
+ self.assertEqual(set([
+ ('name', 'data'),
+ ('name2', ''),
+ ('name3', ''),
+ ]), set(index.iter_all_entries()))
+
def test_add_nodes(self):
index = self.make_index(1)
index.add_nodes([('name', 'data', ([],))])
@@ -486,7 +496,7 @@
self.assertEqual([], list(index.iter_all_entries()))
def test_iter_all_entries_simple(self):
- index = self.make_index(nodes=[('name', 'data', ())])
+ index = self.make_index(nodes=[('name', 'data')])
self.assertEqual([('name', 'data')],
list(index.iter_all_entries()))
@@ -528,7 +538,7 @@
index.validate()
def test_validate_no_refs_content(self):
- index = self.make_index(nodes=[('key', 'value', ())])
+ index = self.make_index(nodes=[('key', 'value')])
index.validate()
More information about the bazaar-commits
mailing list