Rev 2626: Temporary performance hack for GraphIndex : load the entire index once and only once into ram. in http://people.ubuntu.com/~robertc/baz2.0/index

Robert Collins robertc at robertcollins.net
Wed Jul 18 07:06:52 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/index

------------------------------------------------------------
revno: 2626
revision-id: robertc at robertcollins.net-20070718060649-k1c0mh6bmra497n2
parent: robertc at robertcollins.net-20070718044624-kb7pmne2pd96ekum
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index
timestamp: Wed 2007-07-18 16:06:49 +1000
message:
  Temporary performance hack for GraphIndex : load the entire index once and only once into ram.
modified:
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-07-18 04:46:24 +0000
+++ b/bzrlib/index.py	2007-07-18 06:06:49 +0000
@@ -196,20 +196,21 @@
         """
         self._transport = transport
         self._name = name
-
-    def iter_all_entries(self):
-        """Iterate over all keys within the index.
-
-        :return: An iterable of (key, value) or (key, value, reference_lists).
-            The former tuple is used when there are no reference lists in the
-            index, making the API compatible with simple key:value index types.
-            There is no defined order for the result iteration - it will be in
-            the most efficient order for the index.
+        self._nodes = None
+        self._keys_by_offset = None
+
+    def _buffer_all(self):
+        """Buffer all the index data.
+
+        Mutates self._nodes and self.keys_by_offset.
         """
         stream = self._transport.get(self._name)
         self._read_prefix(stream)
         line_count = 0
-        self.keys_by_offset = {}
+        # raw data keyed by offset
+        self._keys_by_offset = {}
+        # ready-to-return key:value or key:value, node_ref_lists
+        self._nodes = {}
         trailers = 0
         pos = stream.tell()
         for line in stream.readlines():
@@ -224,23 +225,41 @@
                     int(ref) for ref in ref_string.split('\r') if ref
                     ]))
             ref_lists = tuple(ref_lists)
-            self.keys_by_offset[pos] = (key, absent, ref_lists, value)
+            self._keys_by_offset[pos] = (key, absent, ref_lists, value)
             pos += len(line)
-        for key, absent, references, value in self.keys_by_offset.itervalues():
+        for key, absent, references, value in self._keys_by_offset.itervalues():
             if absent:
                 continue
             # resolve references:
             if self.node_ref_lists:
                 node_refs = []
                 for ref_list in references:
-                    node_refs.append(tuple([self.keys_by_offset[ref][0] for ref in ref_list]))
-                yield (key, value, tuple(node_refs))
+                    node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
+                self._nodes[key] = (value, tuple(node_refs))
             else:
-                yield (key, value)
+                self._nodes[key] = value
         if trailers != 1:
             # there must be one line - the empty trailer line.
             raise errors.BadIndexData(self)
 
+    def iter_all_entries(self):
+        """Iterate over all keys within the index.
+
+        :return: An iterable of (key, value) or (key, value, reference_lists).
+            The former tuple is used when there are no reference lists in the
+            index, making the API compatible with simple key:value index types.
+            There is no defined order for the result iteration - it will be in
+            the most efficient order for the index.
+        """
+        if self._nodes is None:
+            self._buffer_all()
+        if self.node_ref_lists:
+            for key, (value, node_ref_lists) in self._nodes.iteritems():
+                yield key, value, node_ref_lists
+        else:
+            for key, value in self._nodes.iteritems():
+                yield key, value
+
     def _read_prefix(self, stream):
         signature = stream.read(len(self._signature()))
         if not signature == self._signature():



More information about the bazaar-commits mailing list