Rev 3647: Do a bit more work to get all the tests to pass. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/index_builder_cleanup

John Arbash Meinel john at arbash-meinel.com
Mon Aug 25 17:24:13 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/index_builder_cleanup

------------------------------------------------------------
revno: 3647
revision-id: john at arbash-meinel.com-20080825162409-0766y19zjs45m87i
parent: john at arbash-meinel.com-20080825034342-owq0858uk1wp2q0l
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index_builder_cleanup
timestamp: Mon 2008-08-25 11:24:09 -0500
message:
  Do a bit more work to get all the tests to pass.
modified:
  bzrlib/btree_index.py          index.py-20080624222253-p0x5f92uyh5hw734-7
  bzrlib/chunk_writer.py         chunk_writer.py-20080630234519-6ggn4id17nipovny-1
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
-------------- next part --------------
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2008-08-25 03:43:42 +0000
+++ b/bzrlib/btree_index.py	2008-08-25 16:24:09 +0000
@@ -25,6 +25,7 @@
 import sha
 import struct
 import tempfile
+import time
 import zlib
 
 from bzrlib import (
@@ -59,6 +60,7 @@
 miss_attempts = 0  # Missed this entry while looking up
 bisect_shortcut = [0, 0]
 dupes = [0]
+_add_node_time = [0.0]
 
 
 class _BuilderRow(object):
@@ -85,8 +87,10 @@
             del byte_lines[-1]
             skipped_bytes = padding
         self.spool.writelines(byte_lines)
-        if (self.spool.tell() + skipped_bytes) % _PAGE_SIZE != 0:
-            raise AssertionError("incorrect node length")
+        remainder = (self.spool.tell() + skipped_bytes) % _PAGE_SIZE
+        if remainder != 0:
+            raise AssertionError("incorrect node length: %d, %d"
+                                 % (self.spool.tell(), remainder))
         self.nodes += 1
         self.writer = None
 
@@ -391,7 +395,10 @@
             copied_len = osutils.pumpfile(row.spool, result)
             if copied_len != (row.nodes - 1) * _PAGE_SIZE:
                 if type(row) != _LeafBuilderRow:
-                    raise AssertionError("Not enough data copied")
+                    raise AssertionError("Incorrect amount of data copied"
+                        " expected: %d, got: %d"
+                        % ((row.nodes - 1) * _PAGE_SIZE,
+                           copied_len))
         result.flush()
         size = result.tell()
         result.seek(0)

=== modified file 'bzrlib/chunk_writer.py'
--- a/bzrlib/chunk_writer.py	2008-08-22 02:09:36 +0000
+++ b/bzrlib/chunk_writer.py	2008-08-25 16:24:09 +0000
@@ -93,6 +93,16 @@
         This returns the final compressed chunk, and either None, or the
         bytes that did not fit in the chunk.
         """
+        # self.bytes_list = self.bytes_in
+        # bytes_out_len = sum(map(len, self.bytes_list))
+        # if bytes_out_len > self.chunk_size:
+        #     raise Assertion("too much data: %d" % bytes_out_len)
+        # self.bytes_in = None
+        # nulls_needed = self.chunk_size - self.seen_bytes
+        # if nulls_needed:
+        #     self.bytes_list.append("\x00" * nulls_needed)
+        # return self.bytes_list, self.unused_bytes, nulls_needed
+
         self.bytes_in = None # Free the data cached so far, we don't need it
         out = self.compressor.flush(Z_FINISH)
         self.bytes_list.append(out)
@@ -101,7 +111,7 @@
             raise AssertionError('Somehow we ended up with too much'
                                  ' compressed data, %d > %d'
                                  % (self.bytes_out_len, self.chunk_size))
-        nulls_needed = self.chunk_size - self.bytes_out_len % self.chunk_size
+        nulls_needed = self.chunk_size - self.bytes_out_len
         if nulls_needed:
             self.bytes_list.append("\x00" * nulls_needed)
         return self.bytes_list, self.unused_bytes, nulls_needed

=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2008-08-25 03:41:39 +0000
+++ b/bzrlib/index.py	2008-08-25 16:24:09 +0000
@@ -95,6 +95,23 @@
             if not element or _whitespace_re.search(element) is not None:
                 raise errors.BadIndexKey(element)
 
+    def _update_nodes_by_key(self, key, value, node_refs):
+        """Update the _nodes_by_key dict with a new key.
+
+        For a key of (foo, bar, baz) create
+        _nodes_by_key[foo][bar][baz] = key_value
+        """
+        if self._nodes_by_key is None:
+            return
+        key_dict = self._nodes_by_key
+        if self.reference_lists:
+            key_value = key, value, node_refs
+        else:
+            key_value = key, value
+        for subkey in key[:-1]:
+            key_dict = key_dict.setdefault(subkey, {})
+        key_dict[key[-1]] = key_value
+
     def add_node(self, key, value, references=()):
         """Add a node to the index.
 
@@ -120,21 +137,11 @@
             node_refs.append(tuple(reference_list))
         if key in self._nodes and self._nodes[key][0] == '':
             raise errors.BadIndexDuplicateKey(key, self)
-        self._nodes[key] = ('', tuple(node_refs), value)
+        node_refs = tuple(node_refs)
+        self._nodes[key] = ('', node_refs, value)
         self._keys.add(key)
         if self._key_length > 1 and self._nodes_by_key is not None:
-            key_dict = self._nodes_by_key
-            if self.reference_lists:
-                key_value = key, value, tuple(node_refs)
-            else:
-                key_value = key, value
-            # possibly should do this on-demand, but it seems likely it is 
-            # always wanted
-            # For a key of (foo, bar, baz) create
-            # _nodes_by_key[foo][bar][baz] = key_value
-            for subkey in key[:-1]:
-                key_dict = key_dict.setdefault(subkey, {})
-            key_dict[key[-1]] = key_value
+            self._update_nodes_by_key(key, value, node_refs)
 
     def finish(self):
         lines = [_SIGNATURE]
@@ -320,14 +327,14 @@
                 node_value = value
             self._nodes[key] = node_value
             if self._key_length > 1:
-                subkey = list(reversed(key[:-1]))
+                # TODO: We may want to do this lazily, but if we are calling
+                #       _buffer_all, we are likely to be doing
+                #       iter_entries_prefix
                 key_dict = self._nodes_by_key
                 if self.node_ref_lists:
                     key_value = key, node_value[0], node_value[1]
                 else:
                     key_value = key, node_value
-                # possibly should do this on-demand, but it seems likely it is 
-                # always wanted
                 # For a key of (foo, bar, baz) create
                 # _nodes_by_key[foo][bar][baz] = key_value
                 for subkey in key[:-1]:
@@ -1147,6 +1154,14 @@
     available - for example via a CombinedGraphIndex.
     """
 
+    def __init__(self, reference_lists=0, key_elements=1):
+        super(InMemoryGraphIndex, self).__init__(
+            reference_lists=reference_lists,
+            key_elements=key_elements)
+        # The tests using InMemoryGraphIndex expect _nodes_by_key to be filled
+        # out
+        self._nodes_by_key = {}
+
     def add_nodes(self, nodes):
         """Add nodes to the index.
 

=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py	2008-08-25 03:41:39 +0000
+++ b/bzrlib/tests/test_btree_index.py	2008-08-25 16:24:09 +0000
@@ -487,7 +487,6 @@
         self.assertEqual(sorted(set([(builder,) + node for node in nodes[11:13]])),
             sorted(set(builder.iter_entries([nodes[12][0], nodes[11][0]]))))
         self.assertEqual(13, builder.key_count())
-        import pdb; pdb.set_trace()
         self.assertEqual(sorted(set([(builder,) + node for node in nodes[11:13]])),
             sorted(set(builder.iter_entries_prefix([nodes[12][0], nodes[11][0]]))))
         builder.add_node(*nodes[13])



More information about the bazaar-commits mailing list