Rev 5077: Actually implement offset support for GraphIndex. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-btree-offset

Fri Mar 5 17:57:28 GMT 2010

At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-btree-offset

------------------------------------------------------------
revno: 5077
revision-id: john at arbash-meinel.com-20100305175655-06ypovp1vq6ngclt
parent: john at arbash-meinel.com-20100305173021-56x1q530woa0oq48
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-btree-offset
timestamp: Fri 2010-03-05 11:56:55 -0600
message:
  Actually implement offset support for GraphIndex.
  
  I don't think it is strictly needed, but bzr-search would want it
  for old-format data. Plus it felt a bit weird to arbitrarily
  abort.
-------------- next part --------------
=== modified file 'bzrlib/index.py'

--- a/bzrlib/index.py	2010-03-05 17:30:21 +0000
+++ b/bzrlib/index.py	2010-03-05 17:56:55 +0000
@@ -419,8 +419,6 @@
         # The number of bytes we've read so far in trying to process this file
         self._bytes_read = 0
         self._base_offset = offset
-        if offset != 0:
-            raise NotImplementedError('GraphIndex(offset) must be 0')
 
     def __eq__(self, other):
         """Equal when self and other were created with the same parameters."""
@@ -449,6 +447,10 @@
             mutter('Reading entire index %s', self._transport.abspath(self._name))
         if stream is None:
             stream = self._transport.get(self._name)
+            if self._base_offset != 0:
+                # This is wasteful, but it is better than dealing with
+                # adjusting all the offsets, etc.
+                stream = StringIO(stream.read()[self._base_offset:])
         self._read_prefix(stream)
         self._expected_elements = 3 + self._key_length
         line_count = 0
@@ -1195,11 +1197,22 @@
             self._buffer_all()
             return
 
+        base_offset = self._base_offset
+        if base_offset != 0:
+            # Rewrite the ranges for the offset
+            readv_ranges = [(start+base_offset, size)
+                            for start, size in readv_ranges]
         readv_data = self._transport.readv(self._name, readv_ranges, True,
-            self._size)
+            self._size + self._base_offset)
         # parse
         for offset, data in readv_data:
+            offset -= base_offset
             self._bytes_read += len(data)
+            if offset < 0:
+                # transport.readv() expanded to extra data which isn't part of
+                # this index
+                data = data[-offset:]
+                offset = 0
             if offset == 0 and len(data) == self._size:
                 # We read the whole range, most likely because the
                 # Transport upcast our readv ranges into one long request

=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py	2010-03-05 17:30:21 +0000
+++ b/bzrlib/tests/test_index.py	2010-03-05 17:56:55 +0000
@@ -388,6 +388,17 @@
         size = trans.put_file('index', stream)
         return GraphIndex(trans, 'index', size)
 
+    def make_index_with_offset(self, ref_lists=0, key_elements=1, nodes=[],
+                               offset=0):
+        builder = GraphIndexBuilder(ref_lists, key_elements=key_elements)
+        for key, value, references in nodes:
+            builder.add_node(key, value, references)
+        content = builder.finish().read()
+        size = len(content)
+        trans = self.get_transport()
+        trans.put_bytes('index', (' '*offset) + content)
+        return GraphIndex(trans, 'index', size, offset=offset)
+
     def test_clear_cache(self):
         index = self.make_index()
         # For now, we just want to make sure the api is available. As this is
@@ -399,10 +410,25 @@
         trans.put_bytes('name', "not an index\n")
         index = GraphIndex(trans, 'name', 13)
 
-    def test_open_bad_offset(self):
-        trans = self.get_transport()
-        self.assertRaises(NotImplementedError, 
-            GraphIndex, trans, 'name', 13, offset=10)
+    def test_with_offset(self):
+        nodes = self.make_nodes(200)
+        index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+        self.assertEqual(200, index.key_count())
+
+    def test_buffer_all_with_offset(self):
+        nodes = self.make_nodes(200)
+        index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+        index._buffer_all()
+        self.assertEqual(200, index.key_count())
+
+    def test_side_effect_buffering_with_offset(self):
+        nodes = self.make_nodes(20)
+        index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+        index._transport.recommended_page_size = lambda:64*1024
+        subset_nodes = [nodes[0][0], nodes[10][0], nodes[19][0]]
+        entries = [n[1] for n in index.iter_entries(subset_nodes)]
+        self.assertEqual(sorted(subset_nodes), sorted(entries))
+        self.assertEqual(20, index.key_count())
 
     def test_open_sets_parsed_map_empty(self):
         index = self.make_index()