Rev 5077: Actually implement offset support for GraphIndex. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-btree-offset
John Arbash Meinel
john at arbash-meinel.com
Fri Mar 5 17:57:28 GMT 2010
At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-btree-offset
------------------------------------------------------------
revno: 5077
revision-id: john at arbash-meinel.com-20100305175655-06ypovp1vq6ngclt
parent: john at arbash-meinel.com-20100305173021-56x1q530woa0oq48
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-btree-offset
timestamp: Fri 2010-03-05 11:56:55 -0600
message:
Actually implement offset support for GraphIndex.
I don't think it is strictly needed, but bzr-search would want it
for old-format data. Plus it felt a bit weird to arbitrarily
abort.
-------------- next part --------------
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2010-03-05 17:30:21 +0000
+++ b/bzrlib/index.py 2010-03-05 17:56:55 +0000
@@ -419,8 +419,6 @@
# The number of bytes we've read so far in trying to process this file
self._bytes_read = 0
self._base_offset = offset
- if offset != 0:
- raise NotImplementedError('GraphIndex(offset) must be 0')
def __eq__(self, other):
"""Equal when self and other were created with the same parameters."""
@@ -449,6 +447,10 @@
mutter('Reading entire index %s', self._transport.abspath(self._name))
if stream is None:
stream = self._transport.get(self._name)
+ if self._base_offset != 0:
+ # This is wasteful, but it is better than dealing with
+ # adjusting all the offsets, etc.
+ stream = StringIO(stream.read()[self._base_offset:])
self._read_prefix(stream)
self._expected_elements = 3 + self._key_length
line_count = 0
@@ -1195,11 +1197,22 @@
self._buffer_all()
return
+ base_offset = self._base_offset
+ if base_offset != 0:
+ # Rewrite the ranges for the offset
+ readv_ranges = [(start+base_offset, size)
+ for start, size in readv_ranges]
readv_data = self._transport.readv(self._name, readv_ranges, True,
- self._size)
+ self._size + self._base_offset)
# parse
for offset, data in readv_data:
+ offset -= base_offset
self._bytes_read += len(data)
+ if offset < 0:
+ # transport.readv() expanded to extra data which isn't part of
+ # this index
+ data = data[-offset:]
+ offset = 0
if offset == 0 and len(data) == self._size:
# We read the whole range, most likely because the
# Transport upcast our readv ranges into one long request
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2010-03-05 17:30:21 +0000
+++ b/bzrlib/tests/test_index.py 2010-03-05 17:56:55 +0000
@@ -388,6 +388,17 @@
size = trans.put_file('index', stream)
return GraphIndex(trans, 'index', size)
+ def make_index_with_offset(self, ref_lists=0, key_elements=1, nodes=[],
+ offset=0):
+ builder = GraphIndexBuilder(ref_lists, key_elements=key_elements)
+ for key, value, references in nodes:
+ builder.add_node(key, value, references)
+ content = builder.finish().read()
+ size = len(content)
+ trans = self.get_transport()
+ trans.put_bytes('index', (' '*offset) + content)
+ return GraphIndex(trans, 'index', size, offset=offset)
+
def test_clear_cache(self):
index = self.make_index()
# For now, we just want to make sure the api is available. As this is
@@ -399,10 +410,25 @@
trans.put_bytes('name', "not an index\n")
index = GraphIndex(trans, 'name', 13)
- def test_open_bad_offset(self):
- trans = self.get_transport()
- self.assertRaises(NotImplementedError,
- GraphIndex, trans, 'name', 13, offset=10)
+ def test_with_offset(self):
+ nodes = self.make_nodes(200)
+ index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+ self.assertEqual(200, index.key_count())
+
+ def test_buffer_all_with_offset(self):
+ nodes = self.make_nodes(200)
+ index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+ index._buffer_all()
+ self.assertEqual(200, index.key_count())
+
+ def test_side_effect_buffering_with_offset(self):
+ nodes = self.make_nodes(20)
+ index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+ index._transport.recommended_page_size = lambda:64*1024
+ subset_nodes = [nodes[0][0], nodes[10][0], nodes[19][0]]
+ entries = [n[1] for n in index.iter_entries(subset_nodes)]
+ self.assertEqual(sorted(subset_nodes), sorted(entries))
+ self.assertEqual(20, index.key_count())
def test_open_sets_parsed_map_empty(self):
index = self.make_index()
More information about the bazaar-commits
mailing list