Rev 5116: (jam) BTreeGraphIndex can now take an offset for the start-of-data. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Thu Mar 25 19:04:18 GMT 2010
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 5116 [merge]
revision-id: pqm at pqm.ubuntu.com-20100325190414-fftnvvyh1clu2pkr
parent: pqm at pqm.ubuntu.com-20100325121226-uz21gdeklliihj3a
parent: john at arbash-meinel.com-20100325170953-s6qzmwap7mg0frcx
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2010-03-25 19:04:14 +0000
message:
(jam) BTreeGraphIndex can now take an offset for the start-of-data.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
bzrlib/index.py index.py-20070712131115-lolkarso50vjr64s-1
bzrlib/osutils.py osutils.py-20050309040759-eeaff12fbf77ac86
bzrlib/tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
bzrlib/tests/test_index.py test_index.py-20070712131115-lolkarso50vjr64s-2
=== modified file 'NEWS'
--- a/NEWS 2010-03-25 09:59:45 +0000
+++ b/NEWS 2010-03-25 12:33:15 +0000
@@ -13,6 +13,9 @@
Compatibility Breaks
********************
+* BTreeGraphIndex can now take an offset to indicate that the data starts
+ somewhere other than then beginning of the file. (John Arbash Meinel)
+
* Deleted very old hidden commands ``versionedfile-list``,
``weave-plan-merge``, ``weave-merge-text``.
(Martin Pool)
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2010-03-13 23:13:48 +0000
+++ b/bzrlib/btree_index.py 2010-03-25 12:32:24 +0000
@@ -647,7 +647,8 @@
memory except when very large walks are done.
"""
- def __init__(self, transport, name, size, unlimited_cache=False):
+ def __init__(self, transport, name, size, unlimited_cache=False,
+ offset=0):
"""Create a B+Tree index object on the index name.
:param transport: The transport to read data for the index from.
@@ -660,6 +661,8 @@
:param unlimited_cache: If set to True, then instead of using an
LRUCache with size _NODE_CACHE_SIZE, we will use a dict and always
cache all leaf nodes.
+ :param offset: The start of the btree index data isn't byte 0 of the
+ file. Instead it starts at some point later.
"""
self._transport = transport
self._name = name
@@ -667,6 +670,7 @@
self._file = None
self._recommended_pages = self._compute_recommended_pages()
self._root_node = None
+ self._base_offset = offset
# Default max size is 100,000 leave values
self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
if unlimited_cache:
@@ -1494,8 +1498,9 @@
# list of (offset, length) regions of the file that should, evenually
# be read in to data_ranges, either from 'bytes' or from the transport
ranges = []
+ base_offset = self._base_offset
for index in nodes:
- offset = index * _PAGE_SIZE
+ offset = (index * _PAGE_SIZE)
size = _PAGE_SIZE
if index == 0:
# Root node - special case
@@ -1505,9 +1510,11 @@
# The only case where we don't know the size, is for very
# small indexes. So we read the whole thing
bytes = self._transport.get_bytes(self._name)
- self._size = len(bytes)
+ num_bytes = len(bytes)
+ self._size = num_bytes - base_offset
# the whole thing should be parsed out of 'bytes'
- ranges.append((0, len(bytes)))
+ ranges = [(start, min(_PAGE_SIZE, num_bytes - start))
+ for start in xrange(base_offset, num_bytes, _PAGE_SIZE)]
break
else:
if offset > self._size:
@@ -1515,13 +1522,13 @@
' of the file %s > %s'
% (offset, self._size))
size = min(size, self._size - offset)
- ranges.append((offset, size))
+ ranges.append((base_offset + offset, size))
if not ranges:
return
elif bytes is not None:
# already have the whole file
- data_ranges = [(start, bytes[start:start+_PAGE_SIZE])
- for start in xrange(0, len(bytes), _PAGE_SIZE)]
+ data_ranges = [(start, bytes[start:start+size])
+ for start, size in ranges]
elif self._file is None:
data_ranges = self._transport.readv(self._name, ranges)
else:
@@ -1530,6 +1537,7 @@
self._file.seek(offset)
data_ranges.append((offset, self._file.read(size)))
for offset, data in data_ranges:
+ offset -= base_offset
if offset == 0:
# extract the header
offset, data = self._parse_header_from_bytes(data)
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py 2010-02-17 17:11:16 +0000
+++ b/bzrlib/index.py 2010-03-05 17:56:55 +0000
@@ -382,7 +382,7 @@
suitable for production use. :XXX
"""
- def __init__(self, transport, name, size, unlimited_cache=False):
+ def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
"""Open an index called name on transport.
:param transport: A bzrlib.transport.Transport.
@@ -394,6 +394,8 @@
avoided by having it supplied. If size is None, then bisection
support will be disabled and accessing the index will just stream
all the data.
+ :param offset: Instead of starting the index data at offset 0, start it
+ at an arbitrary offset.
"""
self._transport = transport
self._name = name
@@ -416,6 +418,7 @@
self._size = size
# The number of bytes we've read so far in trying to process this file
self._bytes_read = 0
+ self._base_offset = offset
def __eq__(self, other):
"""Equal when self and other were created with the same parameters."""
@@ -444,6 +447,10 @@
mutter('Reading entire index %s', self._transport.abspath(self._name))
if stream is None:
stream = self._transport.get(self._name)
+ if self._base_offset != 0:
+ # This is wasteful, but it is better than dealing with
+ # adjusting all the offsets, etc.
+ stream = StringIO(stream.read()[self._base_offset:])
self._read_prefix(stream)
self._expected_elements = 3 + self._key_length
line_count = 0
@@ -1190,11 +1197,22 @@
self._buffer_all()
return
+ base_offset = self._base_offset
+ if base_offset != 0:
+ # Rewrite the ranges for the offset
+ readv_ranges = [(start+base_offset, size)
+ for start, size in readv_ranges]
readv_data = self._transport.readv(self._name, readv_ranges, True,
- self._size)
+ self._size + self._base_offset)
# parse
for offset, data in readv_data:
+ offset -= base_offset
self._bytes_read += len(data)
+ if offset < 0:
+ # transport.readv() expanded to extra data which isn't part of
+ # this index
+ data = data[-offset:]
+ offset = 0
if offset == 0 and len(data) == self._size:
# We read the whole range, most likely because the
# Transport upcast our readv ranges into one long request
=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py 2010-03-11 06:35:52 +0000
+++ b/bzrlib/osutils.py 2010-03-25 17:04:08 +0000
@@ -1812,8 +1812,9 @@
If src is None, the containing directory is used as source. If chown
fails, the error is ignored and a warning is printed.
"""
- has_chown = getattr(os, 'chown')
- if has_chown is None: return
+ chown = getattr(os, 'chown', None)
+ if chown is None:
+ return
if src == None:
src = os.path.dirname(dst)
@@ -1822,7 +1823,7 @@
try:
s = os.stat(src)
- os.chown(dst, s.st_uid, s.st_gid)
+ chown(dst, s.st_uid, s.st_gid)
except OSError, e:
trace.warning("Unable to copy ownership from '%s' to '%s': IOError: %s." % (src, dst, e))
=== modified file 'bzrlib/tests/test_btree_index.py'
--- a/bzrlib/tests/test_btree_index.py 2010-02-23 07:43:11 +0000
+++ b/bzrlib/tests/test_btree_index.py 2010-03-25 17:09:53 +0000
@@ -611,6 +611,21 @@
size = trans.put_file('index', stream)
return btree_index.BTreeGraphIndex(trans, 'index', size)
+ def make_index_with_offset(self, ref_lists=1, key_elements=1, nodes=[],
+ offset=0):
+ builder = btree_index.BTreeBuilder(key_elements=key_elements,
+ reference_lists=ref_lists)
+ builder.add_nodes(nodes)
+ transport = self.get_transport('')
+ # NamedTemporaryFile dies on builder.finish().read(). weird.
+ temp_file = builder.finish()
+ content = temp_file.read()
+ del temp_file
+ size = len(content)
+ transport.put_bytes('index', (' '*offset)+content)
+ return btree_index.BTreeGraphIndex(transport, 'index', size=size,
+ offset=offset)
+
def test_clear_cache(self):
nodes = self.make_nodes(160, 2, 2)
index = self.make_index(ref_lists=2, key_elements=2, nodes=nodes)
@@ -686,6 +701,25 @@
transport._activity)
self.assertEqual(1173, size)
+ def test_with_offset_no_size(self):
+ index = self.make_index_with_offset(key_elements=1, ref_lists=1,
+ offset=1234,
+ nodes=self.make_nodes(200, 1, 1))
+ index._size = None # throw away the size info
+ self.assertEqual(200, index.key_count())
+
+ def test_with_small_offset(self):
+ index = self.make_index_with_offset(key_elements=1, ref_lists=1,
+ offset=1234,
+ nodes=self.make_nodes(200, 1, 1))
+ self.assertEqual(200, index.key_count())
+
+ def test_with_large_offset(self):
+ index = self.make_index_with_offset(key_elements=1, ref_lists=1,
+ offset=123456,
+ nodes=self.make_nodes(200, 1, 1))
+ self.assertEqual(200, index.key_count())
+
def test__read_nodes_no_size_one_page_reads_once(self):
self.make_index(nodes=[(('key',), 'value', ())])
trans = get_transport('trace+' + self.get_url())
=== modified file 'bzrlib/tests/test_index.py'
--- a/bzrlib/tests/test_index.py 2010-02-17 17:11:16 +0000
+++ b/bzrlib/tests/test_index.py 2010-03-05 17:56:55 +0000
@@ -388,6 +388,17 @@
size = trans.put_file('index', stream)
return GraphIndex(trans, 'index', size)
+ def make_index_with_offset(self, ref_lists=0, key_elements=1, nodes=[],
+ offset=0):
+ builder = GraphIndexBuilder(ref_lists, key_elements=key_elements)
+ for key, value, references in nodes:
+ builder.add_node(key, value, references)
+ content = builder.finish().read()
+ size = len(content)
+ trans = self.get_transport()
+ trans.put_bytes('index', (' '*offset) + content)
+ return GraphIndex(trans, 'index', size, offset=offset)
+
def test_clear_cache(self):
index = self.make_index()
# For now, we just want to make sure the api is available. As this is
@@ -399,6 +410,26 @@
trans.put_bytes('name', "not an index\n")
index = GraphIndex(trans, 'name', 13)
+ def test_with_offset(self):
+ nodes = self.make_nodes(200)
+ index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+ self.assertEqual(200, index.key_count())
+
+ def test_buffer_all_with_offset(self):
+ nodes = self.make_nodes(200)
+ index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+ index._buffer_all()
+ self.assertEqual(200, index.key_count())
+
+ def test_side_effect_buffering_with_offset(self):
+ nodes = self.make_nodes(20)
+ index = self.make_index_with_offset(offset=1234567, nodes=nodes)
+ index._transport.recommended_page_size = lambda:64*1024
+ subset_nodes = [nodes[0][0], nodes[10][0], nodes[19][0]]
+ entries = [n[1] for n in index.iter_entries(subset_nodes)]
+ self.assertEqual(sorted(subset_nodes), sorted(entries))
+ self.assertEqual(20, index.key_count())
+
def test_open_sets_parsed_map_empty(self):
index = self.make_index()
self.assertEqual([], index._parsed_byte_map)
More information about the bazaar-commits
mailing list