Rev 5079: Add bootstrapping code so that we can start with a file on disk and extract the in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 4 22:43:50 GMT 2010
At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
------------------------------------------------------------
revno: 5079
revision-id: john at arbash-meinel.com-20100304224322-1y9ot2twh0tv9g0g
parent: john at arbash-meinel.com-20100304220843-say9gc02mpn9ty6w
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Thu 2010-03-04 16:43:22 -0600
message:
Add bootstrapping code so that we can start with a file on disk and extract the
information from its tail.
-------------- next part --------------
=== modified file 'bzrlib/sack.py'
--- a/bzrlib/sack.py 2010-03-04 22:08:43 +0000
+++ b/bzrlib/sack.py 2010-03-04 22:43:22 +0000
@@ -29,6 +29,9 @@
)
from bzrlib.transport import file_view
+_HEADER_BASE = '\nBazaar Sack v'
+_VERSION = 1
+
class TrailingIndexBuilder(object):
"""This is the final bit that gets written to the sack content.
@@ -44,12 +47,10 @@
# Note that the header intentionally starts with '\n' so that it separates
# from the rest of the data if you open it in a text editor
- _HEADER_BASE = '\nBazaar Sack v'
- _VERSION = 1
def __init__(self, start_offset):
self.start_offset = start_offset
- self.version = self.__class__._VERSION
+ self.version = _VERSION
self._index_builder = btree_index.BTreeBuilder(reference_lists=0,
key_elements=1)
@@ -60,8 +61,13 @@
self._index_builder.add_node((index_type,), '%d %d' % (start, length))
def finish(self):
+ # TODO: Perhaps this should be more like BTreeBuilder and return a
+ # file-like object, alternatively it should take a 'writer'
+ # function so that we can stream the data into it. For now,
+ # we don't expect the TrailingIndex to be big enough to worry
+ # about memory pressure, etc.
chunks = []
- chunks.append('%s%d\n' % (self._HEADER_BASE, self.version))
+ chunks.append('%s%d\n' % (_HEADER_BASE, self.version))
chunks.append(self._index_builder.finish().read())
self._index_builder = None
chunks.append(struct.pack('!QI', self.start_offset, self.version))
@@ -69,25 +75,72 @@
+# Note: one downside of abstracting via the FileView is that even though all of
+# the content exists in a single file, we aren't able to make a single
+# 'readv()' across all of the sub-indexes, because we fake that it is
+# actually separate files.
+
class TrailingIndex(object):
"""Track the root-structure for the Sack.
See TrailingIndexBuilder for the structure of this tail.
+
+ :ivar start_offset: The offset in the sack file where the trailing index
+ starts.
+ :ivar version: The recorded version of the content
"""
+
def __init__(self, version, start_offset):
self.start_offset = start_offset
self.version = version
+ self._end_of_file = None
+ self._base_transport = None
+ self._base_filename = None
+ # This is a BTreeGraphIndex mapping names => their offsets in the sack.
+ self._named_sections = None
+ # This is a transport.file_view.FileView which allows us to maps names
+ # into offsets in the backing transport
+ self._file_view = None
+ self._section_file_map = {}
+
+ def _ensure_named_sections(self):
+ expected_header = '%s%d\n' % (_HEADER_BASE, self.version)
+ _, start = self._base_transport.readv(self._base_filename,
+ [(self.start_offset, len(expected_header))]).next()
+ assert start == expected_header
+ root_start = self.start_offset + len(expected_header)
+ root_end = self._end_of_file - 12
+ self._section_file_map = {'root-index': (root_start, root_end)}
+ self._file_view = file_view.FileView(self._base_transport,
+ self._base_filename, self._section_file_map)
+ self._named_sections = btree_index.BTreeGraphIndex(
+ self._file_view, 'root-index', root_end - root_start)
+ # Ensure that we have entries
+ for _, key, value in self._named_sections.iter_all_entries():
+ start, length = map(int, value.split())
+ assert len(key) == 1
+ name, = key
+ self._section_file_map[name] = (start, start+length)
@staticmethod
- def from_tail(bytes):
+ def parse_tail_bytes(bytes):
"""Get the meta-info out of the last 12 bytes of content."""
offset, version = struct.unpack('!QI', bytes[-12:])
- tie = TrailingIndex(version, offset)
- return tie
+ ti = TrailingIndex(version, offset)
+ return ti
@classmethod
- def from_bytes(cls, bytes):
- pass
+ def from_transport(cls, transport, filename):
+ # This is a bootstrap method, if all you have is the sack file, read
+ # its tail and figure out where the internal indices are.
+ file_st = transport.stat(filename)
+ _,tail = transport.readv(filename, [(file_st.st_size-12, 12)]).next()
+ ti = TrailingIndex.parse_tail_bytes(tail)
+ ti._base_transport = transport
+ ti._base_filename = filename
+ ti._end_of_file = file_st.st_size
+ ti._ensure_named_sections()
+ return ti
class Sack(object):
@@ -104,6 +157,9 @@
| blob-content | index1 | index2 | tail-index |
- The individual indexes are likely to just be a BTreeIndex, the tail-index
- is a simplified descriptor defining where to find the other indices.
+ The individual indexes are likely to just be a BTreeGraphIndex, the
+ tail-index is a simplified descriptor defining where to find the other
+ indices.
"""
+
+ # TODO: Rename this to IndexedPack or something like that
=== modified file 'bzrlib/tests/test_sack.py'
--- a/bzrlib/tests/test_sack.py 2010-03-04 22:08:43 +0000
+++ b/bzrlib/tests/test_sack.py 2010-03-04 22:43:22 +0000
@@ -81,16 +81,30 @@
class TestTrailingIndex(tests.TestCase):
- def assertFromTail(self, start_offset, version, bytes):
- ti = sack.TrailingIndex.from_tail(bytes)
+ def assertTailBytes(self, start_offset, version, bytes):
+ ti = sack.TrailingIndex.parse_tail_bytes(bytes)
self.assertIsInstance(ti, sack.TrailingIndex)
self.assertEqual(start_offset, ti.start_offset)
self.assertEqual(version, ti.version)
- def test_from_tail(self):
- self.assertFromTail(12345, 1,
- '\x00\x00\x00\x00\x00\x00\x30\x39'
- '\x00\x00\x00\x01')
- self.assertFromTail(12345, 123,
- '\x00\x00\x00\x00\x00\x00\x30\x39'
- '\x00\x00\x00\x7b')
+ def test_parse_tail_bytes(self):
+ self.assertTailBytes(12345, 1,
+ '\x00\x00\x00\x00\x00\x00\x30\x39'
+ '\x00\x00\x00\x01')
+ self.assertTailBytes(12345, 123,
+ '\x00\x00\x00\x00\x00\x00\x30\x39'
+ '\x00\x00\x00\x7b')
+
+ def test_from_transport(self):
+ # We should be able to bootstrap all info starting with just a path on
+ # disk
+ builder = sack.TrailingIndexBuilder(start_offset=500)
+ builder.add_index_info('texts', 150, 350)
+ content = builder.finish()
+ t = memory.MemoryTransport('')
+ t.put_bytes('test.sack', ' '*500 + content)
+ ti = sack.TrailingIndex.from_transport(t, 'test.sack')
+ # We skip the 16-byte header at the beginning, and the 12-byte tail
+ self.assertEqual({'root-index': (516, 500+len(content)-12),
+ 'texts': (150, 500),
+ }, ti._section_file_map)
More information about the bazaar-commits
mailing list