Rev 5075: Start working on a self-contained pack implementation. in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 4 20:35:03 GMT 2010
At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack
------------------------------------------------------------
revno: 5075
revision-id: john at arbash-meinel.com-20100304203435-csu20otvt3mm1g3i
parent: pqm at pqm.ubuntu.com-20100303113037-51ffw5xyk93yzgl0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Thu 2010-03-04 14:34:35 -0600
message:
Start working on a self-contained pack implementation.
Basically, we put the indices at the end of the pack content, and add
a little mini header that describes where those indexes are.
Note that the mini-index isn't intended to be the primary read location.
Instead, we recommend that the content contained therein is aggregated
across many sacks into a meta-file (similar to pack-names for a repo).
-------------- next part --------------
=== added file 'bzrlib/sack.py'
--- a/bzrlib/sack.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/sack.py 2010-03-04 20:34:35 +0000
@@ -0,0 +1,109 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""A Sack is a self-contained pack file.
+
+The primary difference is that a Sack puts its indexes at the end of the file,
+rather than storing them in a separate disk structure.
+"""
+
+import struct
+
+
+class IndexInfo(object):
+ """Information that we track for a single index."""
+
+ def __init__(self):
+ self.start_offset = 0
+ self.length = 0
+ self.index_type = None # 'revisions', etc
+
+ def to_bytes(self):
+ pass
+
+
+class SackTie(object):
+ """This is the final bit that gets written to the sack content.
+
+ It describes the version of the file (also present at the beginning),
+ as well as some basic information about where data can be found.
+
+ The tie byte layout is basically:
+
+ | header | index count | index information
+ | 8-byte start-of-header | 4-byte version |
+
+ The last two records are using fixed-width MSB encoding, so that we always
+ know how much to parse. The rest is more free-form text.
+
+ :ivar start_offset: The beginning of the tie information
+ :ivar version: The serialized version of this content
+ :ivar index_info: Information about individual indexes
+ """
+
+ # Note that the header intentionally starts with '\n' so that it separates
+ # from the rest of the data if you open it in a text editor
+ _HEADER_BASE = '\nBazaar Sack v'
+
+ def __init__(self, version):
+ self.start_offset = 0
+ self.version = version
+ self.index_info = []
+
+ def to_bytes(self, start_offset):
+ chunks = []
+ chunks.append('%s%d\n'
+ 'num_indexes: %d\n'
+ % (self._HEADER_BASE, self.version,
+ len(self.index_info))
+ )
+ for ii in self.index_info:
+ chunks.append(ii.to_bytes())
+ chunks.append(struct.pack('!QI', start_offset, self.version))
+ # TODO: 'set self.start_offset' ?
+ return ''.join(chunks)
+
+ @classmethod
+ def from_tail(cls, bytes):
+ """Get the meta-info out of the last 12 bytes of content."""
+ offset, version = struct.unpack('!QI', bytes[-12:])
+ tie = cls(version)
+ tie.start_offset = offset
+ return tie
+
+ @classmethod
+ def from_bytes(cls, bytes):
+ pass
+
+
+
+class Sack(object):
+ """A self-contained pack file.
+
+ The content is fairly similar to a regular 'pack' file, except the indexes
+ are written to then end of the file. The whole file is thus
+ self-describing, and but must be read backwards. For improved performance,
+ it is recommended that you store the locations of the indices in
+ yet-another index across the various sacks. (similar to how pack-names
+ stores the length of indexes and pack files.)
+
+ The basic structure is::
+
+ | blob-content | index1 | index2 | tail-index |
+
+ The individual indexes are likely to just be a BTreeIndex, the tail-index
+ is a simplified descriptor defining where to find the other indices.
+ """
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2010-02-18 02:15:48 +0000
+++ b/bzrlib/tests/__init__.py 2010-03-04 20:34:35 +0000
@@ -3714,6 +3714,7 @@
'bzrlib.tests.test_revisiontree',
'bzrlib.tests.test_rio',
'bzrlib.tests.test_rules',
+ 'bzrlib.tests.test_sack',
'bzrlib.tests.test_sampler',
'bzrlib.tests.test_script',
'bzrlib.tests.test_selftest',
=== added file 'bzrlib/tests/test_sack.py'
--- a/bzrlib/tests/test_sack.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_sack.py 2010-03-04 20:34:35 +0000
@@ -0,0 +1,84 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Tests for the self-contained pack file."""
+
+import struct
+
+from bzrlib import (
+ errors,
+ sack,
+ tests,
+ )
+
+
+class TestSack(tests.TestCaseWithMemoryTransport):
+
+ def test_sack(self):
+ pass
+
+
+
+class TestSackTie(tests.TestCase):
+
+ def assertToBytes(self, expected, tie, start_offset):
+ content = tie.to_bytes(start_offset)
+ tail = content[-12:]
+ content = content[:-12]
+ offset, version = struct.unpack('!QI', tail)
+ self.assertEqual(tie.version, version)
+ self.assertEqual(start_offset, offset)
+ self.assertEqualDiff(expected, content)
+
+ def assertFromTail(self, start_offset, version, bytes):
+ tie = sack.SackTie.from_tail(bytes)
+ self.assertIsInstance(tie, sack.SackTie)
+ self.assertEqual(start_offset, tie.start_offset)
+ self.assertEqual(version, tie.version)
+
+ def test_to_bytes(self):
+ tie = sack.SackTie(1)
+ self.assertToBytes('\nBazaar Sack v1\n'
+ 'num_indexes: 0\n',
+ tie, 0)
+ self.assertToBytes('\nBazaar Sack v1\n'
+ 'num_indexes: 0\n',
+ tie, 1)
+ self.assertToBytes('\nBazaar Sack v1\n'
+ 'num_indexes: 0\n',
+ tie, 2**48-1)
+ self.assertToBytes('\nBazaar Sack v1\n'
+ 'num_indexes: 0\n',
+ tie, 2**64-1)
+
+ def test_from_tail(self):
+ self.assertFromTail(12345, 1,
+ '\nBazaar Sack v1\n'
+ 'num_indexes: 0\n'
+ '\x00\x00\x00\x00\x00\x00\x30\x39'
+ '\x00\x00\x00\x01')
+ self.assertFromTail(12345, 123,
+ '\nBazaar Sack v123\n'
+ 'num_indexes: 0\n'
+ '\x00\x00\x00\x00\x00\x00\x30\x39'
+ '\x00\x00\x00\x7b')
+
+
+class TestIndexInfo(tests.TestCase):
+
+ def assertToBytes(self, expected, index_info):
+ content = index_info.to_bytes()
+ self.assertEqualDiff(expected, content)
More information about the bazaar-commits
mailing list