Rev 5075: Start working on a self-contained pack implementation. in

John Arbash Meinel john at
Thu Mar 4 20:35:03 GMT 2010


revno: 5075
revision-id: john at
parent: pqm at
committer: John Arbash Meinel <john at>
branch nick: 2.2.0b2-contained-pack
timestamp: Thu 2010-03-04 14:34:35 -0600
  Start working on a self-contained pack implementation.
  Basically, we put the indices at the end of the pack content, and add
  a little mini header that describes where those indexes are.
  Note that the mini-index isn't intended to be the primary read location.
  Instead, we recommend that the content contained therein is aggregated
  across many sacks into a meta-file (similar to pack-names for a repo).
-------------- next part --------------
=== added file 'bzrlib/'
--- a/bzrlib/	1970-01-01 00:00:00 +0000
+++ b/bzrlib/	2010-03-04 20:34:35 +0000
@@ -0,0 +1,109 @@
+# Copyright (C) 2010 Canonical Ltd
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+"""A Sack is a self-contained pack file.
+The primary difference is that a Sack puts its indexes at the end of the file,
+rather than storing them in a separate disk structure.
+import struct
+class IndexInfo(object):
+    """Information that we track for a single index."""
+    def __init__(self):
+        self.start_offset = 0
+        self.length = 0
+        self.index_type = None # 'revisions', etc
+    def to_bytes(self):
+        pass
+class SackTie(object):
+    """This is the final bit that gets written to the sack content.
+    It describes the version of the file (also present at the beginning),
+    as well as some basic information about where data can be found.
+    The tie byte layout is basically:
+    | header | index count | index information
+        | 8-byte start-of-header | 4-byte version |
+    The last two records are using fixed-width MSB encoding, so that we always
+    know how much to parse. The rest is more free-form text.
+    :ivar start_offset: The beginning of the tie information
+    :ivar version: The serialized version of this content
+    :ivar index_info: Information about individual indexes
+    """
+    # Note that the header intentionally starts with '\n' so that it separates
+    # from the rest of the data if you open it in a text editor
+    _HEADER_BASE = '\nBazaar Sack v'
+    def __init__(self, version):
+        self.start_offset = 0
+        self.version = version
+        self.index_info = []
+    def to_bytes(self, start_offset):
+        chunks = []
+        chunks.append('%s%d\n'
+                      'num_indexes: %d\n'
+                      % (self._HEADER_BASE, self.version,
+                         len(self.index_info))
+                     )
+        for ii in self.index_info:
+            chunks.append(ii.to_bytes())
+        chunks.append(struct.pack('!QI', start_offset, self.version))
+        # TODO: 'set self.start_offset' ?
+        return ''.join(chunks)
+    @classmethod
+    def from_tail(cls, bytes):
+        """Get the meta-info out of the last 12 bytes of content."""
+        offset, version = struct.unpack('!QI', bytes[-12:])
+        tie = cls(version)
+        tie.start_offset = offset
+        return tie
+    @classmethod
+    def from_bytes(cls, bytes):
+        pass
+class Sack(object):
+    """A self-contained pack file.
+    The content is fairly similar to a regular 'pack' file, except the indexes
+    are written to then end of the file. The whole file is thus
+    self-describing, and but must be read backwards. For improved performance,
+    it is recommended that you store the locations of the indices in
+    yet-another index across the various sacks. (similar to how pack-names
+    stores the length of indexes and pack files.)
+    The basic structure is::
+      | blob-content | index1 | index2 | tail-index |
+    The individual indexes are likely to just be a BTreeIndex, the tail-index
+    is a simplified descriptor defining where to find the other indices.
+    """

=== modified file 'bzrlib/tests/'
--- a/bzrlib/tests/	2010-02-18 02:15:48 +0000
+++ b/bzrlib/tests/	2010-03-04 20:34:35 +0000
@@ -3714,6 +3714,7 @@
+        'bzrlib.tests.test_sack',

=== added file 'bzrlib/tests/'
--- a/bzrlib/tests/	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/	2010-03-04 20:34:35 +0000
@@ -0,0 +1,84 @@
+# Copyright (C) 2010 Canonical Ltd
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+"""Tests for the self-contained pack file."""
+import struct
+from bzrlib import (
+    errors,
+    sack,
+    tests,
+    )
+class TestSack(tests.TestCaseWithMemoryTransport):
+    def test_sack(self):
+        pass
+class TestSackTie(tests.TestCase):
+    def assertToBytes(self, expected, tie, start_offset):
+        content = tie.to_bytes(start_offset)
+        tail = content[-12:]
+        content = content[:-12]
+        offset, version = struct.unpack('!QI', tail)
+        self.assertEqual(tie.version, version)
+        self.assertEqual(start_offset, offset)
+        self.assertEqualDiff(expected, content)
+    def assertFromTail(self, start_offset, version, bytes):
+        tie = sack.SackTie.from_tail(bytes)
+        self.assertIsInstance(tie, sack.SackTie)
+        self.assertEqual(start_offset, tie.start_offset)
+        self.assertEqual(version, tie.version)
+    def test_to_bytes(self):
+        tie = sack.SackTie(1)
+        self.assertToBytes('\nBazaar Sack v1\n'
+                           'num_indexes: 0\n',
+                           tie, 0)
+        self.assertToBytes('\nBazaar Sack v1\n'
+                           'num_indexes: 0\n',
+                           tie, 1)
+        self.assertToBytes('\nBazaar Sack v1\n'
+                           'num_indexes: 0\n',
+                           tie, 2**48-1)
+        self.assertToBytes('\nBazaar Sack v1\n'
+                           'num_indexes: 0\n',
+                           tie, 2**64-1)
+    def test_from_tail(self):
+        self.assertFromTail(12345, 1,
+                            '\nBazaar Sack v1\n'
+                            'num_indexes: 0\n'
+                            '\x00\x00\x00\x00\x00\x00\x30\x39'
+                            '\x00\x00\x00\x01')
+        self.assertFromTail(12345, 123,
+                            '\nBazaar Sack v123\n'
+                            'num_indexes: 0\n'
+                            '\x00\x00\x00\x00\x00\x00\x30\x39'
+                            '\x00\x00\x00\x7b')
+class TestIndexInfo(tests.TestCase):
+    def assertToBytes(self, expected, index_info):
+        content = index_info.to_bytes()
+        self.assertEqualDiff(expected, content)

More information about the bazaar-commits mailing list