Rev 9: Initial stab at repository format support. in http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk
Robert Collins
robertc at robertcollins.net
Tue Jul 15 13:45:50 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk
------------------------------------------------------------
revno: 9
revision-id: robertc at robertcollins.net-20080715124549-jt2k0bjdhtump2u1
parent: robertc at robertcollins.net-20080715112717-44vwju2fw9wph4s8
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Tue 2008-07-15 22:45:49 +1000
message:
Initial stab at repository format support.
added:
repofmt.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
tests/test_repofmt.py test_repofmt.py-20080715094215-wp1qfvoo7093c8qr-2
modified:
__init__.py __init__.py-20080705181503-ccbxd6xuy1bdnrpu-6
groupcompress.py groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
=== modified file '__init__.py'
--- a/__init__.py 2008-07-05 18:15:40 +0000
+++ b/__init__.py 2008-07-15 12:45:49 +0000
@@ -28,8 +28,70 @@
Documentation
=============
-See DESIGN in the groupcompress osurc.e
+See DESIGN in the groupcompress source.
"""
+
+
+
+from bzrlib.bzrdir import format_registry
+format_registry.register_metadir('gc-plain',
+ 'bzrlib.plugins.groupcompress.repofmt.RepositoryFormatPackGCPlain',
+ help='pack-0.92 with btree index and group compress. '
+ 'Please read '
+ 'http://doc.bazaar-vcs.org/latest/developers/development-repo.html '
+ 'before use.',
+ branch_format='bzrlib.branch.BzrBranchFormat6',
+ tree_format='bzrlib.workingtree.WorkingTreeFormat4',
+ hidden=False,
+ experimental=True,
+ )
+
+format_registry.register_metadir('gc-rich-root',
+ 'bzrlib.plugins.groupcompress.repofmt.RepositoryFormatPackGCRichRoot',
+ help='rich-root-pack with btree index and group compress. '
+ 'Please read '
+ 'http://doc.bazaar-vcs.org/latest/developers/development-repo.html '
+ 'before use.',
+ branch_format='bzrlib.branch.BzrBranchFormat6',
+ tree_format='bzrlib.workingtree.WorkingTreeFormat4',
+ hidden=False,
+ experimental=True,
+ )
+
+format_registry.register_metadir('gc-subtrees',
+ 'bzrlib.plugins.groupcompress.repofmt.RepositoryFormatPackGCSubtrees',
+ help='pack-0.92-subtress with btree index and group compress. '
+ 'Please read '
+ 'http://doc.bazaar-vcs.org/latest/developers/development-repo.html '
+ 'before use.',
+ branch_format='bzrlib.branch.BzrBranchFormat6',
+ tree_format='bzrlib.workingtree.WorkingTreeFormat4',
+ hidden=False,
+ experimental=True,
+ )
+
+from bzrlib.repository import format_registry as repo_registry
+repo_registry.register_lazy(
+ 'Bazaar development format - btree+gc (needs bzr.dev from 1.6)\n',
+ 'bzrlib.plugins.groupcompress.repofmt',
+ 'RepositoryFormatPackGCPlain',
+ )
+from bzrlib.repository import format_registry as repo_registry
+repo_registry.register_lazy(
+ 'Bazaar development format - btree+gc-rich-root (needs bzr.dev from 1.6)\n',
+ 'bzrlib.plugins.groupcompress.repofmt',
+ 'RepositoryFormatPackGCRichRoot',
+ )
+
+from bzrlib.repository import format_registry as repo_registry
+repo_registry.register_lazy(
+ 'Bazaar development format - btree+gc-subtrees (needs bzr.dev from 1.6)\n',
+ 'bzrlib.plugins.groupcompress.repofmt',
+ 'RepositoryFormatPackGCSubtrees',
+ )
+
+
+
def test_suite():
# Thunk across to load_tests for niceness with older bzr versions
from bzrlib.tests import TestLoader
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2008-07-15 11:27:17 +0000
+++ b/groupcompress.py 2008-07-15 12:45:49 +0000
@@ -39,6 +39,7 @@
split_lines,
)
from bzrlib.plugins.index2.btree_index import BTreeBuilder
+from bzrlib.tsort import topo_sort
from bzrlib.versionedfile import (
adapter_registry,
AbsentContentFactory,
@@ -233,7 +234,7 @@
writer = pack.ContainerWriter(stream.write)
writer.begin()
index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
- deltas=delta, add_callback=graph_index.add_nodes)
+ add_callback=graph_index.add_nodes)
access = _DirectPackAccess({})
access.set_writer(writer, graph_index, (transport, 'newpack'))
result = GroupCompressVersionedFiles(index, access, delta)
@@ -570,14 +571,13 @@
class _GCGraphIndex(object):
"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
- def __init__(self, graph_index, is_locked, deltas=False, parents=True,
+ def __init__(self, graph_index, is_locked, parents=True,
add_callback=None):
"""Construct a _GCGraphIndex on a graph_index.
:param graph_index: An implementation of bzrlib.index.GraphIndex.
:param is_locked: A callback to check whether the object should answer
queries.
- :param deltas: Allow delta-compressed records.
:param parents: If True, record knits parents, if not do not record
parents.
:param add_callback: If not None, allow additions to the index and call
@@ -588,13 +588,7 @@
"""
self._add_callback = add_callback
self._graph_index = graph_index
- self._deltas = deltas
self._parents = parents
- if deltas and not parents:
- # XXX: TODO: Delta tree and parent graph should be conceptually
- # separate.
- raise errors.KnitCorrupt(self, "Cannot do delta compression without "
- "parent tracking.")
self.has_graph = parents
self._is_locked = is_locked
=== added file 'repofmt.py'
--- a/repofmt.py 1970-01-01 00:00:00 +0000
+++ b/repofmt.py 2008-07-15 12:45:49 +0000
@@ -0,0 +1,301 @@
+# groupcompress, a bzr plugin providing improved disk utilisation
+# Copyright (C) 2008 Canonical Limited.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+"""Repostory formats using B+Tree indices and groupcompress compression."""
+
+import md5
+import time
+
+from bzrlib import debug, errors, pack, repository
+from bzrlib.index import GraphIndex, GraphIndexBuilder
+from bzrlib.repository import InterPackRepo
+from bzrlib.plugins.groupcompress.groupcompress import (
+ _GCGraphIndex,
+ GroupCompressVersionedFiles,
+ )
+from bzrlib.plugins.index2.btree_index import (
+ BTreeBuilder,
+ BTreeGraphIndex,
+ FixedMemoryGraphIndex,
+ )
+from bzrlib.osutils import rand_chars
+from bzrlib.repofmt.pack_repo import (
+ Pack,
+ NewPack,
+ KnitPackRepository,
+ RepositoryPackCollection,
+ RepositoryFormatPackDevelopment0,
+ RepositoryFormatPackDevelopment0Subtree,
+ RepositoryFormatKnitPack1,
+ RepositoryFormatKnitPack3,
+ RepositoryFormatKnitPack4,
+ Packer,
+ ReconcilePacker,
+ OptimisingPacker,
+ )
+from bzrlib import ui
+
+
+def open_pack(self):
+ return self._pack_collection.pack_factory(self._pack_collection._upload_transport,
+ self._pack_collection._index_transport,
+ self._pack_collection._pack_transport, upload_suffix=self.suffix,
+ file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
+
+
+Packer.open_pack = open_pack
+
+
+class GCPack(NewPack):
+
+ def __init__(self, upload_transport, index_transport, pack_transport,
+ upload_suffix='', file_mode=None):
+ """Create a NewPack instance.
+
+ :param upload_transport: A writable transport for the pack to be
+ incrementally uploaded to.
+ :param index_transport: A writable transport for the pack's indices to
+ be written to when the pack is finished.
+ :param pack_transport: A writable transport for the pack to be renamed
+ to when the upload is complete. This *must* be the same as
+ upload_transport.clone('../packs').
+ :param upload_suffix: An optional suffix to be given to any temporary
+ files created during the pack creation. e.g '.autopack'
+ :param file_mode: An optional file mode to create the new files with.
+ """
+ # The relative locations of the packs are constrained, but all are
+ # passed in because the caller has them, so as to avoid object churn.
+ Pack.__init__(self,
+ # Revisions: parents list, no text compression.
+ BTreeBuilder(reference_lists=1),
+ # Inventory: compressed, with graph for compatibility with other
+ # existing bzrlib code.
+ BTreeBuilder(reference_lists=1),
+ # Texts: per file graph:
+ BTreeBuilder(reference_lists=1, key_elements=2),
+ # Signatures: Just blobs to store, no compression, no parents
+ # listing.
+ BTreeBuilder(reference_lists=0),
+ )
+ # where should the new pack be opened
+ self.upload_transport = upload_transport
+ # where are indices written out to
+ self.index_transport = index_transport
+ # where is the pack renamed to when it is finished?
+ self.pack_transport = pack_transport
+ # What file mode to upload the pack and indices with.
+ self._file_mode = file_mode
+ # tracks the content written to the .pack file.
+ self._hash = md5.new()
+ # a four-tuple with the length in bytes of the indices, once the pack
+ # is finalised. (rev, inv, text, sigs)
+ self.index_sizes = None
+ # How much data to cache when writing packs. Note that this is not
+ # synchronised with reads, because it's not in the transport layer, so
+ # is not safe unless the client knows it won't be reading from the pack
+ # under creation.
+ self._cache_limit = 0
+ # the temporary pack file name.
+ self.random_name = rand_chars(20) + upload_suffix
+ # when was this pack started ?
+ self.start_time = time.time()
+ # open an output stream for the data added to the pack.
+ self.write_stream = self.upload_transport.open_write_stream(
+ self.random_name, mode=self._file_mode)
+ if 'pack' in debug.debug_flags:
+ mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
+ time.ctime(), self.upload_transport.base, self.random_name,
+ time.time() - self.start_time)
+ # A list of byte sequences to be written to the new pack, and the
+ # aggregate size of them. Stored as a list rather than separate
+ # variables so that the _write_data closure below can update them.
+ self._buffer = [[], 0]
+ # create a callable for adding data
+ #
+ # robertc says- this is a closure rather than a method on the object
+ # so that the variables are locals, and faster than accessing object
+ # members.
+ def _write_data(bytes, flush=False, _buffer=self._buffer,
+ _write=self.write_stream.write, _update=self._hash.update):
+ _buffer[0].append(bytes)
+ _buffer[1] += len(bytes)
+ # buffer cap
+ if _buffer[1] > self._cache_limit or flush:
+ bytes = ''.join(_buffer[0])
+ _write(bytes)
+ _update(bytes)
+ _buffer[:] = [[], 0]
+ # expose this on self, for the occasion when clients want to add data.
+ self._write_data = _write_data
+ # a pack writer object to serialise pack records.
+ self._writer = pack.ContainerWriter(self._write_data)
+ self._writer.begin()
+ # what state is the pack in? (open, finished, aborted)
+ self._state = 'open'
+
+ def _replace_index_with_readonly(self, index_type):
+ setattr(self, index_type + '_index',
+ BTreeGraphIndex(self.index_transport,
+ self.index_name(index_type, self.name),
+ self.index_sizes[self.index_offset(index_type)]))
+
+
+RepositoryPackCollection.pack_factory = NewPack
+
+class GCRepositoryPackCollection(RepositoryPackCollection):
+
+ pack_factory = GCPack
+
+ def _make_index(self, name, suffix):
+ """Overridden to use BTreeGraphIndex objects."""
+ size_offset = self._suffix_offsets[suffix]
+ index_name = name + suffix
+ index_size = self._names[name][size_offset]
+ return BTreeGraphIndex(
+ self._index_transport, index_name, index_size)
+
+ def _start_write_group(self):
+ # Do not permit preparation for writing if we're not in a 'write lock'.
+ if not self.repo.is_write_locked():
+ raise errors.NotWriteLocked(self)
+ self._new_pack = self.pack_factory(self._upload_transport, self._index_transport,
+ self._pack_transport, upload_suffix='.pack',
+ file_mode=self.repo.bzrdir._get_file_mode())
+ # allow writing: queue writes to a new index
+ self.revision_index.add_writable_index(self._new_pack.revision_index,
+ self._new_pack)
+ self.inventory_index.add_writable_index(self._new_pack.inventory_index,
+ self._new_pack)
+ self.text_index.add_writable_index(self._new_pack.text_index,
+ self._new_pack)
+ self.signature_index.add_writable_index(self._new_pack.signature_index,
+ self._new_pack)
+
+ self.repo.inventories._index._add_callback = self.inventory_index.add_callback
+ self.repo.revisions._index._add_callback = self.revision_index.add_callback
+ self.repo.signatures._index._add_callback = self.signature_index.add_callback
+ self.repo.texts._index._add_callback = self.text_index.add_callback
+
+
+
+class GCPackRepository(KnitPackRepository):
+ """GC customisation of KnitPackRepository."""
+
+ def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
+ _serializer):
+ """Overridden to change pack collection class."""
+ KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
+ _commit_builder_class, _serializer)
+ # and now replace everything it did :)
+ index_transport = self._transport.clone('indices')
+ self._pack_collection = GCRepositoryPackCollection(self,
+ self._transport, index_transport,
+ self._transport.clone('upload'),
+ self._transport.clone('packs'))
+ self.inventories = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
+ add_callback=self._pack_collection.inventory_index.add_callback,
+ parents=True, is_locked=self.is_locked),
+ access=self._pack_collection.inventory_index.data_access)
+ self.revisions = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.revision_index.combined_index,
+ add_callback=self._pack_collection.revision_index.add_callback,
+ parents=True, is_locked=self.is_locked),
+ access=self._pack_collection.revision_index.data_access,
+ delta=False)
+ self.signatures = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.signature_index.combined_index,
+ add_callback=self._pack_collection.signature_index.add_callback,
+ parents=False, is_locked=self.is_locked),
+ access=self._pack_collection.signature_index.data_access,
+ delta=False)
+ self.texts = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.text_index.combined_index,
+ add_callback=self._pack_collection.text_index.add_callback,
+ parents=True, is_locked=self.is_locked),
+ access=self._pack_collection.text_index.data_access)
+ # True when the repository object is 'write locked' (as opposed to the
+ # physical lock only taken out around changes to the pack-names list.)
+ # Another way to represent this would be a decorator around the control
+ # files object that presents logical locks as physical ones - if this
+ # gets ugly consider that alternative design. RBC 20071011
+ self._write_lock_count = 0
+ self._transaction = None
+ # for tests
+ self._reconcile_does_inventory_gc = True
+ self._reconcile_fixes_text_parents = True
+ self._reconcile_backsup_inventory = False
+
+
+class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment0):
+ """A B+Tree index using pack repository."""
+
+ repository_class = GCPackRepository
+
+ def get_format_string(self):
+ """See RepositoryFormat.get_format_string()."""
+ return ("Bazaar development format - btree+gc "
+ "(needs bzr.dev from 1.6)\n")
+
+ def get_format_description(self):
+ """See RepositoryFormat.get_format_description()."""
+ return ("Development repository format - btree+groupcompress "
+ ", interoperates with pack-0.92\n")
+
+
+class RepositoryFormatPackGCRichRoot(RepositoryFormatKnitPack4):
+ """A B+Tree index using pack repository."""
+
+ repository_class = GCPackRepository
+
+ def get_format_string(self):
+ """See RepositoryFormat.get_format_string()."""
+ return ("Bazaar development format - btree+gc-rich-root "
+ "(needs bzr.dev from 1.6)\n")
+
+ def get_format_description(self):
+ """See RepositoryFormat.get_format_description()."""
+ return ("Development repository format - btree+groupcompress "
+ ", interoperates with rich-root-pack\n")
+
+
+class RepositoryFormatPackGCSubtrees(RepositoryFormatPackDevelopment0Subtree):
+ """A B+Tree index using pack repository."""
+
+ repository_class = GCPackRepository
+
+ def get_format_string(self):
+ """See RepositoryFormat.get_format_string()."""
+ return ("Bazaar development format - btree+gc-subtrees "
+ "(needs bzr.dev from 1.6)\n")
+
+ def get_format_description(self):
+ """See RepositoryFormat.get_format_description()."""
+ return ("Development repository format - btree+groupcompress "
+ ", interoperates with pack-0.92-subtrees\n")
+
+
+def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
+ formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
+ RepositoryFormatPackGCSubtrees)
+ if isinstance(source._format, formats) or isinstance(target, formats):
+ return False
+ else:
+ return orig_method(source, target)
+
+
+InterPackRepo.is_compatible = staticmethod(pack_incompatible)
=== added file 'tests/test_repofmt.py'
--- a/tests/test_repofmt.py 1970-01-01 00:00:00 +0000
+++ b/tests/test_repofmt.py 2008-07-15 12:45:49 +0000
@@ -0,0 +1,41 @@
+# groupcompress, a bzr plugin providing improved disk utilisation
+# Copyright (C) 2008 Canonical Limited.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+"""Tests for btree based repository operations."""
+
+import pprint
+import random
+import zlib
+
+from bzrlib import index, tests
+from bzrlib import errors as bzrerrors
+from bzrlib import ui
+from bzrlib.plugins import index2
+from bzrlib.plugins.index2 import btree_index, errors, repofmt
+from bzrlib.repository import Repository
+from bzrlib.repofmt.knitrepo import (
+ RepositoryFormatKnit3,
+ )
+from bzrlib.repofmt.pack_repo import (
+ RepositoryFormatKnitPack4,
+ )
+from bzrlib.tests import (
+ TestCaseWithTransport,
+ )
+from bzrlib.transport import get_transport
+
+
More information about the bazaar-commits
mailing list