Rev 7: Add --btree-plain repository format for testing. in http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
Robert Collins
robertc at robertcollins.net
Tue Jul 1 12:37:43 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
------------------------------------------------------------
revno: 7
revision-id: robertc at robertcollins.net-20080701113743-n0pc6x42arqd4zhd
parent: robertc at robertcollins.net-20080701095116-ge9gvqt09zie8x42
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Tue 2008-07-01 21:37:43 +1000
message:
Add --btree-plain repository format for testing.
added:
repofmt.py repofmt.py-20080701113732-m1iu3n94ikbxdelb-1
modified:
__init__.py __init__.py-20080624222253-p0x5f92uyh5hw734-5
btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
=== modified file '__init__.py'
--- a/__init__.py 2008-06-30 22:10:08 +0000
+++ b/__init__.py 2008-07-01 11:37:43 +0000
@@ -21,8 +21,33 @@
=============
See `pydoc bzrlib.plugins.index2.index`.
+
+This plugin also provides an experimental repository format making use of the
+new index classes. bzr init --btree-plain to test this. Warning: if it eats your
+data, you get to do the stomach searching.
"""
+from bzrlib.bzrdir import format_registry
+format_registry.register_metadir('btree-plain',
+ 'bzrlib.plugins.index2.repofmt.RepositoryFormatPackBTreePlain',
+ help='Trivial rename of pack-0.92 with B+Tree based index. '
+ 'Please read '
+ 'http://doc.bazaar-vcs.org/latest/developers/development-repo.html '
+ 'before use.',
+ branch_format='bzrlib.branch.BzrBranchFormat6',
+ tree_format='bzrlib.workingtree.WorkingTreeFormat4',
+ hidden=False,
+ experimental=True,
+ )
+
+from bzrlib.repository import format_registry as repo_registry
+repo_registry.register_lazy(
+ 'Bazaar development format - btree (needs bzr.dev from 1.6)\n',
+ 'bzrlib.plugins.index2.repofmt',
+ 'RepositoryFormatPackBTreePlain',
+ )
+
+
def test_suite():
# Thunk across to load_tests for niceness with older bzr versions
from bzrlib.tests import TestLoader
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-01 09:51:16 +0000
+++ b/btree_index.py 2008-07-01 11:37:43 +0000
@@ -363,6 +363,8 @@
return
if self._key_count is None:
self._cache_nodes([0])
+ if not self._key_count:
+ return
for key in keys:
# repeated single-bisection : 'make it work'
# FUTURE: look within the current node for the next key,
@@ -437,7 +439,7 @@
"""
signature = bytes[0:len(self._signature())]
if not signature == self._signature():
- raise bzrerrors.BadIndexFormatSignature(self._name, GraphIndex)
+ raise bzrerrors.BadIndexFormatSignature(self._name, BTreeGraphIndex)
lines = bytes[len(self._signature()):].splitlines()
options_line = lines[0]
if not options_line.startswith(_OPTION_NODE_REFS):
=== added file 'repofmt.py'
--- a/repofmt.py 1970-01-01 00:00:00 +0000
+++ b/repofmt.py 2008-07-01 11:37:43 +0000
@@ -0,0 +1,394 @@
+# index2, a bzr plugin providing experimental index types.
+# Copyright (C) 2008 Canonical Limited.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+"""Repostory formats using B+Tree indices.
+
+Currently only a plain format is present, this is enough to do basic testing
+on.
+"""
+
+import md5
+import time
+
+from bzrlib import debug, pack
+from bzrlib.plugins.index2.btree_index import BTreeGraphIndex, BTreeBuilder
+from bzrlib.knit import (
+ _KnitGraphIndex,
+ KnitVersionedFiles,
+ )
+from bzrlib.osutils import rand_chars
+from bzrlib.repofmt.pack_repo import (
+ Pack,
+ NewPack,
+ KnitPackRepository,
+ RepositoryPackCollection,
+ RepositoryFormatPackDevelopment0,
+ Packer,
+ ReconcilePacker,
+ OptimisingPacker,
+ )
+
+
+def open_pack(self):
+ return self._pack_collection.pack_factory(self._pack_collection._upload_transport,
+ self._pack_collection._index_transport,
+ self._pack_collection._pack_transport, upload_suffix=self.suffix,
+ file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
+
+Packer.open_pack = open_pack
+
+
+class InMemoryBTree(BTreeBuilder):
+ """TODO - make InMemoryGraphIndex more reusable.
+
+ (not that all-in-mem is a good idea).
+ """
+
+ def add_nodes(self, nodes):
+ """Add nodes to the index.
+
+ :param nodes: An iterable of (key, node_refs, value) entries to add.
+ """
+ if self.reference_lists:
+ for (key, value, node_refs) in nodes:
+ self.add_node(key, value, node_refs)
+ else:
+ for (key, value) in nodes:
+ self.add_node(key, value)
+
+ def iter_all_entries(self):
+ """Iterate over all keys within the index
+
+ :return: An iterable of (index, key, reference_lists, value). There is no
+ defined order for the result iteration - it will be in the most
+ efficient order for the index (in this case dictionary hash order).
+ """
+ if 'evil' in debug.debug_flags:
+ trace.mutter_callsite(3,
+ "iter_all_entries scales with size of history.")
+ if self.reference_lists:
+ for key, (absent, references, value) in self._nodes.iteritems():
+ if not absent:
+ yield self, key, value, references
+ else:
+ for key, (absent, references, value) in self._nodes.iteritems():
+ if not absent:
+ yield self, key, value
+
+ def iter_entries(self, keys):
+ """Iterate over keys within the index.
+
+ :param keys: An iterable providing the keys to be retrieved.
+ :return: An iterable of (index, key, value, reference_lists). There is no
+ defined order for the result iteration - it will be in the most
+ efficient order for the index (keys iteration order in this case).
+ """
+ keys = set(keys)
+ if self.reference_lists:
+ for key in keys.intersection(self._keys):
+ node = self._nodes[key]
+ if not node[0]:
+ yield self, key, node[2], node[1]
+ else:
+ for key in keys.intersection(self._keys):
+ node = self._nodes[key]
+ if not node[0]:
+ yield self, key, node[2]
+
+ def iter_entries_prefix(self, keys):
+ """Iterate over keys within the index using prefix matching.
+
+ Prefix matching is applied within the tuple of a key, not to within
+ the bytestring of each key element. e.g. if you have the keys ('foo',
+ 'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
+ only the former key is returned.
+
+ :param keys: An iterable providing the key prefixes to be retrieved.
+ Each key prefix takes the form of a tuple the length of a key, but
+ with the last N elements 'None' rather than a regular bytestring.
+ The first element cannot be 'None'.
+ :return: An iterable as per iter_all_entries, but restricted to the
+ keys with a matching prefix to those supplied. No additional keys
+ will be returned, and every match that is in the index will be
+ returned.
+ """
+ # XXX: To much duplication with the GraphIndex class; consider finding
+ # a good place to pull out the actual common logic.
+ keys = set(keys)
+ if not keys:
+ return
+ if self._key_length == 1:
+ for key in keys:
+ # sanity check
+ if key[0] is None:
+ raise errors.BadIndexKey(key)
+ if len(key) != self._key_length:
+ raise errors.BadIndexKey(key)
+ node = self._nodes[key]
+ if node[0]:
+ continue
+ if self.reference_lists:
+ yield self, key, node[2], node[1]
+ else:
+ yield self, key, node[2]
+ return
+ for key in keys:
+ # sanity check
+ if key[0] is None:
+ raise errors.BadIndexKey(key)
+ if len(key) != self._key_length:
+ raise errors.BadIndexKey(key)
+ # find what it refers to:
+ key_dict = self._nodes_by_key
+ elements = list(key)
+ # find the subdict to return
+ try:
+ while len(elements) and elements[0] is not None:
+ key_dict = key_dict[elements[0]]
+ elements.pop(0)
+ except KeyError:
+ # a non-existant lookup.
+ continue
+ if len(elements):
+ dicts = [key_dict]
+ while dicts:
+ key_dict = dicts.pop(-1)
+ # can't be empty or would not exist
+ item, value = key_dict.iteritems().next()
+ if type(value) == dict:
+ # push keys
+ dicts.extend(key_dict.itervalues())
+ else:
+ # yield keys
+ for value in key_dict.itervalues():
+ yield (self, ) + value
+ else:
+ yield (self, ) + key_dict
+
+ def key_count(self):
+ """Return an estimate of the number of keys in this index.
+
+ For InMemoryGraphIndex the estimate is exact.
+ """
+ return len(self._keys)
+
+ def validate(self):
+ """In memory index's have no known corruption at the moment."""
+
+
+class BTreePack(NewPack):
+
+ def __init__(self, upload_transport, index_transport, pack_transport,
+ upload_suffix='', file_mode=None):
+ """Create a NewPack instance.
+
+ :param upload_transport: A writable transport for the pack to be
+ incrementally uploaded to.
+ :param index_transport: A writable transport for the pack's indices to
+ be written to when the pack is finished.
+ :param pack_transport: A writable transport for the pack to be renamed
+ to when the upload is complete. This *must* be the same as
+ upload_transport.clone('../packs').
+ :param upload_suffix: An optional suffix to be given to any temporary
+ files created during the pack creation. e.g '.autopack'
+ :param file_mode: An optional file mode to create the new files with.
+ """
+ # The relative locations of the packs are constrained, but all are
+ # passed in because the caller has them, so as to avoid object churn.
+ Pack.__init__(self,
+ # Revisions: parents list, no text compression.
+ InMemoryBTree(reference_lists=1),
+ # Inventory: We want to map compression only, but currently the
+ # knit code hasn't been updated enough to understand that, so we
+ # have a regular 2-list index giving parents and compression
+ # source.
+ InMemoryBTree(reference_lists=2),
+ # Texts: compression and per file graph, for all fileids - so two
+ # reference lists and two elements in the key tuple.
+ InMemoryBTree(reference_lists=2, key_elements=2),
+ # Signatures: Just blobs to store, no compression, no parents
+ # listing.
+ InMemoryBTree(reference_lists=0),
+ )
+ # where should the new pack be opened
+ self.upload_transport = upload_transport
+ # where are indices written out to
+ self.index_transport = index_transport
+ # where is the pack renamed to when it is finished?
+ self.pack_transport = pack_transport
+ # What file mode to upload the pack and indices with.
+ self._file_mode = file_mode
+ # tracks the content written to the .pack file.
+ self._hash = md5.new()
+ # a four-tuple with the length in bytes of the indices, once the pack
+ # is finalised. (rev, inv, text, sigs)
+ self.index_sizes = None
+ # How much data to cache when writing packs. Note that this is not
+ # synchronised with reads, because it's not in the transport layer, so
+ # is not safe unless the client knows it won't be reading from the pack
+ # under creation.
+ self._cache_limit = 0
+ # the temporary pack file name.
+ self.random_name = rand_chars(20) + upload_suffix
+ # when was this pack started ?
+ self.start_time = time.time()
+ # open an output stream for the data added to the pack.
+ self.write_stream = self.upload_transport.open_write_stream(
+ self.random_name, mode=self._file_mode)
+ if 'pack' in debug.debug_flags:
+ mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
+ time.ctime(), self.upload_transport.base, self.random_name,
+ time.time() - self.start_time)
+ # A list of byte sequences to be written to the new pack, and the
+ # aggregate size of them. Stored as a list rather than separate
+ # variables so that the _write_data closure below can update them.
+ self._buffer = [[], 0]
+ # create a callable for adding data
+ #
+ # robertc says- this is a closure rather than a method on the object
+ # so that the variables are locals, and faster than accessing object
+ # members.
+ def _write_data(bytes, flush=False, _buffer=self._buffer,
+ _write=self.write_stream.write, _update=self._hash.update):
+ _buffer[0].append(bytes)
+ _buffer[1] += len(bytes)
+ # buffer cap
+ if _buffer[1] > self._cache_limit or flush:
+ bytes = ''.join(_buffer[0])
+ _write(bytes)
+ _update(bytes)
+ _buffer[:] = [[], 0]
+ # expose this on self, for the occasion when clients want to add data.
+ self._write_data = _write_data
+ # a pack writer object to serialise pack records.
+ self._writer = pack.ContainerWriter(self._write_data)
+ self._writer.begin()
+ # what state is the pack in? (open, finished, aborted)
+ self._state = 'open'
+
+ def _replace_index_with_readonly(self, index_type):
+ setattr(self, index_type + '_index',
+ BTreeGraphIndex(self.index_transport,
+ self.index_name(index_type, self.name),
+ self.index_sizes[self.index_offset(index_type)]))
+
+
+RepositoryPackCollection.pack_factory = NewPack
+
+class BTreeRepositoryPackCollection(RepositoryPackCollection):
+
+ pack_factory = BTreePack
+
+ def _make_index(self, name, suffix):
+ """Overridden to use BTreeGraphIndex objects."""
+ size_offset = self._suffix_offsets[suffix]
+ index_name = name + suffix
+ index_size = self._names[name][size_offset]
+ return BTreeGraphIndex(
+ self._index_transport, index_name, index_size)
+
+ def _start_write_group(self):
+ # Do not permit preparation for writing if we're not in a 'write lock'.
+ if not self.repo.is_write_locked():
+ raise errors.NotWriteLocked(self)
+ self._new_pack = self.pack_factory(self._upload_transport, self._index_transport,
+ self._pack_transport, upload_suffix='.pack',
+ file_mode=self.repo.bzrdir._get_file_mode())
+ # allow writing: queue writes to a new index
+ self.revision_index.add_writable_index(self._new_pack.revision_index,
+ self._new_pack)
+ self.inventory_index.add_writable_index(self._new_pack.inventory_index,
+ self._new_pack)
+ self.text_index.add_writable_index(self._new_pack.text_index,
+ self._new_pack)
+ self.signature_index.add_writable_index(self._new_pack.signature_index,
+ self._new_pack)
+
+ self.repo.inventories._index._add_callback = self.inventory_index.add_callback
+ self.repo.revisions._index._add_callback = self.revision_index.add_callback
+ self.repo.signatures._index._add_callback = self.signature_index.add_callback
+ self.repo.texts._index._add_callback = self.text_index.add_callback
+
+
+
+class BTreePackRepository(KnitPackRepository):
+ """BTree customisation of KnitPackRepository."""
+
+ def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
+ _serializer):
+ """Overridden to change pack collection class."""
+ KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
+ _commit_builder_class, _serializer)
+ # and now replace everything it did :)
+ index_transport = self._transport.clone('indices')
+ self._pack_collection = BTreeRepositoryPackCollection(self,
+ self._transport, index_transport,
+ self._transport.clone('upload'),
+ self._transport.clone('packs'))
+ self.inventories = KnitVersionedFiles(
+ _KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
+ add_callback=self._pack_collection.inventory_index.add_callback,
+ deltas=True, parents=True, is_locked=self.is_locked),
+ data_access=self._pack_collection.inventory_index.data_access,
+ max_delta_chain=200)
+ self.revisions = KnitVersionedFiles(
+ _KnitGraphIndex(self._pack_collection.revision_index.combined_index,
+ add_callback=self._pack_collection.revision_index.add_callback,
+ deltas=False, parents=True, is_locked=self.is_locked),
+ data_access=self._pack_collection.revision_index.data_access,
+ max_delta_chain=0)
+ self.signatures = KnitVersionedFiles(
+ _KnitGraphIndex(self._pack_collection.signature_index.combined_index,
+ add_callback=self._pack_collection.signature_index.add_callback,
+ deltas=False, parents=False, is_locked=self.is_locked),
+ data_access=self._pack_collection.signature_index.data_access,
+ max_delta_chain=0)
+ self.texts = KnitVersionedFiles(
+ _KnitGraphIndex(self._pack_collection.text_index.combined_index,
+ add_callback=self._pack_collection.text_index.add_callback,
+ deltas=True, parents=True, is_locked=self.is_locked),
+ data_access=self._pack_collection.text_index.data_access,
+ max_delta_chain=200)
+ # True when the repository object is 'write locked' (as opposed to the
+ # physical lock only taken out around changes to the pack-names list.)
+ # Another way to represent this would be a decorator around the control
+ # files object that presents logical locks as physical ones - if this
+ # gets ugly consider that alternative design. RBC 20071011
+ self._write_lock_count = 0
+ self._transaction = None
+ # for tests
+ self._reconcile_does_inventory_gc = True
+ self._reconcile_fixes_text_parents = True
+ self._reconcile_backsup_inventory = False
+
+
+class RepositoryFormatPackBTreePlain(RepositoryFormatPackDevelopment0):
+ """A B+Tree index using pack repository."""
+
+ repository_class = BTreePackRepository
+
+ def get_format_string(self):
+ """See RepositoryFormat.get_format_string()."""
+ return ("Bazaar development format - btree "
+ "(needs bzr.dev from 1.6)\n")
+
+ def get_format_description(self):
+ """See RepositoryFormat.get_format_description()."""
+ return ("Development repository format - btree, interoperates with "
+ "pack-0.92\n")
+
+
More information about the bazaar-commits
mailing list