Rev 26: Working better --gc-plain-chk. in http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk
Robert Collins
robertc at robertcollins.net
Tue Feb 10 22:03:25 GMT 2009
At http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk
------------------------------------------------------------
revno: 26
revision-id: robertc at robertcollins.net-20090210220324-r7353rfrbk0nd2iv
parent: robertc at robertcollins.net-20090210213517-c1lwf3rlcsz4oat5
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2009-02-11 09:03:24 +1100
message:
Working better --gc-plain-chk.
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2009-02-03 01:26:50 +0000
+++ b/groupcompress.py 2009-02-10 22:03:24 +0000
@@ -158,7 +158,9 @@
"""Compress lines with label key.
:param key: A key tuple. It is stored in the output
- for identification of the text during decompression.
+ for identification of the text during decompression. If the last
+ element is 'None' it is replaced with the sha1 of the text -
+ e.g. sha1:xxxxxxx.
:param lines: The lines to be compressed. Must be split
on \n, with the \n preserved.'
:param expected_sha: If non-None, the sha the lines are blieved to
@@ -168,6 +170,8 @@
the group output so far.
"""
sha1 = sha_strings(lines)
+ if key[-1] is None:
+ key = key[:-1] + ('sha1:' + sha1,)
label = '\x00'.join(key)
# setup good encoding for trailing \n support.
if not lines or lines[-1].endswith('\n'):
@@ -406,8 +410,9 @@
def _check_add(self, key, lines, random_id, check_content):
"""check that version_id and lines are safe to add."""
version_id = key[-1]
- if contains_whitespace(version_id):
- raise InvalidRevisionId(version_id, self)
+ if version_id is not None:
+ if contains_whitespace(version_id):
+ raise InvalidRevisionId(version_id, self)
self.check_not_reserved_id(version_id)
# TODO: If random_id==False and the key is already present, we should
# probably check that the existing content is identical to what is
@@ -586,9 +591,13 @@
record.get_bytes_as(record.storage_kind))
found_sha1, end_point = self._compressor.compress(record.key,
split_lines(bytes), record.sha1)
- self._unadded_refs[record.key] = record.parents
+ if record.key[-1] is None:
+ key = record.key[:-1] + ('sha1:' + found_sha1,)
+ else:
+ key = record.key
+ self._unadded_refs[key] = record.parents
yield found_sha1
- keys_to_add.append((record.key, '%d %d' % (basis_end, end_point),
+ keys_to_add.append((key, '%d %d' % (basis_end, end_point),
(record.parents,)))
basis_end = end_point
if basis_end > 1024 * 1024 * 20:
=== modified file 'repofmt.py'
--- a/repofmt.py 2009-02-10 21:35:17 +0000
+++ b/repofmt.py 2009-02-10 22:03:24 +0000
@@ -48,6 +48,7 @@
)
try:
from bzrlib.repofmt.pack_repo import (
+ CHKInventoryRepository,
RepositoryFormatPackDevelopment4,
RepositoryFormatPackDevelopment4Subtree,
)
@@ -277,6 +278,17 @@
add_callback=self._pack_collection.text_index.add_callback,
parents=True, is_locked=self.is_locked),
access=self._pack_collection.text_index.data_access)
+ if chk_support and _format.supports_chks:
+ # No graph, no compression:- references from chks are between
+ # different objects not temporal versions of the same; and without
+ # some sort of temporal structure knit compression will just fail.
+ self.chk_bytes = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.chk_index.combined_index,
+ add_callback=self._pack_collection.chk_index.add_callback,
+ parents=False, is_locked=self.is_locked),
+ access=self._pack_collection.chk_index.data_access)
+ else:
+ self.chk_bytes = None
# True when the repository object is 'write locked' (as opposed to the
# physical lock only taken out around changes to the pack-names list.)
# Another way to represent this would be a decorator around the control
@@ -290,6 +302,79 @@
self._reconcile_backsup_inventory = False
+if chk_support:
+ class GCCHKPackRepository(CHKInventoryRepository):
+ """GC customisation of CHKInventoryRepository."""
+
+ def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
+ _serializer):
+ """Overridden to change pack collection class."""
+ KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
+ _commit_builder_class, _serializer)
+ # and now replace everything it did :)
+ index_transport = self._transport.clone('indices')
+ if chk_support:
+ self._pack_collection = GCRepositoryPackCollection(self,
+ self._transport, index_transport,
+ self._transport.clone('upload'),
+ self._transport.clone('packs'),
+ _format.index_builder_class,
+ _format.index_class,
+ use_chk_index=self._format.supports_chks,
+ )
+ else:
+ self._pack_collection = GCRepositoryPackCollection(self,
+ self._transport, index_transport,
+ self._transport.clone('upload'),
+ self._transport.clone('packs'),
+ _format.index_builder_class,
+ _format.index_class)
+ self.inventories = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
+ add_callback=self._pack_collection.inventory_index.add_callback,
+ parents=True, is_locked=self.is_locked),
+ access=self._pack_collection.inventory_index.data_access)
+ self.revisions = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.revision_index.combined_index,
+ add_callback=self._pack_collection.revision_index.add_callback,
+ parents=True, is_locked=self.is_locked),
+ access=self._pack_collection.revision_index.data_access,
+ delta=False)
+ self.signatures = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.signature_index.combined_index,
+ add_callback=self._pack_collection.signature_index.add_callback,
+ parents=False, is_locked=self.is_locked),
+ access=self._pack_collection.signature_index.data_access,
+ delta=False)
+ self.texts = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.text_index.combined_index,
+ add_callback=self._pack_collection.text_index.add_callback,
+ parents=True, is_locked=self.is_locked),
+ access=self._pack_collection.text_index.data_access)
+ if chk_support and _format.supports_chks:
+ # No graph, no compression:- references from chks are between
+ # different objects not temporal versions of the same; and without
+ # some sort of temporal structure knit compression will just fail.
+ self.chk_bytes = GroupCompressVersionedFiles(
+ _GCGraphIndex(self._pack_collection.chk_index.combined_index,
+ add_callback=self._pack_collection.chk_index.add_callback,
+ parents=False, is_locked=self.is_locked),
+ access=self._pack_collection.chk_index.data_access)
+ else:
+ self.chk_bytes = None
+ # True when the repository object is 'write locked' (as opposed to the
+ # physical lock only taken out around changes to the pack-names list.)
+ # Another way to represent this would be a decorator around the control
+ # files object that presents logical locks as physical ones - if this
+ # gets ugly consider that alternative design. RBC 20071011
+ self._write_lock_count = 0
+ self._transaction = None
+ # for tests
+ self._reconcile_does_inventory_gc = True
+ self._reconcile_fixes_text_parents = True
+ self._reconcile_backsup_inventory = False
+
+
class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment2):
"""A B+Tree index using pack repository."""
@@ -342,7 +427,7 @@
class RepositoryFormatPackGCPlainCHK(RepositoryFormatPackDevelopment4):
"""A CHK+group compress pack repository."""
- repository_class = GCPackRepository
+ repository_class = GCCHKPackRepository
def get_format_string(self):
"""See RepositoryFormat.get_format_string()."""
@@ -359,8 +444,11 @@
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
+ """Be incompatible with the regular fetch code."""
formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
RepositoryFormatPackGCSubtrees)
+ if chk_support:
+ formats = formats = (RepositoryFormatPackGCPlain,)
if isinstance(source._format, formats) or isinstance(target._format, formats):
return False
else:
More information about the bazaar-commits
mailing list