Rev 26: Working better --gc-plain-chk. in http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk

Robert Collins robertc at robertcollins.net
Tue Feb 10 22:03:25 GMT 2009


At http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk

------------------------------------------------------------
revno: 26
revision-id: robertc at robertcollins.net-20090210220324-r7353rfrbk0nd2iv
parent: robertc at robertcollins.net-20090210213517-c1lwf3rlcsz4oat5
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2009-02-11 09:03:24 +1100
message:
  Working better --gc-plain-chk.
=== modified file 'groupcompress.py'
--- a/groupcompress.py	2009-02-03 01:26:50 +0000
+++ b/groupcompress.py	2009-02-10 22:03:24 +0000
@@ -158,7 +158,9 @@
         """Compress lines with label key.
 
         :param key: A key tuple. It is stored in the output
-            for identification of the text during decompression.
+            for identification of the text during decompression. If the last
+            element is 'None' it is replaced with the sha1 of the text -
+            e.g. sha1:xxxxxxx.
         :param lines: The lines to be compressed. Must be split
             on \n, with the \n preserved.'
         :param expected_sha: If non-None, the sha the lines are blieved to
@@ -168,6 +170,8 @@
             the group output so far.
         """
         sha1 = sha_strings(lines)
+        if key[-1] is None:
+            key = key[:-1] + ('sha1:' + sha1,)
         label = '\x00'.join(key)
         # setup good encoding for trailing \n support.
         if not lines or lines[-1].endswith('\n'):
@@ -406,8 +410,9 @@
     def _check_add(self, key, lines, random_id, check_content):
         """check that version_id and lines are safe to add."""
         version_id = key[-1]
-        if contains_whitespace(version_id):
-            raise InvalidRevisionId(version_id, self)
+        if version_id is not None:
+            if contains_whitespace(version_id):
+                raise InvalidRevisionId(version_id, self)
         self.check_not_reserved_id(version_id)
         # TODO: If random_id==False and the key is already present, we should
         # probably check that the existing content is identical to what is
@@ -586,9 +591,13 @@
                     record.get_bytes_as(record.storage_kind))
             found_sha1, end_point = self._compressor.compress(record.key,
                 split_lines(bytes), record.sha1)
-            self._unadded_refs[record.key] = record.parents
+            if record.key[-1] is None:
+                key = record.key[:-1] + ('sha1:' + found_sha1,)
+            else:
+                key = record.key
+            self._unadded_refs[key] = record.parents
             yield found_sha1
-            keys_to_add.append((record.key, '%d %d' % (basis_end, end_point),
+            keys_to_add.append((key, '%d %d' % (basis_end, end_point),
                 (record.parents,)))
             basis_end = end_point
             if basis_end > 1024 * 1024 * 20:

=== modified file 'repofmt.py'
--- a/repofmt.py	2009-02-10 21:35:17 +0000
+++ b/repofmt.py	2009-02-10 22:03:24 +0000
@@ -48,6 +48,7 @@
     )
 try:
     from bzrlib.repofmt.pack_repo import (
+    CHKInventoryRepository,
     RepositoryFormatPackDevelopment4,
     RepositoryFormatPackDevelopment4Subtree,
     )
@@ -277,6 +278,17 @@
                 add_callback=self._pack_collection.text_index.add_callback,
                 parents=True, is_locked=self.is_locked),
             access=self._pack_collection.text_index.data_access)
+        if chk_support and _format.supports_chks:
+            # No graph, no compression:- references from chks are between
+            # different objects not temporal versions of the same; and without
+            # some sort of temporal structure knit compression will just fail.
+            self.chk_bytes = GroupCompressVersionedFiles(
+                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
+                    add_callback=self._pack_collection.chk_index.add_callback,
+                    parents=False, is_locked=self.is_locked),
+                access=self._pack_collection.chk_index.data_access)
+        else:
+            self.chk_bytes = None
         # True when the repository object is 'write locked' (as opposed to the
         # physical lock only taken out around changes to the pack-names list.) 
         # Another way to represent this would be a decorator around the control
@@ -290,6 +302,79 @@
         self._reconcile_backsup_inventory = False
 
 
+if chk_support:
+    class GCCHKPackRepository(CHKInventoryRepository):
+        """GC customisation of CHKInventoryRepository."""
+
+        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
+            _serializer):
+            """Overridden to change pack collection class."""
+            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
+                _commit_builder_class, _serializer)
+            # and now replace everything it did :)
+            index_transport = self._transport.clone('indices')
+            if chk_support:
+                self._pack_collection = GCRepositoryPackCollection(self,
+                    self._transport, index_transport,
+                    self._transport.clone('upload'),
+                    self._transport.clone('packs'),
+                    _format.index_builder_class,
+                    _format.index_class,
+                    use_chk_index=self._format.supports_chks,
+                    )
+            else:
+                self._pack_collection = GCRepositoryPackCollection(self,
+                    self._transport, index_transport,
+                    self._transport.clone('upload'),
+                    self._transport.clone('packs'),
+                    _format.index_builder_class,
+                    _format.index_class)
+            self.inventories = GroupCompressVersionedFiles(
+                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
+                    add_callback=self._pack_collection.inventory_index.add_callback,
+                    parents=True, is_locked=self.is_locked),
+                access=self._pack_collection.inventory_index.data_access)
+            self.revisions = GroupCompressVersionedFiles(
+                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
+                    add_callback=self._pack_collection.revision_index.add_callback,
+                    parents=True, is_locked=self.is_locked),
+                access=self._pack_collection.revision_index.data_access,
+                delta=False)
+            self.signatures = GroupCompressVersionedFiles(
+                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
+                    add_callback=self._pack_collection.signature_index.add_callback,
+                    parents=False, is_locked=self.is_locked),
+                access=self._pack_collection.signature_index.data_access,
+                delta=False)
+            self.texts = GroupCompressVersionedFiles(
+                _GCGraphIndex(self._pack_collection.text_index.combined_index,
+                    add_callback=self._pack_collection.text_index.add_callback,
+                    parents=True, is_locked=self.is_locked),
+                access=self._pack_collection.text_index.data_access)
+            if chk_support and _format.supports_chks:
+                # No graph, no compression:- references from chks are between
+                # different objects not temporal versions of the same; and without
+                # some sort of temporal structure knit compression will just fail.
+                self.chk_bytes = GroupCompressVersionedFiles(
+                    _GCGraphIndex(self._pack_collection.chk_index.combined_index,
+                        add_callback=self._pack_collection.chk_index.add_callback,
+                        parents=False, is_locked=self.is_locked),
+                    access=self._pack_collection.chk_index.data_access)
+            else:
+                self.chk_bytes = None
+            # True when the repository object is 'write locked' (as opposed to the
+            # physical lock only taken out around changes to the pack-names list.) 
+            # Another way to represent this would be a decorator around the control
+            # files object that presents logical locks as physical ones - if this
+            # gets ugly consider that alternative design. RBC 20071011
+            self._write_lock_count = 0
+            self._transaction = None
+            # for tests
+            self._reconcile_does_inventory_gc = True
+            self._reconcile_fixes_text_parents = True
+            self._reconcile_backsup_inventory = False
+
+
 class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment2):
     """A B+Tree index using pack repository."""
 
@@ -342,7 +427,7 @@
     class RepositoryFormatPackGCPlainCHK(RepositoryFormatPackDevelopment4):
         """A CHK+group compress pack repository."""
 
-        repository_class = GCPackRepository
+        repository_class = GCCHKPackRepository
 
         def get_format_string(self):
             """See RepositoryFormat.get_format_string()."""
@@ -359,8 +444,11 @@
 
 
 def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
+    """Be incompatible with the regular fetch code."""
     formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
         RepositoryFormatPackGCSubtrees)
+    if chk_support:
+        formats = formats = (RepositoryFormatPackGCPlain,)
     if isinstance(source._format, formats) or isinstance(target._format, formats):
         return False
     else:




More information about the bazaar-commits mailing list