Rev 2832: Move pack rename-into-place into NewPack.finish and document hash-collision cases somewhat better. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Tue Oct 16 08:10:13 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2832
revision-id: robertc at robertcollins.net-20071016071003-xbsuqdh3inxt3ks1
parent: robertc at robertcollins.net-20071016062834-ugk0v4877wniqwtj
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Tue 2007-10-16 17:10:03 +1000
message:
  Move pack rename-into-place into NewPack.finish and document hash-collision cases somewhat better.
modified:
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-10-16 06:28:34 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-10-16 07:10:03 +0000
@@ -191,7 +191,8 @@
         :param index_transport: A writable transport for the pack's indices to
             be written to when the pack is finished.
         :param pack_transport: A writable transport for the pack to be renamed
-            to when the upload is complete.
+            to when the upload is complete. This *must* be the same as
+            upload_transport.clone('../packs').
         :param upload_suffix: An optional suffix to be given to any temporary
             files created during the pack creation. e.g '.autopack'
         """
@@ -285,6 +286,8 @@
             self._write_data('', flush=True)
         self.name = self._hash.hexdigest()
         # write indices
+        # XXX: should rename each index too rather than just uploading blind
+        # under the chosen name.
         self.index_sizes = [None, None, None, None]
         self._write_index('revision', self.revision_index, 'revision')
         self._write_index('inventory', self.inventory_index, 'inventory')
@@ -292,6 +295,19 @@
         self._write_index('signature', self.signature_index,
             'revision signatures')
         self.write_stream.close()
+        # Note that this will clobber an existing pack with the same name,
+        # without checking for hash collisions. While this is undesirable this
+        # is something that can be rectified in a subsequent release. One way
+        # to rectify it may be to leave the pack at the original name, writing
+        # its pack-names entry as something like 'HASH: index-sizes
+        # temporary-name'. Allocate that and check for collisions, if it is
+        # collision free then rename it into place. If clients know this scheme
+        # they can handle missing-file errors by:
+        #  - try for HASH.pack
+        #  - try for temporary-name
+        #  - refresh the pack-list to see if the pack is now absent
+        self.upload_transport.rename(self.random_name,
+                '../packs/' + self.name + '.pack')
 
     def make_index(self, index_type):
         """Construct a GraphIndex object for this packs index 'index_type'."""
@@ -659,26 +675,20 @@
         if not new_pack.data_inserted():
             new_pack.abort()
             return None
-        # finish the pack
         new_pack.finish()
-        # add to the repository
         self.allocate(new_pack)
-        # rename into place. XXX: should rename each index too rather than just
-        # uploading blind under the chosen name.
-        self._upload_transport.rename(new_pack.random_name, '../packs/' + new_pack.name + '.pack')
-        result = new_pack
         if 'fetch' in debug.debug_flags:
             # XXX: size might be interesting?
             mutter('%s: create_pack: pack renamed into place: %s%s->%s%s t+%6.3fs',
                 time.ctime(), self._upload_transport.base, new_pack.random_name,
-                result.pack_transport, result.name,
+                new_pack.pack_transport, new_pack.name,
                 time.time() - new_pack.start_time)
         if 'fetch' in debug.debug_flags:
             # XXX: size might be interesting?
             mutter('%s: create_pack: finished: %s%s t+%6.3fs',
                 time.ctime(), self._upload_transport.base, new_pack.random_name,
                 time.time() - new_pack.start_time)
-        return result
+        return new_pack
 
     def _execute_pack_operations(self, pack_operations):
         """Execute a series of pack operations.
@@ -903,7 +913,10 @@
         """
         self.ensure_loaded()
         if a_new_pack.name in self._names:
-            raise errors.DuplicateKey(a_new_pack.name)
+            # a collision with the packs we know about (not the only possible
+            # collision, see NewPack.finish() for some discussion). Remove our
+            # prior reference to it.
+            self._remove_pack_by_name(a_new_pack.name)
         self._names[a_new_pack.name] = tuple(a_new_pack.index_sizes)
         self.add_pack_to_memory(a_new_pack)
 
@@ -1141,14 +1154,6 @@
     def _commit_write_group(self):
         if self._new_pack.data_inserted():
             self._new_pack.finish()
-            self._upload_transport.rename(self.repo._open_pack_tuple[1],
-                '../packs/' + self._new_pack.name + '.pack')
-            # If this fails, its a hash collision. We should:
-            # - determine if its a collision or
-            # - the same content or
-            # - the existing name is not the actual hash - e.g.
-            #   its a deliberate attack or data corruption has
-            #   occuring during the write of that file.
             self.allocate(self._new_pack)
             self.repo._open_pack_tuple = None
             self._new_pack = None



More information about the bazaar-commits mailing list