Rev 2744: Record the size of the index files in the pack-names index. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Tue Aug 28 03:18:23 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2744
revision-id: robertc at robertcollins.net-20070828021813-4p3w6xklwghhzwg9
parent: robertc at robertcollins.net-20070827042539-g0pv3c8wltzju47e
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Tue 2007-08-28 12:18:13 +1000
message:
  Record the size of the index files in the pack-names index.
modified:
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-08-24 22:36:01 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-08-28 02:18:13 +0000
@@ -316,50 +316,56 @@
             )):
             self.repo._upload_transport.delete(random_name)
             return None
-        # add to names
-        self.allocate(new_name)
-        # rename into place
-        write_stream.close()
-        self.repo._upload_transport.rename(random_name, '../packs/' + new_name + '.pack')
         result = Pack()
         result.name = new_name
         result.transport = self.repo._upload_transport.clone('../packs/')
-        if 'fetch' in debug.debug_flags:
-            # XXX: size might be interesting?
-            mutter('%s: create_pack: pack renamed into place: %s%s->%s%s t+%6.3fs',
-                time.ctime(), self.repo._upload_transport.base, random_name,
-                result.transport, result.name,
-                time.time() - start_time)
         # write indices
         index_transport = self.repo._upload_transport.clone('../indices')
         rev_index_name = self.repo._revision_store.name_to_revision_index_name(new_name)
-        index_transport.put_file(rev_index_name, revision_index.finish())
+        revision_index_length = index_transport.put_file(rev_index_name,
+            revision_index.finish())
         if 'fetch' in debug.debug_flags:
             # XXX: size might be interesting?
             mutter('%s: create_pack: wrote revision index: %s%s t+%6.3fs',
                 time.ctime(), self.repo._upload_transport.base, random_name,
                 time.time() - start_time)
         inv_index_name = self.repo._inv_thunk.name_to_inv_index_name(new_name)
-        index_transport.put_file(inv_index_name, inv_index.finish())
+        inventory_index_length = index_transport.put_file(inv_index_name,
+            inv_index.finish())
         if 'fetch' in debug.debug_flags:
             # XXX: size might be interesting?
             mutter('%s: create_pack: wrote inventory index: %s%s t+%6.3fs',
                 time.ctime(), self.repo._upload_transport.base, random_name,
                 time.time() - start_time)
         text_index_name = self.repo.weave_store.name_to_text_index_name(new_name)
-        index_transport.put_file(text_index_name, text_index.finish())
+        text_index_length = index_transport.put_file(text_index_name,
+            text_index.finish())
         if 'fetch' in debug.debug_flags:
             # XXX: size might be interesting?
             mutter('%s: create_pack: wrote file texts index: %s%s t+%6.3fs',
                 time.ctime(), self.repo._upload_transport.base, random_name,
                 time.time() - start_time)
         signature_index_name = self.repo._revision_store.name_to_signature_index_name(new_name)
-        index_transport.put_file(signature_index_name, signature_index.finish())
+        signature_index_length = index_transport.put_file(signature_index_name,
+            signature_index.finish())
         if 'fetch' in debug.debug_flags:
             # XXX: size might be interesting?
             mutter('%s: create_pack: wrote revision signatures index: %s%s t+%6.3fs',
                 time.ctime(), self.repo._upload_transport.base, random_name,
                 time.time() - start_time)
+        # add to name
+        self.allocate(new_name, revision_index_length, inventory_index_length,
+            text_index_length, signature_index_length)
+        # rename into place. XXX: should rename each index too rather than just
+        # uploading blind under the chosen name.
+        write_stream.close()
+        self.repo._upload_transport.rename(random_name, '../packs/' + new_name + '.pack')
+        if 'fetch' in debug.debug_flags:
+            # XXX: size might be interesting?
+            mutter('%s: create_pack: pack renamed into place: %s%s->%s%s t+%6.3fs',
+                time.ctime(), self.repo._upload_transport.base, random_name,
+                result.transport, result.name,
+                time.time() - start_time)
         result.revision_index = revision_index
         result.inventory_index = inv_index
         result.text_index = text_index
@@ -384,7 +390,7 @@
             # have a progress bar?
             self._combine_packs(pack_details)
             for pack_detail in pack_details:
-                self._remove_pack_name(pack_detail[1])
+                self._remove_pack_by_name(pack_detail[1])
         # record the newly available packs and stop advertising the old
         # packs
         self.save()
@@ -571,14 +577,31 @@
 
     def ensure_loaded(self):
         if self._names is None:
-            self._names = set(node[1][0] for node in 
-                GraphIndex(self.transport, 'pack-names').iter_all_entries())
-
-    def allocate(self, name):
+            self._names = {}
+            for index, key, value in \
+                GraphIndex(self.transport, 'pack-names').iter_all_entries():
+                name = key[0]
+                sizes = [int(digits) for digits in value.split(' ')]
+                self._names[name] = sizes
+
+    def allocate(self, name, revision_index_length, inventory_index_length,
+        text_index_length, signature_index_length):
+        """Allocate name in the list of packs.
+
+        :param name: The basename - e.g. the md5 hash hexdigest.
+        :param revision_index_length: The length of the revision index in
+            bytes.
+        :param inventory_index_length: The length of the inventory index in
+            bytes.
+        :param text_index_length: The length of the text index in bytes.
+        :param signature_index_length: The length of the signature index in
+            bytes.
+        """
         self.ensure_loaded()
         if name in self._names:
             raise errors.DuplicateKey(name)
-        self._names.add(name)
+        self._names[name] = (revision_index_length, inventory_index_length,
+            text_index_length, signature_index_length)
 
     def _max_pack_count(self, total_revisions):
         """Return the maximum number of packs to use for total revisions.
@@ -596,7 +619,7 @@
 
     def names(self):
         """Provide an order to the underlying names."""
-        return sorted(self._names)
+        return sorted(self._names.keys())
 
     def _obsolete_packs(self, pack_details):
         """Move a number of packs which have been obsoleted out of the way.
@@ -636,9 +659,9 @@
                 result.append(size)
         return list(reversed(result))
 
-    def _remove_pack_name(self, name):
+    def _remove_pack_by_name(self, name):
         # strip .pack
-        self._names.remove(name[:-5])
+        self._names.pop(name[:-5])
 
     def reset(self):
         self._names = None
@@ -716,8 +739,8 @@
 
     def save(self):
         builder = GraphIndexBuilder()
-        for name in self._names:
-            builder.add_node((name, ), '')
+        for name, sizes in self._names.iteritems():
+            builder.add_node((name, ), ' '.join(str(size) for size in sizes))
         self.transport.put_file('pack-names', builder.finish())
 
     def setup(self):
@@ -836,12 +859,12 @@
             return True
         return False
 
-    def flush(self, new_name):
+    def flush(self, new_name, new_pack):
         """Write out pending indices."""
         # write a revision index (might be empty)
         new_index_name = self.name_to_revision_index_name(new_name)
-        self.transport.put_file(new_index_name,
-            self.repo._revision_write_index.finish())
+        new_pack.revision_index_length = self.transport.put_file(
+            new_index_name, self.repo._revision_write_index.finish())
         if self.repo._revision_all_indices is None:
             # create a pack map for the autopack code - XXX finish
             # making a clear managed list of packs, indices and use
@@ -862,8 +885,8 @@
 
         # write a signatures index (might be empty)
         new_index_name = self.name_to_signature_index_name(new_name)
-        self.transport.put_file(new_index_name,
-            self.repo._signature_write_index.finish())
+        new_pack.signature_index_length = self.transport.put_file(
+            new_index_name, self.repo._signature_write_index.finish())
         self.repo._signature_write_index = None
         if self.repo._signature_all_indices is not None:
             # sigatures 'knit' accessed : update it.
@@ -970,12 +993,12 @@
             indices.insert(0, self.repo._text_write_index)
         self.repo._text_all_indices = CombinedGraphIndex(indices)
 
-    def flush(self, new_name):
+    def flush(self, new_name, new_pack):
         """Write the index out to new_name."""
         # write a revision index (might be empty)
         new_index_name = self.name_to_text_index_name(new_name)
-        self.transport.put_file(new_index_name,
-            self.repo._text_write_index.finish())
+        new_pack.text_index_length = self.transport.put_file(
+            new_index_name, self.repo._text_write_index.finish())
         self.repo._text_write_index = None
         if self.repo._text_all_indices is not None:
             # text 'knits' have been used, replace the mutated memory index
@@ -1085,12 +1108,12 @@
         self.repo._inv_all_indices = CombinedGraphIndex(indices)
         self.repo._inv_pack_map = pack_map
 
-    def flush(self, new_name):
+    def flush(self, new_name, new_pack):
         """Write the index out to new_name."""
         # write an index (might be empty)
         new_index_name = self.name_to_inv_index_name(new_name)
-        self.transport.put_file(new_index_name,
-            self.repo._inv_write_index.finish())
+        new_pack.inventory_index_length = self.transport.put_file(
+            new_index_name, self.repo._inv_write_index.finish())
         self.repo._inv_write_index = None
         if self.repo._inv_all_indices is not None:
             # inv 'knit' has been used, replace the mutated memory index
@@ -1216,12 +1239,6 @@
         if data_inserted:
             self._open_pack_writer.end()
             new_name = self._open_pack_hash.hexdigest()
-            # If this fails, its a hash collision. We should:
-            # - determine if its a collision or
-            # - the same content or
-            # - the existing name is not the actual hash - e.g.
-            #   its a deliberate attack or data corruption has
-            #   occuring during the write of that file.
             new_pack = Pack()
             new_pack.name = new_name
             new_pack.transport = self._upload_transport.clone('../packs/')
@@ -1230,13 +1247,21 @@
             # new_pack.inventory_index = 
             # new_pack.text_index = 
             # new_pack.signature_index = 
-            self._packs.allocate(new_name)
-            self.weave_store.flush(new_name)
-            self._inv_thunk.flush(new_name)
-            self._revision_store.flush(new_name)
+            self.weave_store.flush(new_name, new_pack)
+            self._inv_thunk.flush(new_name, new_pack)
+            self._revision_store.flush(new_name, new_pack)
             self._write_stream.close()
             self._upload_transport.rename(self._open_pack_tuple[1],
                 '../packs/' + new_name + '.pack')
+            # If this fails, its a hash collision. We should:
+            # - determine if its a collision or
+            # - the same content or
+            # - the existing name is not the actual hash - e.g.
+            #   its a deliberate attack or data corruption has
+            #   occuring during the write of that file.
+            self._packs.allocate(new_name, new_pack.revision_index_length,
+                new_pack.inventory_index_length, new_pack.text_index_length,
+                new_pack.signature_index_length)
             self._open_pack_tuple = None
             if not self._packs.autopack():
                 self._packs.save()
@@ -1343,19 +1368,29 @@
         if data_inserted:
             self._open_pack_writer.end()
             new_name = self._open_pack_hash.hexdigest()
+            new_pack = Pack()
+            new_pack.name = new_name
+            new_pack.transport = self._upload_transport.clone('../packs/')
+            # To populate:
+            # new_pack.revision_index = 
+            # new_pack.inventory_index = 
+            # new_pack.text_index = 
+            # new_pack.signature_index = 
+            self.weave_store.flush(new_name, new_pack)
+            self._inv_thunk.flush(new_name, new_pack)
+            self._revision_store.flush(new_name, new_pack)
+            self._write_stream.close()
+            self._upload_transport.rename(self._open_pack_tuple[1],
+                '../packs/' + new_name + '.pack')
             # If this fails, its a hash collision. We should:
             # - determine if its a collision or
             # - the same content or
             # - the existing name is not the actual hash - e.g.
             #   its a deliberate attack or data corruption has
             #   occuring during the write of that file.
-            self._packs.allocate(new_name)
-            self.weave_store.flush(new_name)
-            self._inv_thunk.flush(new_name)
-            self._revision_store.flush(new_name)
-            self._write_stream.close()
-            self._upload_transport.rename(self._open_pack_tuple[1],
-                '../packs/' + new_name + '.pack')
+            self._packs.allocate(new_name, new_pack.revision_index_length,
+                new_pack.inventory_index_length, new_pack.text_index_length,
+                new_pack.signature_index_length)
             self._open_pack_tuple = None
             if not self._packs.autopack():
                 self._packs.save()

=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py	2007-08-12 22:22:13 +0000
+++ b/bzrlib/tests/test_repository.py	2007-08-28 02:18:13 +0000
@@ -591,57 +591,23 @@
         self.assertFalse(t.has('no-working-trees'))
         self.check_databases(t)
 
-    def test_add_revision_creates_dot_rix(self):
-        """Adding a revision makes a 0.rix (Revision IndeX) file."""
-        format = self.get_format()
-        tree = self.make_branch_and_tree('.', format=format)
-        trans = tree.branch.repository.bzrdir.get_repository_transport(None)
-        self.assertEqual([],
-            list(GraphIndex(trans, 'pack-names').iter_all_entries()))
-        tree.commit('foobarbaz')
-        index = GraphIndex(trans, 'pack-names')
-        self.assertEqual(1, len(list(index.iter_all_entries())))
-        name = list(index.iter_all_entries())[0][1][0]
-        self.assertTrue(trans.has('indices/%s.rix' % name))
-
-    def test_add_revision_creates_dot_six(self):
-        """Adding a revision makes a 0.six (Signature IndeX) file."""
-        format = self.get_format()
-        tree = self.make_branch_and_tree('.', format=format)
-        trans = tree.branch.repository.bzrdir.get_repository_transport(None)
-        self.assertEqual([],
-            list(GraphIndex(trans, 'pack-names').iter_all_entries()))
-        tree.commit('foobarbaz')
-        index = GraphIndex(trans, 'pack-names')
-        self.assertEqual(1, len(list(index.iter_all_entries())))
-        name = list(index.iter_all_entries())[0][1][0]
-        self.assertTrue(trans.has('indices/%s.six' % name))
-
-    def test_add_revision_creates_dot_iix(self):
-        """Adding a revision makes a 0.iix (Inventory IndeX) file."""
-        format = self.get_format()
-        tree = self.make_branch_and_tree('.', format=format)
-        trans = tree.branch.repository.bzrdir.get_repository_transport(None)
-        self.assertEqual([],
-            list(GraphIndex(trans, 'pack-names').iter_all_entries()))
-        tree.commit('foobarbaz')
-        index = GraphIndex(trans, 'pack-names')
-        self.assertEqual(1, len(list(index.iter_all_entries())))
-        name = list(index.iter_all_entries())[0][1][0]
-        self.assertTrue(trans.has('indices/%s.iix' % name))
-
-    def test_add_revision_creates_dot_tix(self):
-        """Adding a revision makes a 0.tix (Text IndeX) file."""
-        format = self.get_format()
-        tree = self.make_branch_and_tree('.', format=format)
-        trans = tree.branch.repository.bzrdir.get_repository_transport(None)
-        self.assertEqual([],
-            list(GraphIndex(trans, 'pack-names').iter_all_entries()))
-        tree.commit('foobarbaz')
-        index = GraphIndex(trans, 'pack-names')
-        self.assertEqual(1, len(list(index.iter_all_entries())))
-        name = list(index.iter_all_entries())[0][1][0]
-        self.assertTrue(trans.has('indices/%s.tix' % name))
+    def test_add_revision_creates_pack_indices(self):
+        format = self.get_format()
+        tree = self.make_branch_and_tree('.', format=format)
+        trans = tree.branch.repository.bzrdir.get_repository_transport(None)
+        self.assertEqual([],
+            list(GraphIndex(trans, 'pack-names').iter_all_entries()))
+        tree.commit('foobarbaz')
+        index = GraphIndex(trans, 'pack-names')
+        self.assertEqual(1, len(list(index.iter_all_entries())))
+        node = list(index.iter_all_entries())[0]
+        name = node[1][0]
+        # the pack sizes should be listed in the index
+        pack_value = node[2]
+        sizes = [int(digits) for digits in pack_value.split(' ')]
+        for size, suffix in zip(sizes, ['.rix', '.iix', '.tix', '.six']):
+            stat = trans.stat('indices/%s%s' % (name, suffix))
+            self.assertEqual(size, stat.st_size)
 
     def test_pulling_nothing_leads_to_no_new_names(self):
         format = self.get_format()



More information about the bazaar-commits mailing list