Rev 4986: Merge the fix for bug #507557 in http://bazaar.launchpad.net/~jameinel/bzr/2.1.0rc1-dev

John Arbash Meinel john at arbash-meinel.com
Thu Jan 21 21:13:34 GMT 2010


At http://bazaar.launchpad.net/~jameinel/bzr/2.1.0rc1-dev

------------------------------------------------------------
revno: 4986 [merge]
revision-id: john at arbash-meinel.com-20100121211309-5archfb1gss3wb36
parent: john at arbash-meinel.com-20100121210954-rrwx9mp9p147vke2
parent: john at arbash-meinel.com-20100121192640-de9zrkpyfpimwhj1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1.0rc1-dev
timestamp: Thu 2010-01-21 15:13:09 -0600
message:
  Merge the fix for bug #507557
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/repofmt/groupcompress_repo.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS	2010-01-21 21:09:54 +0000
+++ b/NEWS	2010-01-21 21:13:09 +0000
@@ -87,6 +87,13 @@
 * ``bzr unshelve`` has improved messages about what it is doing.
   (Neil Martinsen-Burrell, #496917)
 
+* Concurrent autopacking is more resilient to already-renamed pack files.
+  If we find that a file we are about to obsolete is already obsoleted, we
+  do not try to rename it, and we leave the file in ``obsolete_packs``.
+  The code is also fault tolerant if a file goes missing, assuming that
+  another process already removed the file.
+  (John Arbash Meinel, Gareth White, #507557)
+
 * Fix "Too many concurrent requests" in reconcile when network connection
   fails.  (Andrew Bennetts, #503878)
 
@@ -100,9 +107,6 @@
 * FTP transports support Unicode paths by encoding/decoding them as utf8.
   (Vincent Ladeuil, #472161)
 
-* Give a clearer message if the lockdir disappears after being apparently
-  successfully taken.  (Martin Pool, #498378)
-
 * Listen to the SIGWINCH signal to update the terminal width.
   (Vincent Ladeuil, #316357)
 

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2010-01-04 02:25:11 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2010-01-21 21:13:09 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2008, 2009 Canonical Ltd
+# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -705,10 +705,11 @@
                 self._remove_pack_from_memory(pack)
         # record the newly available packs and stop advertising the old
         # packs
-        result = self._save_pack_names(clear_obsolete_packs=True)
-        # Move the old packs out of the way now they are no longer referenced.
-        for revision_count, packs in pack_operations:
-            self._obsolete_packs(packs)
+        to_be_obsoleted = []
+        for _, packs in pack_operations:
+            to_be_obsoleted.extend(packs)
+        result = self._save_pack_names(clear_obsolete_packs=True,
+                                       obsolete_packs=to_be_obsoleted)
         return result
 
 

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2010-01-17 15:07:39 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2010-01-21 21:13:09 +0000
@@ -1545,10 +1545,11 @@
                 self._remove_pack_from_memory(pack)
         # record the newly available packs and stop advertising the old
         # packs
-        result = self._save_pack_names(clear_obsolete_packs=True)
-        # Move the old packs out of the way now they are no longer referenced.
-        for revision_count, packs in pack_operations:
-            self._obsolete_packs(packs)
+        to_be_obsoleted = []
+        for _, packs in pack_operations:
+            to_be_obsoleted.extend(packs)
+        result = self._save_pack_names(clear_obsolete_packs=True,
+                                       obsolete_packs=to_be_obsoleted)
         return result
 
     def _flush_new_pack(self):
@@ -1788,8 +1789,13 @@
         :param return: None.
         """
         for pack in packs:
-            pack.pack_transport.rename(pack.file_name(),
-                '../obsolete_packs/' + pack.file_name())
+            try:
+                pack.pack_transport.rename(pack.file_name(),
+                    '../obsolete_packs/' + pack.file_name())
+            except (errors.PathError, errors.TransportError), e:
+                # TODO: Should these be warnings or mutters?
+                mutter("couldn't rename obsolete pack, skipping it:\n%s"
+                       % (e,))
             # TODO: Probably needs to know all possible indices for this pack
             # - or maybe list the directory and move all indices matching this
             # name whether we recognize it or not?
@@ -1797,8 +1803,12 @@
             if self.chk_index is not None:
                 suffixes.append('.cix')
             for suffix in suffixes:
-                self._index_transport.rename(pack.name + suffix,
-                    '../obsolete_packs/' + pack.name + suffix)
+                try:
+                    self._index_transport.rename(pack.name + suffix,
+                        '../obsolete_packs/' + pack.name + suffix)
+                except (errors.PathError, errors.TransportError), e:
+                    mutter("couldn't rename obsolete index, skipping it:\n%s"
+                           % (e,))
 
     def pack_distribution(self, total_revisions):
         """Generate a list of the number of revisions to put in each pack.
@@ -1876,6 +1886,7 @@
         disk_nodes = set()
         for index, key, value in self._iter_disk_pack_index():
             disk_nodes.add((key, value))
+        orig_disk_nodes = set(disk_nodes)
 
         # do a two-way diff against our original content
         current_nodes = set()
@@ -1894,7 +1905,7 @@
         disk_nodes.difference_update(deleted_nodes)
         disk_nodes.update(new_nodes)
 
-        return disk_nodes, deleted_nodes, new_nodes
+        return disk_nodes, deleted_nodes, new_nodes, orig_disk_nodes
 
     def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
         """Given the correct set of pack files, update our saved info.
@@ -1940,7 +1951,7 @@
                 added.append(name)
         return removed, added, modified
 
-    def _save_pack_names(self, clear_obsolete_packs=False):
+    def _save_pack_names(self, clear_obsolete_packs=False, obsolete_packs=None):
         """Save the list of packs.
 
         This will take out the mutex around the pack names list for the
@@ -1950,12 +1961,16 @@
 
         :param clear_obsolete_packs: If True, clear out the contents of the
             obsolete_packs directory.
+        :param obsolete_packs: Packs that are obsolete once the new pack-names
+            file has been written.
         :return: A list of the names saved that were not previously on disk.
         """
+        already_obsolete = []
         self.lock_names()
         try:
             builder = self._index_builder_class()
-            disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
+            (disk_nodes, deleted_nodes, new_nodes,
+             orig_disk_nodes) = self._diff_pack_names()
             # TODO: handle same-name, index-size-changes here -
             # e.g. use the value from disk, not ours, *unless* we're the one
             # changing it.
@@ -1963,14 +1978,25 @@
                 builder.add_node(key, value)
             self.transport.put_file('pack-names', builder.finish(),
                 mode=self.repo.bzrdir._get_file_mode())
-            # move the baseline forward
             self._packs_at_load = disk_nodes
             if clear_obsolete_packs:
-                self._clear_obsolete_packs()
+                to_preserve = None
+                if obsolete_packs:
+                    to_preserve = set([o.name for o in obsolete_packs])
+                already_obsolete = self._clear_obsolete_packs(to_preserve)
         finally:
             self._unlock_names()
         # synchronise the memory packs list with what we just wrote:
         self._syncronize_pack_names_from_disk_nodes(disk_nodes)
+        if obsolete_packs:
+            # TODO: We could add one more condition here. "if o.name not in
+            #       orig_disk_nodes and o != the new_pack we haven't written to
+            #       disk yet. However, the new pack object is not easily
+            #       accessible here (it would have to be passed through the
+            #       autopacking code, etc.)
+            obsolete_packs = [o for o in obsolete_packs
+                              if o.name not in already_obsolete]
+            self._obsolete_packs(obsolete_packs)
         return [new_node[0][0] for new_node in new_nodes]
 
     def reload_pack_names(self):
@@ -1991,13 +2017,12 @@
         if first_read:
             return True
         # out the new value.
-        disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
+        (disk_nodes, deleted_nodes, new_nodes,
+         orig_disk_nodes) = self._diff_pack_names()
         # _packs_at_load is meant to be the explicit list of names in
         # 'pack-names' at then start. As such, it should not contain any
         # pending names that haven't been written out yet.
-        pack_names_nodes = disk_nodes.difference(new_nodes)
-        pack_names_nodes.update(deleted_nodes)
-        self._packs_at_load = pack_names_nodes
+        self._packs_at_load = orig_disk_nodes
         (removed, added,
          modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
         if removed or added or modified:
@@ -2012,15 +2037,28 @@
             raise
         raise errors.RetryAutopack(self.repo, False, sys.exc_info())
 
-    def _clear_obsolete_packs(self):
+    def _clear_obsolete_packs(self, preserve=None):
         """Delete everything from the obsolete-packs directory.
+
+        :return: A list of pack identifiers (the filename without '.pack') that
+            were found in obsolete_packs.
         """
+        found = []
         obsolete_pack_transport = self.transport.clone('obsolete_packs')
+        if preserve is None:
+            preserve = set()
         for filename in obsolete_pack_transport.list_dir('.'):
+            name, ext = osutils.splitext(filename)
+            if ext == '.pack':
+                found.append(name)
+            if name in preserve:
+                continue
             try:
                 obsolete_pack_transport.delete(filename)
             except (errors.PathError, errors.TransportError), e:
-                warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
+                warning("couldn't delete obsolete pack, skipping it:\n%s"
+                        % (e,))
+        return found
 
     def _start_write_group(self):
         # Do not permit preparation for writing if we're not in a 'write lock'.

=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py	2010-01-17 15:07:39 +0000
+++ b/bzrlib/tests/test_repository.py	2010-01-21 21:13:09 +0000
@@ -1144,6 +1144,31 @@
         packs.ensure_loaded()
         return tree, r, packs, [rev1, rev2, rev3]
 
+    def test__clear_obsolete_packs(self):
+        packs = self.get_packs()
+        obsolete_pack_trans = packs.transport.clone('obsolete_packs')
+        obsolete_pack_trans.put_bytes('a-pack.pack', 'content\n')
+        obsolete_pack_trans.put_bytes('a-pack.rix', 'content\n')
+        obsolete_pack_trans.put_bytes('a-pack.iix', 'content\n')
+        obsolete_pack_trans.put_bytes('another-pack.pack', 'foo\n')
+        obsolete_pack_trans.put_bytes('not-a-pack.rix', 'foo\n')
+        res = packs._clear_obsolete_packs()
+        self.assertEqual(['a-pack', 'another-pack'], sorted(res))
+        self.assertEqual([], obsolete_pack_trans.list_dir('.'))
+
+    def test__clear_obsolete_packs_preserve(self):
+        packs = self.get_packs()
+        obsolete_pack_trans = packs.transport.clone('obsolete_packs')
+        obsolete_pack_trans.put_bytes('a-pack.pack', 'content\n')
+        obsolete_pack_trans.put_bytes('a-pack.rix', 'content\n')
+        obsolete_pack_trans.put_bytes('a-pack.iix', 'content\n')
+        obsolete_pack_trans.put_bytes('another-pack.pack', 'foo\n')
+        obsolete_pack_trans.put_bytes('not-a-pack.rix', 'foo\n')
+        res = packs._clear_obsolete_packs(preserve=set(['a-pack']))
+        self.assertEqual(['a-pack', 'another-pack'], sorted(res))
+        self.assertEqual(['a-pack.iix', 'a-pack.pack', 'a-pack.rix'],
+                         sorted(obsolete_pack_trans.list_dir('.')))
+
     def test__max_pack_count(self):
         """The maximum pack count is a function of the number of revisions."""
         # no revisions - one pack, so that we can have a revision free repo
@@ -1174,6 +1199,28 @@
         self.assertContainsRe(repr(packs),
             'RepositoryPackCollection(.*Repository(.*))')
 
+    def test__obsolete_packs(self):
+        tree, r, packs, revs = self.make_packs_and_alt_repo(write_lock=True)
+        names = packs.names()
+        pack = packs.get_pack_by_name(names[0])
+        # Schedule this one for removal
+        packs._remove_pack_from_memory(pack)
+        # Simulate a concurrent update by renaming the .pack file and one of
+        # the indices
+        packs.transport.rename('packs/%s.pack' % (names[0],),
+                               'obsolete_packs/%s.pack' % (names[0],))
+        packs.transport.rename('indices/%s.iix' % (names[0],),
+                               'obsolete_packs/%s.iix' % (names[0],))
+        # Now trigger the obsoletion, and ensure that all the remaining files
+        # are still renamed
+        packs._obsolete_packs([pack])
+        self.assertEqual([n + '.pack' for n in names[1:]],
+                         sorted(packs._pack_transport.list_dir('.')))
+        # names[0] should not be present in the index anymore
+        self.assertEqual(names[1:],
+            sorted(set([osutils.splitext(n)[0] for n in
+                        packs._index_transport.list_dir('.')])))
+
     def test_pack_distribution_zero(self):
         packs = self.get_packs()
         self.assertEqual([0], packs.pack_distribution(0))
@@ -1367,7 +1414,7 @@
         removed_pack = packs.get_pack_by_name(to_remove_name)
         packs._remove_pack_from_memory(removed_pack)
         names = packs.names()
-        all_nodes, deleted_nodes, new_nodes = packs._diff_pack_names()
+        all_nodes, deleted_nodes, new_nodes, _ = packs._diff_pack_names()
         new_names = set([x[0][0] for x in new_nodes])
         self.assertEqual(names, sorted([x[0][0] for x in all_nodes]))
         self.assertEqual(set(names) - set(orig_names), new_names)
@@ -1378,7 +1425,7 @@
         reloaded_names = packs.names()
         self.assertEqual(orig_at_load, packs._packs_at_load)
         self.assertEqual(names, reloaded_names)
-        all_nodes, deleted_nodes, new_nodes = packs._diff_pack_names()
+        all_nodes, deleted_nodes, new_nodes, _ = packs._diff_pack_names()
         new_names = set([x[0][0] for x in new_nodes])
         self.assertEqual(names, sorted([x[0][0] for x in all_nodes]))
         self.assertEqual(set(names) - set(orig_names), new_names)
@@ -1386,6 +1433,21 @@
         self.assertEqual([to_remove_name],
                          sorted([x[0][0] for x in deleted_nodes]))
 
+    def test_autopack_obsoletes_new_pack(self):
+        tree, r, packs, revs = self.make_packs_and_alt_repo(write_lock=True)
+        packs._max_pack_count = lambda x: 1
+        packs.pack_distribution = lambda x: [10]
+        r.start_write_group()
+        r.revisions.insert_record_stream([versionedfile.FulltextContentFactory(
+            ('bogus-rev',), (), None, 'bogus-content\n')])
+        # This should trigger an autopack, which will combine everything into a
+        # single pack file.
+        new_names = r.commit_write_group()
+        names = packs.names()
+        self.assertEqual(1, len(names))
+        self.assertEqual([names[0] + '.pack'],
+                         packs._pack_transport.list_dir('.'))
+
     def test_autopack_reloads_and_stops(self):
         tree, r, packs, revs = self.make_packs_and_alt_repo(write_lock=True)
         # After we have determined what needs to be autopacked, trigger a
@@ -1403,6 +1465,38 @@
         self.assertEqual(tree.branch.repository._pack_collection.names(),
                          packs.names())
 
+    def test__save_pack_names(self):
+        tree, r, packs, revs = self.make_packs_and_alt_repo(write_lock=True)
+        names = packs.names()
+        pack = packs.get_pack_by_name(names[0])
+        packs._remove_pack_from_memory(pack)
+        packs._save_pack_names(obsolete_packs=[pack])
+        cur_packs = packs._pack_transport.list_dir('.')
+        self.assertEqual([n + '.pack' for n in names[1:]], sorted(cur_packs))
+        # obsolete_packs will also have stuff like .rix and .iix present.
+        obsolete_packs = packs.transport.list_dir('obsolete_packs')
+        obsolete_names = set([osutils.splitext(n)[0] for n in obsolete_packs])
+        self.assertEqual([pack.name], sorted(obsolete_names))
+
+    def test__save_pack_names_already_obsoleted(self):
+        tree, r, packs, revs = self.make_packs_and_alt_repo(write_lock=True)
+        names = packs.names()
+        pack = packs.get_pack_by_name(names[0])
+        packs._remove_pack_from_memory(pack)
+        # We are going to simulate a concurrent autopack by manually obsoleting
+        # the pack directly.
+        packs._obsolete_packs([pack])
+        packs._save_pack_names(clear_obsolete_packs=True,
+                               obsolete_packs=[pack])
+        cur_packs = packs._pack_transport.list_dir('.')
+        self.assertEqual([n + '.pack' for n in names[1:]], sorted(cur_packs))
+        # Note that while we set clear_obsolete_packs=True, it should not
+        # delete a pack file that we have also scheduled for obsoletion.
+        obsolete_packs = packs.transport.list_dir('obsolete_packs')
+        obsolete_names = set([osutils.splitext(n)[0] for n in obsolete_packs])
+        self.assertEqual([pack.name], sorted(obsolete_names))
+
+
 
 class TestPack(TestCaseWithTransport):
     """Tests for the Pack object."""



More information about the bazaar-commits mailing list