Rev 47: Groupcompress now supports 'autopack' and 'pack'. in http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/trunk
John Arbash Meinel
john at arbash-meinel.com
Wed Feb 25 23:00:30 GMT 2009
At http://bazaar.launchpad.net/%7Ebzr/bzr-groupcompress/trunk
------------------------------------------------------------
revno: 47
revision-id: john at arbash-meinel.com-20090225225958-jnsftmx4zcmmlo2a
parent: john at arbash-meinel.com-20090225222123-frjzu1meidd6qafi
parent: john at arbash-meinel.com-20090225221429-l0is3qxy1hvzuhes
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2009-02-25 16:59:58 -0600
message:
Groupcompress now supports 'autopack' and 'pack'.
It does this by just creating a new pack file, wrapping a GCVersionedFiles
around it, and streaming in the data in 'gc-optimal' ordering.
This actually seems to work fairly well.
modified:
repofmt.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
------------------------------------------------------------
revno: 45.1.1
revision-id: john at arbash-meinel.com-20090225221429-l0is3qxy1hvzuhes
parent: john at arbash-meinel.com-20090225221102-z0qgfbrfwwe2yscf
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: experimental
timestamp: Wed 2009-02-25 16:14:29 -0600
message:
A first-cut at implementing an auto-pack by copying everything.
modified:
repofmt.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
-------------- next part --------------
=== modified file 'repofmt.py'
--- a/repofmt.py 2009-02-19 20:45:00 +0000
+++ b/repofmt.py 2009-02-25 22:59:58 +0000
@@ -20,7 +20,14 @@
import md5
import time
-from bzrlib import debug, errors, pack, repository
+from bzrlib import (
+ debug,
+ errors,
+ knit,
+ pack,
+ repository,
+ ui,
+ )
from bzrlib.btree_index import (
BTreeBuilder,
BTreeGraphIndex,
@@ -229,8 +236,96 @@
self.repo.signatures._index._add_callback = self.signature_index.add_callback
self.repo.texts._index._add_callback = self.text_index.add_callback
- def _do_autopack(self):
- return False
+ def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
+ reload_func=None):
+ """Execute a series of pack operations.
+
+ :param pack_operations: A list of [revision_count, packs_to_combine].
+ :param _packer_class: The class of packer to use (default: Packer).
+ :return: None.
+ """
+ for revision_count, packs in pack_operations:
+ # we may have no-ops from the setup logic
+ if len(packs) == 0:
+ continue
+ # Create a new temp VersionedFile instance based on these packs,
+ # and then just fetch everything into the target
+
+ # XXX: Find a way to 'set_optimize' on the newly created pack
+ # indexes
+ # def open_pack(self):
+ # """Open a pack for the pack we are creating."""
+ # new_pack = super(OptimisingPacker, self).open_pack()
+ # # Turn on the optimization flags for all the index builders.
+ # new_pack.revision_index.set_optimize(for_size=True)
+ # new_pack.inventory_index.set_optimize(for_size=True)
+ # new_pack.text_index.set_optimize(for_size=True)
+ # new_pack.signature_index.set_optimize(for_size=True)
+ # return new_pack
+ to_copy = [('revision_index', 'revisions'),
+ ('inventory_index', 'inventories'),
+ ('text_index', 'texts'),
+ ('signature_index', 'signatures'),
+ ]
+ if getattr(self, 'chk_index', None) is not None:
+ to_copy.insert(2, ('chk_index', 'chk_bytes'))
+
+ # Shouldn't we start_write_group around this?
+ if self._new_pack is not None:
+ raise errors.BzrError('call to %s.pack() while another pack is'
+ ' being written.'
+ % (self.__class__.__name__,))
+ new_pack = self.pack_factory(self, 'autopack',
+ self.repo.bzrdir._get_file_mode())
+ new_pack.set_write_cache_size(1024*1024)
+ # TODO: A better alternative is to probably use Packer.open_pack(), and
+ # then create a GroupCompressVersionedFiles() around the
+ # target pack to insert into.
+ pb = ui.ui_factory.nested_progress_bar()
+ try:
+ for idx, (index_name, vf_name) in enumerate(to_copy):
+ pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
+ keys = set()
+ new_index = getattr(new_pack, index_name)
+ new_index.set_optimize(for_size=True)
+ for pack in packs:
+ source_index = getattr(pack, index_name)
+ keys.update(e[1] for e in source_index.iter_all_entries())
+ source_vf = getattr(self.repo, vf_name)
+ target_access = knit._DirectPackAccess({})
+ target_access.set_writer(new_pack._writer, new_index,
+ new_pack.access_tuple())
+ target_vf = GroupCompressVersionedFiles(
+ _GCGraphIndex(new_index,
+ add_callback=new_index.add_nodes,
+ parents=source_vf._index._parents,
+ is_locked=self.repo.is_locked),
+ access=target_access,
+ delta=source_vf._delta)
+ stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
+ target_vf.insert_record_stream(stream)
+ new_pack._check_references() # shouldn't be needed
+ except:
+ pb.finished()
+ new_pack.abort()
+ raise
+ else:
+ pb.finished()
+ if not new_pack.data_inserted():
+ raise AssertionError('We copied from pack files,'
+ ' but had no data copied')
+ # we need to abort somehow, because we don't want to remove
+ # the other packs
+ new_pack.finish()
+ self.allocate(new_pack)
+ for pack in packs:
+ self._remove_pack_from_memory(pack)
+ # record the newly available packs and stop advertising the old
+ # packs
+ self._save_pack_names(clear_obsolete_packs=True)
+ # Move the old packs out of the way now they are no longer referenced.
+ for revision_count, packs in pack_operations:
+ self._obsolete_packs(packs)
More information about the bazaar-commits
mailing list