Rev 4642: Switching from 'groupcompress' order to 'unordered' causes the fragmentation issue in http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-402645-fragmentation
John Arbash Meinel
john at arbash-meinel.com
Mon Aug 24 20:34:32 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-402645-fragmentation
------------------------------------------------------------
revno: 4642
revision-id: john at arbash-meinel.com-20090824193413-zlz0wc07x99gxs3b
parent: pqm at pqm.ubuntu.com-20090824182846-ac4l3skw47g0tzx0
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0b1-402645-fragmentation
timestamp: Mon 2009-08-24 14:34:13 -0500
message:
Switching from 'groupcompress' order to 'unordered' causes the fragmentation issue
to go away.
Which is probably worthwhile *today*, but the real fix is to repack on the fly
when some value of merit has been reached.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-08-19 16:23:39 +0000
+++ b/bzrlib/groupcompress.py 2009-08-24 19:34:13 +0000
@@ -464,6 +464,8 @@
self.storage_kind)
+_recent_blocks = set()
+
class _LazyGroupContentManager(object):
"""This manages a group of _LazyGroupCompressFactory objects."""
@@ -531,8 +533,11 @@
# expand, since we do full compression again. Perhaps based on a
# request that ends up poorly ordered?
delta = time.time() - tstart
+ if old_length in _recent_blocks:
+ import pdb; pdb.set_trace()
+ _recent_blocks.add(old_length)
self._block = new_block
- trace.mutter('creating new compressed block on-the-fly in %.3fs'
+ trace.note('creating new compressed block on-the-fly in %.3fs'
' %d bytes => %d bytes', delta, old_length,
self._block._content_length)
@@ -1307,6 +1312,8 @@
missing.difference_update(unadded_keys)
(fallback_parent_map, key_to_source_map,
source_result) = self._find_from_fallback(missing)
+ trace.note('getting record stream for %s keys, in %r order, from %s'
+ % (len(keys), ordering, self._index))
if ordering in ('topological', 'groupcompress'):
# would be better to not globally sort initially but instead
# start with one key, recurse to its oldest parent, then grab
@@ -1339,6 +1346,7 @@
# one-at-a-time.) This could be done at insert_record_stream()
# time, but it probably would decrease the number of
# bytes-on-the-wire for fetch.
+ recent_read_memos = set()
for source, keys in source_keys:
if source is self:
for key in keys:
@@ -1357,6 +1365,9 @@
# We are starting a new block. If we have a
# manager, we have found everything that fits for
# now, so yield records
+ if read_memo in recent_read_memos:
+ import pdb; pdb.set_trace()
+ recent_read_memos.add(read_memo)
if manager is not None:
for factory in manager.get_record_stream():
yield factory
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py 2009-08-18 05:18:52 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py 2009-08-24 19:34:13 +0000
@@ -932,7 +932,7 @@
super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
self._revision_keys = None
self._text_keys = None
- self._text_fetch_order = 'groupcompress'
+ # self._text_fetch_order = 'unordered'
self._chk_id_roots = None
self._chk_p_id_roots = None
@@ -949,7 +949,7 @@
p_id_roots_set = set()
source_vf = self.from_repository.inventories
stream = source_vf.get_record_stream(inventory_keys,
- 'groupcompress', True)
+ 'unordered', True)
for record in stream:
if record.storage_kind == 'absent':
if allow_absent:
More information about the bazaar-commits
mailing list