Rev 4643: Use unordered fetches to avoid fragmentation (bug #402645) in http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-402645-fragmentation

Mon Aug 24 21:10:38 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/2.0b1-402645-fragmentation

------------------------------------------------------------
revno: 4643
revision-id: john at arbash-meinel.com-20090824201018-3qthdqr0pzlaarm2
parent: john at arbash-meinel.com-20090824193413-zlz0wc07x99gxs3b
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.0b1-402645-fragmentation
timestamp: Mon 2009-08-24 15:10:18 -0500
message:
  Use unordered fetches to avoid fragmentation (bug #402645)
  
  The real goal is to get a stable groupcompress ordering, and advanced
  heuristics to repack on-the-fly as necessary.
  However for now, this at least avoids the current bugs.
-------------- next part --------------
=== modified file 'NEWS'

--- a/NEWS	2009-08-24 18:28:46 +0000
+++ b/NEWS	2009-08-24 20:10:18 +0000
@@ -27,6 +27,12 @@
 Bug Fixes
 *********
 
+* Fetches were being requested in 'groupcompress' order, but weren't
+  recombining the groups. Thus they would 'fragment' to get the correct
+  order, but not 'recombine' to acutally benefit from it. Until we get
+  recombining to work, switching to 'unordered' fetches avoids the
+  fragmentation. (John Arbash Meinel, #402645)
+
 * Fix a pycurl related test failure on karmic by recognizing an error
   raised by newer versions of pycurl.
   (Vincent Ladeuil, #306264)

=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-08-24 19:34:13 +0000
+++ b/bzrlib/groupcompress.py	2009-08-24 20:10:18 +0000
@@ -464,8 +464,6 @@
                                                self.storage_kind)
 
 
-_recent_blocks = set()
-
 class _LazyGroupContentManager(object):
     """This manages a group of _LazyGroupCompressFactory objects."""
 
@@ -533,11 +531,8 @@
         #       expand, since we do full compression again. Perhaps based on a
         #       request that ends up poorly ordered?
         delta = time.time() - tstart
-        if old_length in _recent_blocks:
-            import pdb; pdb.set_trace()
-        _recent_blocks.add(old_length)
         self._block = new_block
-        trace.note('creating new compressed block on-the-fly in %.3fs'
+        trace.mutter('creating new compressed block on-the-fly in %.3fs'
                      ' %d bytes => %d bytes', delta, old_length,
                      self._block._content_length)
 
@@ -1312,8 +1307,6 @@
         missing.difference_update(unadded_keys)
         (fallback_parent_map, key_to_source_map,
          source_result) = self._find_from_fallback(missing)
-        trace.note('getting record stream for %s keys, in %r order, from %s'
-                   % (len(keys), ordering, self._index))
         if ordering in ('topological', 'groupcompress'):
             # would be better to not globally sort initially but instead
             # start with one key, recurse to its oldest parent, then grab
@@ -1346,7 +1339,6 @@
         #       one-at-a-time.) This could be done at insert_record_stream()
         #       time, but it probably would decrease the number of
         #       bytes-on-the-wire for fetch.
-        recent_read_memos = set()
         for source, keys in source_keys:
             if source is self:
                 for key in keys:
@@ -1365,9 +1357,6 @@
                             # We are starting a new block. If we have a
                             # manager, we have found everything that fits for
                             # now, so yield records
-                            if read_memo in recent_read_memos:
-                                import pdb; pdb.set_trace()
-                            recent_read_memos.add(read_memo)
                             if manager is not None:
                                 for factory in manager.get_record_stream():
                                     yield factory