Rev 3894: Some testing to see if we can decrease the peak memory consumption a bit. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/refcycles

John Arbash Meinel john at arbash-meinel.com
Fri Mar 20 15:03:55 GMT 2009


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/refcycles

------------------------------------------------------------
revno: 3894
revision-id: john at arbash-meinel.com-20090320150205-kcmh70biyo76p0kn
parent: john at arbash-meinel.com-20090320032107-bm9wg421rtcacy5i
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: refcycles
timestamp: Fri 2009-03-20 10:02:05 -0500
message:
  Some testing to see if we can decrease the peak memory consumption a bit.
  It looks like we can, just need some more perf, etc.
-------------- next part --------------
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-03-19 03:06:02 +0000
+++ b/bzrlib/groupcompress.py	2009-03-20 15:02:05 +0000
@@ -339,6 +339,8 @@
         :param sha1: TODO (should we validate only when sha1 is supplied?)
         :return: The bytes for the content
         """
+        if start == end == 0:
+            return None, ''
         # Make sure we have enough bytes for this record
         # TODO: if we didn't want to track the end of this entry, we could
         #       _ensure_content(start+enough_bytes_for_type_and_length), and
@@ -494,6 +496,7 @@
         #       get_bytes_as call? After Manager.get_record_stream() returns
         #       the object?
         self._manager = manager
+        self._bytes = None
         self.storage_kind = 'groupcompress-block'
         if not first:
             self.storage_kind = 'groupcompress-block-ref'
@@ -512,14 +515,19 @@
                 return self._manager._wire_bytes()
             else:
                 return ''
+            self._manager = None # safe?
         if storage_kind in ('fulltext', 'chunked'):
-            self._manager._prepare_for_extract()
-            block = self._manager._block
-            _, bytes = block.extract(self.key, self._start, self._end)
+            if self._bytes is None:
+                # Grab the raw bytes for this entry, and break the ref-cycle
+                self._manager._prepare_for_extract()
+                block = self._manager._block
+                _, bytes = block.extract(self.key, self._start, self._end)
+                self._bytes = bytes
+                self._manager = None
             if storage_kind == 'fulltext':
-                return bytes
+                return self._bytes
             else:
-                return [bytes]
+                return [self._bytes]
         raise errors.UnavailableRepresentation(self.key, storage_kind,
             self.storage_kind)
 
@@ -1298,6 +1306,8 @@
         for key in missing:
             yield AbsentContentFactory(key)
         manager = None
+        last_block = None
+        last_memo = None
         # TODO: This works fairly well at batching up existing groups into a
         #       streamable format, and possibly allowing for taking one big
         #       group and splitting it when it isn't fully utilized.
@@ -1321,7 +1331,13 @@
                         yield FulltextContentFactory(key, parents, sha1, bytes)
                     else:
                         index_memo, _, parents, (method, _) = locations[key]
-                        block = self._get_block(index_memo)
+                        read_memo = index_memo[0:3]
+                        if last_memo == read_memo:
+                            block = last_block
+                        else:
+                            block = self._get_block(index_memo)
+                            last_block = block
+                            last_memo = read_memo
                         start, end = index_memo[3:5]
                         if manager is None:
                             manager = _LazyGroupContentManager(block)

=== modified file 'bzrlib/lru_cache.py'
--- a/bzrlib/lru_cache.py	2008-12-09 22:31:56 +0000
+++ b/bzrlib/lru_cache.py	2009-03-20 15:02:05 +0000
@@ -151,8 +151,12 @@
     def clear(self):
         """Clear out all of the cache."""
         # Clean up in LRU order
-        while self._cache:
-            self._remove_lru()
+        for key in self._cache.keys():
+            self._remove(key)
+        assert not self._cache
+        assert not self._cleanup
+        self._queue = deque()
+        self._refcount = {}
 
     def resize(self, max_cache, after_cleanup_count=None):
         """Change the number of entries that will be cached."""
@@ -247,6 +251,10 @@
         val = LRUCache._remove(self, key)
         self._value_size -= self._compute_size(val)
 
+    def clear(self):
+        LRUCache.clear(self)
+        self._value_size = 0
+
     def resize(self, max_size, after_cleanup_size=None):
         """Change the number of bytes that will be cached."""
         self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2009-03-19 20:31:57 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2009-03-20 15:02:05 +0000
@@ -190,6 +190,8 @@
                 if pb is not None:
                     pb.update(message, idx + 1, len(keys))
                 yield record
+                record._manager = None
+                record._bytes = None
         return pb_stream()
 
     def _get_filtered_inv_stream(self, source_vf, keys, message, pb=None):
@@ -216,6 +218,8 @@
                     p_id_roots_set.add(key)
                     self._chk_p_id_roots.append(key)
                 yield record
+                record._manager = None
+                record._bytes = None
             # We have finished processing all of the inventory records, we
             # don't need these sets anymore
             id_roots_set.clear()
@@ -301,6 +305,8 @@
                         if pb is not None:
                             pb.update('chk node', counter[0], total_keys)
                         yield record
+                        record._manager = None
+                        record._bytes = None
                 yield next_stream()
                 # Double check that we won't be emitting any keys twice
                 # If we get rid of the pre-calculation of all keys, we could
@@ -312,7 +318,7 @@
                 # next_keys = next_keys.intersection(remaining_keys)
                 cur_keys = []
                 for prefix in sorted(keys_by_search_prefix):
-                    cur_keys.extend(keys_by_search_prefix[prefix])
+                    cur_keys.extend(keys_by_search_prefix.pop(prefix))
         for stream in _get_referenced_stream(self._chk_id_roots,
                                              self._gather_text_refs):
             yield stream
@@ -395,11 +401,17 @@
             self.revision_keys = source_vf.keys()
         self._copy_stream(source_vf, target_vf, self.revision_keys,
                           'revisions', self._get_progress_stream, 1)
+        for index in source_vf._index._graph_index._indices:
+            index._leaf_node_cache.clear()
+        # target_vf._index._graph_index._spill_mem_keys_to_disk()
 
     def _copy_inventory_texts(self):
         source_vf, target_vf = self._build_vfs('inventory', True, True)
         self._copy_stream(source_vf, target_vf, self.revision_keys,
                           'inventories', self._get_filtered_inv_stream, 2)
+        for index in source_vf._index._graph_index._indices:
+            index._leaf_node_cache.clear()
+        # target_vf._index._graph_index._spill_mem_keys_to_disk()
 
     def _copy_chk_texts(self):
         source_vf, target_vf = self._build_vfs('chk', False, False)
@@ -421,6 +433,9 @@
                     pass
         finally:
             child_pb.finished()
+        for index in source_vf._index._graph_index._indices:
+            index._leaf_node_cache.clear()
+        # target_vf._index._graph_index._spill_mem_keys_to_disk()
 
     def _copy_text_texts(self):
         source_vf, target_vf = self._build_vfs('text', True, True)
@@ -432,6 +447,9 @@
         text_keys = source_vf.keys()
         self._copy_stream(source_vf, target_vf, text_keys,
                           'text', self._get_progress_stream, 4)
+        for index in source_vf._index._graph_index._indices:
+            index._leaf_node_cache.clear()
+        # target_vf._index._graph_index._spill_mem_keys_to_disk()
 
     def _copy_signature_texts(self):
         source_vf, target_vf = self._build_vfs('signature', False, False)
@@ -439,6 +457,9 @@
         signature_keys.intersection(self.revision_keys)
         self._copy_stream(source_vf, target_vf, signature_keys,
                           'signatures', self._get_progress_stream, 5)
+        for index in source_vf._index._graph_index._indices:
+            index._leaf_node_cache.clear()
+        # target_vf._index._graph_index._spill_mem_keys_to_disk()
 
     def _create_pack_from_packs(self):
         self.pb.update('repacking', 0, 7)
@@ -451,6 +472,7 @@
         self._copy_text_texts()
         self._copy_signature_texts()
         self.new_pack._check_references()
+        trace.debug_memory('after fetch')
         if not self._use_pack(self.new_pack):
             self.new_pack.abort()
             return None



More information about the bazaar-commits mailing list