Rev 6: Cap group size at 20MB internal buffer. (Probably way too big). in http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk

Tue Jul 8 08:24:48 BST 2008

At http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk

------------------------------------------------------------
revno: 6
revision-id: robertc at robertcollins.net-20080708072446-zncm2ymr5sj64zcu
parent: robertc at robertcollins.net-20080708045101-i45nqjdifdquuyhc
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Tue 2008-07-08 17:24:46 +1000
message:
  Cap group size at 20MB internal buffer. (Probably way too big).
modified:
  groupcompress.py               groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
=== modified file 'groupcompress.py'

--- a/groupcompress.py	2008-07-08 04:51:01 +0000
+++ b/groupcompress.py	2008-07-08 07:24:46 +0000
@@ -476,11 +476,20 @@
                 adapters[adapter_key] = adapter
                 return adapter
         adapters = {}
-        compressor = GroupCompressor(self._delta)
         # This will go up to fulltexts for gc to gc fetching, which isn't
         # ideal.
+        compressor = GroupCompressor(self._delta)
         keys_to_add = []
         basis_end = 0
+        groups = 1
+        def flush():
+            compressed = zlib.compress(''.join(compressor.lines))
+            index, start, length = self._access.add_raw_records(
+                [(None, len(compressed))], compressed)[0]
+            nodes = []
+            for key, reads, refs in keys_to_add:
+                nodes.append((key, "%d %d %s" % (start, length, reads), refs))
+            self._index.add_records(nodes, random_id=random_id)
         for record in stream:
             # Raise an error when a record is missing.
             if record.storage_kind == 'absent':
@@ -498,13 +507,12 @@
             keys_to_add.append((record.key, '%d %d' % (basis_end, end_point),
                 (record.parents,)))
             basis_end = end_point
-        compressed = zlib.compress(''.join(compressor.lines))
-        index, start, length = self._access.add_raw_records(
-            [(None, len(compressed))], compressed)[0]
-        nodes = []
-        for key, reads, refs in keys_to_add:
-            nodes.append((key, "%d %d %s" % (start, length, reads), refs))
-        self._index.add_records(nodes, random_id=random_id)
+            if basis_end > 1024 * 1024 * 20:
+                flush()
+                compressor = GroupCompressor(self._delta)
+                keys_to_add = []
+                basis_end = 0
+                groups += 1
 
     def iter_lines_added_or_present_in_keys(self, keys, pb=None):
         """Iterate over the lines in the versioned files from keys.