Rev 3656: Now that we have real data, remove the copy() code. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

John Arbash Meinel john at arbash-meinel.com
Thu Aug 21 20:36:00 BST 2008


At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/btree

------------------------------------------------------------
revno: 3656
revision-id: john at arbash-meinel.com-20080821193558-0a4qni76jso98gxn
parent: john at arbash-meinel.com-20080821192346-4mtm95v5g4kkxbyu
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: btree
timestamp: Thu 2008-08-21 14:35:58 -0500
message:
  Now that we have real data, remove the copy() code.
  It didn't really benefit us over just doing another repack.
  There was a small benefit when the compressor is genuinely full,
  but that path is rarely encountered because we stop repacking by effort
  rather than by space. It also stablizes the tests for all
  platforms, because the copy() code would alter the packing
  slightly.
modified:
  bzrlib/chunk_writer.py         chunk_writer.py-20080630234519-6ggn4id17nipovny-1
-------------- next part --------------
=== modified file 'bzrlib/chunk_writer.py'
--- a/bzrlib/chunk_writer.py	2008-08-21 19:23:46 +0000
+++ b/bzrlib/chunk_writer.py	2008-08-21 19:35:58 +0000
@@ -85,9 +85,6 @@
         self.unused_bytes = None
         self.reserved_size = reserved
         self.min_compress_size = self._default_min_compression_size
-        self.num_zsync = 0
-        self.compressor_has_copy = (getattr(self.compressor, 'copy', None)
-                                    is not None)
 
     def finish(self):
         """Finish the chunk.
@@ -132,17 +129,14 @@
             out = compress(accepted_bytes)
             if out:
                 append(out)
-        alt_compressed = None
         if extra_bytes:
-            if self.compressor_has_copy:
-                alt_compressed = (list(bytes_out), compressor.copy())
             out = compress(extra_bytes)
             if out:
                 append(out)
             out = compressor.flush(Z_SYNC_FLUSH)
             if out:
                 append(out)
-        return bytes_out, compressor, alt_compressed
+        return bytes_out, compressor
 
     def write(self, bytes):
         """Write some bytes to the chunk.
@@ -180,82 +174,35 @@
             self.seen_bytes = next_seen_size
         else:
             if not reserved and self.num_repack >= self._max_repack:
-            # if (not reserved
-            #     and (self.num_repack > self._max_repack
-            #          or (self._max_repack == self._max_repack
-            #              and not self.compressor_has_copy))):
-            #     # We have packed too many times already.
+                # We already know we don't want to try to fit more
                 return True
-            if not reserved and self.num_repack == self._max_repack:
-                assert self.compressor_has_copy
-                # We are trying to sneak in a few more keys before we run out
-                # of room, so copy the compressor. If we bust, we stop right
-                # now
-                copy = self.compressor.copy()
-                out = self.compressor.compress(bytes)
-                out += self.compressor.flush(Z_SYNC_FLUSH)
-                total_len = sum(map(len, self.bytes_list)) + len(out)
-                if total_len + 10 > capacity:
-                    self.compressor = copy
-                    # Don't try any more
-                    self.num_repack += 1
-                    return True
-                # It is tempting to use the copied compressor here, because it
-                # is more tightly packed. It gets us to the maximum packing
-                # value. However, it adds about the same overhead as setting
-                # _max_repack to a higher value
-                # self.compressor = copy
-                # out = self.compressor.compress(bytes)
-                self.bytes_in.append(bytes)
-                if out:
-                    self.bytes_list.append(out)
-                return False
             # This may or may not fit, try to add it with Z_SYNC_FLUSH
             out = self.compressor.compress(bytes)
-            if out:
-                self.bytes_list.append(out)
-            out = self.compressor.flush(Z_SYNC_FLUSH)
-            if out:
-                self.bytes_list.append(out)
-            self.num_zsync += 1
-            # TODO: We may want to cache total_len, as the 'sum' call seems to
-            #       be showing up a bit on lsprof output
+            out += self.compressor.flush(Z_SYNC_FLUSH)
+            if out:
+                self.bytes_list.append(out)
             total_len = sum(map(len, self.bytes_list))
-            # Give us some extra room for a final Z_FINISH call.
+            # total_len + 10 is to give some room for Z_FINISH
             if total_len + 10 > capacity:
                 # We are over budget, try to squeeze this in without any
                 # Z_SYNC_FLUSH calls
                 self.num_repack += 1
-                if False and self.num_repack >= self._max_repack:
-                    this_len = None
-                    alt_compressed = None
-                else:
-                    (bytes_out, compressor,
-                     alt_compressed) = self._recompress_all_bytes_in(bytes)
-                    this_len = sum(map(len, bytes_out))
+                bytes_out, compressor = self._recompress_all_bytes_in(bytes)
+                this_len = sum(map(len, bytes_out))
                 if this_len is None or this_len + 10 > capacity:
                     # No way we can add anymore, we need to re-pack because our
-                    # compressor is now out of sync
-                    if alt_compressed is None:
-                        bytes_out, compressor, _ = self._recompress_all_bytes_in()
-                    else:
-                        bytes_out, compressor = alt_compressed
+                    # compressor is now out of sync.
+                    # This seems to be rarely triggered over
+                    #   num_repack > _max_repack
+                    bytes_out, compressor = self._recompress_all_bytes_in()
                     self.compressor = compressor
                     self.bytes_list = bytes_out
                     self.unused_bytes = bytes
                     return True
                 else:
                     # This fits when we pack it tighter, so use the new packing
-                    if alt_compressed is not None:
-                        # We know it will fit, so put it into another
-                        # compressor without Z_SYNC_FLUSH
-                        bytes_out, compressor = alt_compressed
-                        compressor.compress(bytes)
-                        self.num_zsync = 0
-                    else:
-                        # There is one Z_SYNC_FLUSH call in
-                        # _recompress_all_bytes_in
-                        self.num_zsync = 1
+                    # There is one Z_SYNC_FLUSH call in
+                    # _recompress_all_bytes_in
                     self.compressor = compressor
                     self.bytes_in.append(bytes)
                     self.bytes_list = bytes_out



More information about the bazaar-commits mailing list