Rev 4764: Interning the most common 'value' entries saves about 1-2% of peak memory. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

John Arbash Meinel john at arbash-meinel.com
Thu Oct 8 05:11:02 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

------------------------------------------------------------
revno: 4764
revision-id: john at arbash-meinel.com-20091008041043-579p0as1c3xq1q6w
parent: john at arbash-meinel.com-20091007211747-tlgi6myzn54wdyxa
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-10-07 23:10:43 -0500
message:
  Interning the most common 'value' entries saves about 1-2% of peak memory.
  
  Namely, interning the '0' content values.
  Note that most values have a common prefix now, and an uncommon suffix...
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-10-07 21:17:47 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2009-10-08 04:10:43 +0000
@@ -79,6 +79,7 @@
         pos = pos - 1
     return NULL
 
+
 # TODO: Import this from _dirstate_helpers when it is merged
 cdef object safe_string_from_size(char *s, Py_ssize_t size):
     if size < 0:
@@ -196,6 +197,7 @@
         cdef char *ref_ptr
         cdef char *next_start
         cdef int loop_counter
+        cdef Py_ssize_t str_len
 
         self._start = self._cur_str
         # Find the next newline
@@ -231,10 +233,23 @@
             # Invalid line
             raise AssertionError("Failed to find the value area")
         else:
-            # capture the value string
-            value = safe_string_from_size(temp_ptr + 1, last - temp_ptr - 1)
+            # Because of how conversions were done, we ended up with *lots* of
+            # values that are identical. These are all of the 0-length nodes
+            # that are referred to by the TREE_ROOT (and likely some other
+            # directory nodes.) For example, bzr has 25k references to
+            # something like '12607215 328306 0 0', which ends up consuming 1MB
+            # of memory, just for those strings.
+            str_len = last - temp_ptr - 1
+            if (str_len > 4
+                and strncmp(" 0 0", last - 4, 4) == 0):
+                # This drops peak mem for bzr.dev from 87.4MB => 86.2MB
+                # For Launchpad 236MB => 232MB
+                value = safe_interned_string_from_size(temp_ptr + 1, str_len)
+            else:
+                value = safe_string_from_size(temp_ptr + 1, str_len)
             # shrink the references end point
             last = temp_ptr
+
         if self.ref_list_length:
             ref_lists = StaticTuple_New(self.ref_list_length)
             loop_counter = 0



More information about the bazaar-commits mailing list