Rev 4764: Interning the most common 'value' entries saves about 1-2% of peak memory. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
John Arbash Meinel
john at arbash-meinel.com
Thu Oct 8 05:11:02 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
------------------------------------------------------------
revno: 4764
revision-id: john at arbash-meinel.com-20091008041043-579p0as1c3xq1q6w
parent: john at arbash-meinel.com-20091007211747-tlgi6myzn54wdyxa
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-10-07 23:10:43 -0500
message:
Interning the most common 'value' entries saves about 1-2% of peak memory.
Namely, interning the '0' content values.
Note that most values have a common prefix now, and an uncommon suffix...
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2009-10-07 21:17:47 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2009-10-08 04:10:43 +0000
@@ -79,6 +79,7 @@
pos = pos - 1
return NULL
+
# TODO: Import this from _dirstate_helpers when it is merged
cdef object safe_string_from_size(char *s, Py_ssize_t size):
if size < 0:
@@ -196,6 +197,7 @@
cdef char *ref_ptr
cdef char *next_start
cdef int loop_counter
+ cdef Py_ssize_t str_len
self._start = self._cur_str
# Find the next newline
@@ -231,10 +233,23 @@
# Invalid line
raise AssertionError("Failed to find the value area")
else:
- # capture the value string
- value = safe_string_from_size(temp_ptr + 1, last - temp_ptr - 1)
+ # Because of how conversions were done, we ended up with *lots* of
+ # values that are identical. These are all of the 0-length nodes
+ # that are referred to by the TREE_ROOT (and likely some other
+ # directory nodes.) For example, bzr has 25k references to
+ # something like '12607215 328306 0 0', which ends up consuming 1MB
+ # of memory, just for those strings.
+ str_len = last - temp_ptr - 1
+ if (str_len > 4
+ and strncmp(" 0 0", last - 4, 4) == 0):
+ # This drops peak mem for bzr.dev from 87.4MB => 86.2MB
+ # For Launchpad 236MB => 232MB
+ value = safe_interned_string_from_size(temp_ptr + 1, str_len)
+ else:
+ value = safe_string_from_size(temp_ptr + 1, str_len)
# shrink the references end point
last = temp_ptr
+
if self.ref_list_length:
ref_lists = StaticTuple_New(self.ref_list_length)
loop_counter = 0
More information about the bazaar-commits
mailing list