Rev 4727: Prototype of not interning sha1: strings. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
John Arbash Meinel
john at arbash-meinel.com
Wed Sep 30 22:25:35 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
------------------------------------------------------------
revno: 4727
revision-id: john at arbash-meinel.com-20090930212526-imvbkyikd9d3r4o6
parent: john at arbash-meinel.com-20090930211748-8er5cdw0cf5l01mo
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-09-30 16:25:26 -0500
message:
Prototype of not interning sha1: strings.
This ends up saving 268480KB => 260972KB (7.5MB) of memory for all of launchpad.
Most of that is because the string interned dict drops from 12MB to 3MB because
it holds 100k keys instead of 500k keys.
On the plus side, we can be fairly confident that we don't access sha1 keys
as plain strings.
If we disable interning for the sha1 keys themselves, we bloat by about 8MB,
but save 30s => 27s.
We would bloat more, but we are trading a 24MB _interned_keys dict for a 12MB one.
So what we really need is to get the custom interning dict, so that we
don't waste so much memory there.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2009-09-30 21:17:48 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2009-09-30 21:25:26 +0000
@@ -56,8 +56,6 @@
int strncmp(char *s1, char *s2, size_t n)
cdef extern from "_static_tuple_c.h":
- cdef struct StaticTuple:
- pass
void **StaticTuple_API
int import_static_tuple()
# ctypedef object (*st_new_type)(Py_ssize_t)
@@ -112,8 +110,8 @@
# This sets up the StaticTuple C_API functionality
if import_static_tuple() == -1 or StaticTuple_API == NULL:
raise ImportError('failed to import_static_tuple()')
-cdef object _ST
-_ST = _static_tuple_c.StaticTuple
+cdef object StaticTuple
+StaticTuple = _static_tuple_c.StaticTuple
cdef class BTreeLeafParser:
@@ -182,15 +180,19 @@
last - self._start)))
raise AssertionError(failure_string)
# capture the key string
- key_element = safe_interned_string_from_size(self._start,
+ if ((temp_ptr - self._start) == 45
+ and strncmp(self._start, 'sha1:', 5) == 0):
+ key_element = safe_string_from_size(self._start,
+ temp_ptr - self._start)
+ else:
+ key_element = safe_interned_string_from_size(self._start,
temp_ptr - self._start)
# advance our pointer
self._start = temp_ptr + 1
Py_INCREF(key_element)
- # PyTuple_SET_ITEM(key, loop_counter, key_element)
StaticTuple_SET_ITEM(key, loop_counter, key_element)
- # return _keys_type_c.Key(*key)
- return StaticTuple_intern(key)
+ key = StaticTuple_intern(key)
+ return key
cdef int process_line(self) except -1:
"""Process a line in the bytes."""
@@ -274,18 +276,18 @@
# key runs to the end
temp_ptr = ref_ptr
PyList_Append(ref_list, self.extract_key(temp_ptr))
- ref_list = _ST(*ref_list).intern()
+ ref_list = StaticTuple_intern(StaticTuple(*ref_list))
PyList_Append(ref_lists, ref_list)
# prepare for the next reference list
self._start = next_start
- ref_lists = _ST(*ref_lists)
- node_value = _ST(value, ref_lists)
+ ref_lists = StaticTuple(*ref_lists)
+ node_value = StaticTuple(value, ref_lists)
else:
if last != self._start:
# unexpected reference data present
return -1
- node_value = _ST(value, _ST())
- PyList_Append(self.keys, _ST(key, node_value))
+ node_value = StaticTuple(value, StaticTuple())
+ PyList_Append(self.keys, StaticTuple(key, node_value))
return 0
def parse(self):
More information about the bazaar-commits
mailing list