Rev 4727: Prototype of not interning sha1: strings. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

John Arbash Meinel john at arbash-meinel.com
Wed Sep 30 22:25:35 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

------------------------------------------------------------
revno: 4727
revision-id: john at arbash-meinel.com-20090930212526-imvbkyikd9d3r4o6
parent: john at arbash-meinel.com-20090930211748-8er5cdw0cf5l01mo
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-09-30 16:25:26 -0500
message:
  Prototype of not interning sha1: strings.
  
  This ends up saving 268480KB => 260972KB (7.5MB) of memory for all of launchpad.
  Most of that is because the string interned dict drops from 12MB to 3MB because
  it holds 100k keys instead of 500k keys.
  
  On the plus side, we can be fairly confident that we don't access sha1 keys
  as plain strings.
  If we disable interning for the sha1 keys themselves, we bloat by about 8MB,
  but save 30s => 27s.
  We would bloat more, but we are trading a 24MB _interned_keys dict for a 12MB one.
  So what we really need is to get the custom interning dict, so that we
  don't waste so much memory there.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-09-30 21:17:48 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2009-09-30 21:25:26 +0000
@@ -56,8 +56,6 @@
     int strncmp(char *s1, char *s2, size_t n)
 
 cdef extern from "_static_tuple_c.h":
-    cdef struct StaticTuple:
-        pass
     void **StaticTuple_API
     int import_static_tuple()
     # ctypedef object (*st_new_type)(Py_ssize_t)
@@ -112,8 +110,8 @@
 # This sets up the StaticTuple C_API functionality
 if import_static_tuple() == -1 or StaticTuple_API == NULL:
     raise ImportError('failed to import_static_tuple()')
-cdef object _ST
-_ST = _static_tuple_c.StaticTuple
+cdef object StaticTuple
+StaticTuple = _static_tuple_c.StaticTuple
 
 
 cdef class BTreeLeafParser:
@@ -182,15 +180,19 @@
                                                    last - self._start)))
                     raise AssertionError(failure_string)
             # capture the key string
-            key_element = safe_interned_string_from_size(self._start,
+            if ((temp_ptr - self._start) == 45
+                and strncmp(self._start, 'sha1:', 5) == 0):
+                key_element = safe_string_from_size(self._start,
+                                                    temp_ptr - self._start)
+            else:
+                key_element = safe_interned_string_from_size(self._start,
                                                          temp_ptr - self._start)
             # advance our pointer
             self._start = temp_ptr + 1
             Py_INCREF(key_element)
-            # PyTuple_SET_ITEM(key, loop_counter, key_element)
             StaticTuple_SET_ITEM(key, loop_counter, key_element)
-        # return _keys_type_c.Key(*key)
-        return StaticTuple_intern(key)
+        key = StaticTuple_intern(key)
+        return key
 
     cdef int process_line(self) except -1:
         """Process a line in the bytes."""
@@ -274,18 +276,18 @@
                         # key runs to the end
                         temp_ptr = ref_ptr
                     PyList_Append(ref_list, self.extract_key(temp_ptr))
-                ref_list = _ST(*ref_list).intern()
+                ref_list = StaticTuple_intern(StaticTuple(*ref_list))
                 PyList_Append(ref_lists, ref_list)
                 # prepare for the next reference list
                 self._start = next_start
-            ref_lists = _ST(*ref_lists)
-            node_value = _ST(value, ref_lists)
+            ref_lists = StaticTuple(*ref_lists)
+            node_value = StaticTuple(value, ref_lists)
         else:
             if last != self._start:
                 # unexpected reference data present
                 return -1
-            node_value = _ST(value, _ST())
-        PyList_Append(self.keys, _ST(key, node_value))
+            node_value = StaticTuple(value, StaticTuple())
+        PyList_Append(self.keys, StaticTuple(key, node_value))
         return 0
 
     def parse(self):



More information about the bazaar-commits mailing list