Rev 4076: Use PyString_InternInPlace to intern() the various parts of keys that are processed. in lp:///~jameinel/bzr/intern_keys

John Arbash Meinel john at arbash-meinel.com
Wed Mar 4 02:51:03 GMT 2009


At lp:///~jameinel/bzr/intern_keys

------------------------------------------------------------
revno: 4076
revision-id: john at arbash-meinel.com-20090304025054-fze01hr79xjv21x5
parent: pqm at pqm.ubuntu.com-20090303085413-35seprvnu885xorz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: intern_keys
timestamp: Tue 2009-03-03 20:50:54 -0600
message:
  Use PyString_InternInPlace to intern() the various parts of keys that are processed.
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS	2009-03-03 08:54:13 +0000
+++ b/NEWS	2009-03-04 02:50:54 +0000
@@ -142,6 +142,11 @@
       plugins to have hooks called on non-stacked instances.
       (Robert Collins, #334187)
 
+    * ``BTreeIndex`` now interns the strings inside keys that it
+      processes. This should show some decrease in memory consumption when
+      doing operations that load large portions of the keyspace.
+      (John Arbash Meinel)
+
     * ``BzrDir.cloning_metadir`` now has a RPC call. (Robert Collins)
 
     * ``BzrDirFormat.__str__`` now uses the human readable description

=== modified file 'bzrlib/_btree_serializer_c.pyx'
--- a/bzrlib/_btree_serializer_c.pyx	2008-09-24 04:15:03 +0000
+++ b/bzrlib/_btree_serializer_c.pyx	2009-03-04 02:50:54 +0000
@@ -27,20 +27,24 @@
 cdef extern from "Python.h":
     ctypedef int Py_ssize_t # Required for older pyrex versions
     ctypedef struct PyObject:
+        Py_ssize_t ob_refcnt
         pass
     int PyList_Append(object lst, object item) except -1
 
     char *PyString_AsString(object p) except NULL
     object PyString_FromStringAndSize(char *, Py_ssize_t)
+    PyObject *PyString_FromStringAndSize_ptr "PyString_FromStringAndSize" (char *, Py_ssize_t)
     int PyString_CheckExact(object s)
     int PyString_CheckExact_ptr "PyString_CheckExact" (PyObject *)
     Py_ssize_t PyString_Size(object p)
     Py_ssize_t PyString_GET_SIZE_ptr "PyString_GET_SIZE" (PyObject *)
     char * PyString_AS_STRING_ptr "PyString_AS_STRING" (PyObject *)
     int PyString_AsStringAndSize_ptr(PyObject *, char **buf, Py_ssize_t *len)
+    void PyString_InternInPlace(PyObject **)
     int PyTuple_CheckExact(object t)
     Py_ssize_t PyTuple_GET_SIZE(object t)
     PyObject *PyTuple_GET_ITEM_ptr_object "PyTuple_GET_ITEM" (object tpl, int index)
+    void Py_DECREF_ptr "Py_DECREF" (PyObject *)
 
 cdef extern from "string.h":
     void *memcpy(void *dest, void *src, size_t n)
@@ -74,6 +78,21 @@
     return PyString_FromStringAndSize(s, size)
 
 
+cdef object safe_interned_string_from_size(char *s, Py_ssize_t size):
+    cdef PyObject *py_str
+    if size < 0:
+        raise AssertionError(
+            'tried to create a string with an invalid size: %d @0x%x'
+            % (size, <int>s))
+    py_str = PyString_FromStringAndSize_ptr(s, size)
+    PyString_InternInPlace(&py_str)
+    result = <object>py_str
+    # Casting a PyObject* to an <object> triggers an INCREF from Pyrex, so we
+    # DECREF it to avoid geting immortal strings
+    Py_DECREF_ptr(py_str)
+    return result
+
+
 cdef class BTreeLeafParser:
     """Parse the leaf nodes of a BTree index.
 
@@ -142,8 +161,8 @@
             # TODO: Consider using PyIntern_FromString, the only caveat is that
             # it assumes a NULL-terminated string, so we have to check if
             # temp_ptr[0] == c'\0' or some other char.
-            key_element = safe_string_from_size(self._start,
-                                                temp_ptr - self._start)
+            key_element = safe_interned_string_from_size(self._start,
+                                                         temp_ptr - self._start)
             # advance our pointer
             self._start = temp_ptr + 1
             PyList_Append(key_segments, key_element)

=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py	2009-02-18 05:40:39 +0000
+++ b/bzrlib/btree_index.py	2009-03-04 02:50:54 +0000
@@ -585,7 +585,7 @@
         for line in lines[2:]:
             if line == '':
                 break
-            nodes.append(tuple(line.split('\0')))
+            nodes.append(tuple(map(intern, line.split('\0'))))
         return nodes
 
 



More information about the bazaar-commits mailing list