Rev 4076: Use PyString_InternInPlace to intern() the various parts of keys that are processed. in lp:///~jameinel/bzr/intern_keys
John Arbash Meinel
john at arbash-meinel.com
Wed Mar 4 02:51:03 GMT 2009
At lp:///~jameinel/bzr/intern_keys
------------------------------------------------------------
revno: 4076
revision-id: john at arbash-meinel.com-20090304025054-fze01hr79xjv21x5
parent: pqm at pqm.ubuntu.com-20090303085413-35seprvnu885xorz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: intern_keys
timestamp: Tue 2009-03-03 20:50:54 -0600
message:
Use PyString_InternInPlace to intern() the various parts of keys that are processed.
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS 2009-03-03 08:54:13 +0000
+++ b/NEWS 2009-03-04 02:50:54 +0000
@@ -142,6 +142,11 @@
plugins to have hooks called on non-stacked instances.
(Robert Collins, #334187)
+ * ``BTreeIndex`` now interns the strings inside keys that it
+ processes. This should show some decrease in memory consumption when
+ doing operations that load large portions of the keyspace.
+ (John Arbash Meinel)
+
* ``BzrDir.cloning_metadir`` now has a RPC call. (Robert Collins)
* ``BzrDirFormat.__str__`` now uses the human readable description
=== modified file 'bzrlib/_btree_serializer_c.pyx'
--- a/bzrlib/_btree_serializer_c.pyx 2008-09-24 04:15:03 +0000
+++ b/bzrlib/_btree_serializer_c.pyx 2009-03-04 02:50:54 +0000
@@ -27,20 +27,24 @@
cdef extern from "Python.h":
ctypedef int Py_ssize_t # Required for older pyrex versions
ctypedef struct PyObject:
+ Py_ssize_t ob_refcnt
pass
int PyList_Append(object lst, object item) except -1
char *PyString_AsString(object p) except NULL
object PyString_FromStringAndSize(char *, Py_ssize_t)
+ PyObject *PyString_FromStringAndSize_ptr "PyString_FromStringAndSize" (char *, Py_ssize_t)
int PyString_CheckExact(object s)
int PyString_CheckExact_ptr "PyString_CheckExact" (PyObject *)
Py_ssize_t PyString_Size(object p)
Py_ssize_t PyString_GET_SIZE_ptr "PyString_GET_SIZE" (PyObject *)
char * PyString_AS_STRING_ptr "PyString_AS_STRING" (PyObject *)
int PyString_AsStringAndSize_ptr(PyObject *, char **buf, Py_ssize_t *len)
+ void PyString_InternInPlace(PyObject **)
int PyTuple_CheckExact(object t)
Py_ssize_t PyTuple_GET_SIZE(object t)
PyObject *PyTuple_GET_ITEM_ptr_object "PyTuple_GET_ITEM" (object tpl, int index)
+ void Py_DECREF_ptr "Py_DECREF" (PyObject *)
cdef extern from "string.h":
void *memcpy(void *dest, void *src, size_t n)
@@ -74,6 +78,21 @@
return PyString_FromStringAndSize(s, size)
+cdef object safe_interned_string_from_size(char *s, Py_ssize_t size):
+ cdef PyObject *py_str
+ if size < 0:
+ raise AssertionError(
+ 'tried to create a string with an invalid size: %d @0x%x'
+ % (size, <int>s))
+ py_str = PyString_FromStringAndSize_ptr(s, size)
+ PyString_InternInPlace(&py_str)
+ result = <object>py_str
+ # Casting a PyObject* to an <object> triggers an INCREF from Pyrex, so we
+ # DECREF it to avoid geting immortal strings
+ Py_DECREF_ptr(py_str)
+ return result
+
+
cdef class BTreeLeafParser:
"""Parse the leaf nodes of a BTree index.
@@ -142,8 +161,8 @@
# TODO: Consider using PyIntern_FromString, the only caveat is that
# it assumes a NULL-terminated string, so we have to check if
# temp_ptr[0] == c'\0' or some other char.
- key_element = safe_string_from_size(self._start,
- temp_ptr - self._start)
+ key_element = safe_interned_string_from_size(self._start,
+ temp_ptr - self._start)
# advance our pointer
self._start = temp_ptr + 1
PyList_Append(key_segments, key_element)
=== modified file 'bzrlib/btree_index.py'
--- a/bzrlib/btree_index.py 2009-02-18 05:40:39 +0000
+++ b/bzrlib/btree_index.py 2009-03-04 02:50:54 +0000
@@ -585,7 +585,7 @@
for line in lines[2:]:
if line == '':
break
- nodes.append(tuple(line.split('\0')))
+ nodes.append(tuple(map(intern, line.split('\0'))))
return nodes
More information about the bazaar-commits
mailing list