Rev 4751: Things are ~ working again. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

John Arbash Meinel john at arbash-meinel.com
Tue Oct 6 22:35:40 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

------------------------------------------------------------
revno: 4751
revision-id: john at arbash-meinel.com-20091006213529-k02tyvu39jnxr03k
parent: john at arbash-meinel.com-20091006181406-jzq86882myq4kwvk
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Tue 2009-10-06 16:35:29 -0500
message:
  Things are ~ working again.
  
  Man this is getting messy, I may have to change my mind again.
  
  I tried to export the StaticTuple type only through _static_tuple_pyx.pyx
  by using a little bit of trickery. However, you end up with a circular
  import issue, and also I forgot to track down one place where I needed
  to rename '_static_tuple_c' => '_static_tuple_type_c'.
  
  The idea was that _static_tuple_type.c would *only* define the type,
  and not any extra info. This way the code could be compiled with either
  cython or pyrex and still get the 'better' StaticTuple object.
  
  It ended up, overall, just being a multi-hour mess trying to get the
  dependencies sorted out. By using a .pxd file, at least the basic
  circular import problem was sorted out.
  
  However at this point, you *have* to import _static_tuple_pyx before
  _static_tuple_type_c or you get a segfault, and you have to import the
  latter if you want to get direct access to the class.
  
  So at this point I feel like I either need to:
   1) Go back to the way it was, and get rid of the circular import
   2) Finish the rest of the steps, bring everything into Cython
      and say 'if you want the memory improvements, then you have
      to compile with cython.'
-------------- next part --------------
=== modified file '.bzrignore'
--- a/.bzrignore	2009-10-06 18:14:06 +0000
+++ b/.bzrignore	2009-10-06 21:35:29 +0000
@@ -58,9 +58,9 @@
 bzrlib/_known_graph_pyx.c
 bzrlib/_readdir_pyx.c
 bzrlib/_rio_pyx.c
-bzrlib/_static_tuple_interned_pyx.c
-bzrlib/_static_tuple_interned_pyx.h
-bzrlib/_static_tuple_interned_pyx_api.h
+bzrlib/_static_tuple_pyx.c
+bzrlib/_static_tuple_pyx.h
+bzrlib/_static_tuple_pyx_api.h
 bzrlib/_walkdirs_win32.c
 # built extension modules
 bzrlib/_*.dll

=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-10-06 18:14:06 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2009-10-06 21:35:29 +0000
@@ -59,8 +59,8 @@
 
 # It seems we need to import the definitions so that the pyrex compiler has
 # local names to access them.
-from _static_tuple_c cimport StaticTuple, \
-    import_static_tuple_c, STATIC_TUPLE_ALL_STRING, StaticTuple_New, \
+from _static_tuple_pyx cimport StaticTuple, \
+    STATIC_TUPLE_ALL_STRING, StaticTuple_New, \
     StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact
 
 
@@ -102,11 +102,8 @@
     Py_DECREF_ptr(py_str)
     return result
 
-from bzrlib import _static_tuple_c
-cdef object _ST
-_ST = _static_tuple_c.StaticTuple
-# This sets up the StaticTuple C_API functionality
-import_static_tuple_c()
+from bzrlib import _static_tuple_pyx
+# import_static_tuple_pyx()
 
 
 cdef class BTreeLeafParser:

=== modified file 'bzrlib/_chk_map_pyx.pyx'
--- a/bzrlib/_chk_map_pyx.pyx	2009-10-02 15:51:03 +0000
+++ b/bzrlib/_chk_map_pyx.pyx	2009-10-06 21:35:29 +0000
@@ -61,16 +61,10 @@
 
 # It seems we need to import the definitions so that the pyrex compiler has
 # local names to access them.
-from _static_tuple_c cimport StaticTuple,\
-    import_static_tuple_c, STATIC_TUPLE_ALL_STRING, StaticTuple_New, \
+from _static_tuple_pyx cimport StaticTuple, StaticTuple_New, \
     StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact
 
 
-from bzrlib import _static_tuple_c
-# This sets up the StaticTuple C_API functionality
-if import_static_tuple_c() != 0:
-    raise ImportError('der borken')
-
 cdef object _LeafNode
 _LeafNode = None
 cdef object _InternalNode

=== renamed file 'bzrlib/_static_tuple_interned_pyx.pxd' => 'bzrlib/_static_tuple_pyx.pxd'
--- a/bzrlib/_static_tuple_interned_pyx.pxd	2009-10-02 20:46:25 +0000
+++ b/bzrlib/_static_tuple_pyx.pxd	2009-10-06 21:35:29 +0000
@@ -16,13 +16,14 @@
 
 """Interface definition of a class to intern StaticTuple objects."""
 
-cdef extern from "python-compat.h":
-    ctypedef long Py_ssize_t
-
 cdef extern from "Python.h":
     ctypedef struct PyObject:
         pass
 
+
+from bzrlib._static_tuple_type_c cimport StaticTuple, StaticTuple_SET_ITEM, \
+    StaticTuple_GET_ITEM, STATIC_TUPLE_INTERNED_FLAG, STATIC_TUPLE_ALL_STRING
+
 cdef public api class StaticTupleInterner [object StaticTupleInternerObject,
                                            type StaticTupleInterner_type]:
 
@@ -38,7 +39,11 @@
     cpdef int discard(self, key) except -1
     cdef int _insert_clean(self, PyObject *key) except -1
     cpdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
-
 # TODO: might want to export the C api here, though it is all available from
 #       the class object...
 cdef api object StaticTupleInterner_Add(object self, object key)
+
+cdef api StaticTuple StaticTuple_New(Py_ssize_t)
+cdef api StaticTuple StaticTuple_Intern(StaticTuple)
+cdef api int StaticTuple_CheckExact(object)
+

=== renamed file 'bzrlib/_static_tuple_interned_pyx.pyx' => 'bzrlib/_static_tuple_pyx.pyx'
--- a/bzrlib/_static_tuple_interned_pyx.pyx	2009-10-02 21:14:41 +0000
+++ b/bzrlib/_static_tuple_pyx.pyx	2009-10-06 21:35:29 +0000
@@ -25,11 +25,14 @@
     PyObject *Py_NotImplemented
     void Py_INCREF(PyObject *)
     void Py_DECREF(PyObject *)
+    ctypedef struct PyVarObject:
+        pass
     ctypedef struct PyTypeObject:
         hashfunc tp_hash
         richcmpfunc tp_richcompare
 
     PyTypeObject *Py_TYPE(PyObject *)
+    PyVarObject * _PyObject_NewVar(PyTypeObject *, Py_ssize_t) except NULL
         
     void *PyMem_Malloc(size_t nbytes)
     void PyMem_Free(void *)
@@ -40,7 +43,6 @@
 _dummy_obj = object()
 _dummy = <PyObject *>_dummy_obj
 
-
 cdef inline int _is_equal(PyObject *this, long this_hash, PyObject *other):
     cdef long other_hash
     cdef PyObject *res
@@ -546,3 +548,76 @@
         key[0] = table[i]
     return 1
 
+
+cdef StaticTuple _empty_tuple
+def _get_empty_tuple():
+    """Return the 'empty tuple'
+
+    This is the singleton StaticTuple that has no content.
+    """
+    return _empty_tuple
+
+
+cdef api StaticTuple StaticTuple_New(Py_ssize_t size):
+    """Create a new StaticTuple object with the number of slots specified."""
+    cdef PyObject *tmp
+    cdef StaticTuple stuple
+
+    if size < 0:
+        raise ValueError('size must be > 0')
+
+    if (size == 0 and _empty_tuple is not None):
+        return _empty_tuple
+    # Note that we use PyObject_NewVar because we want to allocate a variable
+    # width entry. However we *aren't* truly a PyVarObject because we don't
+    # use a long for ob_size. Instead we use a plain 'size' that is an int,
+    # and will be overloaded with flags in the future.
+    # As such we do the alloc, and then have to clean up anything it does
+    # incorrectly. Consider switching to PyObject_MALLOC directly
+    tmp = <PyObject *>_PyObject_NewVar(<PyTypeObject*>StaticTuple, size)
+    stuple = <StaticTuple>tmp
+    Py_DECREF(tmp) # The cast to <StaticTuple> causes an INCREF
+    stuple.size = size;
+    stuple.flags = 0;
+    stuple._unused0 = 0
+    stuple._unused1 = 0
+    if size > 0:
+        memset(stuple.items, 0, sizeof(PyObject *) * size)
+    return stuple
+
+cdef api int StaticTuple_CheckExact(object s):
+    return isinstance(s, StaticTuple)
+
+
+cdef api StaticTupleInterner _interned_tuples
+def _get_interned_tuples():
+    """Get a copy of the _interned_tuples object.
+
+    Note that this object should *never* be mutated. Doing so could cause
+    segfaults, etc.
+    """
+    return _interned_tuples
+
+
+cdef api StaticTuple StaticTuple_Intern(StaticTuple self):
+    if _interned_tuples is None:
+        return self
+    if self.flags & STATIC_TUPLE_INTERNED_FLAG:
+        return self
+    # StaticTupleInterner_Add returns whatever object is present at self
+    # or the new object if it needs to add it.
+    # 
+    unique_key = _interned_tuples.add(self)
+    if unique_key is not self:
+        # There was already a key here, just return it
+        return unique_key
+    # This is now interned, mark it as such, and adjust the refcount
+    self.flags |= STATIC_TUPLE_INTERNED_FLAG
+    Py_DECREF(<PyObject *>self)
+    return self
+
+
+_interned_tuples = StaticTupleInterner()
+_empty_tuple = _interned_tuples.add(StaticTuple())
+_empty_tuple.flags |= STATIC_TUPLE_ALL_STRING
+

=== renamed file 'bzrlib/_static_tuple_c.c' => 'bzrlib/_static_tuple_type_c.c'
--- a/bzrlib/_static_tuple_c.c	2009-10-02 21:14:41 +0000
+++ b/bzrlib/_static_tuple_type_c.c	2009-10-06 21:35:29 +0000
@@ -15,14 +15,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-/* Must be defined before importing _static_tuple_c.h so that we get the right
- * linkage.
- */
-#define STATIC_TUPLE_MODULE
-
-#include "_static_tuple_c.h"
-#include "_export_c_api.h"
-#include "_static_tuple_interned_pyx_api.h"
+#include "Python.h"
+#include "_static_tuple_type_c.h"
+#include "_static_tuple_pyx_api.h"
 
 #include "python-compat.h"
 
@@ -36,7 +31,6 @@
 
 
 /* The one and only StaticTuple with no values */
-static StaticTuple *_empty_tuple = NULL;
 static PyObject *_interned_tuples = NULL;
 
 
@@ -71,41 +65,13 @@
 
 static char StaticTuple_as_tuple_doc[] = "as_tuple() => tuple";
 
-static StaticTuple *
-StaticTuple_Intern(StaticTuple *self)
+static PyObject *
+_StaticTuple_intern(StaticTuple *self)
 {
-    PyObject *unique_key = NULL;
-
-    if (_interned_tuples == NULL) {
-        Py_INCREF(self);
-        return self;
-    }
-    if (_StaticTuple_is_interned(self)) {
-        // Already interned
-        Py_INCREF(self);
-        return self;
-    }
-    /* StaticTupleInterner_Add returns whatever object is present at self
-     * or the new object if it needs to add it.
-     */
-    unique_key = StaticTupleInterner_Add(_interned_tuples, (PyObject *)self);
-    if (!unique_key) {
-        // Suppress any error and just return the object
-        PyErr_Clear();
-        return self;
-    }
-    if (unique_key != (PyObject *)self) {
-        // There was already a key at that location
-        return (StaticTuple *)unique_key;
-    }
-    self->flags |= STATIC_TUPLE_INTERNED_FLAG;
-    // The two references in the dict do not count, so that the StaticTuple object
-    // does not become immortal just because it was interned.
-    Py_REFCNT(self) -= 1;
-    return self;
+    return (PyObject *)StaticTuple_Intern(self);
 }
 
-static char StaticTuple_Intern_doc[] = "intern() => unique StaticTuple\n"
+static char StaticTuple_intern_doc[] = "intern() => unique StaticTuple\n"
     "Return a 'canonical' StaticTuple object.\n"
     "Similar to intern() for strings, this makes sure there\n"
     "is only one StaticTuple object for a given value\n."
@@ -132,45 +98,6 @@
 }
 
 
-/* Similar to PyTuple_New() */
-static StaticTuple *
-StaticTuple_New(Py_ssize_t size)
-{
-    StaticTuple *stuple;
-    if (size < 0) {
-        PyErr_BadInternalCall();
-        return NULL;
-    }
-
-    if (size == 0 && _empty_tuple != NULL) {
-        Py_INCREF(_empty_tuple);
-        return _empty_tuple;
-    }
-    /* Note that we use PyObject_NewVar because we want to allocate a variable
-     * width entry. However we *aren't* truly a PyVarObject because we don't
-     * use a long for ob_size. Instead we use a plain 'size' that is an int,
-     * and will be overloaded with flags in the future.
-     * As such we do the alloc, and then have to clean up anything it does
-     * incorrectly.
-     */
-    stuple = PyObject_NewVar(StaticTuple, &StaticTuple_Type, size);
-    if (stuple == NULL) {
-        return NULL;
-    }
-    stuple->size = size;
-    stuple->flags = 0;
-    stuple->_unused0 = 0;
-    stuple->_unused1 = 0;
-    if (size > 0) {
-        memset(stuple->items, 0, sizeof(PyObject *) * size);
-    }
-#if STATIC_TUPLE_HAS_HASH
-    stuple->hash = -1;
-#endif
-    return stuple;
-}
-
-
 static PyObject *
 StaticTuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
@@ -203,7 +130,7 @@
         obj = PyTuple_GET_ITEM(args, i);
         if (!PyString_CheckExact(obj)) {
             is_all_str = 0;
-            if (!StaticTuple_CheckExact(obj)) {
+            if (!_StaticTuple_CheckExact(obj)) {
                 PyErr_SetString(PyExc_TypeError, "StaticTuple.__init__(...)"
                     " requires that all key bits are strings or StaticTuple.");
                 /* TODO: What is the proper way to dealloc ? */
@@ -322,7 +249,7 @@
     PyObject *v_obj, *w_obj;
     richcmpfunc string_richcompare;
 
-    if (!StaticTuple_CheckExact(v)) {
+    if (!_StaticTuple_CheckExact(v)) {
         /* This has never triggered, according to python-dev it seems this
          * might trigger if '__op__' is defined but '__rop__' is not, sort of
          * case. Such as "None == StaticTuple()"
@@ -332,7 +259,7 @@
         return Py_NotImplemented;
     }
     vk = (StaticTuple *)v;
-    if (StaticTuple_CheckExact(w)) {
+    if (_StaticTuple_CheckExact(w)) {
         /* The most common case */
         wk = (StaticTuple*)w;
     } else if (PyTuple_Check(w)) {
@@ -396,8 +323,8 @@
         w_obj = StaticTuple_GET_ITEM(wk, i);
         if (PyString_CheckExact(v_obj) && PyString_CheckExact(w_obj)) {
             result = string_richcompare(v_obj, w_obj, Py_EQ);
-        } else if (StaticTuple_CheckExact(v_obj) &&
-                   StaticTuple_CheckExact(w_obj))
+        } else if (_StaticTuple_CheckExact(v_obj) &&
+                   _StaticTuple_CheckExact(w_obj))
         {
             /* Both are StaticTuple types, so recurse */
             result = StaticTuple_richcompare(v_obj, w_obj, Py_EQ);
@@ -464,8 +391,8 @@
     if (PyString_CheckExact(v_obj) && PyString_CheckExact(w_obj))
     {
         return string_richcompare(v_obj, w_obj, op);
-    } else if (StaticTuple_CheckExact(v_obj) &&
-               StaticTuple_CheckExact(w_obj))
+    } else if (_StaticTuple_CheckExact(v_obj) &&
+               _StaticTuple_CheckExact(w_obj))
     {
         /* Both are StaticTuple types, so recurse */
         return StaticTuple_richcompare(v_obj, w_obj, op);
@@ -543,7 +470,8 @@
 
 static PyMethodDef StaticTuple_methods[] = {
     {"as_tuple", (PyCFunction)StaticTuple_as_tuple, METH_NOARGS, StaticTuple_as_tuple_doc},
-    {"intern", (PyCFunction)StaticTuple_Intern, METH_NOARGS, StaticTuple_Intern_doc},
+    // set after loading _static_tuple_pyx
+    {"intern", (PyCFunction)_StaticTuple_intern, METH_NOARGS, StaticTuple_intern_doc},
     {"_is_interned", (PyCFunction)StaticTuple__is_interned, METH_NOARGS,
      StaticTuple__is_interned_doc},
     {NULL, NULL} /* sentinel */
@@ -612,72 +540,6 @@
 };
 
 
-static char KeyIntern_doc[] = "";
-
-static PyMethodDef KeyIntern_methods[] = {
-    // {"as_tuple", (PyCFunction)Keys_as_tuple, METH_NOARGS, Keys_as_tuple_doc},
-    {NULL, NULL} /* sentinel */
-};
-
-// static PySequenceMethods KeyIntern_as_sequence = {
-//     0, //(lenfunc)Keys_length,           /* sq_length */
-//     0,                              /* sq_concat */
-//     0,                              /* sq_repeat */
-//     0, //(ssizeargfunc)Keys_item,        /* sq_item */
-//     0,                              /* sq_slice */
-//     0,                              /* sq_ass_item */
-//     0,                              /* sq_ass_slice */
-//     0,                              /* sq_contains */
-// };
-
-// static PyTypeObject KeyIntern_Type = {
-//     PyObject_HEAD_INIT(NULL)
-//     0,                                           /* ob_size */
-//     "KeyIntern",                                 /* tp_name */
-//     sizeof(KeyIntern) - sizeof(Key *),           /* tp_basicsize */
-//     sizeof(Key *),                               /* tp_itemsize */
-//     0, //(destructor)Keys_dealloc,               /* tp_dealloc */
-//     0,                                           /* tp_print */
-//     0,                                           /* tp_getattr */
-//     0,                                           /* tp_setattr */
-//     0,                                           /* tp_compare */
-//     // TODO: implement repr() and possibly str()
-//     0, //(reprfunc)Keys_repr,                         /* tp_repr */
-//     0,                                           /* tp_as_number */
-//     &KeyIntern_as_sequence,                      /* tp_as_sequence */
-//     0,                                           /* tp_as_mapping */
-//     0, //(hashfunc)Keys_hash,                         /* tp_hash */
-//     0,                                           /* tp_call */
-//     0,                                           /* tp_str */
-//     PyObject_GenericGetAttr,                     /* tp_getattro */
-//     0,                                           /* tp_setattro */
-//     0,                                           /* tp_as_buffer */
-//     Py_TPFLAGS_DEFAULT,                          /* tp_flags*/
-//     0, // Keys_doc,                                    /* tp_doc */
-//     /* See Key_traverse for why we have this, even though we aren't GC */
-//     0, //(traverseproc)Keys_traverse,                 /* tp_traverse */
-//     0,                                           /* tp_clear */
-//     // TODO: implement richcompare, we should probably be able to compare vs an
-//     //       tuple, as well as versus another Keys object.
-//     0, //Keys_richcompare,                            /* tp_richcompare */
-//     0,                                           /* tp_weaklistoffset */
-//     // We could implement this as returning tuples of keys...
-//     0,                                           /* tp_iter */
-//     0,                                           /* tp_iternext */
-//     KeyIntern_methods,                           /* tp_methods */
-//     0,                                           /* tp_members */
-//     0,                                           /* tp_getset */
-//     0,                                           /* tp_base */
-//     0,                                           /* tp_dict */
-//     0,                                           /* tp_descr_get */
-//     0,                                           /* tp_descr_set */
-//     0,                                           /* tp_dictoffset */
-//     0,                                           /* tp_init */
-//     0,                                           /* tp_alloc */
-//     0, //Keys_new,                                    /* tp_new */
-// };
-
-
 static PyMethodDef static_tuple_c_methods[] = {
 //    {"unique_lcs_c", py_unique_lcs, METH_VARARGS},
 //    {"recurse_matches_c", py_recurse_matches, METH_VARARGS},
@@ -696,60 +558,45 @@
 }
 
 
-static void
-setup_empty_tuple(PyObject *m)
-{
-    StaticTuple *stuple;
-    if (_interned_tuples == NULL) {
-        fprintf(stderr, "You need to call setup_interned_tuples() before"
-                " setup_empty_tuple, because we intern it.\n");
-    }
-    // We need to create the empty tuple
-    stuple = (StaticTuple *)StaticTuple_New(0);
-    stuple->flags = STATIC_TUPLE_ALL_STRING;
-    _empty_tuple = StaticTuple_Intern(stuple);
-    assert(_empty_tuple == stuple);
-    // At this point, refcnt is 2: 1 from New(), and 1 from the return from
-    // intern(). We will keep 1 for the _empty_tuple global, and use the other
-    // for the module reference.
-    PyModule_AddObject(m, "_empty_tuple", (PyObject *)_empty_tuple);
-}
-
-static int
-_StaticTuple_CheckExact(PyObject *obj)
-{
-    return StaticTuple_CheckExact(obj);
-}
-
-static void
-setup_c_api(PyObject *m)
-{
-    _export_function(m, "StaticTuple_New", StaticTuple_New,
-        "StaticTuple *(Py_ssize_t)");
-    _export_function(m, "StaticTuple_Intern", StaticTuple_Intern,
-        "StaticTuple *(StaticTuple *)");
-    _export_function(m, "_StaticTuple_CheckExact", _StaticTuple_CheckExact,
-        "int(PyObject *)");
-}
-
-
 PyMODINIT_FUNC
-init_static_tuple_c(void)
+init_static_tuple_type_c(void)
 {
     PyObject* m;
+    fprintf(stderr, "init_static_tuple_type_c\n");
 
-    if (PyType_Ready(&StaticTuple_Type) < 0)
+    if (PyType_Ready(&StaticTuple_Type) < 0) {
+        fprintf(stderr, "StaticTuple_Type not ready\n");
         return;
+    }
+    fprintf(stderr, "StaticTuple_Type ready\n");
 
-    m = Py_InitModule3("_static_tuple_c", static_tuple_c_methods,
+    m = Py_InitModule3("_static_tuple_type_c", static_tuple_c_methods,
                        "C implementation of a StaticTuple structure");
     if (m == NULL)
       return;
 
     Py_INCREF(&StaticTuple_Type);
     PyModule_AddObject(m, "StaticTuple", (PyObject *)&StaticTuple_Type);
-    import_bzrlib___static_tuple_interned_pyx();
-    setup_interned_tuples(m);
-    setup_empty_tuple(m);
-    setup_c_api(m);
+    fprintf(stderr, "added the StaticTuple type, importing _static_tuple_pyx\n");
+    if (import_bzrlib___static_tuple_pyx() == -1) {
+        PyObject *m2;
+        fprintf(stderr, "Failed to import_bzrlib___static_tuple_pyx.\n");
+        // PyErr_SetString(PyExc_ImportError,
+        //                 "Failed to import _static_tuple_pyx");
+
+        // m2 = PyImport_ImportModule("bzrlib._static_tuple_pyx");
+        // if (m2 == NULL) {
+        //     fprintf(stderr, "Failed to import bzrlib._static_tuple_pyx\n");
+        // }
+        // Py_INCREF(&StaticTuple_Type);
+        // if (PyModule_AddObject(m2, "StaticTuple", (PyObject
+        //     *)&StaticTuple_Type) == -1) {
+        //     fprintf(stderr, "Failed to add StaticTuple to bzrlib._static_tuple_pyx\n");
+        // }
+        return;
+    }
+    if (PyErr_Occurred()) {
+        fprintf(stderr, "an exception has occurred\n");
+    }
+    fprintf(stderr, "imported successfully\n");
 }

=== renamed file 'bzrlib/_static_tuple_c.h' => 'bzrlib/_static_tuple_type_c.h'
--- a/bzrlib/_static_tuple_c.h	2009-10-02 15:51:03 +0000
+++ b/bzrlib/_static_tuple_type_c.h	2009-10-06 21:35:29 +0000
@@ -17,8 +17,6 @@
 
 #ifndef _STATIC_TUPLE_H_
 #define _STATIC_TUPLE_H_
-#include <Python.h>
-#include <string.h>
 
 #define STATIC_TUPLE_HAS_HASH 0
 /* Caching the hash adds memory, but allows us to save a little time during
@@ -61,54 +59,9 @@
 } StaticTuple;
 extern PyTypeObject StaticTuple_Type;
 
-typedef struct {
-    PyObject_VAR_HEAD
-    PyObject *table[0];
-} KeyIntern;
-
+#define _StaticTuple_CheckExact(obj) (Py_TYPE(obj) == &StaticTuple_Type)
 #define StaticTuple_SET_ITEM(key, offset, val) \
     ((((StaticTuple*)(key))->items[(offset)]) = ((PyObject *)(val)))
 #define StaticTuple_GET_ITEM(key, offset) (((StaticTuple*)key)->items[offset])
 
-
-#ifdef STATIC_TUPLE_MODULE
-/* Used when compiling _static_tuple_c.c */
-
-static StaticTuple * StaticTuple_New(Py_ssize_t);
-static StaticTuple * StaticTuple_Intern(StaticTuple *self);
-#define StaticTuple_CheckExact(op) (Py_TYPE(op) == &StaticTuple_Type)
-
-#else
-/* Used as the foreign api */
-
-#include "_import_c_api.h"
-
-static StaticTuple *(*StaticTuple_New)(Py_ssize_t);
-static StaticTuple *(*StaticTuple_Intern)(StaticTuple *);
-static PyTypeObject *_p_StaticTuple_Type;
-
-#define StaticTuple_CheckExact(op) (Py_TYPE(op) == _p_StaticTuple_Type)
-static int (*_StaticTuple_CheckExact)(PyObject *);
-
-
-/* Return -1 and set exception on error, 0 on success */
-static int
-import_static_tuple_c(void)
-{
-    struct function_description functions[] = {
-        {"StaticTuple_New", (void **)&StaticTuple_New,
-            "StaticTuple *(Py_ssize_t)"},
-        {"StaticTuple_Intern", (void **)&StaticTuple_Intern,
-            "StaticTuple *(StaticTuple *)"},
-        {"_StaticTuple_CheckExact", (void **)&_StaticTuple_CheckExact,
-            "int(PyObject *)"},
-        {NULL}};
-    struct type_description types[] = {
-        {"StaticTuple", &_p_StaticTuple_Type},
-        {NULL}};
-    return _import_extension_module("bzrlib._static_tuple_c",
-        functions, types);
-}
-
-#endif // !STATIC_TUPLE_MODULE
 #endif // !_STATIC_TUPLE_H_

=== renamed file 'bzrlib/_static_tuple_c.pxd' => 'bzrlib/_static_tuple_type_c.pxd'
--- a/bzrlib/_static_tuple_c.pxd	2009-10-02 15:51:03 +0000
+++ b/bzrlib/_static_tuple_type_c.pxd	2009-10-06 21:35:29 +0000
@@ -14,32 +14,23 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
-"""The interface definition file for the StaticTuple class."""
-
-
 cdef extern from "Python.h":
-    ctypedef int Py_ssize_t # Required for older pyrex versions
     ctypedef struct PyObject:
         pass
 
-cdef extern from "_static_tuple_c.h":
-    ctypedef class bzrlib._static_tuple_c.StaticTuple [object StaticTuple]:
+cdef extern from "_static_tuple_type_c.h":
+    ctypedef class bzrlib._static_tuple_type_c.StaticTuple [object StaticTuple]:
         cdef unsigned char size
         cdef unsigned char flags
-        # We don't need to define _unused attributes, because the raw
-        # StaticTuple structure will be referenced
-        # cdef unsigned char _unused0
-        # cdef unsigned char _unused1
+        cdef unsigned char _unused0
+        cdef unsigned char _unused1
         cdef PyObject *items[0]
-
-    int import_static_tuple_c() except -1
-    # ctypedef object (*st_new_type)(Py_ssize_t)
-    # st_new_type st_new
     int STATIC_TUPLE_ALL_STRING
+    int STATIC_TUPLE_INTERNED_FLAG
 
-    StaticTuple StaticTuple_New(Py_ssize_t)
-    StaticTuple StaticTuple_Intern(StaticTuple)
     # Steals a reference and Val must be a PyStringObject, no checking is done
     void StaticTuple_SET_ITEM(StaticTuple key, Py_ssize_t offset, object val)
     object StaticTuple_GET_ITEM(StaticTuple key, Py_ssize_t offset)
-    int StaticTuple_CheckExact(object)
+
+
+

=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2009-10-02 21:55:39 +0000
+++ b/bzrlib/index.py	2009-10-06 21:35:29 +0000
@@ -40,7 +40,8 @@
     debug,
     errors,
     )
-from bzrlib._static_tuple_c import StaticTuple
+import bzrlib._static_tuple_pyx
+from bzrlib._static_tuple_type_c import StaticTuple
 
 _HEADER_READV = (0, 200)
 _OPTION_KEY_ELEMENTS = "key_elements="

=== modified file 'bzrlib/tests/test__static_tuple.py'
--- a/bzrlib/tests/test__static_tuple.py	2009-10-02 21:14:41 +0000
+++ b/bzrlib/tests/test__static_tuple.py	2009-10-06 21:35:29 +0000
@@ -34,8 +34,8 @@
     ]
     suite = loader.suiteClass()
     if CompiledStaticTuple.available():
-        from bzrlib import _static_tuple_c
-        scenarios.append(('C', {'module': _static_tuple_c}))
+        from bzrlib import _static_tuple_pyx
+        scenarios.append(('C', {'module': _static_tuple_pyx}))
     else:
         # the compiled module isn't available, so we add a failing test
         class FailWithoutFeature(tests.TestCase):
@@ -50,13 +50,13 @@
 
     def _probe(self):
         try:
-            import bzrlib._static_tuple_c
+            import bzrlib._static_tuple_pyx
         except ImportError:
             return False
         return True
 
     def feature_name(self):
-        return 'bzrlib._static_tuple_c'
+        return 'bzrlib._static_tuple_pyx'
 
 CompiledStaticTuple = _CompiledStaticTuple()
 
@@ -372,3 +372,255 @@
         if self.module is _static_tuple_py:
             return
         self.assertIsNot(None, self.module._C_API)
+
+
+
+class TestStaticTupleInterned(tests.TestCase):
+
+    _test_needs_features = [CompiledStaticTuple]
+
+    def assertIn(self, obj, container):
+        self.assertTrue(obj in container,
+            '%s not found in %s' % (obj, container))
+
+    def assertNotIn(self, obj, container):
+        self.assertTrue(obj not in container,
+            'We found %s in %s' % (obj, container))
+
+    def assertFillState(self, used, fill, mask, obj):
+        self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask))
+
+    def assertRefcount(self, count, obj):
+        """Assert that the refcount for obj is what we expect.
+
+        Note that this automatically adjusts for the fact that calling
+        assertRefcount actually creates a new pointer, as does calling
+        sys.getrefcount. So pass the expected value *before* the call.
+        """
+        # I don't understand why it is count+3 here, but it seems to be
+        # correct. If I check in the calling function, with:
+        # self.assertEqual(count+1, sys.getrefcount(obj))
+        # Then it works fine. Something about passing it to assertRefcount is
+        # actually double-incrementing (and decrementing) the refcount
+        self.assertEqual(count+3, sys.getrefcount(obj))
+
+    def test_initial(self):
+        obj = _module.StaticTupleInterner()
+        self.assertEqual(0, len(obj))
+        st = StaticTuple('foo', 'bar')
+        self.assertFillState(0, 0, 0x3ff, obj)
+
+    def test__lookup(self):
+        # The tuple hash function is rather good at entropy. For all integers
+        # 0=>1023, hash((i,)) & 1023 maps to a unique output, and hash((i,j))
+        # maps to all 1024 fields evenly.
+        # However, hash((c,d))& 1023 for characters has an uneven distribution
+        # of collisions, for example:
+        #  ('a', 'a'), ('f', '4'), ('p', 'r'), ('q', '1'), ('F', 'T'),
+        #  ('Q', 'Q'), ('V', 'd'), ('7', 'C')
+        # all collide @ 643
+        obj = _module.StaticTupleInterner()
+        offset, val = obj._test_lookup(StaticTuple('a', 'a'))
+        self.assertEqual(643, offset)
+        self.assertEqual('<null>', val)
+        offset, val = obj._test_lookup(StaticTuple('f', '4'))
+        self.assertEqual(643, offset)
+        self.assertEqual('<null>', val)
+        offset, val = obj._test_lookup(StaticTuple('p', 'r'))
+        self.assertEqual(643, offset)
+        self.assertEqual('<null>', val)
+
+    def test_get_set_del_with_collisions(self):
+        obj = _module.StaticTupleInterner()
+        k1 = StaticTuple('a', 'a')
+        k2 = StaticTuple('f', '4') # collides
+        k3 = StaticTuple('p', 'r')
+        k4 = StaticTuple('q', '1')
+        self.assertEqual((643, '<null>'), obj._test_lookup(k1))
+        self.assertEqual((643, '<null>'), obj._test_lookup(k2))
+        self.assertEqual((643, '<null>'), obj._test_lookup(k3))
+        self.assertEqual((643, '<null>'), obj._test_lookup(k4))
+        obj.add(k1)
+        self.assertIn(k1, obj)
+        self.assertNotIn(k2, obj)
+        self.assertNotIn(k3, obj)
+        self.assertNotIn(k4, obj)
+        self.assertEqual((643, k1), obj._test_lookup(k1))
+        self.assertEqual((787, '<null>'), obj._test_lookup(k2))
+        self.assertEqual((787, '<null>'), obj._test_lookup(k3))
+        self.assertEqual((787, '<null>'), obj._test_lookup(k4))
+        self.assertIs(k1, obj[k1])
+        obj.add(k2)
+        self.assertIs(k2, obj[k2])
+        self.assertEqual((643, k1), obj._test_lookup(k1))
+        self.assertEqual((787, k2), obj._test_lookup(k2))
+        self.assertEqual((660, '<null>'), obj._test_lookup(k3))
+        # Even though k4 collides for the first couple of iterations, the hash
+        # perturbation uses the full width hash (not just the masked value), so
+        # it now diverges
+        self.assertEqual((180, '<null>'), obj._test_lookup(k4))
+        self.assertEqual((643, k1), obj._test_lookup(('a', 'a')))
+        self.assertEqual((787, k2), obj._test_lookup(('f', '4')))
+        self.assertEqual((660, '<null>'), obj._test_lookup(('p', 'r')))
+        self.assertEqual((180, '<null>'), obj._test_lookup(('q', '1')))
+        obj.add(k3)
+        self.assertIs(k3, obj[k3])
+        self.assertIn(k1, obj)
+        self.assertIn(k2, obj)
+        self.assertIn(k3, obj)
+        self.assertNotIn(k4, obj)
+
+        del obj[k1]
+        self.assertEqual((643, '<dummy>'), obj._test_lookup(k1))
+        self.assertEqual((787, k2), obj._test_lookup(k2))
+        self.assertEqual((660, k3), obj._test_lookup(k3))
+        self.assertEqual((643, '<dummy>'), obj._test_lookup(k4))
+        self.assertNotIn(k1, obj)
+        self.assertIn(k2, obj)
+        self.assertIn(k3, obj)
+        self.assertNotIn(k4, obj)
+
+    def test_add(self):
+        obj = _module.StaticTupleInterner()
+        self.assertFillState(0, 0, 0x3ff, obj)
+        k1 = StaticTuple('foo')
+        self.assertRefcount(1, k1)
+        self.assertIs(k1, obj.add(k1))
+        self.assertFillState(1, 1, 0x3ff, obj)
+        self.assertRefcount(2, k1)
+        ktest = obj[k1]
+        self.assertRefcount(3, k1)
+        self.assertIs(k1, ktest)
+        del ktest
+        self.assertRefcount(2, k1)
+        k2 = StaticTuple('foo')
+        self.assertRefcount(1, k2)
+        self.assertIsNot(k1, k2)
+        # doesn't add anything, so the counters shouldn't be adjusted
+        self.assertIs(k1, obj.add(k2))
+        self.assertFillState(1, 1, 0x3ff, obj)
+        self.assertRefcount(2, k1) # not changed
+        self.assertRefcount(1, k2) # not incremented
+        self.assertIs(k1, obj[k1])
+        self.assertIs(k1, obj[k2])
+        self.assertRefcount(2, k1)
+        self.assertRefcount(1, k2)
+        # Deleting an entry should remove the fill, but not the used
+        del obj[k1]
+        self.assertFillState(0, 1, 0x3ff, obj)
+        self.assertRefcount(1, k1)
+        k3 = StaticTuple('bar')
+        self.assertRefcount(1, k3)
+        self.assertIs(k3, obj.add(k3))
+        self.assertFillState(1, 2, 0x3ff, obj)
+        self.assertRefcount(2, k3)
+        self.assertIs(k2, obj.add(k2))
+        self.assertFillState(2, 2, 0x3ff, obj)
+        self.assertRefcount(1, k1)
+        self.assertRefcount(2, k2)
+        self.assertRefcount(2, k3)
+
+    def test_discard(self):
+        obj = _module.StaticTupleInterner()
+        k1 = StaticTuple('foo')
+        k2 = StaticTuple('foo')
+        k3 = StaticTuple('bar')
+        self.assertRefcount(1, k1)
+        self.assertRefcount(1, k2)
+        self.assertRefcount(1, k3)
+        obj.add(k1)
+        self.assertRefcount(2, k1)
+        self.assertEqual(0, obj.discard(k3))
+        self.assertRefcount(1, k3)
+        obj.add(k3)
+        self.assertRefcount(2, k3)
+        self.assertEqual(1, obj.discard(k3))
+        self.assertRefcount(1, k3)
+
+    def test__delitem__(self):
+        obj = _module.StaticTupleInterner()
+        k1 = StaticTuple('foo')
+        k2 = StaticTuple('foo')
+        k3 = StaticTuple('bar')
+        self.assertRefcount(1, k1)
+        self.assertRefcount(1, k2)
+        self.assertRefcount(1, k3)
+        obj.add(k1)
+        self.assertRefcount(2, k1)
+        self.assertRaises(KeyError, obj.__delitem__, k3)
+        self.assertRefcount(1, k3)
+        obj.add(k3)
+        self.assertRefcount(2, k3)
+        del obj[k3]
+        self.assertRefcount(1, k3)
+
+    def test__resize(self):
+        obj = _module.StaticTupleInterner()
+        k1 = StaticTuple('foo')
+        k2 = StaticTuple('bar')
+        k3 = StaticTuple('baz')
+        obj.add(k1)
+        obj.add(k2)
+        obj.add(k3)
+        del obj[k2]
+        self.assertFillState(2, 3, 0x3ff, obj)
+        self.assertEqual(1024, obj._resize(500))
+        # Doesn't change the size, but does change the content
+        self.assertFillState(2, 2, 0x3ff, obj)
+        obj.add(k2)
+        del obj[k3]
+        self.assertFillState(2, 3, 0x3ff, obj)
+        self.assertEqual(4096, obj._resize(4095))
+        self.assertFillState(2, 2, 0xfff, obj)
+        self.assertIn(k1, obj)
+        self.assertIn(k2, obj)
+        self.assertNotIn(k3, obj)
+        obj.add(k2)
+        self.assertIn(k2, obj)
+        del obj[k2]
+        self.assertEqual((591, '<dummy>'), obj._test_lookup(k2))
+        self.assertFillState(1, 2, 0xfff, obj)
+        self.assertEqual(2048, obj._resize(1024))
+        self.assertFillState(1, 1, 0x7ff, obj)
+        self.assertEqual((591, '<null>'), obj._test_lookup(k2))
+
+    def test_add_and_remove_lots_of_items(self):
+        obj = _module.StaticTupleInterner()
+        chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'
+        for i in chars:
+            for j in chars:
+                k = StaticTuple(i, j)
+                obj.add(k)
+        num = len(chars)*len(chars)
+        self.assertFillState(num, num, 0x1fff, obj)
+        # Now delete all of the entries and it should shrink again
+        for i in chars:
+            for j in chars:
+                k = StaticTuple(i, j)
+                obj.discard(k)
+        # It should be back to 1024 wide mask, though there may still be some
+        # dummy values in there
+        self.assertFillState(0, obj.fill, 0x3ff, obj)
+        # but there should be fewer than 1/5th dummy entries
+        self.assertTrue(obj.fill < 1024 / 5)
+
+    def test__iter__(self):
+        obj = _module.StaticTupleInterner()
+        k1 = StaticTuple('1')
+        k2 = StaticTuple('1', '2')
+        k3 = StaticTuple('3', '4')
+        obj.add(k1)
+        obj.add(k2)
+        obj.add(k3)
+        all = set()
+        for key in obj:
+            all.add(key)
+        self.assertEqual(sorted([k1, k2, k3]), sorted(all))
+        iterator = iter(obj)
+        iterator.next()
+        obj.add(StaticTuple('foo'))
+        # Set changed size
+        self.assertRaises(RuntimeError, iterator.next)
+        # And even removing an item still causes it to fail
+        del obj[k2]
+        self.assertRaises(RuntimeError, iterator.next)

=== modified file 'setup.py'
--- a/setup.py	2009-10-06 18:14:06 +0000
+++ b/setup.py	2009-10-06 21:35:29 +0000
@@ -294,9 +294,9 @@
 add_pyrex_extension('bzrlib._chk_map_pyx', libraries=[z_lib])
 ext_modules.append(Extension('bzrlib._patiencediff_c',
                              ['bzrlib/_patiencediff_c.c']))
-add_pyrex_extension('bzrlib._static_tuple_interned_pyx')
-ext_modules.append(Extension('bzrlib._static_tuple_c',
-                             ['bzrlib/_static_tuple_c.c']))
+ext_modules.append(Extension('bzrlib._static_tuple_type_c',
+                             ['bzrlib/_static_tuple_type_c.c']))
+add_pyrex_extension('bzrlib._static_tuple_pyx')
 add_pyrex_extension('bzrlib._btree_serializer_pyx')
 
 



More information about the bazaar-commits mailing list