Rev 4756: Some code cleanup passes. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

John Arbash Meinel john at arbash-meinel.com
Wed Oct 7 16:57:40 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple

------------------------------------------------------------
revno: 4756
revision-id: john at arbash-meinel.com-20091007155725-vq1jsr92sk1vmidx
parent: john at arbash-meinel.com-20091007154832-lpipxg46lynh9wmr
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-10-07 10:57:25 -0500
message:
  Some code cleanup passes.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-10-07 15:27:24 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2009-10-07 15:57:25 +0000
@@ -60,7 +60,7 @@
 # It seems we need to import the definitions so that the pyrex compiler has
 # local names to access them.
 from _static_tuple_c cimport StaticTuple, \
-    import_static_tuple_c, STATIC_TUPLE_ALL_STRING, StaticTuple_New, \
+    import_static_tuple_c, StaticTuple_New, \
     StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact
 
 
@@ -186,7 +186,6 @@
             self._start = temp_ptr + 1
             Py_INCREF(key_element)
             StaticTuple_SET_ITEM(key, loop_counter, key_element)
-        # key->flags = key->flags | STATIC_TUPLE_ALL_STRING
         key = StaticTuple_Intern(key)
         return key
 

=== modified file 'bzrlib/_chk_map_pyx.pyx'
--- a/bzrlib/_chk_map_pyx.pyx	2009-10-07 15:27:24 +0000
+++ b/bzrlib/_chk_map_pyx.pyx	2009-10-07 15:57:25 +0000
@@ -62,11 +62,10 @@
 # It seems we need to import the definitions so that the pyrex compiler has
 # local names to access them.
 from _static_tuple_c cimport StaticTuple,\
-    import_static_tuple_c, STATIC_TUPLE_ALL_STRING, StaticTuple_New, \
+    import_static_tuple_c, StaticTuple_New, \
     StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact
 
 
-from bzrlib import _static_tuple_c
 # This sets up the StaticTuple C_API functionality
 if import_static_tuple_c() != 0:
     raise ImportError('der borken')

=== modified file 'bzrlib/_static_tuple_c.c'
--- a/bzrlib/_static_tuple_c.c	2009-10-07 15:48:32 +0000
+++ b/bzrlib/_static_tuple_c.c	2009-10-07 15:57:25 +0000
@@ -177,7 +177,6 @@
     StaticTuple *self;
     PyObject *obj = NULL;
     Py_ssize_t i, len = 0;
-    int is_all_str;
 
     if (type != &StaticTuple_Type) {
         PyErr_SetString(PyExc_TypeError, "we only support creating StaticTuple");
@@ -198,11 +197,9 @@
     if (self == NULL) {
         return NULL;
     }
-    is_all_str = 1;
     for (i = 0; i < len; ++i) {
         obj = PyTuple_GET_ITEM(args, i);
         if (!PyString_CheckExact(obj)) {
-            is_all_str = 0;
             if (!StaticTuple_CheckExact(obj)) {
                 PyErr_SetString(PyExc_TypeError, "StaticTuple.__init__(...)"
                     " requires that all key bits are strings or StaticTuple.");
@@ -214,9 +211,6 @@
         Py_INCREF(obj);
         self->items[i] = obj;
     }
-    if (is_all_str) {
-        self->flags |= STATIC_TUPLE_ALL_STRING;
-    }
     return (PyObject *)self;
 }
 
@@ -252,29 +246,16 @@
 #endif
 	x = 0x345678L;
 	p = self->items;
-    if (self->flags & STATIC_TUPLE_ALL_STRING
-        && self->flags & STATIC_TUPLE_DID_HASH) {
-        /* If we know that we only reference strings, and we've already
-         * computed the hash one time before, then we know all the strings will
-         * have valid hash entries, and we can just compute, no branching
-         * logic.
-         */
-        while (--len >= 0) {
-            y = ((PyStringObject*)(*p))->ob_shash;
-            x = (x ^ y) * mult;
-            /* the cast might truncate len; that doesn't change hash stability */
-            mult += (long)(82520L + len + len);
-            p++;
-        }
-    } else {
-        while (--len >= 0) {
-            y = PyObject_Hash(*p++);
-            if (y == -1) /* failure */
-                return -1;
-            x = (x ^ y) * mult;
-            /* the cast might truncate len; that doesn't change hash stability */
-            mult += (long)(82520L + len + len);
-        }
+    // TODO: We could set specific flags if we know that, for example, all the
+    //       keys are strings. I haven't seen a real-world benefit to that yet,
+    //       though.
+    while (--len >= 0) {
+        y = PyObject_Hash(*p++);
+        if (y == -1) /* failure */
+            return -1;
+        x = (x ^ y) * mult;
+        /* the cast might truncate len; that doesn't change hash stability */
+        mult += (long)(82520L + len + len);
     }
 	x += 97531L;
 	if (x == -1)
@@ -287,7 +268,6 @@
     }
     self->hash = x;
 #endif
-    self->flags |= STATIC_TUPLE_DID_HASH;
 	return x;
 }
 
@@ -638,7 +618,6 @@
     }
     // We need to create the empty tuple
     stuple = (StaticTuple *)StaticTuple_New(0);
-    stuple->flags = STATIC_TUPLE_ALL_STRING;
     _empty_tuple = StaticTuple_Intern(stuple);
     assert(_empty_tuple == stuple);
     // At this point, refcnt is 2: 1 from New(), and 1 from the return from

=== modified file 'bzrlib/_static_tuple_c.h'
--- a/bzrlib/_static_tuple_c.h	2009-10-07 15:27:24 +0000
+++ b/bzrlib/_static_tuple_c.h	2009-10-07 15:57:25 +0000
@@ -28,26 +28,24 @@
  * Note that the entries themselves are strings, which already cache their
  * hashes. So while there is a 1.5:1 difference in the time for hash(), it is
  * already a function which is quite fast. Probably the only reason we might
- * want to do so, is if we implement a KeyIntern dict that assumes it is
- * available, and can then drop the 'hash' value from the item pointers. Then
- * again, if Key_hash() is fast enough, we may not even care about that.
+ * want to do so, is if we customized SimpleSet to the point that the item
+ * pointers were exactly certain types, and then accessed table[i]->hash
+ * directly. So far StaticTuple_hash() is fast enough to not warrant the memory
+ * difference.
  */
 
 /* This defines a single variable-width key.
  * It is basically the same as a tuple, but
  * 1) Lighter weight in memory
- * 2) Only supports strings.
- * It is mostly used as a helper. Note that Keys() is a similar structure for
- * lists of Key objects. Its main advantage, though, is that it inlines all of
- * the Key objects so that you have 1 python object overhead for N Keys, rather
- * than N objects.
+ * 2) Only supports strings or other static types (that don't reference other
+ *    objects.)
  */
 
 #define STATIC_TUPLE_INTERNED_FLAG 0x01
-#define STATIC_TUPLE_ALL_STRING    0x02
-#define STATIC_TUPLE_DID_HASH      0x04
 typedef struct {
     PyObject_HEAD
+    // We could go with unsigned short here, and support 64k width tuples
+    // without any memory impact, might be worthwhile
     unsigned char size;
     unsigned char flags;
     unsigned char _unused0;

=== modified file 'bzrlib/_static_tuple_c.pxd'
--- a/bzrlib/_static_tuple_c.pxd	2009-10-07 15:27:24 +0000
+++ b/bzrlib/_static_tuple_c.pxd	2009-10-07 15:57:25 +0000
@@ -26,20 +26,15 @@
     ctypedef class bzrlib._static_tuple_c.StaticTuple [object StaticTuple]:
         cdef unsigned char size
         cdef unsigned char flags
-        # We don't need to define _unused attributes, because the raw
-        # StaticTuple structure will be referenced
-        # cdef unsigned char _unused0
-        # cdef unsigned char _unused1
         cdef PyObject *items[0]
 
+    # Must be called before using any of the C api, as it sets the function
+    # pointers in memory.
     int import_static_tuple_c() except -1
-    # ctypedef object (*st_new_type)(Py_ssize_t)
-    # st_new_type st_new
-    int STATIC_TUPLE_ALL_STRING
-
     StaticTuple StaticTuple_New(Py_ssize_t)
     StaticTuple StaticTuple_Intern(StaticTuple)
-    # Steals a reference and Val must be a PyStringObject, no checking is done
+
+    # Steals a reference and val must be a valid type, no checking is done
     void StaticTuple_SET_ITEM(StaticTuple key, Py_ssize_t offset, object val)
     object StaticTuple_GET_ITEM(StaticTuple key, Py_ssize_t offset)
     int StaticTuple_CheckExact(object)



More information about the bazaar-commits mailing list