Rev 4756: Some code cleanup passes. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
John Arbash Meinel
john at arbash-meinel.com
Wed Oct 7 16:57:40 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
------------------------------------------------------------
revno: 4756
revision-id: john at arbash-meinel.com-20091007155725-vq1jsr92sk1vmidx
parent: john at arbash-meinel.com-20091007154832-lpipxg46lynh9wmr
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-10-07 10:57:25 -0500
message:
Some code cleanup passes.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2009-10-07 15:27:24 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2009-10-07 15:57:25 +0000
@@ -60,7 +60,7 @@
# It seems we need to import the definitions so that the pyrex compiler has
# local names to access them.
from _static_tuple_c cimport StaticTuple, \
- import_static_tuple_c, STATIC_TUPLE_ALL_STRING, StaticTuple_New, \
+ import_static_tuple_c, StaticTuple_New, \
StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact
@@ -186,7 +186,6 @@
self._start = temp_ptr + 1
Py_INCREF(key_element)
StaticTuple_SET_ITEM(key, loop_counter, key_element)
- # key->flags = key->flags | STATIC_TUPLE_ALL_STRING
key = StaticTuple_Intern(key)
return key
=== modified file 'bzrlib/_chk_map_pyx.pyx'
--- a/bzrlib/_chk_map_pyx.pyx 2009-10-07 15:27:24 +0000
+++ b/bzrlib/_chk_map_pyx.pyx 2009-10-07 15:57:25 +0000
@@ -62,11 +62,10 @@
# It seems we need to import the definitions so that the pyrex compiler has
# local names to access them.
from _static_tuple_c cimport StaticTuple,\
- import_static_tuple_c, STATIC_TUPLE_ALL_STRING, StaticTuple_New, \
+ import_static_tuple_c, StaticTuple_New, \
StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact
-from bzrlib import _static_tuple_c
# This sets up the StaticTuple C_API functionality
if import_static_tuple_c() != 0:
raise ImportError('der borken')
=== modified file 'bzrlib/_static_tuple_c.c'
--- a/bzrlib/_static_tuple_c.c 2009-10-07 15:48:32 +0000
+++ b/bzrlib/_static_tuple_c.c 2009-10-07 15:57:25 +0000
@@ -177,7 +177,6 @@
StaticTuple *self;
PyObject *obj = NULL;
Py_ssize_t i, len = 0;
- int is_all_str;
if (type != &StaticTuple_Type) {
PyErr_SetString(PyExc_TypeError, "we only support creating StaticTuple");
@@ -198,11 +197,9 @@
if (self == NULL) {
return NULL;
}
- is_all_str = 1;
for (i = 0; i < len; ++i) {
obj = PyTuple_GET_ITEM(args, i);
if (!PyString_CheckExact(obj)) {
- is_all_str = 0;
if (!StaticTuple_CheckExact(obj)) {
PyErr_SetString(PyExc_TypeError, "StaticTuple.__init__(...)"
" requires that all key bits are strings or StaticTuple.");
@@ -214,9 +211,6 @@
Py_INCREF(obj);
self->items[i] = obj;
}
- if (is_all_str) {
- self->flags |= STATIC_TUPLE_ALL_STRING;
- }
return (PyObject *)self;
}
@@ -252,29 +246,16 @@
#endif
x = 0x345678L;
p = self->items;
- if (self->flags & STATIC_TUPLE_ALL_STRING
- && self->flags & STATIC_TUPLE_DID_HASH) {
- /* If we know that we only reference strings, and we've already
- * computed the hash one time before, then we know all the strings will
- * have valid hash entries, and we can just compute, no branching
- * logic.
- */
- while (--len >= 0) {
- y = ((PyStringObject*)(*p))->ob_shash;
- x = (x ^ y) * mult;
- /* the cast might truncate len; that doesn't change hash stability */
- mult += (long)(82520L + len + len);
- p++;
- }
- } else {
- while (--len >= 0) {
- y = PyObject_Hash(*p++);
- if (y == -1) /* failure */
- return -1;
- x = (x ^ y) * mult;
- /* the cast might truncate len; that doesn't change hash stability */
- mult += (long)(82520L + len + len);
- }
+ // TODO: We could set specific flags if we know that, for example, all the
+ // keys are strings. I haven't seen a real-world benefit to that yet,
+ // though.
+ while (--len >= 0) {
+ y = PyObject_Hash(*p++);
+ if (y == -1) /* failure */
+ return -1;
+ x = (x ^ y) * mult;
+ /* the cast might truncate len; that doesn't change hash stability */
+ mult += (long)(82520L + len + len);
}
x += 97531L;
if (x == -1)
@@ -287,7 +268,6 @@
}
self->hash = x;
#endif
- self->flags |= STATIC_TUPLE_DID_HASH;
return x;
}
@@ -638,7 +618,6 @@
}
// We need to create the empty tuple
stuple = (StaticTuple *)StaticTuple_New(0);
- stuple->flags = STATIC_TUPLE_ALL_STRING;
_empty_tuple = StaticTuple_Intern(stuple);
assert(_empty_tuple == stuple);
// At this point, refcnt is 2: 1 from New(), and 1 from the return from
=== modified file 'bzrlib/_static_tuple_c.h'
--- a/bzrlib/_static_tuple_c.h 2009-10-07 15:27:24 +0000
+++ b/bzrlib/_static_tuple_c.h 2009-10-07 15:57:25 +0000
@@ -28,26 +28,24 @@
* Note that the entries themselves are strings, which already cache their
* hashes. So while there is a 1.5:1 difference in the time for hash(), it is
* already a function which is quite fast. Probably the only reason we might
- * want to do so, is if we implement a KeyIntern dict that assumes it is
- * available, and can then drop the 'hash' value from the item pointers. Then
- * again, if Key_hash() is fast enough, we may not even care about that.
+ * want to do so, is if we customized SimpleSet to the point that the item
+ * pointers were exactly certain types, and then accessed table[i]->hash
+ * directly. So far StaticTuple_hash() is fast enough to not warrant the memory
+ * difference.
*/
/* This defines a single variable-width key.
* It is basically the same as a tuple, but
* 1) Lighter weight in memory
- * 2) Only supports strings.
- * It is mostly used as a helper. Note that Keys() is a similar structure for
- * lists of Key objects. Its main advantage, though, is that it inlines all of
- * the Key objects so that you have 1 python object overhead for N Keys, rather
- * than N objects.
+ * 2) Only supports strings or other static types (that don't reference other
+ * objects.)
*/
#define STATIC_TUPLE_INTERNED_FLAG 0x01
-#define STATIC_TUPLE_ALL_STRING 0x02
-#define STATIC_TUPLE_DID_HASH 0x04
typedef struct {
PyObject_HEAD
+ // We could go with unsigned short here, and support 64k width tuples
+ // without any memory impact, might be worthwhile
unsigned char size;
unsigned char flags;
unsigned char _unused0;
=== modified file 'bzrlib/_static_tuple_c.pxd'
--- a/bzrlib/_static_tuple_c.pxd 2009-10-07 15:27:24 +0000
+++ b/bzrlib/_static_tuple_c.pxd 2009-10-07 15:57:25 +0000
@@ -26,20 +26,15 @@
ctypedef class bzrlib._static_tuple_c.StaticTuple [object StaticTuple]:
cdef unsigned char size
cdef unsigned char flags
- # We don't need to define _unused attributes, because the raw
- # StaticTuple structure will be referenced
- # cdef unsigned char _unused0
- # cdef unsigned char _unused1
cdef PyObject *items[0]
+ # Must be called before using any of the C api, as it sets the function
+ # pointers in memory.
int import_static_tuple_c() except -1
- # ctypedef object (*st_new_type)(Py_ssize_t)
- # st_new_type st_new
- int STATIC_TUPLE_ALL_STRING
-
StaticTuple StaticTuple_New(Py_ssize_t)
StaticTuple StaticTuple_Intern(StaticTuple)
- # Steals a reference and Val must be a PyStringObject, no checking is done
+
+ # Steals a reference and val must be a valid type, no checking is done
void StaticTuple_SET_ITEM(StaticTuple key, Py_ssize_t offset, object val)
object StaticTuple_GET_ITEM(StaticTuple key, Py_ssize_t offset)
int StaticTuple_CheckExact(object)
More information about the bazaar-commits
mailing list