Rev 4725: Play around a bit with changing the hash function. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
John Arbash Meinel
john at arbash-meinel.com
Wed Sep 30 21:15:15 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple
------------------------------------------------------------
revno: 4725
revision-id: john at arbash-meinel.com-20090930201508-3qgsaulrjaql1c83
parent: john at arbash-meinel.com-20090930194334-9rmtwmwimmssqw19
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple
timestamp: Wed 2009-09-30 15:15:08 -0500
message:
Play around a bit with changing the hash function.
In this form, once hash has been called once, we know we are dealing
with only strings, all of which have already computed their hash.
As such, we can get rid of most of the internal checks and just
iterate over the values. This gets us down to 13.8ms (from 15
at worst case, but 9.7 if we cache self->hash).
We may not keep this, but it was an interesting experiment.
-------------- next part --------------
=== modified file 'bzrlib/_static_tuple_c.c'
--- a/bzrlib/_static_tuple_c.c 2009-09-30 19:43:34 +0000
+++ b/bzrlib/_static_tuple_c.c 2009-09-30 20:15:08 +0000
@@ -177,6 +177,7 @@
StaticTuple *self;
PyObject *obj = NULL;
Py_ssize_t i, len = 0;
+ int is_all_str;
if (type != &StaticTuple_Type) {
PyErr_SetString(PyExc_TypeError, "we only support creating StaticTuple");
@@ -197,18 +198,25 @@
if (self == NULL) {
return NULL;
}
+ is_all_str = 1;
for (i = 0; i < len; ++i) {
obj = PyTuple_GET_ITEM(args, i);
- if (!PyString_CheckExact(obj) && !StaticTuple_CheckExact(obj)) {
- PyErr_SetString(PyExc_TypeError, "StaticTuple.__init__(...)"
- " requires that all key bits are strings or StaticTuple.");
- /* TODO: What is the proper way to dealloc ? */
- type->tp_dealloc((PyObject *)self);
- return NULL;
+ if (!PyString_CheckExact(obj)) {
+ is_all_str = 0;
+ if (!StaticTuple_CheckExact(obj)) {
+ PyErr_SetString(PyExc_TypeError, "StaticTuple.__init__(...)"
+ " requires that all key bits are strings or StaticTuple.");
+ /* TODO: What is the proper way to dealloc ? */
+ type->tp_dealloc((PyObject *)self);
+ return NULL;
+ }
}
Py_INCREF(obj);
self->key_bits[i] = obj;
}
+ if (is_all_str) {
+ self->flags |= STATIC_TUPLE_ALL_STRING;
+ }
return (PyObject *)self;
}
@@ -235,7 +243,6 @@
register long x, y;
Py_ssize_t len = self->size;
PyObject **p;
- // hashfunc string_hash;
long mult = 1000003L;
#if STATIC_TUPLE_HAS_HASH
@@ -245,15 +252,30 @@
#endif
x = 0x345678L;
p = self->key_bits;
- while (--len >= 0) {
- y = Py_TYPE(*p)->tp_hash(*p);
- if (y == -1) /* failure */
- return -1;
- x = (x ^ y) * mult;
- /* the cast might truncate len; that doesn't change hash stability */
- mult += (long)(82520L + len + len);
- p++;
- }
+ if (self->flags & STATIC_TUPLE_ALL_STRING
+ && self->flags & STATIC_TUPLE_DID_HASH) {
+ /* If we know that we only reference strings, and we've already
+ * computed the hash one time before, then we know all the strings will
+ * have valid hash entries, and we can just compute, no branching
+ * logic.
+ */
+ while (--len >= 0) {
+ y = ((PyStringObject*)(*p))->ob_shash;
+ x = (x ^ y) * mult;
+ /* the cast might truncate len; that doesn't change hash stability */
+ mult += (long)(82520L + len + len);
+ p++;
+ }
+ } else {
+ while (--len >= 0) {
+ y = PyObject_Hash(*p++);
+ if (y == -1) /* failure */
+ return -1;
+ x = (x ^ y) * mult;
+ /* the cast might truncate len; that doesn't change hash stability */
+ mult += (long)(82520L + len + len);
+ }
+ }
x += 97531L;
if (x == -1)
x = -2;
@@ -265,6 +287,7 @@
}
self->hash = x;
#endif
+ self->flags |= STATIC_TUPLE_DID_HASH;
return x;
}
@@ -500,7 +523,7 @@
StaticTuple_traverse(StaticTuple *self, visitproc visit, void *arg)
{
Py_ssize_t i;
- for (i = Py_SIZE(self); --i >= 0;) {
+ for (i = self->size; --i >= 0;) {
Py_VISIT(self->key_bits[i]);
}
return 0;
@@ -689,7 +712,8 @@
return;
}
key->size = 0;
- key->flags = 0;
+ /* This is all strings, because it has no entries */
+ key->flags = STATIC_TUPLE_ALL_STRING;
key->_unused0 = 0;
key->_unused1 = 0;
#if STATIC_TUPLE_HAS_HASH
=== modified file 'bzrlib/_static_tuple_c.h'
--- a/bzrlib/_static_tuple_c.h 2009-09-30 18:00:42 +0000
+++ b/bzrlib/_static_tuple_c.h 2009-09-30 20:15:08 +0000
@@ -49,6 +49,8 @@
*/
#define STATIC_TUPLE_INTERNED_FLAG 0x01
+#define STATIC_TUPLE_ALL_STRING 0x02
+#define STATIC_TUPLE_DID_HASH 0x04
typedef struct {
PyObject_HEAD
unsigned char size;
More information about the bazaar-commits
mailing list