Rev 4701: It seems that optimizing Keys_hash is not the way to go for 'bzr log' performance. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption
John Arbash Meinel
john at arbash-meinel.com
Sat Sep 12 06:03:46 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption
------------------------------------------------------------
revno: 4701
revision-id: john at arbash-meinel.com-20090912050326-k7nuwxtce50dc4yc
parent: john at arbash-meinel.com-20090911214856-181w4blik8mp1kag
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-memory-consumption
timestamp: Sat 2009-09-12 00:03:26 -0500
message:
It seems that optimizing Keys_hash is not the way to go for 'bzr log' performance.
Baseline bzr.dev time: 702ms
Time with Key: 760ms
Time with Key_richcompare: 751ms
Time with Key_hash: 750ms
Time with Key.hash (stashed hash): 750ms.
Probably need some profiling, though hard to get accurate
profiling at this level.
-------------- next part --------------
=== modified file 'bzrlib/_keys_type_c.c'
--- a/bzrlib/_keys_type_c.c 2009-09-11 21:48:56 +0000
+++ b/bzrlib/_keys_type_c.c 2009-09-12 05:03:26 +0000
@@ -39,6 +39,7 @@
*/
typedef struct {
PyObject_VAR_HEAD
+ long hash;
PyStringObject *key_bits[1];
} Key;
extern PyTypeObject KeyType;
@@ -122,6 +123,7 @@
if (self == NULL) {
return NULL;
}
+ self->hash = -1;
self->ob_size = len;
for (i = 0; i < len; ++i) {
obj = PyTuple_GET_ITEM(args, i);
@@ -155,16 +157,38 @@
static long
Key_hash(Key *self)
{
- PyObject *as_tuple = NULL;
- long hash = -1;
+ /* adapted from tuplehash(), is the specific hash value considered
+ * 'stable'?
+ */
+ register long x, y;
+ Py_ssize_t len = self->ob_size;
+ PyStringObject **p;
+ hashfunc string_hash;
+ long mult = 1000003L;
- as_tuple = Key_as_tuple(self);
- if (as_tuple == NULL) {
- return -1;
+ if (self->hash != -1) {
+ return self->hash;
}
- hash = PyTuple_Type.tp_hash(as_tuple);
- Py_DECREF(as_tuple);
- return hash;
+ x = 0x345678L;
+ p = self->key_bits;
+ string_hash = PyString_Type.tp_hash;
+ while (--len >= 0) {
+ y = (*p)->ob_shash;
+ if (y == -1) { /* not computed yet */
+ y = string_hash((PyObject *)(*p));
+ }
+ if (y == -1) /* failure */
+ return -1;
+ x = (x ^ y) * mult;
+ /* the cast might truncate len; that doesn't change hash stability */
+ mult += (long)(82520L + len + len);
+ p++;
+ }
+ x += 97531L;
+ if (x == -1)
+ x = -2;
+ self->hash = x;
+ return x;
}
static PyObject *
@@ -759,7 +783,7 @@
Py_TPFLAGS_DEFAULT, /* tp_flags*/
Keys_doc, /* tp_doc */
/* See Key_traverse for why we have this, even though we aren't GC */
- Keys_traverse, /* tp_traverse */
+ (traverseproc)Keys_traverse, /* tp_traverse */
0, /* tp_clear */
// TODO: implement richcompare, we should probably be able to compare vs an
// tuple, as well as versus another Keys object.
More information about the bazaar-commits
mailing list