Rev 4740: Stop using hash() because of bugs wrt pyrex 0.9.8.5 in http://bazaar.launchpad.net/~jameinel/bzr/2.1-pyrex-64bit

John Arbash Meinel john at arbash-meinel.com
Tue Oct 13 17:44:54 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-pyrex-64bit

------------------------------------------------------------
revno: 4740
revision-id: john at arbash-meinel.com-20091013164443-b92lnyiir2ucyguj
parent: pqm at pqm.ubuntu.com-20091013092944-37m2zia1k83g061y
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-pyrex-64bit
timestamp: Tue 2009-10-13 11:44:43 -0500
message:
  Stop using hash() because of bugs wrt pyrex 0.9.8.5
  
  Rather than going directly to the Py_TYPE() object, I also use PyObject_Hash()
  everywhere now. This simplifies the code a little bit, as I can declare it
  returns -1 as an exception, rather than having to manually check the return
  value.
  
  What is really strange is that pyrex 0.9.7.2 gets it right, strange
  regression to have. cython 0.11.3 also gets it right, but I don't know
  that all versions of cython handle it correctly, either.
  
  
  The main problem is that we are mixing, and then comparing
  'other_hash = this_hash'. If we always used the 32-bit form, we would
  be okay for our purposes, or always use the 64-bit form. I'm focusing
  on the latter.
-------------- next part --------------
=== modified file 'bzrlib/_simple_set_pyx.pyx'
--- a/bzrlib/_simple_set_pyx.pyx	2009-10-12 16:48:36 +0000
+++ b/bzrlib/_simple_set_pyx.pyx	2009-10-13 16:44:43 +0000
@@ -37,6 +37,9 @@
 
     PyTypeObject *Py_TYPE(PyObject *)
     int PyObject_IsTrue(PyObject *)
+    # Note: *Don't* use hash(), Pyrex 0.9.8.5 thinks it returns an 'int', and
+    #       thus silently truncates to 32-bits on 64-bit machines.
+    long PyObject_Hash(PyObject *) except -1
         
     void *PyMem_Malloc(size_t nbytes)
     void PyMem_Free(void *)
@@ -61,11 +64,7 @@
 
     if this == other:
         return 1
-    other_hash = Py_TYPE(other).tp_hash(other)
-    if other_hash == -1:
-        # Even though other successfully hashed in the past, it seems to have
-        # changed its mind, and failed this time, so propogate the failure.
-        return -1
+    other_hash = PyObject_Hash(other)
     if other_hash != this_hash:
         return 0
 
@@ -203,9 +202,7 @@
         mask = self._mask
         table = self._table
 
-        the_hash = Py_TYPE(key).tp_hash(key)
-        if the_hash == -1:
-            return -1
+        the_hash = PyObject_Hash(key)
         i = the_hash
         for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
             slot = &table[i & mask]
@@ -458,13 +455,14 @@
     cdef long key_hash
     cdef PyObject **table, **slot, *cur, **free_slot, *py_key
 
-    # hash is a signed long(), we are using an offset at unsigned size_t
-    key_hash = hash(key)
+    py_key = <PyObject *>key
+    # Note: avoid using hash(obj) because of a bug w/ pyrex 0.9.8.5 and 64-bit
+    #       (it treats hash() as returning an 'int' rather than a 'long')
+    key_hash = PyObject_Hash(py_key)
     i = <size_t>key_hash
     mask = self._mask
     table = self._table
     free_slot = NULL
-    py_key = <PyObject *>key
     for n_lookup from 0 <= n_lookup <= <size_t>mask: # Don't loop forever
         slot = &table[i & mask]
         cur = slot[0]



More information about the bazaar-commits mailing list