Rev 4768: Review feedback from Andrew Bennetts. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple-no-use

Mon Oct 12 18:51:02 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple-no-use

------------------------------------------------------------
revno: 4768
revision-id: john at arbash-meinel.com-20091012175058-c2rosf9sz6dvk0a9
parent: john at arbash-meinel.com-20091012170705-g1v51yp2kfve4f0r
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple-no-use
timestamp: Mon 2009-10-12 12:50:58 -0500
message:
  Review feedback from Andrew Bennetts.
-------------- next part --------------
=== modified file 'bzrlib/_static_tuple_c.c'

--- a/bzrlib/_static_tuple_c.c	2009-10-07 19:31:39 +0000
+++ b/bzrlib/_static_tuple_c.c	2009-10-12 17:50:58 +0000
@@ -74,7 +74,7 @@
 static StaticTuple *
 StaticTuple_Intern(StaticTuple *self)
 {
-    PyObject *unique_key = NULL;
+    PyObject *canonical_tuple = NULL;
 
     if (_interned_tuples == NULL || _StaticTuple_is_interned(self)) {
         Py_INCREF(self);
@@ -83,20 +83,18 @@
     /* SimpleSet_Add returns whatever object is present at self
      * or the new object if it needs to add it.
      */
-    unique_key = SimpleSet_Add(_interned_tuples, (PyObject *)self);
-    if (!unique_key) {
-        // Suppress any error and just return the object
-        PyErr_Clear();
-        Py_INCREF(self);
-        return self;
+    canonical_tuple = SimpleSet_Add(_interned_tuples, (PyObject *)self);
+    if (!canonical_tuple) {
+        // Some sort of exception, propogate it.
+        return NULL;
     }
-    if (unique_key != (PyObject *)self) {
-        // There was already a key at that location
-        return (StaticTuple *)unique_key;
+    if (canonical_tuple != (PyObject *)self) {
+        // There was already a tuple with that value
+        return (StaticTuple *)canonical_tuple;
     }
     self->flags |= STATIC_TUPLE_INTERNED_FLAG;
-    // The two references in the dict do not count, so that the StaticTuple object
-    // does not become immortal just because it was interned.
+    // The two references in the dict do not count, so that the StaticTuple
+    // object does not become immortal just because it was interned.
     Py_REFCNT(self) -= 1;
     return self;
 }
@@ -187,7 +185,7 @@
     if (len < 0 || len > 255) {
         /* Too big or too small */
         PyErr_SetString(PyExc_ValueError, "StaticTuple.__init__(...)"
-            " takes from 0 to 255 key bits");
+            " takes from 0 to 255 items");
         return NULL;
     }
     self = (StaticTuple *)StaticTuple_New(len);
@@ -199,8 +197,7 @@
         if (!PyString_CheckExact(obj)) {
             if (!StaticTuple_CheckExact(obj)) {
                 PyErr_SetString(PyExc_TypeError, "StaticTuple.__init__(...)"
-                    " requires that all key bits are strings or StaticTuple.");
-                /* TODO: What is the proper way to dealloc ? */
+                    " requires that all items are strings or StaticTuple.");
                 type->tp_dealloc((PyObject *)self);
                 return NULL;
             }
@@ -236,21 +233,21 @@
     /* adapted from tuplehash(), is the specific hash value considered
      * 'stable'?
      */
-	register long x, y;
-	Py_ssize_t len = self->size;
-	PyObject **p;
-	long mult = 1000003L;
+    register long x, y;
+    Py_ssize_t len = self->size;
+    PyObject **p;
+    long mult = 1000003L;
 
 #if STATIC_TUPLE_HAS_HASH
     if (self->hash != -1) {
         return self->hash;
     }
 #endif
-	x = 0x345678L;
-	p = self->items;
+    x = 0x345678L;
+    p = self->items;
     // TODO: We could set specific flags if we know that, for example, all the
-    //       keys are strings. I haven't seen a real-world benefit to that yet,
-    //       though.
+    //       items are strings. I haven't seen a real-world benefit to that
+    //       yet, though.
     while (--len >= 0) {
         y = PyObject_Hash(*p++);
         if (y == -1) /* failure */
@@ -259,18 +256,13 @@
         /* the cast might truncate len; that doesn't change hash stability */
         mult += (long)(82520L + len + len);
     }
-	x += 97531L;
-	if (x == -1)
-		x = -2;
+    x += 97531L;
+    if (x == -1)
+        x = -2;
 #if STATIC_TUPLE_HAS_HASH
-    if (self->hash != -1) {
-        if (self->hash != x) {
-            fprintf(stderr, "hash changed: %d => %d\n", self->hash, x);
-        }
-    }
     self->hash = x;
 #endif
-	return x;
+    return x;
 }
 
 static PyObject *
@@ -281,25 +273,39 @@
     
     vt = StaticTuple_as_tuple((StaticTuple *)v);
     if (vt == NULL) {
-        goto Done;
+        goto done;
     }
     if (!PyTuple_Check(wt)) {
         PyErr_BadInternalCall();
-        result = NULL;
-        goto Done;
+        goto done;
     }
     /* Now we have 2 tuples to compare, do it */
     result = PyTuple_Type.tp_richcompare(vt, wt, op);
-Done:
+done:
     Py_XDECREF(vt);
     return result;
 }
 
+/** Compare two objects to determine if they are equivalent.
+ * The basic flow is as follows
+ *  1) First make sure that both objects are StaticTuple instances. If they
+ *     aren't then cast self to a tuple, and have the tuple do the comparison.
+ *  2) Special case comparison to Py_None, because it happens to occur fairly
+ *     often in the test suite.
+ *  3) Special case when v and w are the same pointer. As we know the answer to
+ *     all queries without walking individual items.
+ *  4) For all operations, we then walk the items to find the first paired
+ *     items that are not equal.
+ *  5) If all items found are equal, we then check the length of self and
+ *     other to determine equality.
+ *  6) If an item differs, then we apply "op" to those last two items. (eg.
+ *     StaticTuple(A, B) > StaticTuple(A, C) iff B > C)
+ */
 
 static PyObject *
 StaticTuple_richcompare(PyObject *v, PyObject *w, int op)
 {
-    StaticTuple *vk, *wk;
+    StaticTuple *v_st, *w_st;
     Py_ssize_t vlen, wlen, min_len, i;
     PyObject *v_obj, *w_obj;
     richcmpfunc string_richcompare;
@@ -313,10 +319,10 @@
         Py_INCREF(Py_NotImplemented);
         return Py_NotImplemented;
     }
-    vk = (StaticTuple *)v;
+    v_st = (StaticTuple *)v;
     if (StaticTuple_CheckExact(w)) {
         /* The most common case */
-        wk = (StaticTuple*)w;
+        w_st = (StaticTuple*)w;
     } else if (PyTuple_Check(w)) {
         /* One of v or w is a tuple, so we go the 'slow' route and cast up to
          * tuples to compare.
@@ -325,17 +331,17 @@
          *       We probably want to optimize comparing self to other when
          *       other is a tuple.
          */
-        return StaticTuple_richcompare_to_tuple(vk, w, op);
+        return StaticTuple_richcompare_to_tuple(v_st, w, op);
     } else if (w == Py_None) {
         // None is always less than the object
-		switch (op) {
-		case Py_NE:case Py_GT:case Py_GE:
+        switch (op) {
+        case Py_NE:case Py_GT:case Py_GE:
             Py_INCREF(Py_True);
             return Py_True;
         case Py_EQ:case Py_LT:case Py_LE:
             Py_INCREF(Py_False);
             return Py_False;
-		}
+        }
     } else {
         /* We don't special case this comparison, we just let python handle
          * it.
@@ -344,38 +350,38 @@
          return Py_NotImplemented;
     }
     /* Now we know that we have 2 StaticTuple objects, so let's compare them.
-     * This code is somewhat borrowed from tuplerichcompare, except we know our
+     * This code is inspired from tuplerichcompare, except we know our
      * objects are limited in scope, so we can inline some comparisons.
      */
     if (v == w) {
         /* Identical pointers, we can shortcut this easily. */
-		switch (op) {
-		case Py_EQ:case Py_LE:case Py_GE:
+        switch (op) {
+        case Py_EQ:case Py_LE:case Py_GE:
             Py_INCREF(Py_True);
             return Py_True;
-		case Py_NE:case Py_LT:case Py_GT:
+        case Py_NE:case Py_LT:case Py_GT:
             Py_INCREF(Py_False);
             return Py_False;
-		}
+        }
     }
     /* TODO: if STATIC_TUPLE_INTERNED_FLAG is set on both objects and they are
      *       not the same pointer, then we know they aren't the same object
      *       without having to do sub-by-sub comparison.
      */
 
-    /* It will be rare that we compare tuples of different lengths, so we don't
-     * start by optimizing the length comparision, same as the tuple code
-     * TODO: Interning may change this, because we'll be comparing lots of
-     *       different StaticTuple objects in the intern dict
+    /* The only time we are likely to compare items of different lengths is in
+     * something like the interned_keys set. However, the hash is good enough
+     * that it is rare. Note that 'tuple_richcompare' also does not compare
+     * lengths here.
      */
-    vlen = vk->size;
-    wlen = wk->size;
-	min_len = (vlen < wlen) ? vlen : wlen;
+    vlen = v_st->size;
+    wlen = w_st->size;
+    min_len = (vlen < wlen) ? vlen : wlen;
     string_richcompare = PyString_Type.tp_richcompare;
     for (i = 0; i < min_len; i++) {
         PyObject *result = NULL;
-        v_obj = StaticTuple_GET_ITEM(vk, i);
-        w_obj = StaticTuple_GET_ITEM(wk, i);
+        v_obj = StaticTuple_GET_ITEM(v_st, i);
+        w_obj = StaticTuple_GET_ITEM(w_st, i);
         if (PyString_CheckExact(v_obj) && PyString_CheckExact(w_obj)) {
             result = string_richcompare(v_obj, w_obj, Py_EQ);
         } else if (StaticTuple_CheckExact(v_obj) &&
@@ -415,28 +421,28 @@
         }
         Py_DECREF(result);
     }
-	if (i >= vlen || i >= wlen) {
+    if (i >= vlen || i >= wlen) {
         /* We walked off one of the lists, but everything compared equal so
          * far. Just compare the size.
          */
-		int cmp;
-		PyObject *res;
-		switch (op) {
-		case Py_LT: cmp = vlen <  wlen; break;
-		case Py_LE: cmp = vlen <= wlen; break;
-		case Py_EQ: cmp = vlen == wlen; break;
-		case Py_NE: cmp = vlen != wlen; break;
-		case Py_GT: cmp = vlen >  wlen; break;
-		case Py_GE: cmp = vlen >= wlen; break;
-		default: return NULL; /* cannot happen */
-		}
-		if (cmp)
-			res = Py_True;
-		else
-			res = Py_False;
-		Py_INCREF(res);
-		return res;
-	}
+        int cmp;
+        PyObject *res;
+        switch (op) {
+        case Py_LT: cmp = vlen <  wlen; break;
+        case Py_LE: cmp = vlen <= wlen; break;
+        case Py_EQ: cmp = vlen == wlen; break;
+        case Py_NE: cmp = vlen != wlen; break;
+        case Py_GT: cmp = vlen >  wlen; break;
+        case Py_GE: cmp = vlen >= wlen; break;
+        default: return NULL; /* cannot happen */
+        }
+        if (cmp)
+            res = Py_True;
+        else
+            res = Py_False;
+        Py_INCREF(res);
+        return res;
+    }
     /* The last item differs, shortcut the Py_NE case */
     if (op == Py_NE) {
         Py_INCREF(Py_True);
@@ -477,15 +483,22 @@
 }
 
 static char StaticTuple__is_interned_doc[] = "_is_interned() => True/False\n"
-    "Check to see if this key has been interned.\n";
+    "Check to see if this tuple has been interned.\n";
 
 
 static PyObject *
 StaticTuple_item(StaticTuple *self, Py_ssize_t offset)
 {
     PyObject *obj;
-    if (offset < 0 || offset >= self->size) {
-        PyErr_SetString(PyExc_IndexError, "StaticTuple index out of range");
+    /* We cast to (int) to avoid worrying about whether Py_ssize_t is a
+     * long long, etc. offsets should never be >2**31 anyway.
+     */
+    if (offset < 0) {
+        PyErr_Format(PyExc_IndexError, "StaticTuple_item does not support"
+            " negative indices: %d\n", (int)offset);
+    } else if (offset >= self->size) {
+        PyErr_Format(PyExc_IndexError, "StaticTuple index out of range"
+            " %d >= %d", (int)offset, (int)self->size);
         return NULL;
     }
     obj = (PyObject *)self->items[offset];
@@ -519,9 +532,12 @@
 
 static char StaticTuple_doc[] =
     "C implementation of a StaticTuple structure."
-    "\n This is used as StaticTuple(key_bit_1, key_bit_2, key_bit_3, ...)"
-    "\n This is similar to tuple, just less flexible in what it"
-    "\n supports, but also lighter memory consumption.";
+    "\n This is used as StaticTuple(item1, item2, item3)"
+    "\n This is similar to tuple, less flexible in what it"
+    "\n supports, but also lighter memory consumption."
+    "\n Note that the constructor mimics the () form of tuples"
+    "\n Rather than the 'tuple()' constructor."
+    "\n  eg. StaticTuple(a, b) == (a, b) == tuple((a, b))";
 
 static PyMethodDef StaticTuple_methods[] = {
     {"as_tuple", (PyCFunction)StaticTuple_as_tuple, METH_NOARGS, StaticTuple_as_tuple_doc},
@@ -542,6 +558,11 @@
     0,                              /* sq_contains */
 };
 
+/* TODO: Implement StaticTuple_as_mapping.
+ * The only thing we really want to support from there is mp_subscript, so that
+ * we could support extended slicing (foo[::2]). Not worth it yet, though.
+ */
+
 
 PyTypeObject StaticTuple_Type = {
     PyObject_HEAD_INIT(NULL)

=== modified file 'bzrlib/tests/test__static_tuple.py'
--- a/bzrlib/tests/test__static_tuple.py	2009-10-07 19:34:22 +0000
+++ b/bzrlib/tests/test__static_tuple.py	2009-10-12 17:50:58 +0000
@@ -140,6 +140,13 @@
         self.assertEqual('foo', k[0])
         self.assertEqual('z', k[6])
         self.assertEqual('z', k[-1])
+        self.assertRaises(IndexError, k.__getitem__, 7)
+        self.assertRaises(IndexError, k.__getitem__, 256+7)
+        self.assertRaises(IndexError, k.__getitem__, 12024)
+        # Python's [] resolver handles the negative arguments, so we can't
+        # really test StaticTuple_item() with negative values.
+        self.assertRaises(TypeError, k.__getitem__, 'not-an-int')
+        self.assertRaises(TypeError, k.__getitem__, '5')
 
     def test_refcount(self):
         f = 'fo' + 'oo'
@@ -276,12 +283,22 @@
         k = self.module.StaticTuple('foo', 'bar', 'baz', 'bing')
         self.assertEqual(('foo', 'bar'), k[:2])
         self.assertEqual(('baz',), k[2:-1])
+        try:
+            val = k[::2]
+        except TypeError:
+            # C implementation raises a TypeError, we don't need the
+            # implementation yet, so allow this to pass
+            pass
+        else:
+            # Python implementation uses a regular Tuple, so make sure it gives
+            # the right result
+            self.assertEqual(('foo', 'baz'), val)
 
     def test_referents(self):
         # We implement tp_traverse so that things like 'meliae' can measure the
         # amount of referenced memory. Unfortunately gc.get_referents() first
-        # checks the IS_GC flag before it traverses anything. So there isn't a
-        # way to expose it that I can see.
+        # checks the IS_GC flag before it traverses anything. We could write a
+        # helper func, but that won't work for the generic implementation...
         self.requireFeature(Meliae)
         from meliae import scanner
         strs = ['foo', 'bar', 'baz', 'bing']