Rev 4771: Bring in bzr.dev 4761 which includes CHKMap and CHKInventory tweaks. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple-chk-map
John Arbash Meinel
john at arbash-meinel.com
Wed Oct 21 17:52:36 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple-chk-map
------------------------------------------------------------
revno: 4771 [merge]
revision-id: john at arbash-meinel.com-20091021165218-rxk2tt2ks7amc6m9
parent: john at arbash-meinel.com-20091021164916-5eband755ip01y1n
parent: pqm at pqm.ubuntu.com-20091021163017-itbys2a178vt5605
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple-chk-map
timestamp: Wed 2009-10-21 11:52:18 -0500
message:
Bring in bzr.dev 4761 which includes CHKMap and CHKInventory tweaks.
It also brings in StaticTuple concatenation, and ability to hold None, etc.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/_static_tuple_c.c _keys_type_c.c-20090908204220-aa346ccw4l37jzt7-1
bzrlib/_static_tuple_py.py _keys_type_py.py-20090908213415-o1ww98k9a8aqm0bm-1
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/tests/test__static_tuple.py test__keys_type.py-20090908204220-aa346ccw4l37jzt7-2
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS 2009-10-17 04:43:14 +0000
+++ b/NEWS 2009-10-21 14:32:16 +0000
@@ -58,14 +58,15 @@
used as the interning structure for StaticTuple objects.
(John Arbash Meinel)
-* ``bzrlib._static_tuple_pyx.StaticTuple`` is now available and used by
+* ``bzrlib._static_tuple_c.StaticTuple`` is now available and used by
the btree index parser. This class functions similarly to ``tuple``
- objects. However, it can only point at other ``StaticTuple`` instances
- or strings. This allows us to remove it from the garbage collector (it
- cannot be in a cycle), it also allows us to intern the objects. In
+ objects. However, it can only point to a limited collection of types.
+ (Currently StaticTuple, str, unicode, None, bool, int, long, float, and
+ not subclasses). This allows us to remove it from the garbage collector
+ (it cannot be in a cycle), it also allows us to intern the objects. In
testing, this can reduce peak memory by 20-40%, and significantly
improve performance by removing objects from being inspected by the
- garbage collector. (John Arbash Meinel)
+ garbage collector. (John Arbash Meinel)
* ``GroupCompressBlock._ensure_content()`` will now release the
``zlib.decompressobj()`` when the first request is for all of the
@@ -75,6 +76,11 @@
of internal state and buffers. (For branching bzr.dev this drops peak
memory from 382MB => 345MB.) (John Arbash Meinel)
+* When streaming content between ``2a`` format repositories, we now clear
+ caches from earlier versioned files. (So 'revisions' is cleared when we
+ start reading 'inventories', etc.) This can have a significant impact on
+ peak memory for initial copies (~200MB). (John Arbash Meinel)
+
Testing
*******
=== modified file 'bzrlib/_static_tuple_c.c'
--- a/bzrlib/_static_tuple_c.c 2009-10-20 22:13:23 +0000
+++ b/bzrlib/_static_tuple_c.c 2009-10-21 16:52:18 +0000
@@ -145,6 +145,12 @@
return NULL;
}
+ if (size < 0 || size > 255) {
+ /* Too big or too small */
+ PyErr_SetString(PyExc_ValueError, "StaticTuple(...)"
+ " takes from 0 to 255 items");
+ return NULL;
+ }
if (size == 0 && _empty_tuple != NULL) {
Py_INCREF(_empty_tuple);
return _empty_tuple;
@@ -220,6 +226,38 @@
}
+/* Check that all items we point to are 'valid' */
+static int
+StaticTuple_check_items(StaticTuple *self)
+{
+ int i;
+ PyObject *obj;
+
+ for (i = 0; i < self->size; ++i) {
+ obj = self->items[i];
+ if (obj == NULL) {
+ PyErr_SetString(PyExc_RuntimeError, "StaticTuple(...)"
+ " should not have a NULL entry.");
+ return 0;
+ }
+ if (PyString_CheckExact(obj)
+ || StaticTuple_CheckExact(obj)
+ || obj == Py_None
+ || PyBool_Check(obj)
+ || PyInt_CheckExact(obj)
+ || PyLong_CheckExact(obj)
+ || PyFloat_CheckExact(obj)
+ || PyUnicode_CheckExact(obj)
+ ) continue;
+ PyErr_Format(PyExc_TypeError, "StaticTuple(...)"
+ " requires that all items are one of"
+ " str, StaticTuple, None, bool, int, long, float, or unicode"
+ " not %s.", Py_TYPE(obj)->tp_name);
+ return 0;
+ }
+ return 1;
+}
+
static PyObject *
StaticTuple_new_constructor(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
@@ -236,30 +274,19 @@
return NULL;
}
len = PyTuple_GET_SIZE(args);
- if (len < 0 || len > 255) {
- /* Too big or too small */
- PyErr_SetString(PyExc_ValueError, "StaticTuple.__init__(...)"
- " takes from 0 to 255 items");
- return NULL;
- }
self = (StaticTuple *)StaticTuple_New(len);
if (self == NULL) {
return NULL;
}
for (i = 0; i < len; ++i) {
obj = PyTuple_GET_ITEM(args, i);
- if (!PyString_CheckExact(obj)) {
- if (!StaticTuple_CheckExact(obj)) {
- PyErr_Format(PyExc_TypeError, "StaticTuple.__init__(...)"
- " requires that all items are strings or StaticTuple"
- " not %s", Py_TYPE(obj)->tp_name);
- type->tp_dealloc((PyObject *)self);
- return NULL;
- }
- }
Py_INCREF(obj);
self->items[i] = obj;
}
+ if (!StaticTuple_check_items(self)) {
+ type->tp_dealloc((PyObject *)self);
+ return NULL;
+ }
return (PyObject *)self;
}
@@ -458,27 +485,14 @@
/* Both are StaticTuple types, so recurse */
result = StaticTuple_richcompare(v_obj, w_obj, Py_EQ);
} else {
- /* Not the same type, obviously they won't compare equal */
- break;
+ /* Fall back to generic richcompare */
+ result = PyObject_RichCompare(v_obj, w_obj, Py_EQ);
}
if (result == NULL) {
return NULL; /* There seems to be an error */
}
- if (result == Py_NotImplemented) {
- Py_DECREF(result);
- /* One side must have had a string and the other a StaticTuple.
- * This clearly means that they are not equal.
- */
- if (op == Py_EQ) {
- Py_INCREF(Py_False);
- return Py_False;
- }
- result = PyObject_RichCompare(v_obj, w_obj, Py_EQ);
- }
if (result == Py_False) {
- /* This entry is not identical
- * Shortcut for Py_EQ
- */
+ // This entry is not identical, Shortcut for Py_EQ
if (op == Py_EQ) {
return result;
}
@@ -532,8 +546,7 @@
/* Both are StaticTuple types, so recurse */
return StaticTuple_richcompare(v_obj, w_obj, op);
} else {
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
+ return PyObject_RichCompare(v_obj, w_obj, op);
}
}
@@ -561,6 +574,59 @@
static PyObject *
+StaticTuple_add(PyObject *v, PyObject *w)
+{
+ Py_ssize_t i, len_v, len_w;
+ PyObject *item;
+ StaticTuple *result;
+ /* StaticTuples and plain tuples may be added (concatenated) to
+ * StaticTuples.
+ */
+ if (StaticTuple_CheckExact(v)) {
+ len_v = ((StaticTuple*)v)->size;
+ } else if (PyTuple_Check(v)) {
+ len_v = PyTuple_GET_SIZE(v);
+ } else {
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ if (StaticTuple_CheckExact(w)) {
+ len_w = ((StaticTuple*)w)->size;
+ } else if (PyTuple_Check(w)) {
+ len_w = PyTuple_GET_SIZE(w);
+ } else {
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ result = StaticTuple_New(len_v + len_w);
+ if (result == NULL)
+ return NULL;
+ for (i = 0; i < len_v; ++i) {
+ // This returns a new reference, which we then 'steal' with
+ // StaticTuple_SET_ITEM
+ item = PySequence_GetItem(v, i);
+ if (item == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ StaticTuple_SET_ITEM(result, i, item);
+ }
+ for (i = 0; i < len_w; ++i) {
+ item = PySequence_GetItem(w, i);
+ if (item == NULL) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ StaticTuple_SET_ITEM(result, i+len_v, item);
+ }
+ if (!StaticTuple_check_items(result)) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ return (PyObject *)result;
+}
+
+static PyObject *
StaticTuple_item(StaticTuple *self, Py_ssize_t offset)
{
PyObject *obj;
@@ -625,6 +691,29 @@
{NULL, NULL} /* sentinel */
};
+
+static PyNumberMethods StaticTuple_as_number = {
+ (binaryfunc) StaticTuple_add, /* nb_add */
+ 0, /* nb_subtract */
+ 0, /* nb_multiply */
+ 0, /* nb_divide */
+ 0, /* nb_remainder */
+ 0, /* nb_divmod */
+ 0, /* nb_power */
+ 0, /* nb_negative */
+ 0, /* nb_positive */
+ 0, /* nb_absolute */
+ 0, /* nb_nonzero */
+ 0, /* nb_invert */
+ 0, /* nb_lshift */
+ 0, /* nb_rshift */
+ 0, /* nb_and */
+ 0, /* nb_xor */
+ 0, /* nb_or */
+ 0, /* nb_coerce */
+};
+
+
static PySequenceMethods StaticTuple_as_sequence = {
(lenfunc)StaticTuple_length, /* sq_length */
0, /* sq_concat */
@@ -655,7 +744,7 @@
0, /* tp_setattr */
0, /* tp_compare */
(reprfunc)StaticTuple_repr, /* tp_repr */
- 0, /* tp_as_number */
+ &StaticTuple_as_number, /* tp_as_number */
&StaticTuple_as_sequence, /* tp_as_sequence */
0, /* tp_as_mapping */
(hashfunc)StaticTuple_hash, /* tp_hash */
@@ -664,7 +753,10 @@
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags*/
+ /* Py_TPFLAGS_CHECKTYPES tells the number operations that they shouldn't
+ * try to 'coerce' but instead stuff like 'add' will check it arguments.
+ */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES, /* tp_flags*/
StaticTuple_doc, /* tp_doc */
/* gc.get_referents checks the IS_GC flag before it calls tp_traverse
* And we don't include this object in the garbage collector because we
@@ -768,18 +860,15 @@
*/
set_module = PyImport_ImportModule("bzrlib._simple_set_pyx");
if (set_module == NULL) {
- // fprintf(stderr, "Failed to import bzrlib._simple_set_pyx\n");
goto end;
}
/* Add the _simple_set_pyx into sys.modules at the appropriate location. */
sys_module = PyImport_ImportModule("sys");
if (sys_module == NULL) {
- // fprintf(stderr, "Failed to import sys\n");
goto end;
}
modules = PyObject_GetAttrString(sys_module, "modules");
if (modules == NULL || !PyDict_Check(modules)) {
- // fprintf(stderr, "Failed to find sys.modules\n");
goto end;
}
PyDict_SetItemString(modules, "_simple_set_pyx", set_module);
@@ -818,3 +907,5 @@
setup_empty_tuple(m);
setup_c_api(m);
}
+
+// vim: tabstop=4 sw=4 expandtab
=== modified file 'bzrlib/_static_tuple_py.py'
--- a/bzrlib/_static_tuple_py.py 2009-10-13 18:00:16 +0000
+++ b/bzrlib/_static_tuple_py.py 2009-10-21 05:02:35 +0000
@@ -33,8 +33,11 @@
def __init__(self, *args):
"""Create a new 'StaticTuple'"""
for bit in args:
- if type(bit) not in (str, StaticTuple):
- raise TypeError('key bits must be strings or StaticTuple')
+ if type(bit) not in (str, StaticTuple, unicode, int, long, float,
+ None.__class__, bool):
+ raise TypeError('StaticTuple can only point to'
+ ' StaticTuple, str, unicode, int, long, float, bool, or'
+ ' None not %s' % (type(bit),))
num_keys = len(args)
if num_keys < 0 or num_keys > 255:
raise ValueError('must have 1 => 256 key bits')
@@ -45,6 +48,10 @@
def __repr__(self):
return '%s%s' % (self.__class__.__name__, tuple.__repr__(self))
+ def __add__(self, other):
+ """Concatenate self with other"""
+ return StaticTuple.from_sequence(tuple.__add__(self,other))
+
def as_tuple(self):
return self
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2009-10-21 16:49:16 +0000
+++ b/bzrlib/chk_map.py 2009-10-21 16:52:18 +0000
@@ -84,6 +84,8 @@
class CHKMap(object):
"""A persistent map from string to string backed by a CHK store."""
+ __slots__ = ('_store', '_root_node', '_search_key_func')
+
def __init__(self, store, root_key, search_key_func=None):
"""Create a CHKMap object.
@@ -567,6 +569,10 @@
adding the header bytes, and without prefix compression.
"""
+ __slots__ = ('_key', '_len', '_maximum_size', '_key_width',
+ '_raw_size', '_items', '_search_prefix', '_search_key_func'
+ )
+
def __init__(self, key_width=1):
"""Create a node.
@@ -661,6 +667,8 @@
the key/value pairs.
"""
+ __slots__ = ('_common_serialised_prefix', '_serialise_key')
+
def __init__(self, search_key_func=None):
Node.__init__(self)
# All of the keys in this leaf node share this common prefix
@@ -958,6 +966,8 @@
LeafNode or InternalNode.
"""
+ __slots__ = ('_node_width',)
+
def __init__(self, prefix='', search_key_func=None):
Node.__init__(self)
# The size of an internalnode with default values and no children.
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2009-10-20 22:15:41 +0000
+++ b/bzrlib/inventory.py 2009-10-21 16:52:18 +0000
@@ -1921,11 +1921,11 @@
raise errors.BzrError('Duplicate key in inventory: %r\n%r'
% (key, bytes))
info[key] = value
- revision_id = info['revision_id']
- root_id = info['root_id']
- search_key_name = info.get('search_key_name', 'plain')
- parent_id_basename_to_file_id = info.get(
- 'parent_id_basename_to_file_id', None)
+ revision_id = intern(info['revision_id'])
+ root_id = intern(info['root_id'])
+ search_key_name = intern(info.get('search_key_name', 'plain'))
+ parent_id_basename_to_file_id = intern(info.get(
+ 'parent_id_basename_to_file_id', None))
if not parent_id_basename_to_file_id.startswith('sha1:'):
raise ValueError('parent_id_basename_to_file_id should be a sha1'
' key not %r' % (parent_id_basename_to_file_id,))
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2009-10-16 07:10:11 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2009-10-21 03:50:26 +0000
@@ -2089,7 +2089,7 @@
('signatures', self.repo.signatures),
):
missing = versioned_file.get_missing_compression_parent_keys()
- all_missing.update([(prefix,) + tuple(key) for key in missing])
+ all_missing.update([(prefix,) + key for key in missing])
if all_missing:
raise errors.BzrCheckError(
"Repository %s has missing compression parent(s) %r "
=== modified file 'bzrlib/tests/test__static_tuple.py'
--- a/bzrlib/tests/test__static_tuple.py 2009-10-17 00:34:28 +0000
+++ b/bzrlib/tests/test__static_tuple.py 2009-10-21 14:27:00 +0000
@@ -104,7 +104,43 @@
args_300 = ['a']*300
self.assertRaises(ValueError, self.module.StaticTuple, *args_300)
# not a string
- self.assertRaises(TypeError, self.module.StaticTuple, 10)
+ self.assertRaises(TypeError, self.module.StaticTuple, object())
+
+ def test_concat(self):
+ st1 = self.module.StaticTuple('foo')
+ st2 = self.module.StaticTuple('bar')
+ st3 = self.module.StaticTuple('foo', 'bar')
+ st4 = st1 + st2
+ self.assertEqual(st3, st4)
+ self.assertIsInstance(st4, self.module.StaticTuple)
+
+ def test_concat_with_tuple(self):
+ st1 = self.module.StaticTuple('foo')
+ t2 = ('bar',)
+ st3 = self.module.StaticTuple('foo', 'bar')
+ st4 = self.module.StaticTuple('bar', 'foo')
+ st5 = st1 + t2
+ st6 = t2 + st1
+ self.assertEqual(st3, st5)
+ self.assertIsInstance(st5, self.module.StaticTuple)
+ self.assertEqual(st4, st6)
+ if self.module is _static_tuple_py:
+ # _static_tuple_py has StaticTuple(tuple), so tuple thinks it
+ # already knows how to concatenate, as such we can't "inject" our
+ # own concatenation...
+ self.assertIsInstance(st6, tuple)
+ else:
+ self.assertIsInstance(st6, self.module.StaticTuple)
+
+ def test_concat_with_bad_tuple(self):
+ st1 = self.module.StaticTuple('foo')
+ t2 = (object(),)
+ # Using st1.__add__ doesn't give the same results as doing the '+' form
+ self.assertRaises(TypeError, lambda: st1 + t2)
+
+ def test_concat_with_non_tuple(self):
+ st1 = self.module.StaticTuple('foo')
+ self.assertRaises(TypeError, lambda: st1 + 10)
def test_as_tuple(self):
k = self.module.StaticTuple('foo')
@@ -177,11 +213,56 @@
self.assertFalse(k1 < k2)
self.assertFalse(k1 > k2)
+ def test_holds_None(self):
+ k1 = self.module.StaticTuple(None)
+ # You cannot subclass None anyway
+
+ def test_holds_int(self):
+ k1 = self.module.StaticTuple(1)
+ class subint(int):
+ pass
+ # But not a subclass, because subint could introduce refcycles
+ self.assertRaises(TypeError, self.module.StaticTuple, subint(2))
+
+ def test_holds_long(self):
+ k1 = self.module.StaticTuple(2L**65)
+ class sublong(long):
+ pass
+ # But not a subclass
+ self.assertRaises(TypeError, self.module.StaticTuple, sublong(1))
+
+ def test_holds_float(self):
+ k1 = self.module.StaticTuple(1.2)
+ class subfloat(float):
+ pass
+ self.assertRaises(TypeError, self.module.StaticTuple, subfloat(1.5))
+
+ def test_holds_str(self):
+ k1 = self.module.StaticTuple('astring')
+ class substr(str):
+ pass
+ self.assertRaises(TypeError, self.module.StaticTuple, substr('a'))
+
+ def test_holds_unicode(self):
+ k1 = self.module.StaticTuple(u'\xb5')
+ class subunicode(unicode):
+ pass
+ self.assertRaises(TypeError, self.module.StaticTuple,
+ subunicode(u'\xb5'))
+
+ def test_hold_bool(self):
+ k1 = self.module.StaticTuple(True)
+ k2 = self.module.StaticTuple(False)
+ # Cannot subclass bool
+
def test_compare_same_obj(self):
k1 = self.module.StaticTuple('foo', 'bar')
self.assertCompareEqual(k1, k1)
k2 = self.module.StaticTuple(k1, k1)
self.assertCompareEqual(k2, k2)
+ k3 = self.module.StaticTuple('foo', 1, None, u'\xb5', 1.2, 2**65, True,
+ k1)
+ self.assertCompareEqual(k3, k3)
def test_compare_equivalent_obj(self):
k1 = self.module.StaticTuple('foo', 'bar')
@@ -190,6 +271,14 @@
k3 = self.module.StaticTuple(k1, k2)
k4 = self.module.StaticTuple(k2, k1)
self.assertCompareEqual(k1, k2)
+ k5 = self.module.StaticTuple('foo', 1, None, u'\xb5', 1.2, 2**65, True,
+ k1)
+ k6 = self.module.StaticTuple('foo', 1, None, u'\xb5', 1.2, 2**65, True,
+ k1)
+ self.assertCompareEqual(k5, k6)
+ k7 = self.module.StaticTuple(None)
+ k8 = self.module.StaticTuple(None)
+ self.assertCompareEqual(k7, k8)
def test_compare_similar_obj(self):
k1 = self.module.StaticTuple('foo' + ' bar', 'bar' + ' baz')
@@ -240,6 +329,15 @@
k3 = self.module.StaticTuple(k1, k2)
k4 = self.module.StaticTuple(k2, k1)
self.assertCompareDifferent(k3, k4)
+ k5 = self.module.StaticTuple(1)
+ k6 = self.module.StaticTuple(2)
+ self.assertCompareDifferent(k5, k6)
+ k7 = self.module.StaticTuple(1.2)
+ k8 = self.module.StaticTuple(2.4)
+ self.assertCompareDifferent(k7, k8)
+ k9 = self.module.StaticTuple(u's\xb5')
+ k10 = self.module.StaticTuple(u's\xe5')
+ self.assertCompareDifferent(k9, k10)
def test_compare_some_different(self):
k1 = self.module.StaticTuple('foo', 'bar')
@@ -248,6 +346,9 @@
k3 = self.module.StaticTuple(k1, k1)
k4 = self.module.StaticTuple(k1, k2)
self.assertCompareDifferent(k3, k4)
+ k5 = self.module.StaticTuple('foo', None)
+ self.assertCompareDifferent(k5, k1)
+ self.assertCompareDifferent(k5, k2)
def test_compare_diff_width(self):
k1 = self.module.StaticTuple('foo')
@@ -257,6 +358,18 @@
k4 = self.module.StaticTuple(k1, k2)
self.assertCompareDifferent(k3, k4)
+ def test_compare_different_types(self):
+ k1 = self.module.StaticTuple('foo', 'bar')
+ k2 = self.module.StaticTuple('foo', 1, None, u'\xb5', 1.2, 2**65, True,
+ k1)
+ self.assertCompareNoRelation(k1, k2)
+ k3 = self.module.StaticTuple('foo')
+ self.assertCompareDifferent(k3, k1)
+ k4 = self.module.StaticTuple(None)
+ self.assertCompareDifferent(k4, k1)
+ k5 = self.module.StaticTuple(1)
+ self.assertCompareNoRelation(k1, k5)
+
def test_compare_to_tuples(self):
k1 = self.module.StaticTuple('foo')
self.assertCompareEqual(k1, ('foo',))
@@ -306,6 +419,11 @@
as_tuple2 = (('foo', 'bar', 'baz', 'bing'),)
self.assertEqual(hash(k2), hash(as_tuple2))
+ k3 = self.module.StaticTuple('foo', 1, None, u'\xb5', 1.2, 2**65, True,
+ k)
+ as_tuple3 = ('foo', 1, None, u'\xb5', 1.2, 2**65, True, k)
+ self.assertEqual(hash(as_tuple3), hash(k3))
+
def test_slice(self):
k = self.module.StaticTuple('foo', 'bar', 'baz', 'bing')
self.assertEqual(('foo', 'bar'), k[:2])
More information about the bazaar-commits
mailing list