Rev 4777: Try out interning of some of the strings. in http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple-chk-map
John Arbash Meinel
john at arbash-meinel.com
Wed Oct 21 21:50:38 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/2.1-static-tuple-chk-map
------------------------------------------------------------
revno: 4777
revision-id: john at arbash-meinel.com-20091021205004-q81ipjjhdy2fl3xy
parent: john at arbash-meinel.com-20091021194305-eso7vfaetsvmhgz6
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-static-tuple-chk-map
timestamp: Wed 2009-10-21 15:50:04 -0500
message:
Try out interning of some of the strings.
Doesn't seem to have a serious impact.
If we only have a 2-deep tree (1 internal, 1 leaf), all the internal nodes
are 1-byte wide anyway, so they are already unique (single chars).
Interning the values is a slight net loss. A bit suprising. At an average
of 21 values per node, (and an average of 1 change per node) I would have
thought we would have more overlap in values.
I'll probably revert this as the meager to nil benefit doesn't outweigh
the complexity.
-------------- next part --------------
=== modified file 'bzrlib/_chk_map_pyx.pyx'
--- a/bzrlib/_chk_map_pyx.pyx 2009-10-21 19:43:05 +0000
+++ b/bzrlib/_chk_map_pyx.pyx 2009-10-21 20:50:04 +0000
@@ -17,7 +17,14 @@
#python2.4 support
cdef extern from "python-compat.h":
- pass
+ # PyString_InternInPlace takes a **, and mutate the pointer supplied.
+ # Neither Pyrex nor Cython handles that particularly well.
+ # We also cannot use intern() directly, because our strings have '\x00' in
+ # them, and pyrex and cython both translate intern() into
+ # PyString_InternFromString(PyString_AsString()) which assumes a regular
+ # null terminated C string
+ #
+ void INTERN_STRING(object)
cdef extern from *:
ctypedef unsigned int size_t
@@ -50,6 +57,8 @@
Py_ssize_t PyString_GET_SIZE_ptr "PyString_GET_SIZE" (PyObject *s)
char *PyString_AS_STRING_ptr "PyString_AS_STRING" (PyObject *s)
object PyString_FromStringAndSize(char*, Py_ssize_t)
+ void PyString_InternInPlace(PyObject **)
+ int PyString_CHECK_INTERNED(object)
# It seems we need to import the definitions so that the pyrex compiler has
# local names to access them.
@@ -292,7 +301,13 @@
# SET_ITEM 'steals' a reference
Py_INCREF(entry)
StaticTuple_SET_ITEM(entry_bits, i, entry)
+ # TODO: Consider interning the value lines
+ # Often there will be many leaf nodes that only have a single
+ # line or two that is different among many (60+) lines
value = PyString_FromStringAndSize(value_start, next_line - value_start)
+ INTERN_STRING(value)
+ if not PyString_CHECK_INTERNED(value):
+ raise AssertionError('i asked to intern, but it didn\'t work')
# The next entry bit needs the 'tail' from the prefix, and first part
# of the line
entry_start = line_start
@@ -400,15 +415,21 @@
next_null = <char *>_my_memrchr(cur, c'\0', next_line - cur)
if next_null == NULL:
raise ValueError('bad no null')
+ # TODO: intern item_prefix, it will almost always be redundant with all
+ # of the other internal node instances at this depth.
item_prefix = PyString_FromStringAndSize(NULL,
prefix_length + next_null - cur)
c_item_prefix = PyString_AS_STRING(item_prefix)
if prefix_length:
memcpy(c_item_prefix, prefix, prefix_length)
memcpy(c_item_prefix + prefix_length, cur, next_null - cur)
+ c_item_prefix = NULL # Done with this
flat_key = PyString_FromStringAndSize(next_null + 1,
next_line - next_null - 1)
flat_key = StaticTuple(flat_key).intern()
+ INTERN_STRING(item_prefix)
+ if not PyString_CHECK_INTERNED(item_prefix):
+ raise AssertionError('i asked to intern, but it didn\'t work')
PyDict_SetItem(items, item_prefix, flat_key)
cur = next_line + 1
assert len(items) > 0
=== modified file 'bzrlib/python-compat.h'
--- a/bzrlib/python-compat.h 2009-10-12 21:44:27 +0000
+++ b/bzrlib/python-compat.h 2009-10-21 20:50:04 +0000
@@ -84,4 +84,7 @@
# define Py_REFCNT(o) ((o)->ob_refcnt)
#endif
+
+#define INTERN_STRING(obj) (PyString_InternInPlace(&(obj)))
+
#endif /* _BZR_PYTHON_COMPAT_H */
=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py 2009-10-21 19:07:30 +0000
+++ b/bzrlib/tests/test_chk_map.py 2009-10-21 20:50:04 +0000
@@ -1139,6 +1139,10 @@
{("a","a"):"content here", ("a", "b",):"more content",
("b", ""): 'boring content'},
maximum_size=10, key_width=2)
+ print
+ print chkmap._dump_tree()
+ print repr(chk_map._page_cache[chkmap.key()])
+ print repr(chk_map._page_cache[chkmap._root_node._items['a']._key])
self.assertEqual(
{("a", "a"): "content here", ("a", "b"): 'more content'},
self.to_dict(chkmap, [("a",)]))
More information about the bazaar-commits
mailing list