Rev 19: Framework for C parser extension, parameterised callgraph output, shuffle lsprofiling. in http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
Robert Collins
robertc at robertcollins.net
Thu Jul 3 04:45:59 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk
------------------------------------------------------------
revno: 19
revision-id: robertc at robertcollins.net-20080703034549-teew69qy3z91m1pt
parent: robertc at robertcollins.net-20080702221844-aw67l2n2hdt2hvw0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Thu 2008-07-03 13:45:49 +1000
message:
Framework for C parser extension, parameterised callgraph output, shuffle lsprofiling.
added:
.bzrignore bzrignore-20080703034434-q63sohljnxg5loze-1
_parse_btree_c.pyx _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
_parse_btree_py.py _parse_btree_py.py-20080703034413-3q25bklkenti3p8p-3
modified:
btree_index.py index.py-20080624222253-p0x5f92uyh5hw734-7
indexbench.py indexbench.py-20080702083855-5tju02y79rw7kkzh-1
setup.py setup.py-20080624222253-p0x5f92uyh5hw734-8
tests/test_btree_index.py test_index.py-20080624222253-p0x5f92uyh5hw734-13
=== added file '.bzrignore'
--- a/.bzrignore 1970-01-01 00:00:00 +0000
+++ b/.bzrignore 2008-07-03 03:45:49 +0000
@@ -0,0 +1,2 @@
+_parse_btree_c.c
+build
=== added file '_parse_btree_c.pyx'
--- a/_parse_btree_c.pyx 1970-01-01 00:00:00 +0000
+++ b/_parse_btree_c.pyx 2008-07-03 03:45:49 +0000
@@ -0,0 +1,43 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Pyrex extensions to btree node parsing."""
+
+def _parse_leaf_lines(lines, key_length, ref_list_length):
+ nodes = []
+ for line in lines[1:]:
+ if line == '':
+ return nodes
+ elements = line.split('\0', key_length)
+ # keys are tuples
+ key = tuple(elements[:key_length])
+ line = elements[-1]
+ references, value = line.rsplit('\0', 1)
+ if ref_list_length:
+ ref_lists = []
+ for ref_string in references.split('\t'):
+ key_list = []
+ for ref in ref_string.split('\r'):
+ if ref:
+ key_list.append(tuple(ref.split('\0')))
+ ref_list = tuple(key_list)
+ ref_lists.append(ref_list)
+ ref_lists = tuple(ref_lists)
+ node_value = (value, ref_lists)
+ else:
+ node_value = (value, ())
+ nodes.append((key, node_value))
+ return nodes
=== added file '_parse_btree_py.py'
--- a/_parse_btree_py.py 1970-01-01 00:00:00 +0000
+++ b/_parse_btree_py.py 2008-07-03 03:45:49 +0000
@@ -0,0 +1,41 @@
+# index2, a bzr plugin providing experimental index types.
+# Copyright (C) 2008 Canonical Limited.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+"""B+Tree index parsing."""
+
+def _parse_leaf_lines(lines, key_length, ref_list_length):
+ nodes = []
+ for line in lines[1:]:
+ if line == '':
+ return nodes
+ elements = line.split('\0', key_length)
+ # keys are tuples
+ key = tuple(elements[:key_length])
+ line = elements[-1]
+ references, value = line.rsplit('\0', 1)
+ if ref_list_length:
+ ref_lists = []
+ for ref_string in references.split('\t'):
+ ref_lists.append(tuple([
+ tuple(ref.split('\0')) for ref in ref_string.split('\r') if ref
+ ]))
+ ref_lists = tuple(ref_lists)
+ node_value = (value, ref_lists)
+ else:
+ node_value = (value, ())
+ nodes.append((key, node_value))
+ return nodes
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-02 22:18:44 +0000
+++ b/btree_index.py 2008-07-03 03:45:49 +0000
@@ -302,31 +302,8 @@
def __init__(self, bytes, key_length, ref_list_length):
"""Parse bytes to create a leaf node object."""
# splitlines mangles the \r delimiters.. don't use it.
- self.keys = dict(self._parse_lines(bytes.split('\n'), key_length,
- ref_list_length))
-
- def _parse_lines(self, lines, key_length, ref_list_length):
- nodes = []
- for line in lines[1:]:
- if line == '':
- return nodes
- elements = line.split('\0', key_length)
- # keys are tuples
- key = tuple(elements[:key_length])
- line = elements[-1]
- references, value = line.rsplit('\0', 1)
- if ref_list_length:
- ref_lists = []
- for ref_string in references.split('\t'):
- ref_lists.append(tuple([
- tuple(ref.split('\0')) for ref in ref_string.split('\r') if ref
- ]))
- ref_lists = tuple(ref_lists)
- node_value = (value, ref_lists)
- else:
- node_value = (value, ())
- nodes.append((key, node_value))
- return nodes
+ self.keys = dict(_parse_btree._parse_leaf_lines(bytes.split('\n'),
+ key_length, ref_list_length))
class _InternalNode(object):
@@ -812,3 +789,9 @@
node_count = sum(self._row_lengths) - 1
for node in self._read_nodes(range(1, node_count + 1)):
pass
+
+
+try:
+ from bzrlib.plugins.index2 import _parse_btree_c as _parse_btree
+except ImportError:
+ from bzrlib.plugins.index2 import _parse_btree_py as _parse_btree
=== modified file 'indexbench.py'
--- a/indexbench.py 2008-07-02 20:17:01 +0000
+++ b/indexbench.py 2008-07-03 03:45:49 +0000
@@ -16,6 +16,7 @@
from bzrlib.revision import NULL_REVISION
from bzrlib.lsprof import profile
+use_calltree = True
key_count = 0
@@ -44,7 +45,11 @@
def profile_fixture(class_name, fixture_name, fixture, *args, **kwargs):
"""Profile a fixture."""
value, stats = profile(fixture, *args, **kwargs)
- fname = class_name + '.' + fixture_name + '.txt'
+ if use_calltree:
+ suffix = '.callgrind'
+ else:
+ suffix = '.txt'
+ fname = class_name + '.' + fixture_name + suffix
stats.save(fname)
return value
@@ -113,16 +118,7 @@
print "%s: iter_all_entries in %0.3f" % (label, finish - now)
# random shuffle, all keys (name_keys comes preshuffled)
if 'shuffle' in fixtures:
- drop_cache()
- reset_hit_counts()
- now = time.time()
- for name, index in iter_indices(names, target, factory):
- order = name_keys[name]
- shuffle(order)
- for key in order:
- index.iter_entries([key]).next()
- finish = time.time()
- print "%s: iter_random_one in %0.3f,%s" % (label, finish - now, hits())
+ run(label, 'iter_random_one', iter_random_one, label, drop_cache, names, target, factory, name_keys)
# text extraction simulation (follow a compression graph) for text_keys
if 'text' in fixtures:
text_names = [name for name in names if name.endswith('.tix')]
@@ -169,6 +165,19 @@
print "%s: -------Done---------" % (label,)
+def iter_random_one(label, drop_cache, names, target, factory, name_keys):
+ drop_cache()
+ reset_hit_counts()
+ now = time.time()
+ for name, index in iter_indices(names, target, factory):
+ order = name_keys[name]
+ shuffle(order)
+ for key in order:
+ index.iter_entries([key]).next()
+ finish = time.time()
+ print "%s: iter_random_one in %0.3f,%s" % (label, finish - now, hits())
+
+
def revision_search(label, drop_cache, names, target, factory, tip_revision_id):
rev_names = [name for name in names if name.endswith('.rix')]
# reopen indices
@@ -215,15 +224,18 @@
help='fixtures to test: one of all, shuffle, text, revision, miss'),
# lspro because --lsprof is a global option, and they interfere with each other.
Option('lspro', help='generate class.fixture.callgrind lsprof files'),
+ Option('calltree', help='generate KCachegrind calltrees when profiling.'),
]
takes_args = ['sample_branch']
def run(self, sample_branch, graph=True, btree=True, drop_cache=False,
- fixture=None, lspro=False):
+ fixture=None, lspro=False, calltree=True):
if not fixture:
fixture = ['all', 'shuffle', 'text', 'revision', 'miss']
+ global use_calltree
+ use_calltree = calltree
from bzrlib.branch import Branch
source_branch = Branch.open(sample_branch)
source = source_branch.repository._transport
=== modified file 'setup.py'
--- a/setup.py 2008-06-24 22:37:56 +0000
+++ b/setup.py 2008-07-03 03:45:49 +0000
@@ -1,11 +1,82 @@
-#!/usr/bin/env python2.4
+#!/usr/bin/env python
from distutils.core import setup
bzr_plugin_name = 'index2'
bzr_plugin_version = (1, 6, 0, 'dev', 0)
-if __name__ == 'main':
+
+from distutils.errors import CCompilerError, DistutilsPlatformError
+from distutils.extension import Extension
+ext_modules = []
+try:
+ from Pyrex.Distutils import build_ext
+except ImportError:
+ have_pyrex = False
+ # try to build the extension from the prior generated source.
+ print
+ print ("The python package 'Pyrex' is not available."
+ " If the .c files are available,")
+ print ("they will be built,"
+ " but modifying the .pyx files will not rebuild them.")
+ print
+ from distutils.command.build_ext import build_ext
+else:
+ have_pyrex = True
+
+
+class build_ext_if_possible(build_ext):
+
+ def run(self):
+ try:
+ build_ext.run(self)
+ except DistutilsPlatformError, e:
+ log.warn(str(e))
+ log.warn('Extensions cannot be built, '
+ 'will use the Python versions instead')
+
+ def build_extension(self, ext):
+ try:
+ build_ext.build_extension(self, ext)
+ except CCompilerError:
+ log.warn('Building of "%s" extension failed, '
+ 'will use the Python version instead' % (ext.name,))
+
+
+# Override the build_ext if we have Pyrex available
+unavailable_files = []
+
+
+def add_pyrex_extension(module_name, **kwargs):
+ """Add a pyrex module to build.
+
+ This will use Pyrex to auto-generate the .c file if it is available.
+ Otherwise it will fall back on the .c file. If the .c file is not
+ available, it will warn, and not add anything.
+
+ You can pass any extra options to Extension through kwargs. One example is
+ 'libraries = []'.
+
+ :param module_name: The python path to the module. This will be used to
+ determine the .pyx and .c files to use.
+ """
+ path = module_name.replace('.', '/')
+ pyrex_name = path + '.pyx'
+ c_name = path + '.c'
+ # Manually honour package_dir :(
+ module_name = 'bzrlib.plugins.index2.' + module_name
+ if have_pyrex:
+ ext_modules.append(Extension(module_name, [pyrex_name]))
+ else:
+ if not os.path.isfile(c_name):
+ unavailable_files.append(c_name)
+ else:
+ ext_modules.append(Extension(module_name, [c_name]))
+
+add_pyrex_extension('_parse_btree_c')
+
+
+if __name__ == '__main__':
setup(name="bzr index2",
version="1.6.0dev0",
description="bzr btree indices.",
@@ -16,4 +87,7 @@
packages=['bzrlib.plugins.index2',
'bzrlib.plugins.index2.tests',
],
- package_dir={'bzrlib.plugins.index2': '.'})
+ package_dir={'bzrlib.plugins.index2': '.'},
+ cmdclass={'build_ext': build_ext_if_possible},
+ ext_modules=ext_modules,
+ )
=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py 2008-07-02 21:00:46 +0000
+++ b/tests/test_btree_index.py 2008-07-03 03:45:49 +0000
@@ -25,10 +25,32 @@
from bzrlib.plugins import index2
from bzrlib.plugins.index2 import errors, btree_index
from bzrlib.plugins.pybloom.pybloom import BloomSHA1
-from bzrlib.tests import TestCaseWithTransport
+from bzrlib.tests import (
+ TestCaseWithTransport,
+ TestScenarioApplier,
+ adapt_tests,
+ condition_isinstance,
+ split_suite_by_condition,
+ )
from bzrlib.transport import get_transport
+def load_tests(standard_tests, module, loader):
+ # parameterise the TestBTreeNodes tests
+ node_tests, others = split_suite_by_condition(standard_tests,
+ condition_isinstance(TestBTreeNodes))
+ applier = TestScenarioApplier()
+ import bzrlib.plugins.index2._parse_btree_py as py_module
+ applier.scenarios = [('python', {'parse_btree': py_module})]
+ try:
+ import bzrlib.plugins.index2._parse_btree_c as c_module
+ applier.scenarios.append(('C', {'parse_btree': c_module}))
+ except ImportError:
+ pass
+ adapt_tests(node_tests, applier, others)
+ return others
+
+
class BTreeTestCase(TestCaseWithTransport):
# test names here are suffixed by the key length and reference list count
# that they test.
@@ -529,6 +551,15 @@
class TestBTreeNodes(BTreeTestCase):
+ def restore_parser(self):
+ btree_index._parse_btree = self.saved_parser
+
+ def setUp(self):
+ BTreeTestCase.setUp(self)
+ self.saved_parser = btree_index._parse_btree
+ self.addCleanup(self.restore_parser)
+ btree_index._parse_btree = self.parse_btree
+
def test_LeafNode_1_0(self):
node_bytes = ("type=leaf\n"
"0000000000000000000000000000000000000000\x00\x00value:0\n"
More information about the bazaar-commits
mailing list