Rev 19: Framework for C parser extension, parameterised callgraph output, shuffle lsprofiling. in http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk

Robert Collins robertc at robertcollins.net
Thu Jul 3 04:45:59 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/index2/trunk

------------------------------------------------------------
revno: 19
revision-id: robertc at robertcollins.net-20080703034549-teew69qy3z91m1pt
parent: robertc at robertcollins.net-20080702221844-aw67l2n2hdt2hvw0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Thu 2008-07-03 13:45:49 +1000
message:
  Framework for C parser extension, parameterised callgraph output, shuffle lsprofiling.
added:
  .bzrignore                     bzrignore-20080703034434-q63sohljnxg5loze-1
  _parse_btree_c.pyx             _parse_btree_c.pyx-20080703034413-3q25bklkenti3p8p-2
  _parse_btree_py.py             _parse_btree_py.py-20080703034413-3q25bklkenti3p8p-3
modified:
  btree_index.py                 index.py-20080624222253-p0x5f92uyh5hw734-7
  indexbench.py                  indexbench.py-20080702083855-5tju02y79rw7kkzh-1
  setup.py                       setup.py-20080624222253-p0x5f92uyh5hw734-8
  tests/test_btree_index.py      test_index.py-20080624222253-p0x5f92uyh5hw734-13
=== added file '.bzrignore'
--- a/.bzrignore	1970-01-01 00:00:00 +0000
+++ b/.bzrignore	2008-07-03 03:45:49 +0000
@@ -0,0 +1,2 @@
+_parse_btree_c.c
+build

=== added file '_parse_btree_c.pyx'
--- a/_parse_btree_c.pyx	1970-01-01 00:00:00 +0000
+++ b/_parse_btree_c.pyx	2008-07-03 03:45:49 +0000
@@ -0,0 +1,43 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Pyrex extensions to btree node parsing."""
+
+def _parse_leaf_lines(lines, key_length, ref_list_length):
+    nodes = []
+    for line in lines[1:]:
+        if line == '':
+            return nodes
+        elements = line.split('\0', key_length)
+        # keys are tuples
+        key = tuple(elements[:key_length])
+        line = elements[-1]
+        references, value = line.rsplit('\0', 1)
+        if ref_list_length:
+            ref_lists = []
+            for ref_string in references.split('\t'):
+                key_list = []
+                for ref in ref_string.split('\r'):
+                    if ref:
+                        key_list.append(tuple(ref.split('\0')))
+                ref_list = tuple(key_list)
+                ref_lists.append(ref_list)
+            ref_lists = tuple(ref_lists)
+            node_value = (value, ref_lists)
+        else:
+            node_value = (value, ())
+        nodes.append((key, node_value))
+    return nodes

=== added file '_parse_btree_py.py'
--- a/_parse_btree_py.py	1970-01-01 00:00:00 +0000
+++ b/_parse_btree_py.py	2008-07-03 03:45:49 +0000
@@ -0,0 +1,41 @@
+# index2, a bzr plugin providing experimental index types.
+# Copyright (C) 2008 Canonical Limited.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+# 
+
+"""B+Tree index parsing."""
+
+def _parse_leaf_lines(lines, key_length, ref_list_length):
+    nodes = []
+    for line in lines[1:]:
+        if line == '':
+            return nodes
+        elements = line.split('\0', key_length)
+        # keys are tuples
+        key = tuple(elements[:key_length])
+        line = elements[-1]
+        references, value = line.rsplit('\0', 1)
+        if ref_list_length:
+            ref_lists = []
+            for ref_string in references.split('\t'):
+                ref_lists.append(tuple([
+                    tuple(ref.split('\0')) for ref in ref_string.split('\r') if ref
+                    ]))
+            ref_lists = tuple(ref_lists)
+            node_value = (value, ref_lists)
+        else:
+            node_value = (value, ())
+        nodes.append((key, node_value))
+    return nodes

=== modified file 'btree_index.py'
--- a/btree_index.py	2008-07-02 22:18:44 +0000
+++ b/btree_index.py	2008-07-03 03:45:49 +0000
@@ -302,31 +302,8 @@
     def __init__(self, bytes, key_length, ref_list_length):
         """Parse bytes to create a leaf node object."""
         # splitlines mangles the \r delimiters.. don't use it.
-        self.keys = dict(self._parse_lines(bytes.split('\n'), key_length,
-            ref_list_length))
-
-    def _parse_lines(self, lines, key_length, ref_list_length):
-        nodes = []
-        for line in lines[1:]:
-            if line == '':
-                return nodes
-            elements = line.split('\0', key_length)
-            # keys are tuples
-            key = tuple(elements[:key_length])
-            line = elements[-1]
-            references, value = line.rsplit('\0', 1)
-            if ref_list_length:
-                ref_lists = []
-                for ref_string in references.split('\t'):
-                    ref_lists.append(tuple([
-                        tuple(ref.split('\0')) for ref in ref_string.split('\r') if ref
-                        ]))
-                ref_lists = tuple(ref_lists)
-                node_value = (value, ref_lists)
-            else:
-                node_value = (value, ())
-            nodes.append((key, node_value))
-        return nodes
+        self.keys = dict(_parse_btree._parse_leaf_lines(bytes.split('\n'),
+            key_length, ref_list_length))
 
 
 class _InternalNode(object):
@@ -812,3 +789,9 @@
         node_count = sum(self._row_lengths) - 1
         for node in self._read_nodes(range(1, node_count + 1)):
             pass
+
+
+try:
+    from bzrlib.plugins.index2 import _parse_btree_c as _parse_btree
+except ImportError:
+    from bzrlib.plugins.index2 import _parse_btree_py as _parse_btree

=== modified file 'indexbench.py'
--- a/indexbench.py	2008-07-02 20:17:01 +0000
+++ b/indexbench.py	2008-07-03 03:45:49 +0000
@@ -16,6 +16,7 @@
 from bzrlib.revision import NULL_REVISION
 from bzrlib.lsprof import profile
 
+use_calltree = True
 
 key_count = 0
 
@@ -44,7 +45,11 @@
 def profile_fixture(class_name, fixture_name, fixture, *args, **kwargs):
     """Profile a fixture."""
     value, stats = profile(fixture, *args, **kwargs)
-    fname = class_name + '.' + fixture_name + '.txt'
+    if use_calltree:
+        suffix = '.callgrind'
+    else:
+        suffix = '.txt'
+    fname = class_name + '.' + fixture_name + suffix
     stats.save(fname)
     return value
 
@@ -113,16 +118,7 @@
         print "%s: iter_all_entries in %0.3f" % (label, finish - now)
 # random shuffle, all keys (name_keys comes preshuffled)
     if 'shuffle' in fixtures:
-        drop_cache()
-        reset_hit_counts()
-        now = time.time()
-        for name, index in iter_indices(names, target, factory):
-            order = name_keys[name]
-            shuffle(order)
-            for key in order:
-                index.iter_entries([key]).next()
-        finish = time.time()
-        print "%s: iter_random_one in %0.3f,%s" % (label, finish - now, hits())
+        run(label, 'iter_random_one', iter_random_one, label, drop_cache, names, target, factory, name_keys)
 # text extraction simulation (follow a compression graph) for text_keys
     if 'text' in fixtures:
         text_names = [name for name in names if name.endswith('.tix')]
@@ -169,6 +165,19 @@
     print "%s: -------Done---------" % (label,)
 
 
+def iter_random_one(label, drop_cache, names, target, factory, name_keys):
+    drop_cache()
+    reset_hit_counts()
+    now = time.time()
+    for name, index in iter_indices(names, target, factory):
+        order = name_keys[name]
+        shuffle(order)
+        for key in order:
+            index.iter_entries([key]).next()
+    finish = time.time()
+    print "%s: iter_random_one in %0.3f,%s" % (label, finish - now, hits())
+
+
 def revision_search(label, drop_cache, names, target, factory, tip_revision_id):
     rev_names = [name for name in names if name.endswith('.rix')]
     # reopen indices
@@ -215,15 +224,18 @@
             help='fixtures to test: one of all, shuffle, text, revision, miss'),
         # lspro because --lsprof is a global option, and they interfere with each other.
         Option('lspro', help='generate class.fixture.callgrind lsprof files'),
+        Option('calltree', help='generate KCachegrind calltrees when profiling.'),
         ]
 
 
     takes_args = ['sample_branch']
 
     def run(self, sample_branch, graph=True, btree=True, drop_cache=False,
-            fixture=None, lspro=False):
+            fixture=None, lspro=False, calltree=True):
         if not fixture:
             fixture = ['all', 'shuffle', 'text', 'revision', 'miss']
+        global use_calltree
+        use_calltree = calltree
         from bzrlib.branch import Branch
         source_branch = Branch.open(sample_branch)
         source = source_branch.repository._transport

=== modified file 'setup.py'
--- a/setup.py	2008-06-24 22:37:56 +0000
+++ b/setup.py	2008-07-03 03:45:49 +0000
@@ -1,11 +1,82 @@
-#!/usr/bin/env python2.4
+#!/usr/bin/env python
 from distutils.core import setup
 
 bzr_plugin_name = 'index2'
 
 bzr_plugin_version = (1, 6, 0, 'dev', 0)
 
-if __name__ == 'main':
+
+from distutils.errors import CCompilerError, DistutilsPlatformError
+from distutils.extension import Extension
+ext_modules = []
+try:
+    from Pyrex.Distutils import build_ext
+except ImportError:
+    have_pyrex = False
+    # try to build the extension from the prior generated source.
+    print
+    print ("The python package 'Pyrex' is not available."
+           " If the .c files are available,")
+    print ("they will be built,"
+           " but modifying the .pyx files will not rebuild them.")
+    print
+    from distutils.command.build_ext import build_ext
+else:
+    have_pyrex = True
+
+
+class build_ext_if_possible(build_ext):
+
+    def run(self):
+        try:
+            build_ext.run(self)
+        except DistutilsPlatformError, e:
+            log.warn(str(e))
+            log.warn('Extensions cannot be built, '
+                     'will use the Python versions instead')
+
+    def build_extension(self, ext):
+        try:
+            build_ext.build_extension(self, ext)
+        except CCompilerError:
+            log.warn('Building of "%s" extension failed, '
+                     'will use the Python version instead' % (ext.name,))
+
+
+# Override the build_ext if we have Pyrex available
+unavailable_files = []
+
+
+def add_pyrex_extension(module_name, **kwargs):
+    """Add a pyrex module to build.
+
+    This will use Pyrex to auto-generate the .c file if it is available.
+    Otherwise it will fall back on the .c file. If the .c file is not
+    available, it will warn, and not add anything.
+
+    You can pass any extra options to Extension through kwargs. One example is
+    'libraries = []'.
+
+    :param module_name: The python path to the module. This will be used to
+        determine the .pyx and .c files to use.
+    """
+    path = module_name.replace('.', '/')
+    pyrex_name = path + '.pyx'
+    c_name = path + '.c'
+    # Manually honour package_dir :(
+    module_name = 'bzrlib.plugins.index2.' + module_name
+    if have_pyrex:
+        ext_modules.append(Extension(module_name, [pyrex_name]))
+    else:
+        if not os.path.isfile(c_name):
+            unavailable_files.append(c_name)
+        else:
+            ext_modules.append(Extension(module_name, [c_name]))
+
+add_pyrex_extension('_parse_btree_c')
+
+
+if __name__ == '__main__':
     setup(name="bzr index2",
           version="1.6.0dev0",
           description="bzr btree indices.",
@@ -16,4 +87,7 @@
           packages=['bzrlib.plugins.index2',
                     'bzrlib.plugins.index2.tests',
                     ],
-          package_dir={'bzrlib.plugins.index2': '.'})
+          package_dir={'bzrlib.plugins.index2': '.'},
+          cmdclass={'build_ext': build_ext_if_possible},
+          ext_modules=ext_modules,
+          )

=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py	2008-07-02 21:00:46 +0000
+++ b/tests/test_btree_index.py	2008-07-03 03:45:49 +0000
@@ -25,10 +25,32 @@
 from bzrlib.plugins import index2
 from bzrlib.plugins.index2 import errors, btree_index
 from bzrlib.plugins.pybloom.pybloom import BloomSHA1
-from bzrlib.tests import TestCaseWithTransport
+from bzrlib.tests import (
+    TestCaseWithTransport,
+    TestScenarioApplier,
+    adapt_tests,
+    condition_isinstance,
+    split_suite_by_condition,
+    )
 from bzrlib.transport import get_transport
 
 
+def load_tests(standard_tests, module, loader):
+    # parameterise the TestBTreeNodes tests
+    node_tests, others = split_suite_by_condition(standard_tests,
+        condition_isinstance(TestBTreeNodes))
+    applier = TestScenarioApplier()
+    import bzrlib.plugins.index2._parse_btree_py as py_module
+    applier.scenarios = [('python', {'parse_btree': py_module})]
+    try:
+        import bzrlib.plugins.index2._parse_btree_c as c_module
+        applier.scenarios.append(('C', {'parse_btree': c_module}))
+    except ImportError:
+        pass
+    adapt_tests(node_tests, applier, others)
+    return others
+
+
 class BTreeTestCase(TestCaseWithTransport):
     # test names here are suffixed by the key length and reference list count
     # that they test.
@@ -529,6 +551,15 @@
 
 class TestBTreeNodes(BTreeTestCase):
 
+    def restore_parser(self):
+        btree_index._parse_btree = self.saved_parser
+
+    def setUp(self):
+        BTreeTestCase.setUp(self)
+        self.saved_parser = btree_index._parse_btree
+        self.addCleanup(self.restore_parser)
+        btree_index._parse_btree = self.parse_btree
+
     def test_LeafNode_1_0(self):
         node_bytes = ("type=leaf\n"
             "0000000000000000000000000000000000000000\x00\x00value:0\n"




More information about the bazaar-commits mailing list