Rev 3898: Move everything into properly parameterized tests. in http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked

Thu Dec 11 03:08:28 GMT 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked

------------------------------------------------------------
revno: 3898
revision-id: john at arbash-meinel.com-20081211030803-gctunob7zsten3qg
parent: john at arbash-meinel.com-20081211021859-3ds8cwdqiq387t83
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: get_record_stream_chunked
timestamp: Wed 2008-12-10 21:08:03 -0600
message:
  Move everything into properly parameterized tests.
  
  Also add tests that we preserve the object when it is already lines.
  
  The compiled form takes 450us on a 7.6k line file (NEWS).
  So for common cases, we should have virtually no overhead.
-------------- next part --------------
=== added file 'bzrlib/_chunks_to_lines_py.py'

--- a/bzrlib/_chunks_to_lines_py.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/_chunks_to_lines_py.py	2008-12-11 03:08:03 +0000
@@ -0,0 +1,57 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""The python implementation of chunks_to_lines"""
+
+
+def chunks_to_lines(chunks):
+    """Ensure that chunks is split cleanly into lines.
+
+    Each entry in the result should contain a single newline at the end. Except
+    for the last entry which may not have a final newline.
+
+    :param chunks: An list/tuple of strings. If chunks is already a list of
+        lines, then we will return it as-is.
+    :return: A list of strings.
+    """
+    # Optimize for a very common case when chunks are already lines
+    def fail():
+        raise IndexError
+    try:
+        # This is a bit ugly, but is the fastest way to check if all of the
+        # chunks are individual lines.
+        # You can't use function calls like .count(), .index(), or endswith()
+        # because they incur too much python overhead.
+        # It works because
+        #   if chunk is an empty string, it will raise IndexError, which will
+        #       be caught.
+        #   if chunk doesn't end with '\n' then we hit fail()
+        #   if there is more than one '\n' then we hit fail()
+        # timing shows this loop to take 2.58ms rather than 3.18ms for
+        # split_lines(''.join(chunks))
+        # Further, it means we get to preserve the original lines, rather than
+        # expanding memory
+        if not chunks:
+            return chunks
+        [(chunk[-1] == '\n' and '\n' not in chunk[:-1]) or fail()
+         for chunk in chunks[:-1]]
+        last = chunks[-1]
+        if last and '\n' not in last[:-1]:
+            return chunks
+    except IndexError:
+        pass
+    from bzrlib.osutils import split_lines
+    return split_lines(''.join(chunks))

=== modified file 'bzrlib/_chunks_to_lines_pyx.pyx'
--- a/bzrlib/_chunks_to_lines_pyx.pyx	2008-12-11 02:18:59 +0000
+++ b/bzrlib/_chunks_to_lines_pyx.pyx	2008-12-11 03:08:03 +0000
@@ -42,15 +42,22 @@
     cdef char *newline
     cdef char *c_last
     cdef Py_ssize_t the_len
+    cdef Py_ssize_t chunks_len
+    cdef Py_ssize_t cur
 
     # Check to see if the chunks are already lines
+    chunks_len = len(chunks)
+    if chunks_len == 0:
+        return chunks
+    cur = 0
     for chunk in chunks:
+        cur += 1
         PyString_AsStringAndSize(chunk, &c_str, &the_len)
         if the_len == 0:
             break
         c_last = c_str + the_len - 1
         newline = <char *>memchr(c_str, c'\n', the_len)
-        if newline == NULL or newline != c_last:
+        if newline != c_last and not (newline == NULL and cur == chunks_len):
             break
     else:
         return chunks

=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py	2008-12-11 02:18:59 +0000
+++ b/bzrlib/osutils.py	2008-12-11 03:08:03 +0000
@@ -820,44 +820,10 @@
     return pathjoin(*p)
 
 
-def chunks_to_lines(chunks):
-    """Ensure that chunks is split cleanly into lines.
-
-    Each entry in the result should contain a single newline at the end. Except
-    for the last entry which may not have a final newline.
-
-    :param chunks: An iterable of strings
-    :return: A list of strings.
-    """
-    # Optimize for a very common case when chunks are already lines
-    def fail():
-        raise IndexError
-    try:
-        # This is a bit ugly, but is the fastest way to check if all of the
-        # chunks are individual lines.
-        # You can't use function calls like .count(), .index(), or endswith()
-        # because they incur too much python overhead.
-        # It works because
-        #   if chunk is an empty string, it will raise IndexError, which will
-        #       be caught.
-        #   if chunk doesn't end with '\n' then we hit fail()
-        #   if there is more than one '\n' then we hit fail()
-        # timing shows this loop to take 2.58ms rather than 3.18ms for
-        # split_lines(''.join(chunks))
-        # Further, it means we get to preserve the original lines, rather than
-        # expanding memory
-        [(chunk[-1] == '\n' and '\n' not in chunk[:-1]) or fail()
-         for chunk in chunks]
-        return chunks
-    except IndexError:
-        pass
-    return split_lines(''.join(chunks))
-
-
 try:
     from bzrlib._chunks_to_lines_pyx import chunks_to_lines
 except ImportError:
-    pass
+    from bzrlib._chunks_to_lines_py import chunks_to_lines
 
 
 def split_lines(s):

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2008-12-09 21:35:49 +0000
+++ b/bzrlib/tests/__init__.py	2008-12-11 03:08:03 +0000
@@ -2788,6 +2788,7 @@
                    'bzrlib.tests.test_bzrdir',
                    'bzrlib.tests.test_cache_utf8',
                    'bzrlib.tests.test_chunk_writer',
+                   'bzrlib.tests.test__chunks_to_lines',
                    'bzrlib.tests.test_commands',
                    'bzrlib.tests.test_commit',
                    'bzrlib.tests.test_commit_merge',

=== added file 'bzrlib/tests/test__chunks_to_lines.py'
--- a/bzrlib/tests/test__chunks_to_lines.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test__chunks_to_lines.py	2008-12-11 03:08:03 +0000
@@ -0,0 +1,112 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+"""Tests for chunks_to_lines."""
+
+from bzrlib import tests
+
+
+def load_tests(standard_tests, module, loader):
+    # parameterize all tests in this module
+    suite = loader.suiteClass()
+    applier = tests.TestScenarioApplier()
+    import bzrlib._chunks_to_lines_py as py_module
+    applier.scenarios = [('python', {'module': py_module})]
+    if CompiledChunksToLinesFeature.available():
+        import bzrlib._chunks_to_lines_pyx as c_module
+        applier.scenarios.append(('C', {'module': c_module}))
+    else:
+        # the compiled module isn't available, so we add a failing test
+        class FailWithoutFeature(tests.TestCase):
+            def test_fail(self):
+                self.requireFeature(CompiledChunksToLinesFeature)
+        suite.addTest(loader.loadTestsFromTestCase(FailWithoutFeature))
+    tests.adapt_tests(standard_tests, applier, suite)
+    return suite
+
+
+class _CompiledChunksToLinesFeature(tests.Feature):
+
+    def _probe(self):
+        try:
+            import bzrlib._chunks_to_lines_pyx
+        except ImportError:
+            return False
+        return True
+
+    def feature_name(self):
+        return 'bzrlib._chunks_to_lines_pyx'
+
+CompiledChunksToLinesFeature = _CompiledChunksToLinesFeature()
+
+
+class TestChunksToLines(tests.TestCase):
+
+    module = None # Filled in by test parameterization
+
+    def assertChunksToLines(self, lines, chunks, alreadly_lines=False):
+        result = self.module.chunks_to_lines(chunks)
+        self.assertEqual(lines, result)
+        if alreadly_lines:
+            self.assertIs(chunks, result)
+
+    def test_fulltext_chunk_to_lines(self):
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
+                                 ['foo\nbar\r\nba\rz\n'])
+        self.assertChunksToLines(['foobarbaz\n'], ['foobarbaz\n'],
+                                 alreadly_lines=True)
+
+    def test_lines_to_lines(self):
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
+                                 ['foo\n', 'bar\r\n', 'ba\rz\n'],
+                                 alreadly_lines=True)
+
+    def test_no_final_newline(self):
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+                                 ['foo\nbar\r\nba\rz'])
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+                                 ['foo\n', 'bar\r\n', 'ba\rz'],
+                                 alreadly_lines=True)
+        self.assertChunksToLines(('foo\n', 'bar\r\n', 'ba\rz'),
+                                 ('foo\n', 'bar\r\n', 'ba\rz'),
+                                 alreadly_lines=True)
+        self.assertChunksToLines([], [], alreadly_lines=True)
+        self.assertChunksToLines(['foobarbaz'], ['foobarbaz'],
+                                 alreadly_lines=True)
+        self.assertChunksToLines([], [''])
+
+    def test_mixed(self):
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+                                 ['foo\n', 'bar\r\nba\r', 'z'])
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+                                 ['foo\nb', 'a', 'r\r\nba\r', 'z'])
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+                                 ['foo\nbar\r\nba', '\r', 'z'])
+
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+                                 ['foo\n', '', 'bar\r\nba', '\r', 'z'])
+        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
+                                 ['foo\n', 'bar\r\n', 'ba\rz\n', ''])
+
+    def test_not_lines(self):
+        # We should raise a TypeError, not crash
+        self.assertRaises(TypeError, self.module.chunks_to_lines,
+                          object())
+        self.assertRaises(TypeError, self.module.chunks_to_lines,
+                          [object()])
+        self.assertRaises(TypeError, self.module.chunks_to_lines,
+                          ['foo', object()])

=== modified file 'bzrlib/tests/test_osutils.py'
--- a/bzrlib/tests/test_osutils.py	2008-12-11 02:02:07 +0000
+++ b/bzrlib/tests/test_osutils.py	2008-12-11 03:08:03 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2005, 2006, 2007 Canonical Ltd
+# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -758,33 +758,19 @@
 
 class TestChunksToLines(TestCase):
 
-    def assertChunksToLines(self, lines, chunks):
-        self.assertEqual(lines, osutils.chunks_to_lines(chunks))
-
-    def test_fulltext_chunk_to_lines(self):
-        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
-                                 ['foo\nbar\r\nba\rz\n'])
-
-    def test_lines_to_lines(self):
-        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
-                                 ['foo\n', 'bar\r\n', 'ba\rz\n'])
-
-    def test_no_final_newline(self):
-        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
-                                 ['foo\nbar\r\nba\rz'])
-        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
-                                 ['foo\n', 'bar\r\n', 'ba\rz'])
-
-    def test_mixed(self):
-        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
-                                 ['foo\n', 'bar\r\nba\r', 'z'])
-        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
-                                 ['foo\nb', 'a', 'r\r\nba\r', 'z'])
-        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
-                                 ['foo\nbar\r\nba', '\r', 'z'])
-
-        self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
-                                 ['foo\n', '', 'bar\r\nba', '\r', 'z'])
+    def test_smoketest(self):
+        self.assertEqual(['foo\n', 'bar\n', 'baz\n'],
+                         osutils.chunks_to_lines(['foo\nbar', '\nbaz\n']))
+        self.assertEqual(['foo\n', 'bar\n', 'baz\n'],
+                         osutils.chunks_to_lines(['foo\n', 'bar\n', 'baz\n']))
+
+    def test_is_compiled(self):
+        from bzrlib.tests.test__chunks_to_lines import CompiledChunksToLinesFeature
+        if CompiledChunksToLinesFeature:
+            from bzrlib._chunks_to_lines_pyx import chunks_to_lines
+        else:
+            from bzrlib._chunks_to_lines_py import chunks_to_lines
+        self.assertIs(chunks_to_lines, osutils.chunks_to_lines)
 
 
 class TestSplitLines(TestCase):