Rev 3898: Move everything into properly parameterized tests. in http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked
John Arbash Meinel
john at arbash-meinel.com
Thu Dec 11 03:08:28 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.11/get_record_stream_chunked
------------------------------------------------------------
revno: 3898
revision-id: john at arbash-meinel.com-20081211030803-gctunob7zsten3qg
parent: john at arbash-meinel.com-20081211021859-3ds8cwdqiq387t83
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: get_record_stream_chunked
timestamp: Wed 2008-12-10 21:08:03 -0600
message:
Move everything into properly parameterized tests.
Also add tests that we preserve the object when it is already lines.
The compiled form takes 450us on a 7.6k line file (NEWS).
So for common cases, we should have virtually no overhead.
-------------- next part --------------
=== added file 'bzrlib/_chunks_to_lines_py.py'
--- a/bzrlib/_chunks_to_lines_py.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/_chunks_to_lines_py.py 2008-12-11 03:08:03 +0000
@@ -0,0 +1,57 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""The python implementation of chunks_to_lines"""
+
+
+def chunks_to_lines(chunks):
+ """Ensure that chunks is split cleanly into lines.
+
+ Each entry in the result should contain a single newline at the end. Except
+ for the last entry which may not have a final newline.
+
+ :param chunks: An list/tuple of strings. If chunks is already a list of
+ lines, then we will return it as-is.
+ :return: A list of strings.
+ """
+ # Optimize for a very common case when chunks are already lines
+ def fail():
+ raise IndexError
+ try:
+ # This is a bit ugly, but is the fastest way to check if all of the
+ # chunks are individual lines.
+ # You can't use function calls like .count(), .index(), or endswith()
+ # because they incur too much python overhead.
+ # It works because
+ # if chunk is an empty string, it will raise IndexError, which will
+ # be caught.
+ # if chunk doesn't end with '\n' then we hit fail()
+ # if there is more than one '\n' then we hit fail()
+ # timing shows this loop to take 2.58ms rather than 3.18ms for
+ # split_lines(''.join(chunks))
+ # Further, it means we get to preserve the original lines, rather than
+ # expanding memory
+ if not chunks:
+ return chunks
+ [(chunk[-1] == '\n' and '\n' not in chunk[:-1]) or fail()
+ for chunk in chunks[:-1]]
+ last = chunks[-1]
+ if last and '\n' not in last[:-1]:
+ return chunks
+ except IndexError:
+ pass
+ from bzrlib.osutils import split_lines
+ return split_lines(''.join(chunks))
=== modified file 'bzrlib/_chunks_to_lines_pyx.pyx'
--- a/bzrlib/_chunks_to_lines_pyx.pyx 2008-12-11 02:18:59 +0000
+++ b/bzrlib/_chunks_to_lines_pyx.pyx 2008-12-11 03:08:03 +0000
@@ -42,15 +42,22 @@
cdef char *newline
cdef char *c_last
cdef Py_ssize_t the_len
+ cdef Py_ssize_t chunks_len
+ cdef Py_ssize_t cur
# Check to see if the chunks are already lines
+ chunks_len = len(chunks)
+ if chunks_len == 0:
+ return chunks
+ cur = 0
for chunk in chunks:
+ cur += 1
PyString_AsStringAndSize(chunk, &c_str, &the_len)
if the_len == 0:
break
c_last = c_str + the_len - 1
newline = <char *>memchr(c_str, c'\n', the_len)
- if newline == NULL or newline != c_last:
+ if newline != c_last and not (newline == NULL and cur == chunks_len):
break
else:
return chunks
=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py 2008-12-11 02:18:59 +0000
+++ b/bzrlib/osutils.py 2008-12-11 03:08:03 +0000
@@ -820,44 +820,10 @@
return pathjoin(*p)
-def chunks_to_lines(chunks):
- """Ensure that chunks is split cleanly into lines.
-
- Each entry in the result should contain a single newline at the end. Except
- for the last entry which may not have a final newline.
-
- :param chunks: An iterable of strings
- :return: A list of strings.
- """
- # Optimize for a very common case when chunks are already lines
- def fail():
- raise IndexError
- try:
- # This is a bit ugly, but is the fastest way to check if all of the
- # chunks are individual lines.
- # You can't use function calls like .count(), .index(), or endswith()
- # because they incur too much python overhead.
- # It works because
- # if chunk is an empty string, it will raise IndexError, which will
- # be caught.
- # if chunk doesn't end with '\n' then we hit fail()
- # if there is more than one '\n' then we hit fail()
- # timing shows this loop to take 2.58ms rather than 3.18ms for
- # split_lines(''.join(chunks))
- # Further, it means we get to preserve the original lines, rather than
- # expanding memory
- [(chunk[-1] == '\n' and '\n' not in chunk[:-1]) or fail()
- for chunk in chunks]
- return chunks
- except IndexError:
- pass
- return split_lines(''.join(chunks))
-
-
try:
from bzrlib._chunks_to_lines_pyx import chunks_to_lines
except ImportError:
- pass
+ from bzrlib._chunks_to_lines_py import chunks_to_lines
def split_lines(s):
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2008-12-09 21:35:49 +0000
+++ b/bzrlib/tests/__init__.py 2008-12-11 03:08:03 +0000
@@ -2788,6 +2788,7 @@
'bzrlib.tests.test_bzrdir',
'bzrlib.tests.test_cache_utf8',
'bzrlib.tests.test_chunk_writer',
+ 'bzrlib.tests.test__chunks_to_lines',
'bzrlib.tests.test_commands',
'bzrlib.tests.test_commit',
'bzrlib.tests.test_commit_merge',
=== added file 'bzrlib/tests/test__chunks_to_lines.py'
--- a/bzrlib/tests/test__chunks_to_lines.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test__chunks_to_lines.py 2008-12-11 03:08:03 +0000
@@ -0,0 +1,112 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+"""Tests for chunks_to_lines."""
+
+from bzrlib import tests
+
+
+def load_tests(standard_tests, module, loader):
+ # parameterize all tests in this module
+ suite = loader.suiteClass()
+ applier = tests.TestScenarioApplier()
+ import bzrlib._chunks_to_lines_py as py_module
+ applier.scenarios = [('python', {'module': py_module})]
+ if CompiledChunksToLinesFeature.available():
+ import bzrlib._chunks_to_lines_pyx as c_module
+ applier.scenarios.append(('C', {'module': c_module}))
+ else:
+ # the compiled module isn't available, so we add a failing test
+ class FailWithoutFeature(tests.TestCase):
+ def test_fail(self):
+ self.requireFeature(CompiledChunksToLinesFeature)
+ suite.addTest(loader.loadTestsFromTestCase(FailWithoutFeature))
+ tests.adapt_tests(standard_tests, applier, suite)
+ return suite
+
+
+class _CompiledChunksToLinesFeature(tests.Feature):
+
+ def _probe(self):
+ try:
+ import bzrlib._chunks_to_lines_pyx
+ except ImportError:
+ return False
+ return True
+
+ def feature_name(self):
+ return 'bzrlib._chunks_to_lines_pyx'
+
+CompiledChunksToLinesFeature = _CompiledChunksToLinesFeature()
+
+
+class TestChunksToLines(tests.TestCase):
+
+ module = None # Filled in by test parameterization
+
+ def assertChunksToLines(self, lines, chunks, alreadly_lines=False):
+ result = self.module.chunks_to_lines(chunks)
+ self.assertEqual(lines, result)
+ if alreadly_lines:
+ self.assertIs(chunks, result)
+
+ def test_fulltext_chunk_to_lines(self):
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
+ ['foo\nbar\r\nba\rz\n'])
+ self.assertChunksToLines(['foobarbaz\n'], ['foobarbaz\n'],
+ alreadly_lines=True)
+
+ def test_lines_to_lines(self):
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
+ ['foo\n', 'bar\r\n', 'ba\rz\n'],
+ alreadly_lines=True)
+
+ def test_no_final_newline(self):
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+ ['foo\nbar\r\nba\rz'])
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+ ['foo\n', 'bar\r\n', 'ba\rz'],
+ alreadly_lines=True)
+ self.assertChunksToLines(('foo\n', 'bar\r\n', 'ba\rz'),
+ ('foo\n', 'bar\r\n', 'ba\rz'),
+ alreadly_lines=True)
+ self.assertChunksToLines([], [], alreadly_lines=True)
+ self.assertChunksToLines(['foobarbaz'], ['foobarbaz'],
+ alreadly_lines=True)
+ self.assertChunksToLines([], [''])
+
+ def test_mixed(self):
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+ ['foo\n', 'bar\r\nba\r', 'z'])
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+ ['foo\nb', 'a', 'r\r\nba\r', 'z'])
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+ ['foo\nbar\r\nba', '\r', 'z'])
+
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
+ ['foo\n', '', 'bar\r\nba', '\r', 'z'])
+ self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
+ ['foo\n', 'bar\r\n', 'ba\rz\n', ''])
+
+ def test_not_lines(self):
+ # We should raise a TypeError, not crash
+ self.assertRaises(TypeError, self.module.chunks_to_lines,
+ object())
+ self.assertRaises(TypeError, self.module.chunks_to_lines,
+ [object()])
+ self.assertRaises(TypeError, self.module.chunks_to_lines,
+ ['foo', object()])
=== modified file 'bzrlib/tests/test_osutils.py'
--- a/bzrlib/tests/test_osutils.py 2008-12-11 02:02:07 +0000
+++ b/bzrlib/tests/test_osutils.py 2008-12-11 03:08:03 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2005, 2006, 2007 Canonical Ltd
+# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -758,33 +758,19 @@
class TestChunksToLines(TestCase):
- def assertChunksToLines(self, lines, chunks):
- self.assertEqual(lines, osutils.chunks_to_lines(chunks))
-
- def test_fulltext_chunk_to_lines(self):
- self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
- ['foo\nbar\r\nba\rz\n'])
-
- def test_lines_to_lines(self):
- self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz\n'],
- ['foo\n', 'bar\r\n', 'ba\rz\n'])
-
- def test_no_final_newline(self):
- self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
- ['foo\nbar\r\nba\rz'])
- self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
- ['foo\n', 'bar\r\n', 'ba\rz'])
-
- def test_mixed(self):
- self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
- ['foo\n', 'bar\r\nba\r', 'z'])
- self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
- ['foo\nb', 'a', 'r\r\nba\r', 'z'])
- self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
- ['foo\nbar\r\nba', '\r', 'z'])
-
- self.assertChunksToLines(['foo\n', 'bar\r\n', 'ba\rz'],
- ['foo\n', '', 'bar\r\nba', '\r', 'z'])
+ def test_smoketest(self):
+ self.assertEqual(['foo\n', 'bar\n', 'baz\n'],
+ osutils.chunks_to_lines(['foo\nbar', '\nbaz\n']))
+ self.assertEqual(['foo\n', 'bar\n', 'baz\n'],
+ osutils.chunks_to_lines(['foo\n', 'bar\n', 'baz\n']))
+
+ def test_is_compiled(self):
+ from bzrlib.tests.test__chunks_to_lines import CompiledChunksToLinesFeature
+ if CompiledChunksToLinesFeature:
+ from bzrlib._chunks_to_lines_pyx import chunks_to_lines
+ else:
+ from bzrlib._chunks_to_lines_py import chunks_to_lines
+ self.assertIs(chunks_to_lines, osutils.chunks_to_lines)
class TestSplitLines(TestCase):
More information about the bazaar-commits
mailing list