Rev 3912: Start adding permutation tests for _groupcompress_py and _groupcompress_pyx in http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam

Wed Mar 25 20:58:38 GMT 2009

At http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam

------------------------------------------------------------
revno: 3912
revision-id: john at arbash-meinel.com-20090325205816-m8rellryp3b7u4f0
parent: john at arbash-meinel.com-20090325202406-d3na661pqwtw75dx
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: vilajam
timestamp: Wed 2009-03-25 15:58:16 -0500
message:
  Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
  We need to implement make_delta() for the python version.
  (maybe, but it would be a good test for apply_delta, which we *do* care about)
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_py.py'

--- a/bzrlib/_groupcompress_py.py	2009-03-25 20:24:06 +0000
+++ b/bzrlib/_groupcompress_py.py	2009-03-25 20:58:16 +0000
@@ -57,6 +57,83 @@
         except KeyError:
             return None
 
+    def _get_longest_match(self, pos, max_pos, locations):
+        """Get the longest possible match for the current position."""
+        range_start = pos
+        range_len = 0
+        copy_ends = None
+        while pos < max_pos:
+            if locations is None:
+                locations = self.get_idx_matches(pos)
+            if locations is None:
+                # No more matches, just return whatever we have, but we know that
+                # this last position is not going to match anything
+                pos += 1
+                break
+            else:
+                if copy_ends is None:
+                    # We are starting a new range
+                    copy_ends = [loc + 1 for loc in locations]
+                    range_len = 1
+                    locations = None # Consumed
+                else:
+                    # We are currently in the middle of a match
+                    next_locations = set(copy_ends).intersection(locations)
+                    if len(next_locations):
+                        # range continues
+                        copy_ends = [loc + 1 for loc in next_locations]
+                        range_len += 1
+                        locations = None # Consumed
+                    else:
+                        # But we are done with this match, we should be
+                        # starting a new one, though. We will pass back
+                        # 'locations' so that we don't have to do another
+                        # lookup.
+                        break
+            pos += 1
+        if copy_ends is None:
+            return None, pos, locations
+        return (((min(copy_ends) - range_len, range_start, range_len)),
+                pos, locations)
+
+    def get_matching_blocks(self, lines, soft=False):
+        """Return the ranges in lines which match self.lines.
+
+        :param lines: lines to compress
+        :return: A list of (old_start, new_start, length) tuples which reflect
+            a region in self.lines that is present in lines.  The last element
+            of the list is always (old_len, new_len, 0) to provide a end point
+            for generating instructions from the matching blocks list.
+        """
+        result = []
+        pos = 0
+        self.set_right_lines(lines)
+        locations = None
+        max_pos = len(lines)
+        result_append = result.append
+        min_match_bytes = 10
+        if soft:
+            min_match_bytes = 200
+        while pos < max_pos:
+            block, pos, locations = self._get_longest_match(pos, max_pos,
+                                                            locations)
+            if block is not None:
+                # Check to see if we are matching fewer than 5 characters,
+                # which is turned into a simple 'insert', rather than a copy
+                # If we have more than 5 lines, we definitely have more than 5
+                # chars
+                if block[-1] < min_match_bytes:
+                    # This block may be a 'short' block, check
+                    old_start, new_start, range_len = block
+                    matched_bytes = sum(map(len,
+                        lines[new_start:new_start + range_len]))
+                    if matched_bytes < min_match_bytes:
+                        block = None
+            if block is not None:
+                result_append(block)
+        result_append((len(self.lines), len(lines), 0))
+        return result
+
     def _get_matching_lines(self):
         """Return a dictionary showing matching lines."""
         matching = {}
@@ -87,42 +164,11 @@
         self._right_lines = lines
 
 
-def _get_longest_match(equivalence_table, pos, max_pos, locations):
-    """Get the longest possible match for the current position."""
-    range_start = pos
-    range_len = 0
-    copy_ends = None
-    while pos < max_pos:
-        if locations is None:
-            locations = equivalence_table.get_idx_matches(pos)
-        if locations is None:
-            # No more matches, just return whatever we have, but we know that
-            # this last position is not going to match anything
-            pos += 1
-            break
-        else:
-            if copy_ends is None:
-                # We are starting a new range
-                copy_ends = [loc + 1 for loc in locations]
-                range_len = 1
-                locations = None # Consumed
-            else:
-                # We are currently in the middle of a match
-                next_locations = set(copy_ends).intersection(locations)
-                if len(next_locations):
-                    # range continues
-                    copy_ends = [loc + 1 for loc in next_locations]
-                    range_len += 1
-                    locations = None # Consumed
-                else:
-                    # But we are done with this match, we should be
-                    # starting a new one, though. We will pass back 'locations'
-                    # so that we don't have to do another lookup.
-                    break
-        pos += 1
-    if copy_ends is None:
-        return None, pos, locations
-    return ((min(copy_ends) - range_len, range_start, range_len)), pos, locations
+
+def make_delta(source_bytes, target_bytes):
+    """Create a delta from source to target."""
+    line_locations = EquivalenceTable([])
+    return None
 
 
 def apply_delta(basis, delta):

=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py	2009-03-25 20:24:06 +0000
+++ b/bzrlib/groupcompress.py	2009-03-25 20:58:16 +0000
@@ -845,45 +845,6 @@
         self.lines = self.line_locations.lines
         self._present_prefixes = set()
 
-    def get_matching_blocks(self, lines, soft=False):
-        """Return the ranges in lines which match self.lines.
-
-        :param lines: lines to compress
-        :return: A list of (old_start, new_start, length) tuples which reflect
-            a region in self.lines that is present in lines.  The last element
-            of the list is always (old_len, new_len, 0) to provide a end point
-            for generating instructions from the matching blocks list.
-        """
-        result = []
-        pos = 0
-        line_locations = self.line_locations
-        line_locations.set_right_lines(lines)
-        locations = None
-        max_pos = len(lines)
-        result_append = result.append
-        min_match_bytes = 10
-        if soft:
-            min_match_bytes = 200
-        while pos < max_pos:
-            block, pos, locations = _get_longest_match(line_locations, pos,
-                                                       max_pos, locations)
-            if block is not None:
-                # Check to see if we are matching fewer than 5 characters,
-                # which is turned into a simple 'insert', rather than a copy
-                # If we have more than 5 lines, we definitely have more than 5
-                # chars
-                if block[-1] < min_match_bytes:
-                    # This block may be a 'short' block, check
-                    old_start, new_start, range_len = block
-                    matched_bytes = sum(map(len,
-                        lines[new_start:new_start + range_len]))
-                    if matched_bytes < min_match_bytes:
-                        block = None
-            if block is not None:
-                result_append(block)
-        result_append((len(self.lines), len(lines), 0))
-        return result
-
     # FIXME: implement nostore_sha
     def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
         """Compress lines with label key.
@@ -921,7 +882,7 @@
         # reserved for content type, content length, source_len, target_len
         out_lines = ['', '', '', '']
         index_lines = [False, False, False, False]
-        blocks = self.get_matching_blocks(new_lines, soft=soft)
+        blocks = self.line_locations.get_matching_blocks(new_lines, soft=soft)
         current_line_num = 0
         # We either copy a range (while there are reusable lines) or we
         # insert new lines. To find reusable lines we traverse
@@ -2031,7 +1992,6 @@
 from bzrlib._groupcompress_py import (
     apply_delta,
     EquivalenceTable,
-    _get_longest_match,
     )
 try:
     from bzrlib._groupcompress_pyx import (

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2009-03-25 17:29:07 +0000
+++ b/bzrlib/tests/__init__.py	2009-03-25 20:58:16 +0000
@@ -2931,7 +2931,7 @@
                    'bzrlib.tests.per_repository_reference',
                    'bzrlib.tests.test__chk_map',
                    'bzrlib.tests.test__dirstate_helpers',
-                   'bzrlib.tests.test__groupcompress_pyx',
+                   'bzrlib.tests.test__groupcompress',
                    'bzrlib.tests.test__walkdirs_win32',
                    'bzrlib.tests.test_ancestry',
                    'bzrlib.tests.test_annotate',

=== renamed file 'bzrlib/tests/test__groupcompress_pyx.py' => 'bzrlib/tests/test__groupcompress.py'
--- a/bzrlib/tests/test__groupcompress_pyx.py	2009-03-24 19:36:34 +0000
+++ b/bzrlib/tests/test__groupcompress.py	2009-03-25 20:58:16 +0000
@@ -16,12 +16,28 @@
 
 """Tests for the pyrex extension of groupcompress"""
 
-from bzrlib import tests
-
-from bzrlib import groupcompress
-
-
-class _CompiledGroupCompress(tests.Feature):
+from bzrlib import (
+    groupcompress,
+    _groupcompress_py,
+    tests,
+    )
+
+
+def load_tests(standard_tests, module, loader):
+    """Parameterize tests for view-aware vs not."""
+    to_adapt, result = tests.split_suite_by_condition(
+        standard_tests, tests.condition_isinstance(TestMakeAndApplyDelta))
+    scenarios = [
+        ('python', {'_gc_module': _groupcompress_py}),
+        ]
+    if CompiledGroupCompressFeature.available():
+        from bzrlib import _groupcompress_pyx
+        scenarios.append(('C',
+            {'_gc_module': _groupcompress_pyx}))
+    return tests.multiply_tests(to_adapt, scenarios, result)
+
+
+class _CompiledGroupCompressFeature(tests.Feature):
 
     def _probe(self):
         try:
@@ -34,7 +50,7 @@
     def feature_name(self):
         return 'bzrlib._groupcompress_pyx'
 
-CompiledGroupCompress = _CompiledGroupCompress()
+CompiledGroupCompressFeature = _CompiledGroupCompressFeature()
 
 _text1 = """\
 This is a bit
@@ -93,17 +109,9 @@
 same rabin hash
 """
 
-class Test_GroupCompress(tests.TestCase):
-    """Direct tests for the compiled extension."""
-
-    def setUp(self):
-        super(Test_GroupCompress, self).setUp()
-        self.requireFeature(CompiledGroupCompress)
-        from bzrlib import _groupcompress_pyx
-        self._gc_module = _groupcompress_pyx
-
-
-class TestMakeAndApplyDelta(Test_GroupCompress):
+class TestMakeAndApplyDelta(tests.TestCase):
+
+    _gc_module = None # Set by load_tests
 
     def setUp(self):
         super(TestMakeAndApplyDelta, self).setUp()
@@ -160,7 +168,16 @@
         self.assertEqual(_text1, target)
 
 
-class TestDeltaIndex(Test_GroupCompress):
+class TestDeltaIndex(tests.TestCase):
+
+    def setUp(self):
+        super(TestDeltaIndex, self).setUp()
+        # This test isn't multiplied, because we only have DeltaIndex for the
+        # compiled form
+        # We call this here, because _test_needs_features happens after setUp
+        self.requireFeature(CompiledGroupCompressFeature)
+        from bzrlib import _groupcompress_pyx
+        self._gc_module = _groupcompress_pyx
 
     def test_repr(self):
         di = self._gc_module.DeltaIndex('test text\n')

=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py	2009-03-25 20:24:06 +0000
+++ b/bzrlib/tests/test_groupcompress.py	2009-03-25 20:58:16 +0000
@@ -26,10 +26,7 @@
     versionedfile,
     )
 from bzrlib.osutils import sha_string
-from bzrlib.tests import (
-    TestCaseWithTransport,
-    multiply_tests,
-    )
+from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
 
 def load_tests(standard_tests, module, loader):
@@ -39,25 +36,10 @@
     scenarios = [
         ('python', {'compressor': groupcompress.PythonGroupCompressor}),
         ]
-    if CompiledGroupcompressFeature.available():
+    if CompiledGroupCompressFeature.available():
         scenarios.append(('C',
             {'compressor': groupcompress.PyrexGroupCompressor}))
-    return multiply_tests(to_adapt, scenarios, result)
-
-
-class _CompiledGroupcompressFeature(tests.Feature):
-
-    def _probe(self):
-        try:
-            import bzrlib._groupcompress_pyx
-        except ImportError:
-            return False
-        return True
-
-    def feature_name(self):
-        return "bzrlib._groupcompress_pyx"
-
-CompiledGroupcompressFeature = _CompiledGroupcompressFeature()
+    return tests.multiply_tests(to_adapt, scenarios, result)
 
 
 class TestGroupCompressor(tests.TestCase):
@@ -140,7 +122,7 @@
 
 class TestPyrexGroupCompressor(TestGroupCompressor):
 
-    _test_needs_features = [CompiledGroupcompressFeature]
+    _test_needs_features = [CompiledGroupCompressFeature]
     compressor = groupcompress.PyrexGroupCompressor
 
     def test_stats(self):