Rev 4046: Some direct tests for _group_keys_for_io in lp:///~jameinel/bzr/sort_knit_fetch

Wed Feb 25 21:13:28 GMT 2009

At lp:///~jameinel/bzr/sort_knit_fetch

------------------------------------------------------------
revno: 4046
revision-id: john at arbash-meinel.com-20090225211322-qc94czk3s1g7nliq
parent: john at arbash-meinel.com-20090225202304-j52lrdrx8aw101uh
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: sort_knit_fetch
timestamp: Wed 2009-02-25 15:13:22 -0600
message:
  Some direct tests for _group_keys_for_io
-------------- next part --------------
=== modified file 'bzrlib/knit.py'

--- a/bzrlib/knit.py	2009-02-25 20:23:04 +0000
+++ b/bzrlib/knit.py	2009-02-25 21:13:22 +0000
@@ -1256,7 +1256,8 @@
                 prefix_order.append(prefix)
         return split_by_prefix, prefix_order
 
-    def _group_keys_for_io(self, keys, non_local_keys, positions):
+    def _group_keys_for_io(self, keys, non_local_keys, positions,
+                           _min_buffer_size=_STREAM_MIN_BUFFER_SIZE):
         """For the given keys, group them into 'best-sized' requests.
 
         The idea is to avoid making 1 request per file, but to never try to
@@ -1264,8 +1265,8 @@
         possible, we should try to group requests to the same pack file
         together.
 
-        :return: yield (keys, non_local) tuples that indicate what keys should
-            be fetched next.
+        :return: list of (keys, non_local) tuples that indicate what keys
+            should be fetched next.
         """
         # TODO: Ideally we would group on 2 factors. We want to extract texts
         #       from the same pack file together, and we want to extract all
@@ -1287,7 +1288,7 @@
             cur_size += this_size
             cur_keys.extend(keys)
             cur_non_local.update(non_local)
-            if cur_size > _STREAM_MIN_BUFFER_SIZE:
+            if cur_size > _min_buffer_size:
                 result.append((cur_keys, cur_non_local))
                 sizes.append(cur_size)
                 cur_keys = []

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2009-02-25 20:23:04 +0000
+++ b/bzrlib/tests/test_knit.py	2009-02-25 21:13:22 +0000
@@ -1889,12 +1889,61 @@
 
 class TestKnitVersionedFiles(KnitTests):
 
+    def assertGroupKeysForIo(self, exp_groups, keys, non_local_keys,
+                             positions, _min_buffer_size=None):
+        kvf = self.make_test_knit()
+        if _min_buffer_size is None:
+            _min_buffer_size = knit._STREAM_MIN_BUFFER_SIZE
+        self.assertEqual(exp_groups, kvf._group_keys_for_io(keys,
+                                        non_local_keys, positions,
+                                        _min_buffer_size=_min_buffer_size))
+
     def assertSplitByPrefix(self, expected_map, expected_prefix_order,
                             keys):
         split, prefix_order = KnitVersionedFiles._split_by_prefix(keys)
         self.assertEqual(expected_map, split)
         self.assertEqual(expected_prefix_order, prefix_order)
 
+    def test__group_keys_for_io(self):
+        ft_detail = ('fulltext', False)
+        ld_detail = ('line-delta', False)
+        f_a = ('f', 'a')
+        f_b = ('f', 'b')
+        f_c = ('f', 'c')
+        g_a = ('g', 'a')
+        g_b = ('g', 'b')
+        g_c = ('g', 'c')
+        positions = {
+            f_a: (ft_detail, (f_a, 0, 100), None),
+            f_b: (ld_detail, (f_b, 100, 21), f_a),
+            f_c: (ld_detail, (f_c, 180, 15), f_b),
+            g_a: (ft_detail, (g_a, 121, 35), None),
+            g_b: (ld_detail, (g_b, 156, 12), g_a),
+            g_c: (ld_detail, (g_c, 195, 13), g_a),
+            }
+        self.assertGroupKeysForIo([([f_a], set())],
+                                  [f_a], [], positions)
+        self.assertGroupKeysForIo([([f_a], set([f_a]))],
+                                  [f_a], [f_a], positions)
+        self.assertGroupKeysForIo([([f_a, f_b], set([]))],
+                                  [f_a, f_b], [], positions)
+        self.assertGroupKeysForIo([([f_a, f_b], set([f_b]))],
+                                  [f_a, f_b], [f_b], positions)
+        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
+                                  [f_a, g_a, f_b, g_b], [], positions)
+        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
+                                  [f_a, g_a, f_b, g_b], [], positions,
+                                  _min_buffer_size=150)
+        self.assertGroupKeysForIo([([f_a, f_b], set()), ([g_a, g_b], set())],
+                                  [f_a, g_a, f_b, g_b], [], positions,
+                                  _min_buffer_size=100)
+        self.assertGroupKeysForIo([([f_c], set()), ([g_b], set())],
+                                  [f_c, g_b], [], positions,
+                                  _min_buffer_size=125)
+        self.assertGroupKeysForIo([([g_b, f_c], set())],
+                                  [g_b, f_c], [], positions,
+                                  _min_buffer_size=125)
+
     def test__split_by_prefix(self):
         self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
                                   'g': [('g', 'b'), ('g', 'a')],