Rev 5374: Add some tests that key lookup works. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf

John Arbash Meinel john at arbash-meinel.com
Wed Aug 4 02:52:29 BST 2010


At http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf

------------------------------------------------------------
revno: 5374
revision-id: john at arbash-meinel.com-20100804015214-5dbdub9p0qyc07dp
parent: john at arbash-meinel.com-20100804013103-mz61vkhuj1w3uozh
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-btree-chk-leaf
timestamp: Tue 2010-08-03 20:52:14 -0500
message:
  Add some tests that key lookup works.
  
  Timing shows that looking up all 120 keys takes 205us, using bisect takes 184us.
  So better, but not great. I'm a bit surprised it isn't faster, but perhaps
  the comparison is a lot less of total time than the conversion to
  python objects.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2010-08-04 01:31:03 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2010-08-04 01:52:14 +0000
@@ -561,12 +561,21 @@
 
     cdef gc_chk_sha1_record* _lookup_record(self, char *sha1):
         """Find a gc_chk_sha1_record that matches the sha1 supplied."""
-        # For right now we iterate, in the future we should bisect, or create
-        # a local index, or use the sha1 as a hash into a local table, etc.
-        cdef int i
-        for i from 0 <= i < self.num_entries:
-            if memcmp(self.entries[i].sha1, sha1, 20) == 0:
-                return &self.entries[i]
+        cdef int lo, hi, mid, the_cmp
+
+        lo = 0
+        hi = self.num_entries
+
+        # Bisect until we find the right spot
+        while lo < hi:
+            mid = (lo + hi) / 2
+            the_cmp = memcmp(sha1, self.entries[mid].sha1, 20)
+            if the_cmp == 0:
+                return &self.entries[mid]
+            elif the_cmp < 0:
+                hi = mid
+            else:
+                lo = mid + 1
         return NULL
 
     def __contains__(self, key):

=== modified file 'bzrlib/tests/test__btree_serializer.py'
--- a/bzrlib/tests/test__btree_serializer.py	2010-08-04 01:31:03 +0000
+++ b/bzrlib/tests/test__btree_serializer.py	2010-08-04 01:52:14 +0000
@@ -138,8 +138,14 @@
 """
 
 _multi_key_content = """type=leaf
-sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4
-sha1:abcd123456789012345678901234567890abcdef\x00\x005678 2345 3456 4567
+sha1:70c881d4a26984ddce795f6f71817c9cf4480e79\x00\x000 0 0 0
+sha1:7e240de74fb1ed08fa08d38063f6a6a91462a815\x00\x001 1 1 1
+sha1:86f7e437faa5a7fce15d1ddcb9eaeaea377667b8\x00\x002 2 2 2
+sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709\x00\x003 3 3 3
+sha1:df51e37c269aa94d38f93e537bf6e2020b21406c\x00\x004 4 4 4
+sha1:e0c9035898dd52fc65c41454cec9c4d2611bfb37\x00\x005 5 5 5
+sha1:e93b4e3c464ffd51732fbd6ded717e9efda28aad\x00\x006 6 6 6
+sha1:f7a9e24777ec23212c54d7a350bc5bea5477fdbb\x00\x007 7 7 7
 """
 
 class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
@@ -183,12 +189,8 @@
 
     def test_many_key_leaf(self):
         leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
-        self.assertEqual(2, len(leaf))
-        sha_key1 = ('sha1:' + _hex_form,)
-        sha_key2 = ('sha1:abcd123456789012345678901234567890abcdef',)
-        self.assertEqual([sha_key1, sha_key2], leaf.all_keys())
-        self.assertEqual([(sha_key1, ('1 2 3 4', ())),
-                          (sha_key2, ('5678 2345 3456 4567', ()))
-                         ], leaf.all_items())
-        self.assertTrue(sha_key1 in leaf)
-        self.assertTrue(sha_key2 in leaf)
+        self.assertEqual(8, len(leaf))
+        all_keys = leaf.all_keys()
+        self.assertEqual(8, len(leaf.all_keys()))
+        for idx, key in enumerate(all_keys):
+            self.assertEqual(str(idx), leaf[key][0].split()[0])



More information about the bazaar-commits mailing list