Rev 5374: Add some tests that key lookup works. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf
John Arbash Meinel
john at arbash-meinel.com
Wed Aug 4 02:52:29 BST 2010
At http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf
------------------------------------------------------------
revno: 5374
revision-id: john at arbash-meinel.com-20100804015214-5dbdub9p0qyc07dp
parent: john at arbash-meinel.com-20100804013103-mz61vkhuj1w3uozh
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-btree-chk-leaf
timestamp: Tue 2010-08-03 20:52:14 -0500
message:
Add some tests that key lookup works.
Timing shows that looking up all 120 keys takes 205us, using bisect takes 184us.
So better, but not great. I'm a bit surprised it isn't faster, but perhaps
the comparison is a lot less of total time than the conversion to
python objects.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2010-08-04 01:31:03 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2010-08-04 01:52:14 +0000
@@ -561,12 +561,21 @@
cdef gc_chk_sha1_record* _lookup_record(self, char *sha1):
"""Find a gc_chk_sha1_record that matches the sha1 supplied."""
- # For right now we iterate, in the future we should bisect, or create
- # a local index, or use the sha1 as a hash into a local table, etc.
- cdef int i
- for i from 0 <= i < self.num_entries:
- if memcmp(self.entries[i].sha1, sha1, 20) == 0:
- return &self.entries[i]
+ cdef int lo, hi, mid, the_cmp
+
+ lo = 0
+ hi = self.num_entries
+
+ # Bisect until we find the right spot
+ while lo < hi:
+ mid = (lo + hi) / 2
+ the_cmp = memcmp(sha1, self.entries[mid].sha1, 20)
+ if the_cmp == 0:
+ return &self.entries[mid]
+ elif the_cmp < 0:
+ hi = mid
+ else:
+ lo = mid + 1
return NULL
def __contains__(self, key):
=== modified file 'bzrlib/tests/test__btree_serializer.py'
--- a/bzrlib/tests/test__btree_serializer.py 2010-08-04 01:31:03 +0000
+++ b/bzrlib/tests/test__btree_serializer.py 2010-08-04 01:52:14 +0000
@@ -138,8 +138,14 @@
"""
_multi_key_content = """type=leaf
-sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4
-sha1:abcd123456789012345678901234567890abcdef\x00\x005678 2345 3456 4567
+sha1:70c881d4a26984ddce795f6f71817c9cf4480e79\x00\x000 0 0 0
+sha1:7e240de74fb1ed08fa08d38063f6a6a91462a815\x00\x001 1 1 1
+sha1:86f7e437faa5a7fce15d1ddcb9eaeaea377667b8\x00\x002 2 2 2
+sha1:da39a3ee5e6b4b0d3255bfef95601890afd80709\x00\x003 3 3 3
+sha1:df51e37c269aa94d38f93e537bf6e2020b21406c\x00\x004 4 4 4
+sha1:e0c9035898dd52fc65c41454cec9c4d2611bfb37\x00\x005 5 5 5
+sha1:e93b4e3c464ffd51732fbd6ded717e9efda28aad\x00\x006 6 6 6
+sha1:f7a9e24777ec23212c54d7a350bc5bea5477fdbb\x00\x007 7 7 7
"""
class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
@@ -183,12 +189,8 @@
def test_many_key_leaf(self):
leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
- self.assertEqual(2, len(leaf))
- sha_key1 = ('sha1:' + _hex_form,)
- sha_key2 = ('sha1:abcd123456789012345678901234567890abcdef',)
- self.assertEqual([sha_key1, sha_key2], leaf.all_keys())
- self.assertEqual([(sha_key1, ('1 2 3 4', ())),
- (sha_key2, ('5678 2345 3456 4567', ()))
- ], leaf.all_items())
- self.assertTrue(sha_key1 in leaf)
- self.assertTrue(sha_key2 in leaf)
+ self.assertEqual(8, len(leaf))
+ all_keys = leaf.all_keys()
+ self.assertEqual(8, len(leaf.all_keys()))
+ for idx, key in enumerate(all_keys):
+ self.assertEqual(str(idx), leaf[key][0].split()[0])
More information about the bazaar-commits
mailing list