Rev 5370: Found some more bugs. I wasn't incrementing one of the pointers, in http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf
John Arbash Meinel
john at arbash-meinel.com
Wed Aug 4 00:41:16 BST 2010
At http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf
------------------------------------------------------------
revno: 5370
revision-id: john at arbash-meinel.com-20100803234101-742wpbkuyhqc9tpp
parent: john at arbash-meinel.com-20100803221022-dz367g1mvwqi7cq4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-btree-chk-leaf
timestamp: Tue 2010-08-03 18:41:01 -0500
message:
Found some more bugs. I wasn't incrementing one of the pointers,
so I was parsing all the records, but not saving them correctly.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2010-08-03 22:10:22 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2010-08-03 23:41:01 +0000
@@ -354,12 +354,17 @@
cdef int _unhexlify_sha1(char *as_hex, char *as_bin):
- """Take the hex sha1 in as_hex and make it binary in as_bin"""
+ """Take the hex sha1 in as_hex and make it binary in as_bin
+
+ Same as binascii.unhexlify, but working on C strings, not Python objects.
+ """
cdef int top
cdef int bot
cdef int i, j
cdef char *cur
+ # binascii does this using isupper() and tolower() and ?: syntax. I'm
+ # guessing a simple lookup array should be faster.
j = 0
for i from 0 <= i < 20:
top = _unhexbuf[<unsigned char>(as_hex[j])]
@@ -404,6 +409,63 @@
return as_hex
+cdef int _key_to_sha1(key, char *sha1):
+ """Map a key into its sha1 content.
+
+ :param key: A tuple of style ('sha1:abcd...',)
+ :param sha1: A char buffer of 20 bytes
+ :return: 1 if this could be converted, 0 otherwise
+ """
+ cdef char *c_val
+ if not PyTuple_CheckExact(key) and not StaticTuple_CheckExact(key):
+ return 0
+ if len(key) != 1:
+ return 0
+ val = key[0]
+ if not PyString_CheckExact(val) or PyString_GET_SIZE(val) != 45:
+ return 0
+ c_val = PyString_AS_STRING(val)
+ if strncmp(c_val, 'sha1:', 5) != 0:
+ return 0
+ if not _unhexlify_sha1(c_val + 5, sha1):
+ return 0
+ return 1
+
+
+def _test_key_to_sha1(key):
+ """Map a key to a simple sha1 string.
+
+ This is a testing thunk to the C function.
+ """
+ as_bin_sha = PyString_FromStringAndSize(NULL, 20)
+ if _key_to_sha1(key, PyString_AS_STRING(as_bin_sha)):
+ return as_bin_sha
+ return None
+
+
+cdef StaticTuple _sha1_to_key(char *sha1):
+ """Compute a ('sha1:abcd',) key for a given sha1."""
+ cdef StaticTuple key
+ cdef object hexxed
+ cdef char *c_buf
+ hexxed = PyString_FromStringAndSize(NULL, 45)
+ c_buf = PyString_AS_STRING(hexxed)
+ memcpy(c_buf, 'sha1:', 5)
+ _hexlify_sha1(sha1, c_buf+5)
+ key = StaticTuple_New(1)
+ Py_INCREF(hexxed)
+ StaticTuple_SET_ITEM(key, 0, hexxed)
+ # key = StaticTuple_Intern(key)
+ return key
+
+
+def _test_sha1_to_key(sha1_bin):
+ """Test thunk to check the sha1 mapping."""
+ if not PyString_CheckExact(sha1_bin) or PyString_GET_SIZE(sha1_bin) != 20:
+ raise ValueError('sha1_bin must be a str of exactly 20 bytes')
+ return _sha1_to_key(PyString_AS_STRING(sha1_bin))
+
+
cdef class GCCHKSHA1LeafNode:
"""Track all the entries for a given leaf node."""
@@ -432,52 +494,15 @@
property min_key:
def __get__(self):
if self.num_entries > 0:
- return self._sha1_to_key(self.entries[0].sha1)
+ return _sha1_to_key(self.entries[0].sha1)
return None
property max_key:
def __get__(self):
if self.num_entries > 0:
- return self._sha1_to_key(self.entries[self.num_entries-1].sha1)
+ return _sha1_to_key(self.entries[self.num_entries-1].sha1)
return None
- cdef int _key_to_sha1(self, key, char *sha1):
- """Map a key into its sha1 content.
-
- :param key: A tuple of style ('sha1:abcd...',)
- :param sha1: A char buffer of 20 bytes
- :return: 1 if this could be converted, 0 otherwise
- """
- cdef char *c_val
- if not PyTuple_CheckExact(key) and not StaticTuple_CheckExact(key):
- return 0
- if len(key) != 1:
- return 0
- val = key[0]
- if not PyString_CheckExact(val) or PyString_GET_SIZE(val) != 45:
- return 0
- c_val = PyString_AS_STRING(val)
- if not strncmp(c_val, 'sha1:', 5):
- return 0
- if not _unhexlify_sha1(c_val + 5, sha1):
- return 0
- return 1
-
- cdef StaticTuple _sha1_to_key(self, char *sha1):
- """Compute a ('sha1:abcd',) key for a given sha1."""
- cdef StaticTuple key
- cdef object hexxed
- cdef char *c_buf
- hexxed = PyString_FromStringAndSize(NULL, 45)
- c_buf = PyString_AS_STRING(hexxed)
- memcpy(c_buf, 'sha1:', 5)
- _hexlify_sha1(sha1, c_buf+5)
- key = StaticTuple_New(1)
- Py_INCREF(hexxed)
- StaticTuple_SET_ITEM(key, 0, hexxed)
- key = StaticTuple_Intern(key)
- return key
-
cdef StaticTuple _record_to_value_and_refs(self,
gc_chk_sha1_record *record):
"""Extract the refs and value part of this record."""
@@ -486,8 +511,9 @@
value_and_refs = StaticTuple_New(2)
# This is really inefficient to go from a logical state back to a
# string, but it makes things work a bit better internally for now.
- value = PyString_FromFormat('%llu %lu %lu %lu',
- record.block_offset, record.block_length,
+ value = PyString_FromFormat('%lu %lu %lu %lu',
+ <unsigned long>record.block_offset,
+ record.block_length,
record.record_start, record.record_end)
Py_INCREF(value)
StaticTuple_SET_ITEM(value_and_refs, 0, value)
@@ -504,7 +530,7 @@
cdef StaticTuple key
cdef StaticTuple value_and_refs
cdef object value
- key = self._sha1_to_key(record.sha1)
+ key = _sha1_to_key(record.sha1)
item = StaticTuple_New(2)
Py_INCREF(key)
StaticTuple_SET_ITEM(item, 0, key)
@@ -526,7 +552,7 @@
def __contains__(self, key):
cdef char sha1[20]
cdef gc_chk_sha1_record *record
- if not self._key_to_sha1(key, sha1):
+ if not _key_to_sha1(key, sha1):
# If it isn't a sha1 key, then it won't be in this leaf node
return False
return self._lookup_record(sha1) != NULL
@@ -534,7 +560,7 @@
def __getitem__(self, key):
cdef char sha1[20]
cdef gc_chk_sha1_record *record = NULL
- if self._key_to_sha1(key, sha1):
+ if _key_to_sha1(key, sha1):
record = self._lookup_record(sha1)
if record == NULL:
raise KeyError('key %r is not present' % (key,))
@@ -547,7 +573,7 @@
cdef int i
cdef list result = []
for i from 0 <= i < self.num_entries:
- result.append(self._sha1_to_key(self.entries[i].sha1))
+ result.append(_sha1_to_key(self.entries[i].sha1))
return result
def all_items(self):
@@ -627,6 +653,12 @@
if c_cur == c_next or c_next[0] != c'\n':
raise ValueError('Failed to parse record end')
c_cur = c_next + 1
+ cur_record += 1
+ entry += 1
+ if (entry != self.num_entries
+ or c_cur != c_end
+ or cur_record != self.entries + self.num_entries):
+ raise ValueError('Something went wrong while parsing.')
# Pass 3: build the offset map
# The idea with the offset map is that we should be able to quickly
# jump to the key that matches a gives sha1. We know that the keys are
=== modified file 'bzrlib/tests/test__btree_serializer.py'
--- a/bzrlib/tests/test__btree_serializer.py 2010-08-03 22:10:22 +0000
+++ b/bzrlib/tests/test__btree_serializer.py 2010-08-03 23:41:01 +0000
@@ -32,6 +32,7 @@
super(TestBtreeSerializer, self).setUp()
self.module = compiled_btreeparser_feature.module
+
class TestHexAndUnhex(TestBtreeSerializer):
def assertHexlify(self, as_binary):
@@ -75,9 +76,68 @@
self.assertFailUnhexlify('12345678901234567890123456789012345678X9')
+_hex_form = '123456789012345678901234567890abcdefabcd'
+
+class Test_KeyToSha1(TestBtreeSerializer):
+
+ def assertKeyToSha1(self, expected, key):
+ if expected is None:
+ expected_bin = None
+ else:
+ expected_bin = binascii.unhexlify(expected)
+ actual_sha1 = self.module._test_key_to_sha1(key)
+ if expected_bin != actual_sha1:
+ actual_hex_sha1 = None
+ if actual_sha1 is not None:
+ actual_hex_sha1 = binascii.hexlify(actual_sha1)
+ self.fail('_key_to_sha1 returned:\n %s\n != %s'
+ % (actual_sha1, expected))
+
+ def test_simple(self):
+ self.assertKeyToSha1(_hex_form, ('sha1:' + _hex_form,))
+
+ def test_invalid_not_tuple(self):
+ self.assertKeyToSha1(None, _hex_form)
+ self.assertKeyToSha1(None, 'sha1:' + _hex_form)
+
+ def test_invalid_empty(self):
+ self.assertKeyToSha1(None, ())
+
+ def test_invalid_not_string(self):
+ self.assertKeyToSha1(None, (None,))
+ self.assertKeyToSha1(None, (list(_hex_form),))
+
+ def test_invalid_not_sha1(self):
+ self.assertKeyToSha1(None, (_hex_form,))
+ self.assertKeyToSha1(None, ('sha2:' + _hex_form,))
+
+ def test_invalid_not_hex(self):
+ self.assertKeyToSha1(None,
+ ('sha1:abcdefghijklmnopqrstuvwxyz12345678901234',))
+
+
+class Test_Sha1ToKey(TestBtreeSerializer):
+
+ def assertSha1ToKey(self, hex_sha1):
+ bin_sha1 = binascii.unhexlify(hex_sha1)
+ key = self.module._test_sha1_to_key(bin_sha1)
+ self.assertEqual(('sha1:' + hex_sha1,), key)
+
+ def test_simple(self):
+ self.assertSha1ToKey(_hex_form)
+
+
+_one_key_content = """type=leaf
+sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4
+"""
+
+_multi_key_content = """type=leaf
+sha1:123456789012345678901234567890abcdefabcd\x00\x001 2 3 4
+sha1:abcd123456789012345678901234567890abcdef\x00\x005678 2345 3456 4567
+"""
+
class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
-
def assertInvalid(self, bytes):
"""Ensure that we get a proper error when trying to parse invalid bytes.
@@ -97,3 +157,25 @@
self.assertEqual(0, len(leaf))
self.assertEqual([], leaf.all_items())
self.assertEqual([], leaf.all_keys())
+ # It should allow any key to be queried
+ self.assertFalse(('key',) in leaf)
+
+ def test_one_key_leaf(self):
+ leaf = self.module._parse_into_chk(_one_key_content, 1, 0)
+ self.assertEqual(1, len(leaf))
+ sha_key = ('sha1:123456789012345678901234567890abcdefabcd',)
+ self.assertEqual([sha_key], leaf.all_keys())
+ self.assertEqual([(sha_key, ('1 2 3 4', ()))], leaf.all_items())
+ self.assertTrue(sha_key in leaf)
+
+ def test_many_key_leaf(self):
+ leaf = self.module._parse_into_chk(_multi_key_content, 1, 0)
+ self.assertEqual(2, len(leaf))
+ sha_key1 = ('sha1:123456789012345678901234567890abcdefabcd',)
+ sha_key2 = ('sha1:abcd123456789012345678901234567890abcdef',)
+ self.assertEqual([sha_key1, sha_key2], leaf.all_keys())
+ self.assertEqual([(sha_key1, ('1 2 3 4', ())),
+ (sha_key2, ('5678 2345 3456 4567', ()))
+ ], leaf.all_items())
+ self.assertTrue(sha_key1 in leaf)
+ self.assertTrue(sha_key2 in leaf)
More information about the bazaar-commits
mailing list