Rev 5369: Some direct tests of the hex and unhex functions. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf
John Arbash Meinel
john at arbash-meinel.com
Tue Aug 3 23:10:37 BST 2010
At http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf
------------------------------------------------------------
revno: 5369
revision-id: john at arbash-meinel.com-20100803221022-dz367g1mvwqi7cq4
parent: john at arbash-meinel.com-20100803212946-xurjxv9i4xz8luac
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-btree-chk-leaf
timestamp: Tue 2010-08-03 17:10:22 -0500
message:
Some direct tests of the hex and unhex functions.
As they say, untested code is broken code. And my code was broken.
(I didn't actually init the array, I had bugs in my init strings, etc.)
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx 2010-08-03 21:29:46 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx 2010-08-03 22:10:22 +0000
@@ -327,6 +327,8 @@
# the big win there is to cache across pages, and not just one page
# Though if we did cache in a page, we could certainly use a short int.
# And this goes from 40 bytes to 30 bytes.
+# One slightly ugly option would be to cache block offsets in a global.
+# However, that leads to thread-safety issues, etc.
ctypedef struct gc_chk_sha1_record:
unsigned long long block_offset
unsigned int block_length
@@ -336,39 +338,50 @@
cdef int _unhexbuf[256]
-cdef char *_hexbuf = '01234567890abcdef'
+cdef char *_hexbuf = '0123456789abcdef'
cdef _populate_unhexbuf():
- cdef unsigned char a
- for a from 0 <= a < 255:
- _unhexbuf[a] = -1
- for a in '0123456789':
- _unhexbuf[a] = a - c'0'
- for a in 'abcdef':
- _unhexbuf[a] = a - c'a' + 10
- for a in 'ABCDEF':
- _unhexbuf[a] = a - c'A' + 10
+ cdef int i
+ for i from 0 <= i < 256:
+ _unhexbuf[i] = -1
+ for i from 0 <= i < 10: # 0123456789 => map to the raw number
+ _unhexbuf[(i + c'0')] = i
+ for i from 10 <= i < 16: # abcdef => 10, 11, 12, 13, 14, 15, 16
+ _unhexbuf[(i - 10 + c'a')] = i
+ for i from 10 <= i < 16: # ABCDEF => 10, 11, 12, 13, 14, 15, 16
+ _unhexbuf[(i - 10 + c'A')] = i
+_populate_unhexbuf()
cdef int _unhexlify_sha1(char *as_hex, char *as_bin):
"""Take the hex sha1 in as_hex and make it binary in as_bin"""
cdef int top
cdef int bot
- cdef int i
+ cdef int i, j
cdef char *cur
- cur = as_hex
+ j = 0
for i from 0 <= i < 20:
- top = _unhexbuf[<unsigned char>(cur)]
- cur += 1
- bot = _unhexbuf[<unsigned char>(cur)]
- cur += 1
+ top = _unhexbuf[<unsigned char>(as_hex[j])]
+ j += 1
+ bot = _unhexbuf[<unsigned char>(as_hex[j])]
+ j += 1
if top == -1 or bot == -1:
return 0
- as_bin[i] = (top << 4) + bot;
+ as_bin[i] = <unsigned char>((top << 4) + bot);
return 1
+def _test_unhexlify(as_hex):
+ """For the test infrastructure, just thunks to _unhexlify_sha1"""
+ if len(as_hex) != 40 or not PyString_CheckExact(as_hex):
+ raise ValueError('not a 40-byte hex digest')
+ as_bin = PyString_FromStringAndSize(NULL, 20)
+ if _unhexlify_sha1(PyString_AS_STRING(as_hex), PyString_AS_STRING(as_bin)):
+ return as_bin
+ return None
+
+
cdef void _hexlify_sha1(char *as_bin, char *as_hex):
cdef int i, j
cdef char c
@@ -382,6 +395,15 @@
j += 1
+def _test_hexlify(as_bin):
+ """For test infrastructure, thunk to _hexlify_sha1"""
+ if len(as_bin) != 20 or not PyString_CheckExact(as_bin):
+ raise ValueError('not a 20-byte binary digest')
+ as_hex = PyString_FromStringAndSize(NULL, 40)
+ _hexlify_sha1(PyString_AS_STRING(as_bin), PyString_AS_STRING(as_hex))
+ return as_hex
+
+
cdef class GCCHKSHA1LeafNode:
"""Track all the entries for a given leaf node."""
=== modified file 'bzrlib/tests/test__btree_serializer.py'
--- a/bzrlib/tests/test__btree_serializer.py 2010-08-03 21:29:46 +0000
+++ b/bzrlib/tests/test__btree_serializer.py 2010-08-03 22:10:22 +0000
@@ -17,19 +17,67 @@
"""Direct tests of the btree serializer extension"""
+import binascii
+
from bzrlib import tests
from bzrlib.tests.test_btree_index import compiled_btreeparser_feature
-class TestGCCKHSHA1LeafNode(tests.TestCase):
+class TestBtreeSerializer(tests.TestCase):
_test_needs_features = [compiled_btreeparser_feature]
def setUp(self):
- super(TestGCCKHSHA1LeafNode, self).setUp()
+ super(TestBtreeSerializer, self).setUp()
self.module = compiled_btreeparser_feature.module
+class TestHexAndUnhex(TestBtreeSerializer):
+
+ def assertHexlify(self, as_binary):
+ self.assertEqual(binascii.hexlify(as_binary),
+ self.module._test_hexlify(as_binary))
+
+ def assertUnhexlify(self, as_hex):
+ ba_unhex = binascii.unhexlify(as_hex)
+ mod_unhex = self.module._test_unhexlify(as_hex)
+ if ba_unhex != mod_unhex:
+ if mod_unhex is None:
+ mod_hex = '<None>'
+ else:
+ mod_hex = binascii.hexlify(mod_unhex)
+ self.fail('_test_unhexlify returned a different answer'
+ ' from binascii:\n %s\n != %s'
+ % (binascii.hexlify(ba_unhex), mod_hex))
+
+ def assertFailUnhexlify(self, as_hex):
+ # Invalid hex content
+ self.assertIs(None, self.module._test_unhexlify(as_hex))
+
+ def test_to_hex(self):
+ raw_bytes = ''.join(map(chr, range(256)))
+ for i in range(0, 240, 20):
+ self.assertHexlify(raw_bytes[i:i+20])
+ self.assertHexlify(raw_bytes[240:]+raw_bytes[0:4])
+
+ def test_from_hex(self):
+ self.assertUnhexlify('0123456789abcdef0123456789abcdef01234567')
+ self.assertUnhexlify('123456789abcdef0123456789abcdef012345678')
+ self.assertUnhexlify('0123456789ABCDEF0123456789ABCDEF01234567')
+ self.assertUnhexlify('123456789ABCDEF0123456789ABCDEF012345678')
+ hex_chars = binascii.hexlify(''.join(map(chr, range(256))))
+ for i in range(0, 480, 40):
+ self.assertUnhexlify(hex_chars[i:i+40])
+ self.assertUnhexlify(hex_chars[480:]+hex_chars[0:8])
+
+ def test_from_invalid_hex(self):
+ self.assertFailUnhexlify('123456789012345678901234567890123456789X')
+ self.assertFailUnhexlify('12345678901234567890123456789012345678X9')
+
+
+class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
+
+
def assertInvalid(self, bytes):
"""Ensure that we get a proper error when trying to parse invalid bytes.
More information about the bazaar-commits
mailing list