Rev 5369: Some direct tests of the hex and unhex functions. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf

John Arbash Meinel john at arbash-meinel.com
Tue Aug 3 23:10:37 BST 2010


At http://bazaar.launchpad.net/~jameinel/bzr/2.3-btree-chk-leaf

------------------------------------------------------------
revno: 5369
revision-id: john at arbash-meinel.com-20100803221022-dz367g1mvwqi7cq4
parent: john at arbash-meinel.com-20100803212946-xurjxv9i4xz8luac
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-btree-chk-leaf
timestamp: Tue 2010-08-03 17:10:22 -0500
message:
  Some direct tests of the hex and unhex functions.
  
  As they say, untested code is broken code. And my code was broken.
  (I didn't actually init the array, I had bugs in my init strings, etc.)
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2010-08-03 21:29:46 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2010-08-03 22:10:22 +0000
@@ -327,6 +327,8 @@
 #       the big win there is to cache across pages, and not just one page
 #       Though if we did cache in a page, we could certainly use a short int.
 #       And this goes from 40 bytes to 30 bytes.
+#       One slightly ugly option would be to cache block offsets in a global.
+#       However, that leads to thread-safety issues, etc.
 ctypedef struct gc_chk_sha1_record:
     unsigned long long block_offset
     unsigned int block_length
@@ -336,39 +338,50 @@
 
 
 cdef int _unhexbuf[256]
-cdef char *_hexbuf = '01234567890abcdef'
+cdef char *_hexbuf = '0123456789abcdef'
 
 cdef _populate_unhexbuf():
-    cdef unsigned char a
-    for a from 0 <= a < 255:
-        _unhexbuf[a] = -1
-    for a in '0123456789':
-        _unhexbuf[a] = a - c'0'
-    for a in 'abcdef':
-        _unhexbuf[a] = a - c'a' + 10
-    for a in 'ABCDEF':
-        _unhexbuf[a] = a - c'A' + 10
+    cdef int i
+    for i from 0 <= i < 256:
+        _unhexbuf[i] = -1
+    for i from 0 <= i < 10: # 0123456789 => map to the raw number
+        _unhexbuf[(i + c'0')] = i
+    for i from 10 <= i < 16: # abcdef => 10, 11, 12, 13, 14, 15, 16
+        _unhexbuf[(i - 10 + c'a')] = i
+    for i from 10 <= i < 16: # ABCDEF => 10, 11, 12, 13, 14, 15, 16
+        _unhexbuf[(i - 10 + c'A')] = i
+_populate_unhexbuf()
 
 
 cdef int _unhexlify_sha1(char *as_hex, char *as_bin):
     """Take the hex sha1 in as_hex and make it binary in as_bin"""
     cdef int top
     cdef int bot
-    cdef int i
+    cdef int i, j
     cdef char *cur
     
-    cur = as_hex
+    j = 0
     for i from 0 <= i < 20:
-        top = _unhexbuf[<unsigned char>(cur)]
-        cur += 1
-        bot = _unhexbuf[<unsigned char>(cur)]
-        cur += 1
+        top = _unhexbuf[<unsigned char>(as_hex[j])]
+        j += 1
+        bot = _unhexbuf[<unsigned char>(as_hex[j])]
+        j += 1
         if top == -1 or bot == -1:
             return 0
-        as_bin[i] = (top << 4) + bot;
+        as_bin[i] = <unsigned char>((top << 4) + bot);
     return 1
 
 
+def _test_unhexlify(as_hex):
+    """For the test infrastructure, just thunks to _unhexlify_sha1"""
+    if len(as_hex) != 40 or not PyString_CheckExact(as_hex):
+        raise ValueError('not a 40-byte hex digest')
+    as_bin = PyString_FromStringAndSize(NULL, 20)
+    if _unhexlify_sha1(PyString_AS_STRING(as_hex), PyString_AS_STRING(as_bin)):
+        return as_bin
+    return None
+
+
 cdef void _hexlify_sha1(char *as_bin, char *as_hex):
     cdef int i, j
     cdef char c
@@ -382,6 +395,15 @@
         j += 1
 
 
+def _test_hexlify(as_bin):
+    """For test infrastructure, thunk to _hexlify_sha1"""
+    if len(as_bin) != 20 or not PyString_CheckExact(as_bin):
+        raise ValueError('not a 20-byte binary digest')
+    as_hex = PyString_FromStringAndSize(NULL, 40)
+    _hexlify_sha1(PyString_AS_STRING(as_bin), PyString_AS_STRING(as_hex))
+    return as_hex
+
+
 cdef class GCCHKSHA1LeafNode:
     """Track all the entries for a given leaf node."""
 

=== modified file 'bzrlib/tests/test__btree_serializer.py'
--- a/bzrlib/tests/test__btree_serializer.py	2010-08-03 21:29:46 +0000
+++ b/bzrlib/tests/test__btree_serializer.py	2010-08-03 22:10:22 +0000
@@ -17,19 +17,67 @@
 
 """Direct tests of the btree serializer extension"""
 
+import binascii
+
 from bzrlib import tests
 
 from bzrlib.tests.test_btree_index import compiled_btreeparser_feature
 
 
-class TestGCCKHSHA1LeafNode(tests.TestCase):
+class TestBtreeSerializer(tests.TestCase):
 
     _test_needs_features = [compiled_btreeparser_feature]
 
     def setUp(self):
-        super(TestGCCKHSHA1LeafNode, self).setUp()
+        super(TestBtreeSerializer, self).setUp()
         self.module = compiled_btreeparser_feature.module
 
+class TestHexAndUnhex(TestBtreeSerializer):
+
+    def assertHexlify(self, as_binary):
+        self.assertEqual(binascii.hexlify(as_binary),
+                         self.module._test_hexlify(as_binary))
+
+    def assertUnhexlify(self, as_hex):
+        ba_unhex = binascii.unhexlify(as_hex)
+        mod_unhex = self.module._test_unhexlify(as_hex)
+        if ba_unhex != mod_unhex:
+            if mod_unhex is None:
+                mod_hex = '<None>'
+            else:
+                mod_hex = binascii.hexlify(mod_unhex)
+            self.fail('_test_unhexlify returned a different answer'
+                      ' from binascii:\n    %s\n != %s'
+                      % (binascii.hexlify(ba_unhex), mod_hex))
+
+    def assertFailUnhexlify(self, as_hex):
+        # Invalid hex content
+        self.assertIs(None, self.module._test_unhexlify(as_hex))
+
+    def test_to_hex(self):
+        raw_bytes = ''.join(map(chr, range(256)))
+        for i in range(0, 240, 20):
+            self.assertHexlify(raw_bytes[i:i+20])
+        self.assertHexlify(raw_bytes[240:]+raw_bytes[0:4])
+
+    def test_from_hex(self):
+        self.assertUnhexlify('0123456789abcdef0123456789abcdef01234567')
+        self.assertUnhexlify('123456789abcdef0123456789abcdef012345678')
+        self.assertUnhexlify('0123456789ABCDEF0123456789ABCDEF01234567')
+        self.assertUnhexlify('123456789ABCDEF0123456789ABCDEF012345678')
+        hex_chars = binascii.hexlify(''.join(map(chr, range(256))))
+        for i in range(0, 480, 40):
+            self.assertUnhexlify(hex_chars[i:i+40])
+        self.assertUnhexlify(hex_chars[480:]+hex_chars[0:8])
+
+    def test_from_invalid_hex(self):
+        self.assertFailUnhexlify('123456789012345678901234567890123456789X')
+        self.assertFailUnhexlify('12345678901234567890123456789012345678X9')
+
+
+class TestGCCKHSHA1LeafNode(TestBtreeSerializer):
+
+
     def assertInvalid(self, bytes):
         """Ensure that we get a proper error when trying to parse invalid bytes.
 



More information about the bazaar-commits mailing list