Rev 19: Remove the base32 implementation in favor of a *real* base32 implementation in http://bzr.arbash-meinel.com/plugins/pybloom

John Arbash Meinel john at arbash-meinel.com
Thu Apr 26 23:10:06 BST 2007


At http://bzr.arbash-meinel.com/plugins/pybloom

------------------------------------------------------------
revno: 19
revision-id: john at arbash-meinel.com-20070426221002-mmwu0z30xzc1b3ks
parent: john at arbash-meinel.com-20070426220655-cib91nwztwll1ld2
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: pybloom
timestamp: Thu 2007-04-26 17:10:02 -0500
message:
  Remove the base32 implementation in favor of a *real* base32 implementation
modified:
  pybloom.py                     pybloom.py-20061013005844-b3v176fajvw2xeip-1
  test_pybloom.py                test_pybloom.py-20061015235457-3ygcmfqjet8yfq0f-2
-------------- next part --------------
=== modified file 'pybloom.py'
--- a/pybloom.py	2007-03-27 16:33:05 +0000
+++ b/pybloom.py	2007-04-26 22:10:02 +0000
@@ -34,6 +34,7 @@
 """
 
 import array
+import base64
 import math
 import md5
 import sha
@@ -172,12 +173,12 @@
         with the source 8-bits per byte.
         """
         as_chr = [chr(x) for x in self._array]
-        return encode_base32(''.join(as_chr))
+        return base64.b32encode(''.join(as_chr))
 
     def array_to_base64(self):
         """Pack the internal array to base64 encoding"""
         as_chr = [chr(x) for x in self._array]
-        return ''.join(as_chr).encode('base64')
+        return base64.b64encode(''.join(as_chr))
 
     def custom_base_64(self):
         """An alternate version of base64 which avoids unsafe fs chars"""
@@ -189,64 +190,6 @@
         raise NotImplementedError(self.custom_base_64)
 
 
-def encode_base32(bytes):
-    """Encode a byte-string in base32. Which is like 64 only case insensitive.
-
-    :param bytes: A string of bytes to encode.
-    :return: A new string with base32 encoded chars.
-    """
-    vals = 'abcdefghijklmnopqrstuvwxyz012345'
-    out = []
-    # Decode 5 bytes at a time
-    for start in xrange(0, len(bytes), 5):
-        hunk = bytes[start:start+5]
-
-        # Always pad to a multiple of 5 bytes
-        # XXX: We should have a way to indicate that the decoded bytes should
-        # be thrown away. Base64 uses '=' for bytes to ignore.
-        if len(hunk) < 5:
-            hunk += '\x00\x00\x00\x00\x00'[len(hunk):]
-        # Take 5-bytes and decode them into 1-32bit int and 1 8-bit int
-        # We actually use little endian order, so we can grab the bits off the
-        # low end
-        # The bit mask is this:
-        #    '\xF8\x00\x00\x00\x00'
-        #    '\x07\xC0\x00\x00\x00'
-        #    '\x00\x3E\x00\x00\x00'
-        #    '\x00\x01\xF0\x00\x00'
-        #    '\x00\x00\x0F\x80\x00'
-        #    '\x00\x00\x00\x7C\x00'
-        #    '\x00\x00\x00\x03\xE0'
-        #    '\x00\x00\x00\x00\x1F'
-
-        # Doing it by 32-bit integer seems better
-        first_int, second_byte = struct.unpack('>ib', hunk)
-        bits = [((first_int >> 27) & 0x1F),
-                ((first_int >> 22) & 0x1F),
-                ((first_int >> 17) & 0x1F),
-                ((first_int >> 12) & 0x1F),
-                ((first_int >>  7) & 0x1F),
-                ((first_int >>  2) & 0x1F),
-                (((first_int & 0x03) << 3) | ((second_byte >> 5) & 0x07)),
-                (second_byte & 0x1F),
-               ]
-        # Alternatively, this can be done by extracting 5 8-bit ints
-        # and then using more combinations:
-        # first, second, third, fourth, fifth = struct.unpack('5B', hunk)
-        # bits = [
-        #    ((first  & 0xF8) >> 3),
-        #    ((first  & 0x07) << 2 | (second & 0xC0) >> 6),
-        #    ((second & 0x3E) >> 1),
-        #    ((second & 0x01) << 4 | (third  & 0xF0) >> 4),
-        #    ((third  & 0x0F) << 1 | (fourth & 0x80) >> 7),
-        #    ((fourth & 0x7C) >> 2),
-        #    ((fourth & 0x03) << 3 | (fifth  & 0xE0) >> 5),
-        #    ((fifth  & 0x1F)),
-        #    ]
-        out.extend(vals[b] for b in bits)
-    return ''.join(out)
-
-
 class BloomMD5(Bloom):
     """A bloom implementation that uses md5 as the crypto hash.
 

=== modified file 'test_pybloom.py'
--- a/test_pybloom.py	2007-04-26 22:06:55 +0000
+++ b/test_pybloom.py	2007-04-26 22:10:02 +0000
@@ -269,54 +269,3 @@
         self.failUnless('xyz' in bloom)
 
 
-class TestBase32(tests.TestCase):
-    """Test base32 representation"""
-
-    def assertEncodeEqual(self, encoded, binary):
-        """Assert encoding the given bytes generates the specific base32"""
-        val = pybloom.encode_base32(binary)
-        self.assertEqual(encoded, val,
-                         '%r did not encode properly %r != %r'
-                         % (binary, encoded, val))
-
-    def test_encode(self):
-        self.assertEncodeEqual('aaaaaaaa', '\x00\x00\x00\x00\x00')
-        self.assertEncodeEqual('baaaaaaa', '\x08\x00\x00\x00\x00')
-        self.assertEncodeEqual('caaaaaaa', '\x10\x00\x00\x00\x00')
-        self.assertEncodeEqual('daaaaaaa', '\x18\x00\x00\x00\x00')
-        self.assertEncodeEqual('eaaaaaaa', '\x20\x00\x00\x00\x00')
-        self.assertEncodeEqual('faaaaaaa', '\x28\x00\x00\x00\x00')
-        self.assertEncodeEqual('gaaaaaaa', '\x30\x00\x00\x00\x00')
-        self.assertEncodeEqual('haaaaaaa', '\x38\x00\x00\x00\x00')
-        self.assertEncodeEqual('iaaaaaaa', '\x40\x00\x00\x00\x00')
-        self.assertEncodeEqual('jaaaaaaa', '\x48\x00\x00\x00\x00')
-        self.assertEncodeEqual('kaaaaaaa', '\x50\x00\x00\x00\x00')
-        self.assertEncodeEqual('laaaaaaa', '\x58\x00\x00\x00\x00')
-        self.assertEncodeEqual('maaaaaaa', '\x60\x00\x00\x00\x00')
-        self.assertEncodeEqual('naaaaaaa', '\x68\x00\x00\x00\x00')
-        self.assertEncodeEqual('oaaaaaaa', '\x70\x00\x00\x00\x00')
-        self.assertEncodeEqual('paaaaaaa', '\x78\x00\x00\x00\x00')
-        self.assertEncodeEqual('qaaaaaaa', '\x80\x00\x00\x00\x00')
-        self.assertEncodeEqual('raaaaaaa', '\x88\x00\x00\x00\x00')
-        self.assertEncodeEqual('saaaaaaa', '\x90\x00\x00\x00\x00')
-        self.assertEncodeEqual('taaaaaaa', '\x98\x00\x00\x00\x00')
-        self.assertEncodeEqual('uaaaaaaa', '\xA0\x00\x00\x00\x00')
-        self.assertEncodeEqual('vaaaaaaa', '\xA8\x00\x00\x00\x00')
-        self.assertEncodeEqual('waaaaaaa', '\xB0\x00\x00\x00\x00')
-        self.assertEncodeEqual('xaaaaaaa', '\xB8\x00\x00\x00\x00')
-        self.assertEncodeEqual('yaaaaaaa', '\xC0\x00\x00\x00\x00')
-        self.assertEncodeEqual('zaaaaaaa', '\xC8\x00\x00\x00\x00')
-        self.assertEncodeEqual('0aaaaaaa', '\xD0\x00\x00\x00\x00')
-        self.assertEncodeEqual('1aaaaaaa', '\xD8\x00\x00\x00\x00')
-        self.assertEncodeEqual('2aaaaaaa', '\xE0\x00\x00\x00\x00')
-        self.assertEncodeEqual('3aaaaaaa', '\xE8\x00\x00\x00\x00')
-        self.assertEncodeEqual('4aaaaaaa', '\xF0\x00\x00\x00\x00')
-        self.assertEncodeEqual('5aaaaaaa', '\xF8\x00\x00\x00\x00')
-        self.assertEncodeEqual('a5aaaaaa', '\x07\xC0\x00\x00\x00')
-        self.assertEncodeEqual('aa5aaaaa', '\x00\x3E\x00\x00\x00')
-        self.assertEncodeEqual('aaa5aaaa', '\x00\x01\xF0\x00\x00')
-        self.assertEncodeEqual('aaaa5aaa', '\x00\x00\x0F\x80\x00')
-        self.assertEncodeEqual('aaaaa5aa', '\x00\x00\x00\x7C\x00')
-        self.assertEncodeEqual('aaaaaa5a', '\x00\x00\x00\x03\xE0')
-        self.assertEncodeEqual('aaaaaaa5', '\x00\x00\x00\x00\x1F')
-        self.assertEncodeEqual('55555555', '\xFF\xFF\xFF\xFF\xFF')



More information about the bazaar-commits mailing list