Rev 19: Remove the base32 implementation in favor of a *real* base32 implementation in http://bzr.arbash-meinel.com/plugins/pybloom
John Arbash Meinel
john at arbash-meinel.com
Thu Apr 26 23:10:06 BST 2007
At http://bzr.arbash-meinel.com/plugins/pybloom
------------------------------------------------------------
revno: 19
revision-id: john at arbash-meinel.com-20070426221002-mmwu0z30xzc1b3ks
parent: john at arbash-meinel.com-20070426220655-cib91nwztwll1ld2
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: pybloom
timestamp: Thu 2007-04-26 17:10:02 -0500
message:
Remove the base32 implementation in favor of a *real* base32 implementation
modified:
pybloom.py pybloom.py-20061013005844-b3v176fajvw2xeip-1
test_pybloom.py test_pybloom.py-20061015235457-3ygcmfqjet8yfq0f-2
-------------- next part --------------
=== modified file 'pybloom.py'
--- a/pybloom.py 2007-03-27 16:33:05 +0000
+++ b/pybloom.py 2007-04-26 22:10:02 +0000
@@ -34,6 +34,7 @@
"""
import array
+import base64
import math
import md5
import sha
@@ -172,12 +173,12 @@
with the source 8-bits per byte.
"""
as_chr = [chr(x) for x in self._array]
- return encode_base32(''.join(as_chr))
+ return base64.b32encode(''.join(as_chr))
def array_to_base64(self):
"""Pack the internal array to base64 encoding"""
as_chr = [chr(x) for x in self._array]
- return ''.join(as_chr).encode('base64')
+ return base64.b64encode(''.join(as_chr))
def custom_base_64(self):
"""An alternate version of base64 which avoids unsafe fs chars"""
@@ -189,64 +190,6 @@
raise NotImplementedError(self.custom_base_64)
-def encode_base32(bytes):
- """Encode a byte-string in base32. Which is like 64 only case insensitive.
-
- :param bytes: A string of bytes to encode.
- :return: A new string with base32 encoded chars.
- """
- vals = 'abcdefghijklmnopqrstuvwxyz012345'
- out = []
- # Decode 5 bytes at a time
- for start in xrange(0, len(bytes), 5):
- hunk = bytes[start:start+5]
-
- # Always pad to a multiple of 5 bytes
- # XXX: We should have a way to indicate that the decoded bytes should
- # be thrown away. Base64 uses '=' for bytes to ignore.
- if len(hunk) < 5:
- hunk += '\x00\x00\x00\x00\x00'[len(hunk):]
- # Take 5-bytes and decode them into 1-32bit int and 1 8-bit int
- # We actually use little endian order, so we can grab the bits off the
- # low end
- # The bit mask is this:
- # '\xF8\x00\x00\x00\x00'
- # '\x07\xC0\x00\x00\x00'
- # '\x00\x3E\x00\x00\x00'
- # '\x00\x01\xF0\x00\x00'
- # '\x00\x00\x0F\x80\x00'
- # '\x00\x00\x00\x7C\x00'
- # '\x00\x00\x00\x03\xE0'
- # '\x00\x00\x00\x00\x1F'
-
- # Doing it by 32-bit integer seems better
- first_int, second_byte = struct.unpack('>ib', hunk)
- bits = [((first_int >> 27) & 0x1F),
- ((first_int >> 22) & 0x1F),
- ((first_int >> 17) & 0x1F),
- ((first_int >> 12) & 0x1F),
- ((first_int >> 7) & 0x1F),
- ((first_int >> 2) & 0x1F),
- (((first_int & 0x03) << 3) | ((second_byte >> 5) & 0x07)),
- (second_byte & 0x1F),
- ]
- # Alternatively, this can be done by extracting 5 8-bit ints
- # and then using more combinations:
- # first, second, third, fourth, fifth = struct.unpack('5B', hunk)
- # bits = [
- # ((first & 0xF8) >> 3),
- # ((first & 0x07) << 2 | (second & 0xC0) >> 6),
- # ((second & 0x3E) >> 1),
- # ((second & 0x01) << 4 | (third & 0xF0) >> 4),
- # ((third & 0x0F) << 1 | (fourth & 0x80) >> 7),
- # ((fourth & 0x7C) >> 2),
- # ((fourth & 0x03) << 3 | (fifth & 0xE0) >> 5),
- # ((fifth & 0x1F)),
- # ]
- out.extend(vals[b] for b in bits)
- return ''.join(out)
-
-
class BloomMD5(Bloom):
"""A bloom implementation that uses md5 as the crypto hash.
=== modified file 'test_pybloom.py'
--- a/test_pybloom.py 2007-04-26 22:06:55 +0000
+++ b/test_pybloom.py 2007-04-26 22:10:02 +0000
@@ -269,54 +269,3 @@
self.failUnless('xyz' in bloom)
-class TestBase32(tests.TestCase):
- """Test base32 representation"""
-
- def assertEncodeEqual(self, encoded, binary):
- """Assert encoding the given bytes generates the specific base32"""
- val = pybloom.encode_base32(binary)
- self.assertEqual(encoded, val,
- '%r did not encode properly %r != %r'
- % (binary, encoded, val))
-
- def test_encode(self):
- self.assertEncodeEqual('aaaaaaaa', '\x00\x00\x00\x00\x00')
- self.assertEncodeEqual('baaaaaaa', '\x08\x00\x00\x00\x00')
- self.assertEncodeEqual('caaaaaaa', '\x10\x00\x00\x00\x00')
- self.assertEncodeEqual('daaaaaaa', '\x18\x00\x00\x00\x00')
- self.assertEncodeEqual('eaaaaaaa', '\x20\x00\x00\x00\x00')
- self.assertEncodeEqual('faaaaaaa', '\x28\x00\x00\x00\x00')
- self.assertEncodeEqual('gaaaaaaa', '\x30\x00\x00\x00\x00')
- self.assertEncodeEqual('haaaaaaa', '\x38\x00\x00\x00\x00')
- self.assertEncodeEqual('iaaaaaaa', '\x40\x00\x00\x00\x00')
- self.assertEncodeEqual('jaaaaaaa', '\x48\x00\x00\x00\x00')
- self.assertEncodeEqual('kaaaaaaa', '\x50\x00\x00\x00\x00')
- self.assertEncodeEqual('laaaaaaa', '\x58\x00\x00\x00\x00')
- self.assertEncodeEqual('maaaaaaa', '\x60\x00\x00\x00\x00')
- self.assertEncodeEqual('naaaaaaa', '\x68\x00\x00\x00\x00')
- self.assertEncodeEqual('oaaaaaaa', '\x70\x00\x00\x00\x00')
- self.assertEncodeEqual('paaaaaaa', '\x78\x00\x00\x00\x00')
- self.assertEncodeEqual('qaaaaaaa', '\x80\x00\x00\x00\x00')
- self.assertEncodeEqual('raaaaaaa', '\x88\x00\x00\x00\x00')
- self.assertEncodeEqual('saaaaaaa', '\x90\x00\x00\x00\x00')
- self.assertEncodeEqual('taaaaaaa', '\x98\x00\x00\x00\x00')
- self.assertEncodeEqual('uaaaaaaa', '\xA0\x00\x00\x00\x00')
- self.assertEncodeEqual('vaaaaaaa', '\xA8\x00\x00\x00\x00')
- self.assertEncodeEqual('waaaaaaa', '\xB0\x00\x00\x00\x00')
- self.assertEncodeEqual('xaaaaaaa', '\xB8\x00\x00\x00\x00')
- self.assertEncodeEqual('yaaaaaaa', '\xC0\x00\x00\x00\x00')
- self.assertEncodeEqual('zaaaaaaa', '\xC8\x00\x00\x00\x00')
- self.assertEncodeEqual('0aaaaaaa', '\xD0\x00\x00\x00\x00')
- self.assertEncodeEqual('1aaaaaaa', '\xD8\x00\x00\x00\x00')
- self.assertEncodeEqual('2aaaaaaa', '\xE0\x00\x00\x00\x00')
- self.assertEncodeEqual('3aaaaaaa', '\xE8\x00\x00\x00\x00')
- self.assertEncodeEqual('4aaaaaaa', '\xF0\x00\x00\x00\x00')
- self.assertEncodeEqual('5aaaaaaa', '\xF8\x00\x00\x00\x00')
- self.assertEncodeEqual('a5aaaaaa', '\x07\xC0\x00\x00\x00')
- self.assertEncodeEqual('aa5aaaaa', '\x00\x3E\x00\x00\x00')
- self.assertEncodeEqual('aaa5aaaa', '\x00\x01\xF0\x00\x00')
- self.assertEncodeEqual('aaaa5aaa', '\x00\x00\x0F\x80\x00')
- self.assertEncodeEqual('aaaaa5aa', '\x00\x00\x00\x7C\x00')
- self.assertEncodeEqual('aaaaaa5a', '\x00\x00\x00\x03\xE0')
- self.assertEncodeEqual('aaaaaaa5', '\x00\x00\x00\x00\x1F')
- self.assertEncodeEqual('55555555', '\xFF\xFF\xFF\xFF\xFF')
More information about the bazaar-commits
mailing list