Rev 40: Switch over to the Murmur hash in http://bzr.arbash-meinel.com/plugins/index2

John Arbash Meinel john at arbash-meinel.com
Wed Jul 16 00:13:53 BST 2008


At http://bzr.arbash-meinel.com/plugins/index2

------------------------------------------------------------
revno: 40
revision-id: john at arbash-meinel.com-20080715231347-brpla88u0zdr0v9g
parent: john at arbash-meinel.com-20080715173512-0lpiu07o098hkrzl
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Tue 2008-07-15 18:13:47 -0500
message:
  Switch over to the Murmur hash
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py	2008-07-15 17:35:12 +0000
+++ b/btree_index.py	2008-07-15 23:13:47 +0000
@@ -32,7 +32,7 @@
 from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
 from bzrlib.osutils import basename, dirname
 from bzrlib.plugins.index2 import errors, chunk_writer
-from bzrlib.plugins.pybloom.pybloom import BloomAP5
+from bzrlib.plugins.pybloom.pybloom import BloomMurmur, _murmur_hash
 from bzrlib.transport import get_transport
 
 
@@ -63,11 +63,11 @@
 dupes = [0]
 
 
-class NodeBloom(BloomAP5):
-    """Subclass of BloomAP5 that can be read from a file."""
+class NodeBloom(BloomMurmur):
+    """Subclass of BloomMurmur that can be read from a file."""
 
     def __init__(self, bytes):
-        # See BloomAP5._init_array - which we don't use as we have an array
+        # See BloomMurmur._init_array - which we don't use as we have an array
         # ready
         self._num_bytes = len(bytes)
         self._num_bits = self._num_bytes * 8
@@ -80,11 +80,11 @@
     def get_raw_offsets(keys):
         """Compute the raw internal offsets for these keys"""
         offsets = {}
-        hash_ap = BloomAP5._hash_ap
-        seeds = BloomAP5._predefined_seeds[:BloomAP5.num_offsets]
+        hash = _murmur_hash
+        seeds = BloomMurmur._predefined_seeds[:BloomMurmur.num_offsets]
         for key in keys:
             text = '\x00'.join(key)
-            offsets[key] = tuple([hash_ap(text, seed) for seed in seeds])
+            offsets[key] = tuple([int(hash(text, seed)) for seed in seeds])
         return offsets
 
     # @staticmethod
@@ -312,7 +312,7 @@
         # and must always be 1 (if there are any nodes in the tree).
         self.row_lengths = []
         if BTreeGraphIndex._default_use_blooms:
-            global_bloom = BloomAP5(_GLOBAL_BLOOM_START_SIZE)
+            global_bloom = BloomMurmur(_GLOBAL_BLOOM_START_SIZE)
         else:
             global_bloom = None
         def add_key(string_key, key_line):
@@ -345,7 +345,7 @@
                             str(rows[pos + 1].nodes) + "\n")
                         if create_bloom:
                             # new bloom for the new node
-                            internal_row.bloom = BloomAP5(_BLOOM_BUILD_SIZE)
+                            internal_row.bloom = BloomMurmur(_BLOOM_BUILD_SIZE)
                         else:
                             internal_row.bloom = None
                 # add a new leaf

=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py	2008-07-15 17:35:12 +0000
+++ b/tests/test_btree_index.py	2008-07-15 23:13:47 +0000
@@ -24,7 +24,7 @@
 from bzrlib import errors as bzrerrors, tests
 from bzrlib.plugins import index2
 from bzrlib.plugins.index2 import errors, btree_index
-from bzrlib.plugins.pybloom.pybloom import BloomAP5
+from bzrlib.plugins.pybloom.pybloom import BloomMurmur
 from bzrlib.tests import (
     TestCaseWithTransport,
     TestScenarioApplier,
@@ -216,8 +216,8 @@
         leaf2 = content[12288:]
         root_bytes = zlib.decompress(root)
         # Create a little bloom by hand
-        bloom = BloomAP5(256 * 8)
-        global_bloom = BloomAP5(4096 * 8)
+        bloom = BloomMurmur(256 * 8)
+        global_bloom = BloomMurmur(4096 * 8)
         # set a a bit to test
         for node in nodes:
             bloom.insert('\x00'.join(node[0]))
@@ -317,8 +317,8 @@
         leaf2 = content[12288:]
         root_bytes = zlib.decompress(root)
         # Create a little and big bloom by hand
-        bloom = BloomAP5(256 * 8)
-        global_bloom = BloomAP5(4096 * 8)
+        bloom = BloomMurmur(256 * 8)
+        global_bloom = BloomMurmur(4096 * 8)
         # set a a bit to test
         for node in nodes:
             bloom.insert('\x00'.join(node[0]))
@@ -1016,7 +1016,7 @@
 
     def test_InternalNode_bloom(self):
         # Create a little bloom so we can check it is parsed right
-        bloom = BloomAP5(256 * 8)
+        bloom = BloomMurmur(256 * 8)
         # set a a bit to test
         bloom.insert("foo-bar")
         # get bytes
@@ -1112,7 +1112,7 @@
 class TestNodeBloom(tests.TestCase):
 
     def test_init(self):
-        basic_bloom = BloomAP5(256 * 8)
+        basic_bloom = BloomMurmur(256 * 8)
         basic_bloom.insert('foo')
         basic_bloom.insert('bar')
 
@@ -1135,10 +1135,10 @@
     def test_contains_raw_offsets_mixed_sizes(self):
         raw_offsets = btree_index.NodeBloom.get_raw_offsets([('foo',), ('bar',), ('baz',)])
 
-        small_bloom = BloomAP5(16 * 8)
+        small_bloom = BloomMurmur(16 * 8)
         small_bloom.insert('foo')
         small_bloom.insert('bar')
-        big_bloom = BloomAP5(256 * 8)
+        big_bloom = BloomMurmur(256 * 8)
         big_bloom.insert('foo')
         big_bloom.insert('bar')
 



More information about the bazaar-commits mailing list