Rev 40: Switch over to the Murmur hash in http://bzr.arbash-meinel.com/plugins/index2
John Arbash Meinel
john at arbash-meinel.com
Wed Jul 16 00:13:53 BST 2008
At http://bzr.arbash-meinel.com/plugins/index2
------------------------------------------------------------
revno: 40
revision-id: john at arbash-meinel.com-20080715231347-brpla88u0zdr0v9g
parent: john at arbash-meinel.com-20080715173512-0lpiu07o098hkrzl
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Tue 2008-07-15 18:13:47 -0500
message:
Switch over to the Murmur hash
-------------- next part --------------
=== modified file 'btree_index.py'
--- a/btree_index.py 2008-07-15 17:35:12 +0000
+++ b/btree_index.py 2008-07-15 23:13:47 +0000
@@ -32,7 +32,7 @@
from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
from bzrlib.osutils import basename, dirname
from bzrlib.plugins.index2 import errors, chunk_writer
-from bzrlib.plugins.pybloom.pybloom import BloomAP5
+from bzrlib.plugins.pybloom.pybloom import BloomMurmur, _murmur_hash
from bzrlib.transport import get_transport
@@ -63,11 +63,11 @@
dupes = [0]
-class NodeBloom(BloomAP5):
- """Subclass of BloomAP5 that can be read from a file."""
+class NodeBloom(BloomMurmur):
+ """Subclass of BloomMurmur that can be read from a file."""
def __init__(self, bytes):
- # See BloomAP5._init_array - which we don't use as we have an array
+ # See BloomMurmur._init_array - which we don't use as we have an array
# ready
self._num_bytes = len(bytes)
self._num_bits = self._num_bytes * 8
@@ -80,11 +80,11 @@
def get_raw_offsets(keys):
"""Compute the raw internal offsets for these keys"""
offsets = {}
- hash_ap = BloomAP5._hash_ap
- seeds = BloomAP5._predefined_seeds[:BloomAP5.num_offsets]
+ hash = _murmur_hash
+ seeds = BloomMurmur._predefined_seeds[:BloomMurmur.num_offsets]
for key in keys:
text = '\x00'.join(key)
- offsets[key] = tuple([hash_ap(text, seed) for seed in seeds])
+ offsets[key] = tuple([int(hash(text, seed)) for seed in seeds])
return offsets
# @staticmethod
@@ -312,7 +312,7 @@
# and must always be 1 (if there are any nodes in the tree).
self.row_lengths = []
if BTreeGraphIndex._default_use_blooms:
- global_bloom = BloomAP5(_GLOBAL_BLOOM_START_SIZE)
+ global_bloom = BloomMurmur(_GLOBAL_BLOOM_START_SIZE)
else:
global_bloom = None
def add_key(string_key, key_line):
@@ -345,7 +345,7 @@
str(rows[pos + 1].nodes) + "\n")
if create_bloom:
# new bloom for the new node
- internal_row.bloom = BloomAP5(_BLOOM_BUILD_SIZE)
+ internal_row.bloom = BloomMurmur(_BLOOM_BUILD_SIZE)
else:
internal_row.bloom = None
# add a new leaf
=== modified file 'tests/test_btree_index.py'
--- a/tests/test_btree_index.py 2008-07-15 17:35:12 +0000
+++ b/tests/test_btree_index.py 2008-07-15 23:13:47 +0000
@@ -24,7 +24,7 @@
from bzrlib import errors as bzrerrors, tests
from bzrlib.plugins import index2
from bzrlib.plugins.index2 import errors, btree_index
-from bzrlib.plugins.pybloom.pybloom import BloomAP5
+from bzrlib.plugins.pybloom.pybloom import BloomMurmur
from bzrlib.tests import (
TestCaseWithTransport,
TestScenarioApplier,
@@ -216,8 +216,8 @@
leaf2 = content[12288:]
root_bytes = zlib.decompress(root)
# Create a little bloom by hand
- bloom = BloomAP5(256 * 8)
- global_bloom = BloomAP5(4096 * 8)
+ bloom = BloomMurmur(256 * 8)
+ global_bloom = BloomMurmur(4096 * 8)
# set a a bit to test
for node in nodes:
bloom.insert('\x00'.join(node[0]))
@@ -317,8 +317,8 @@
leaf2 = content[12288:]
root_bytes = zlib.decompress(root)
# Create a little and big bloom by hand
- bloom = BloomAP5(256 * 8)
- global_bloom = BloomAP5(4096 * 8)
+ bloom = BloomMurmur(256 * 8)
+ global_bloom = BloomMurmur(4096 * 8)
# set a a bit to test
for node in nodes:
bloom.insert('\x00'.join(node[0]))
@@ -1016,7 +1016,7 @@
def test_InternalNode_bloom(self):
# Create a little bloom so we can check it is parsed right
- bloom = BloomAP5(256 * 8)
+ bloom = BloomMurmur(256 * 8)
# set a a bit to test
bloom.insert("foo-bar")
# get bytes
@@ -1112,7 +1112,7 @@
class TestNodeBloom(tests.TestCase):
def test_init(self):
- basic_bloom = BloomAP5(256 * 8)
+ basic_bloom = BloomMurmur(256 * 8)
basic_bloom.insert('foo')
basic_bloom.insert('bar')
@@ -1135,10 +1135,10 @@
def test_contains_raw_offsets_mixed_sizes(self):
raw_offsets = btree_index.NodeBloom.get_raw_offsets([('foo',), ('bar',), ('baz',)])
- small_bloom = BloomAP5(16 * 8)
+ small_bloom = BloomMurmur(16 * 8)
small_bloom.insert('foo')
small_bloom.insert('bar')
- big_bloom = BloomAP5(256 * 8)
+ big_bloom = BloomMurmur(256 * 8)
big_bloom.insert('foo')
big_bloom.insert('bar')
More information about the bazaar-commits
mailing list