Rev 3205: Get prefix scaling working. in http://people.ubuntu.com/~robertc/baz2.0/index.hashed
Robert Collins
robertc at robertcollins.net
Sun Jan 27 18:15:36 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/index.hashed
------------------------------------------------------------
revno: 3205
revision-id:robertc at robertcollins.net-20080127181529-ryodcvcrajph2fn0
parent: robertc at robertcollins.net-20080127070656-gc93wlc9wtml829x
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index.hashed
timestamp: Mon 2008-01-28 05:15:29 +1100
message:
Get prefix scaling working.
modified:
bzrlib/hashtable.py hashtable.py-20080127054910-pf6gcotuda8xo42y-1
bzrlib/tests/test_hashtable.py test_hashtable.py-20080127054910-pf6gcotuda8xo42y-2
=== modified file 'bzrlib/hashtable.py'
--- a/bzrlib/hashtable.py 2008-01-27 07:06:56 +0000
+++ b/bzrlib/hashtable.py 2008-01-27 18:15:29 +0000
@@ -23,10 +23,12 @@
'CompactHashTable',
]
+import sha
import struct
from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
+from bzrlib import debug
from bzrlib import errors
from bzrlib import trace
from bzrlib.osutils import sha_string
@@ -73,7 +75,10 @@
try:
self._hash_values[self._key_hashes[key]] = value
except KeyError:
- key_hash = sha_string(key)
+ key_hash = sha.new(key).digest()
+ if key_hash in self._key_hashes:
+ raise errors.InternalBzrError(
+ 'Congrats, sha1 conflict on (%r->%r)' % (key, key_hash))
self._key_hashes[key] = key_hash
self._hash_values[key_hash] = value
@@ -87,11 +92,22 @@
"""
prefix_bytes = 1
# loop to determine how many bytes of hash
- hashes = self._hash_values.keys()
- end = len(hashes)
- pos = 0
+ hashes = sorted(self._hash_values.keys())
if not hashes:
return 0, 0, ""
+ end = len(hashes)
+ pos = 1
+ current_hash = hashes[0]
+ while pos < end:
+ if hashes[pos][:prefix_bytes] == current_hash[:prefix_bytes]:
+ if prefix_bytes == 20:
+ # infinte loop (and wtf ?!)
+ raise errors.InternalBzrError('unreachable in theory')
+ # eat up more bytes
+ prefix_bytes += 1
+ else:
+ # unique enough
+ pos += 1
# TODO: be more general. duh.
hash_rule = ">" + "B" * prefix_bytes
# now generate an array of the values.
@@ -104,14 +120,24 @@
"Not all values have the same length.")
if not value_bytes:
raise errors.InternalBzrError("zero-length values not supported.")
- hash_array = ['\xff' * value_bytes] * (prefix_bytes << 8)
+ hash_array = ['\xff' * value_bytes] * 2 ** (prefix_bytes * 8)
hash_powers = tuple(enumerate([2 ** (offset * 8) for offset in
reversed(range(prefix_bytes))]))
+ if 'hashtable' in debug.debug_flags:
+ indices = {}
for keyhash, value in self._hash_values.iteritems():
hash_prefix = keyhash[:prefix_bytes]
hash_index = 0
+ hash_bytes = struct.unpack(hash_rule, hash_prefix[:prefix_bytes])
for offset, size in hash_powers:
- hash_index += struct.unpack(hash_rule, hash_prefix[offset])[0] * size
+ hash_index += hash_bytes[offset] * size
hash_array[hash_index] = value
+ if 'hashtable' in debug.debug_flags:
+ print value ,hash_index, repr(hash_prefix)
+ indices.setdefault(hash_index, []).append(value)
+ if 'hashtable' in debug.debug_flags:
+ for values in indices.values():
+ if len(values) > 1:
+ print values
hash_bytes = ''.join(hash_array)
return prefix_bytes, value_bytes, hash_bytes
=== modified file 'bzrlib/tests/test_hashtable.py'
--- a/bzrlib/tests/test_hashtable.py 2008-01-27 07:06:56 +0000
+++ b/bzrlib/tests/test_hashtable.py 2008-01-27 18:15:29 +0000
@@ -57,6 +57,17 @@
table = CompactHashTable()
table['foo'] = 'b'
hash_array = ['\xff'] * 256
- hash_array[48] = 'b'
+ hash_array[11] = 'b'
hash_bytes = ''.join(hash_array)
self.assertEqual((1, 1, hash_bytes), table.serialize())
+
+ def test_serialize_16_bit_table(self):
+ table = CompactHashTable()
+ sample_data = [('p', 20843), ('u', 20966)]
+ hash_array = ['\xff'] * 65536
+ for key, offset in sample_data:
+ table[key] = key
+ hash_array[offset] = key
+ hash_bytes = ''.join(hash_array)
+ self.assertEqual((2, 1, hash_bytes), table.serialize())
+
More information about the bazaar-commits
mailing list