Rev 5540: (broken, experiment) Lowering the load factor did improve the peek-skip rate in http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem
John Arbash Meinel
john at arbash-meinel.com
Thu Dec 2 19:31:15 GMT 2010
At http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem
------------------------------------------------------------
revno: 5540
revision-id: john at arbash-meinel.com-20101202193104-hk2y015fy71azhm2
parent: john at arbash-meinel.com-20101202180208-76zf19m2cdib333d
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-gcb-peak-mem
timestamp: Thu 2010-12-02 13:31:04 -0600
message:
(broken, experiment) Lowering the load factor did improve the peek-skip rate
Changing initial hash to (rabin_val ^ (rabin_val >> 16)) is actually
quite a bit worse overall. Though it looks like it was 'unstable' and not giving
identical pack file output. >>24 seems to be better, but doesn't improve anything.
In both cases, it is clear the extra bits don't help peek skip rates, so it isn't
helpful.
-------------- next part --------------
=== modified file 'bzrlib/_delta_index_pyx.pyx'
--- a/bzrlib/_delta_index_pyx.pyx 2010-12-02 18:02:08 +0000
+++ b/bzrlib/_delta_index_pyx.pyx 2010-12-02 19:31:04 +0000
@@ -170,7 +170,7 @@
# will overlap for any table size less than 0x10000, at *all* steps) I
# don't know how common collisions are, but it may be more-than-normal
# because RABIN isn't a super-strong hash.
- self._next_hash = rabin_val
+ self._next_hash = (rabin_val ^ (rabin_val >> 24))
@cython.profile(False)
cdef rabin_entry* next_with_dummy(self): # noexcept
@@ -189,7 +189,7 @@
while self._step < self._table_mask:
slot = self._table + (self._next_hash & self._table_mask)
self._step += 1
- self._next_hash = self._next_hash + self._step
+ self._next_hash += self._step
if slot.ptr == NULL:
# We found a blank entry, so this chain is done
if self.free_slot == NULL:
@@ -235,7 +235,7 @@
# will overlap for any table size less than 0x10000, at *all* steps) I
# don't know how common collisions are, but it may be more-than-normal
# because RABIN isn't a super-strong hash.
- self._next_hash = rabin_val
+ self._next_hash = (rabin_val ^ (rabin_val >> 24))
@cython.profile(False)
cdef inline rabin_entry* RabinEntrySearch_next_with_dummy(RabinEntrySearch self): # noexcept
@@ -254,7 +254,7 @@
while self._step < self._table_mask:
slot = self._table + (self._next_hash & self._table_mask)
self._step += 1
- self._next_hash = self._next_hash + self._step
+ self._next_hash += self._step
if slot.ptr == NULL:
# We found a blank entry, so this chain is done
if self.free_slot == NULL:
@@ -296,7 +296,7 @@
then there might be something, might not. Concretely, we return 0 if
there is null in the first slot, 1 otherwise.
"""
- return self._table[rabin_val & self._table_mask].ptr != NULL
+ return self._table[(rabin_val ^ (rabin_val >> 24)) & self._table_mask].ptr != NULL
cdef public unsigned int MIN_TABLE_SIZE
@@ -461,12 +461,12 @@
# TODO: We may want to shrink at this point if we find that we
# over-allocated. (Say we got 1MB of all 0's, then we end up with
# a single entry in the map, but reserved lots of space)
- if (self.table_mask != 0 and total_entries * 3 < table_size * 2):
+ if (self.table_mask != 0 and total_entries * 2 < table_size * 1):
# We have enough room already
return
# We need to resize, find out the new table size required
table_size = MIN_TABLE_SIZE
- while table_size < total_entries * 2 and table_size < 2**31:
+ while table_size < total_entries * 3 and table_size < 2**31:
table_size = table_size * 2
trace.mutter('resizing table %08x from %d to %d'
' for %d new_entries, %d total'
@@ -953,11 +953,11 @@
cdef _DeltaCreator creator
if not self.sources:
return None
- # t = _timer()
+ t = _timer()
creator = _DeltaCreator(self, content, max_delta_size, noisy)
val = creator.create()
- # global make_delta_total_time
- # make_delta_total_time += _timer() - t
+ global make_delta_total_time
+ make_delta_total_time += _timer() - t
return val
More information about the bazaar-commits
mailing list