Rev 5540: (broken, experiment) Lowering the load factor did improve the peek-skip rate in http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem

John Arbash Meinel john at arbash-meinel.com
Thu Dec 2 19:31:15 GMT 2010


At http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem

------------------------------------------------------------
revno: 5540
revision-id: john at arbash-meinel.com-20101202193104-hk2y015fy71azhm2
parent: john at arbash-meinel.com-20101202180208-76zf19m2cdib333d
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-gcb-peak-mem
timestamp: Thu 2010-12-02 13:31:04 -0600
message:
  (broken, experiment) Lowering the load factor did improve the peek-skip rate
  
  Changing initial hash to (rabin_val ^ (rabin_val >> 16)) is actually
  quite a bit worse overall. Though it looks like it was 'unstable' and not giving
  identical pack file output. >>24 seems to be better, but doesn't improve anything.
  In both cases, it is clear the extra bits don't help peek skip rates, so it isn't
  helpful.
-------------- next part --------------
=== modified file 'bzrlib/_delta_index_pyx.pyx'
--- a/bzrlib/_delta_index_pyx.pyx	2010-12-02 18:02:08 +0000
+++ b/bzrlib/_delta_index_pyx.pyx	2010-12-02 19:31:04 +0000
@@ -170,7 +170,7 @@
         # will overlap for any table size less than 0x10000, at *all* steps) I
         # don't know how common collisions are, but it may be more-than-normal
         # because RABIN isn't a super-strong hash.
-        self._next_hash = rabin_val
+        self._next_hash = (rabin_val ^ (rabin_val >> 24))
 
     @cython.profile(False)
     cdef rabin_entry* next_with_dummy(self): # noexcept
@@ -189,7 +189,7 @@
         while self._step < self._table_mask:
             slot = self._table + (self._next_hash & self._table_mask)
             self._step += 1
-            self._next_hash = self._next_hash + self._step
+            self._next_hash += self._step
             if slot.ptr == NULL:
                 # We found a blank entry, so this chain is done
                 if self.free_slot == NULL:
@@ -235,7 +235,7 @@
     # will overlap for any table size less than 0x10000, at *all* steps) I
     # don't know how common collisions are, but it may be more-than-normal
     # because RABIN isn't a super-strong hash.
-    self._next_hash = rabin_val
+    self._next_hash = (rabin_val ^ (rabin_val >> 24))
 
 @cython.profile(False)
 cdef inline rabin_entry* RabinEntrySearch_next_with_dummy(RabinEntrySearch self): # noexcept
@@ -254,7 +254,7 @@
     while self._step < self._table_mask:
         slot = self._table + (self._next_hash & self._table_mask)
         self._step += 1
-        self._next_hash = self._next_hash + self._step
+        self._next_hash += self._step
         if slot.ptr == NULL:
             # We found a blank entry, so this chain is done
             if self.free_slot == NULL:
@@ -296,7 +296,7 @@
     then there might be something, might not. Concretely, we return 0 if
     there is null in the first slot, 1 otherwise.
     """
-    return self._table[rabin_val & self._table_mask].ptr != NULL
+    return self._table[(rabin_val ^ (rabin_val >> 24)) & self._table_mask].ptr != NULL
 
 
 cdef public unsigned int MIN_TABLE_SIZE
@@ -461,12 +461,12 @@
         # TODO: We may want to shrink at this point if we find that we
         #       over-allocated. (Say we got 1MB of all 0's, then we end up with
         #       a single entry in the map, but reserved lots of space)
-        if (self.table_mask != 0 and total_entries * 3 < table_size * 2):
+        if (self.table_mask != 0 and total_entries * 2 < table_size * 1):
             # We have enough room already
             return
         # We need to resize, find out the new table size required
         table_size = MIN_TABLE_SIZE
-        while table_size < total_entries * 2 and table_size < 2**31:
+        while table_size < total_entries * 3 and table_size < 2**31:
             table_size = table_size * 2
         trace.mutter('resizing table %08x from %d to %d'
                      ' for %d new_entries, %d total'
@@ -953,11 +953,11 @@
         cdef _DeltaCreator creator
         if not self.sources:
             return None
-        # t = _timer()
+        t = _timer()
         creator = _DeltaCreator(self, content, max_delta_size, noisy)
         val = creator.create()
-        # global make_delta_total_time
-        # make_delta_total_time += _timer() - t
+        global make_delta_total_time
+        make_delta_total_time += _timer() - t
         return val
 
 



More information about the bazaar-commits mailing list