Rev 5535: More instrumentation. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem

Tue Nov 30 20:37:52 GMT 2010

At http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem

------------------------------------------------------------
revno: 5535
revision-id: john at arbash-meinel.com-20101130203740-twiaq89vtwrvyq1p
parent: john at arbash-meinel.com-20101123214953-9yhtoz01kxnr5cct
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-gcb-peak-mem
timestamp: Tue 2010-11-30 14:37:40 -0600
message:
  More instrumentation.
-------------- next part --------------
=== modified file 'bzrlib/_delta_index_pyx.pyx'

--- a/bzrlib/_delta_index_pyx.pyx	2010-11-23 21:49:53 +0000
+++ b/bzrlib/_delta_index_pyx.pyx	2010-11-30 20:37:40 +0000
@@ -57,6 +57,8 @@
                                      unsigned char *top) nogil
 
 
+from bzrlib import trace
+
 cdef int MAX_HASH_BUCKET_ENTRIES
 MAX_HASH_BUCKET_ENTRIES = 64
 cdef int MAX_DELTA_OP_SIZE
@@ -114,6 +116,9 @@
     # 256 values here
     unsigned char match_pre
 
+    # How many times has this entry been considered a success?
+    unsigned short match_count
+
 
 cdef class RabinHashMap
 
@@ -150,10 +155,27 @@
 
         self.free_slot = NULL
         self._step = 0
+        # Consider, self._next_hash & self._table_mask never see the 'upper'
+        # bits of the rabin_val. Is there a decent way to 'mix that in', to
+        # help keep chains from overlapping too much? (ATM 0x10001 and 0x20001
+        # will overlap for any table size less than 0x10000, at *all* steps) I
+        # don't know how common collisions are, but it may be more-than-normal
+        # because RABIN isn't a super-strong hash.
         self._next_hash = rabin_val
 
     cdef rabin_entry* next_with_dummy(self) except? NULL:
         cdef rabin_entry *slot
+        # This uses Quadratic Probing:
+        #  http://en.wikipedia.org/wiki/Quadratic_probing
+        # with c1 = c2 = 1/2
+        # This leads to probe locations at:
+        #  h0 = hash(k1)
+        #  h1 = h0 + 1 = h0 + 1
+        #  h2 = h0 + 3 = h1 + 2
+        #  h3 = h0 + 6 = h2 + 3
+        #  h4 = h0 + 10 = h2 + 4
+        # Note that all of these are '& mask', but that is computed *after* the
+        # offset.
         while self._step < self._table_mask:
             slot = self._table + (self._next_hash & self._table_mask)
             self._step += 1
@@ -179,17 +201,6 @@
 
     cdef next(self):
         cdef rabin_entry *slot
-        # This uses Quadratic Probing:
-        #  http://en.wikipedia.org/wiki/Quadratic_probing
-        # with c1 = c2 = 1/2
-        # This leads to probe locations at:
-        #  h0 = hash(k1)
-        #  h1 = h0 + 1
-        #  h2 = h0 + 3 = h1 + 1 + 1
-        #  h3 = h0 + 6 = h2 + 1 + 2
-        #  h4 = h0 + 10 = h2 + 1 + 3
-        # Note that all of these are '& mask', but that is computed *after* the
-        # offset.
         # Don't search forever, even if we have bogus entries in the table
         while self._step < self._table_mask:
             slot = self.next_with_dummy()
@@ -286,6 +297,7 @@
         # that point.
         cdef rabin_entry *slot, *new_slot, *old_table
         cdef unsigned int i, new_offset, old_mask
+        cdef unsigned long total_matches, total_steps, total_items
         cdef RabinEntrySearch search
 
         old_table = self._table
@@ -294,10 +306,31 @@
         self.table_mask = new_mask
         if old_table == NULL:
             # Nothing to do
-            self._table = new_table
-            self.table_mask = new_mask
             return
 
+        # TODO: Consider this as a possible time to re-sort the data. A few
+        #       thoughts:
+        #       1) We could walk the old table, and when we find a rabin_val,
+        #          we could search for all matches of that val. And then insert
+        #          those together into the new table. That saves some searching
+        #          on the target table (walking items you just inserted), at a
+        #          cost of searching the old table.
+        #       2) However, when searching the old table, you can temp-copy
+        #          everything into the temp_entries mini-table and sort them
+        #          before inserting into the new table. That way, all lists are
+        #          tend towards sorted, even if they don't end up with >64
+        #          entries.
+        #       3) You could only sort if you get more than X entries.
+        #       4) If we walk the source in order, then we will preserve the
+        #          ordering when inserting into the new table, rather than
+        #          randomizing the ordering every time we have to resize.
+        #       5) It may help compact the chains (you might get less
+        #          'alternating'
+        #       6) We still have to 'search' the target table, but we don't
+        #          have to walk over items we already walked. With the current
+        #          code w/ 64 entries in a bucket, we walk, 1, 2, 3, 4, 5...
+        #          2047 steps.
+        total_steps = total_matches = total_items = 0
         try:
             search = RabinEntrySearch(self)
             for i from 0 <= i <= old_mask:
@@ -305,12 +338,22 @@
                 if (slot.ptr == NULL or slot.ptr == dummy_ptr):
                     # An empty slot
                     continue
+                total_items += 1
                 search.start(slot.rabin_val)
                 while search.free_slot == NULL:
+                    # Note: It should be fine to use search.next() here,
+                    # because there should be no dummy pointers in the new
+                    # table. Might want to validate that, though.
                     search.next()
                 search.free_slot[0] = slot[0]
+                total_steps += search._step
+                total_matches += search.match_count
         finally:
             PyMem_Free(old_table)
+        trace.mutter('              took %d steps, %d matches,'
+                     ' for %d items'
+                     % (total_steps, total_matches,
+                        total_items))
 
     def reserve(self, new_entries):
         """Ensure that we have enough open slots for N items.
@@ -340,6 +383,11 @@
         table_size = MIN_TABLE_SIZE
         while table_size < total_entries * 2 and table_size < 2**31:
             table_size = table_size * 2
+        if self._table != NULL:
+            trace.mutter('resizing table %08x from %d to %d'
+                         ' for %d new_entries, %d total'
+                         % (id(self), self.table_mask + 1, table_size,
+                            new_entries, total_entries))
         n_bytes = sizeof(rabin_entry) * table_size
         new_table = <rabin_entry*>PyMem_Malloc(n_bytes)
         if new_table == NULL:
@@ -392,7 +440,11 @@
                 weight -= 1
             # 'pre' bytes is less valuable than 'post' bytes, because of how
             # the matching works, so double-weight post bytes.
-            weight += (search.entry.match_tail * 2) + search.entry.match_pre
+            # Having hits is very valuable, more so than a byte or two
+            # available
+            weight += ((search.entry.match_count * 10)
+                     + (search.entry.match_tail * 2)
+                     + search.entry.match_pre)
             all_entries.append((weight, temp_idx))
             temp_entries[temp_idx] = search.entry[0]
             temp_idx += 1
@@ -451,6 +503,7 @@
         entry.rabin_val = val
         entry.match_pre = match_pre
         entry.match_tail = match_tail
+        entry.match_count = 0
         entry.ptr = ptr
         entry.global_offset = global_offset
         self.entry_count += 1
@@ -1023,6 +1076,8 @@
         cdef Py_ssize_t match_size
         cdef Py_ssize_t local_offset
 
+        if self.match_entry.match_count < 65534:
+            self.match_entry.match_count += 1
         self.match_offset = self.match_entry.global_offset
         data = self._expand_match(data)
         self._finalize_pending_insert()