Rev 5537: More profiling results. It appears that a vast majority of rabin lookups fail. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem

Wed Dec 1 22:27:05 GMT 2010

At http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem

------------------------------------------------------------
revno: 5537
revision-id: john at arbash-meinel.com-20101201222700-mpty01tewes4s1lu
parent: john at arbash-meinel.com-20101201191814-laegibyb63r913f3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-gcb-peak-mem
timestamp: Wed 2010-12-01 16:27:00 -0600
message:
  More profiling results. It appears that a vast majority of rabin lookups fail.
  
  Something like 90+% (384M vs 420M total lookups). So adding a 'peek()' function that does
  the most-trivial amount of work I can find to determine if this is worth persuing further.
  Eliminates lots of .next() and .start() calls. Might want to turn it into an inline
  separate function, since it is pretty cheap. peek() seems to skip 217M of those calls.
-------------- next part --------------
=== modified file 'bzrlib/_delta_index_pyx.pyx'

--- a/bzrlib/_delta_index_pyx.pyx	2010-12-01 19:18:14 +0000
+++ b/bzrlib/_delta_index_pyx.pyx	2010-12-01 22:27:00 +0000
@@ -89,6 +89,10 @@
 matches_checked = 0
 cdef int try_again_count
 try_again_count = 0
+cdef int nil_match
+nil_match = 0
+cdef int peek_skip
+peek_skip = 0
 cdef int try_again_matches_checked
 try_again_matches_checked = 0
 
@@ -98,9 +102,9 @@
     sys.stderr.write('spent %.3fs in add_delta_source\n' % (add_delta_total_time,))
     sys.stderr.write('spent %.3fs in make_delta_source\n' % (make_delta_total_time,))
     sys.stderr.write('spent %.3fs in _find_better_match\n' % (find_better_total_time,))
-    sys.stderr.write('%d try again, %d checks resulted in retry,'
+    sys.stderr.write('%d try again, %d nil match, %d peek skip, %d checks resulted in retry,'
                      ' %d total matches checked\n' % (
-        try_again_count, try_again_matches_checked, matches_checked))
+        try_again_count, nil_match, peek_skip, try_again_matches_checked, matches_checked))
 atexit.register(report_total_time)
 
 
@@ -153,6 +157,20 @@
         self._table_mask = rabin_hash_map.table_mask
 
     @cython.profile(False)
+    cdef int peek(self, unsigned int rabin_val): # noexcept
+        """Check and see if there might be anything at a given rabin_val.
+
+        The idea is that most lookups fail to find anything, and we want it to
+        be as efficient as possible to get to that state. If this returns 0,
+        then there is definitely nothing for that rabin_val. If it returns 1,
+        then there might be something, might not. Concretely, we return 0 if
+        there is null in the first slot, 1 otherwise.
+        """
+        cdef rabin_entry *slot
+        slot = self._table + (rabin_val & self._table_mask)
+        return (slot.ptr != NULL)
+
+    @cython.profile(False)
     cdef start(self, unsigned int rabin_val):
         self.rabin_val = rabin_val
         self.entry = NULL
@@ -445,9 +463,10 @@
             # the matching works, so double-weight post bytes.
             # Having hits is very valuable, more so than a byte or two
             # available
-            weight += ((search.entry.match_count * 10)
-                     + (search.entry.match_tail * 2)
-                     + search.entry.match_pre)
+            weight += ((search.entry.match_count * 20)
+                     + (search.entry.match_tail * 4)
+                     + search.entry.match_pre * 2
+                     + search.entry.global_offset & 0xF)
             all_entries.append((weight, temp_idx))
             temp_entries[temp_idx] = search.entry[0]
             temp_idx += 1
@@ -936,6 +955,7 @@
         self.out[self.out_pos] = <unsigned char>(i)
         self.out_pos += 1
 
+    @cython.profile(False)
     cdef int _find_better_match(self, const_data data, const_data data_end,
                                 unsigned int cur_rabin_val,
                                 RabinEntrySearch search) except -1:
@@ -949,27 +969,16 @@
         cdef SourceInfo ref_source
         cdef int comparisons
 
-        # TODO: Incorporate feedback. If _find_better_match picks a given entry
-        #       as the best match, that entry should be rated up, and moved
-        #       earlier in the matches-to-check heirarchy
-        # TODO: We have lots of time to spend during build phase if we can
-        #       improve the make_delta phase. Rough numbers are:
-        #           spent 0.676s in add_source
-        #           spent 1.689s in add_delta_source
-        #           spent 132.955s in make_delta
-        #       So about 100x more time spent in make_delta, then in add_delta
-        #       or add_source. add_delta_source is probably just because of the
-        #       shear count difference. (20x more deltas than raw sources,
-        #       etc.)
-        #       Anything we can do to make the 'good' matches found quicker
-        #       will result in a much more significant win. One simple step,
-        #       'move to front' anything that is chosen as a match... Sort more
-        #       often, etc.
-        #
+        # This is the function that gets triggered the most. Something like
+        # 420 calls while repacking bzr. (only one higher is search.next()
+        # having 500M calls.) Out of those 420M calls, 392M of those failed to
+        # match anything significant (try_again_count).
 
-        # search = RabinEntrySearch(self.index._hash_map)
         search.start(cur_rabin_val)
         search.next()
+        if search.entry == NULL:
+            global nil_match
+            nil_match += 1
         remaining_data_len = data_end - data
         comparisons = 0
         while search.entry != NULL:
@@ -1016,6 +1025,7 @@
             self.out[out_pos] = insert_byte
             self.insert_count = 0
 
+    @cython.profile(False)
     cdef _output_one(self, unsigned char c):
         """Insert one byte into the output stream."""
         if self.insert_count == 0:
@@ -1094,6 +1104,7 @@
                     (self.out + self.out_pos) - command_byte),
                 offset, size)
 
+    @cython.profile(False)
     cdef const_data _output_match(self, const_data data) except NULL:
         """We have a match that we like, insert it into the output stream"""
         cdef Py_ssize_t match_size
@@ -1157,10 +1168,14 @@
                 # one
                 # Pull in the next data character
                 RABIN_UPDATE(cur_rabin_val, data[-RABIN_WINDOW], data[0])
-                count = self._find_better_match(data, data_end, cur_rabin_val,
-                                                search)
-                global matches_checked
-                matches_checked += count
+                if search.peek(cur_rabin_val):
+                    count = self._find_better_match(data, data_end, cur_rabin_val,
+                                                    search)
+                    global matches_checked
+                    matches_checked += count
+                else:
+                    global peek_skip
+                    peek_skip += 1
             if self.match_size < 4:
                 # Our best match is only 4 extra bytes, just count this as an
                 # insert