Rev 5533: Trying to do some instrumentation of where the time went. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem

Tue Nov 23 21:10:43 GMT 2010

At http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem

------------------------------------------------------------
revno: 5533
revision-id: john at arbash-meinel.com-20101123211007-v5eu386vc5jv0vbj
parent: john at arbash-meinel.com-20101123185819-016sfnhqupjnat7p
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-gcb-peak-mem
timestamp: Tue 2010-11-23 15:10:07 -0600
message:
  Trying to do some instrumentation of where the time went.
  
  Looks to be all time spent in make_delta_source. I wonder if the new
  heuristic penalizes us there, by creating more 'good but not great' matches.
  
  Basic timing:
  spent 0.676s in add_source
  spent 1.689s in add_delta_source
  spent 132.955s in make_delta_source
  
  Trying to profile make_delta_source doesn't work so well:
  spent 0.653s in add_source
  spent 1.789s in add_delta_source
  spent 1246.300s in make_delta_source
  spent 669.533s in _find_better_match
  
  thats a *lot* of overhead (10x overhead for a couple of time.clock() calls)
-------------- next part --------------
=== modified file 'bzrlib/_delta_index_pyx.pyx'

--- a/bzrlib/_delta_index_pyx.pyx	2010-11-23 18:58:19 +0000
+++ b/bzrlib/_delta_index_pyx.pyx	2010-11-23 21:10:07 +0000
@@ -70,12 +70,30 @@
 import time
 cdef object _timer
 _timer = time.clock
-cdef double total_time
-total_time = 0.0
+cdef double add_total_time
+add_total_time = 0.0
+cdef double add_delta_total_time
+add_delta_total_time = 0.0
+cdef double make_delta_total_time
+make_delta_total_time = 0.0
+cdef double find_better_total_time
+find_better_total_time = 0.0
+cdef int matches_checked
+matches_checked = 0
+cdef int try_again_count
+try_again_count = 0
+cdef int try_again_matches_checked
+try_again_matches_checked = 0
 
 import atexit, sys
 def report_total_time():
-    sys.stderr.write('spent %.3fs in _limit_hash_entries\n' % (total_time,))
+    sys.stderr.write('spent %.3fs in add_source\n' % (add_total_time,))
+    sys.stderr.write('spent %.3fs in add_delta_source\n' % (add_delta_total_time,))
+    sys.stderr.write('spent %.3fs in make_delta_source\n' % (make_delta_total_time,))
+    sys.stderr.write('spent %.3fs in _find_better_match\n' % (find_better_total_time,))
+    sys.stderr.write('%d try again, %d checks resulted in retry,'
+                     ' %d total matches checked\n' % (
+        try_again_count, try_again_matches_checked, matches_checked))
 atexit.register(report_total_time)
 
 
@@ -436,10 +454,7 @@
         entry.global_offset = global_offset
         self.entry_count += 1
         if search.match_count + 1 >= MAX_HASH_BUCKET_ENTRIES:
-            global total_time
-            c = _timer()
             self._limit_hash_entries(val, search)
-            total_time += <double>(_timer() - c)
         return entry
 
     def _py_add(self, val, global_offset, pre_bytes, post_bytes):
@@ -748,18 +763,22 @@
         """
         if not PyString_CheckExact(source):
             raise TypeError("source must be a str, not %s" % (type(source),))
+        t = _timer()
         start_offset = self.total_source_bytes + extra_offset
         new_info = SourceInfo(source, start_offset)
         self.sources.append(new_info)
         self._hash_map.reserve(len(source) / RABIN_WINDOW)
         self._compute_offsets(source, start_offset)
         self.total_source_bytes += len(source) + extra_offset
+        global add_total_time
+        add_total_time += _timer() - t
 
     def add_delta_source(self, delta_source, extra_offset=0):
         """Add more source content for this delta data."""
         if not PyString_CheckExact(delta_source):
             raise TypeError("delta_source must be a str, not %s"
                             % (type(delta_source),))
+        t = _timer()
         start_offset = self.total_source_bytes + extra_offset
         new_info = SourceInfo(delta_source, start_offset)
         self.sources.append(new_info)
@@ -767,14 +786,20 @@
         self._hash_map.reserve(len(delta_source) / RABIN_WINDOW)
         self._compute_offsets_from_delta(delta_source, start_offset)
         self.total_source_bytes += len(delta_source) + extra_offset
+        global add_delta_total_time
+        add_delta_total_time += _timer() - t
 
     def make_delta(self, content, max_delta_size=0, noisy=False):
         """Create a delta stream for the given content vs the current state."""
         cdef _DeltaCreator creator
         if not self.sources:
             return None
+        t = _timer()
         creator = _DeltaCreator(self, content, max_delta_size, noisy)
-        return creator.create()
+        val = creator.create()
+        global make_delta_total_time
+        make_delta_total_time += _timer() - t
+        return val
 
 
 cdef class _DeltaCreator:
@@ -834,8 +859,8 @@
         self.out[self.out_pos] = <unsigned char>(i)
         self.out_pos += 1
 
-    cdef _find_better_match(self, const_data data, const_data data_end,
-                            unsigned int cur_rabin_val):
+    cdef int _find_better_match(self, const_data data, const_data data_end,
+                                unsigned int cur_rabin_val) except -1:
         cdef rabin_entry *cur_entry
         cdef const_data cur
         cdef const_data ref
@@ -875,7 +900,8 @@
                 self.match_entry = cur_entry
                 if cur_match_size >= 4096:
                     # Stop searching for better matches
-                    return
+                    return search.match_count
+        return search.match_count
 
     cdef _finalize_pending_insert(self):
         cdef unsigned char insert_byte
@@ -1000,6 +1026,7 @@
         cdef int i
         cdef unsigned char c
         cdef Py_ssize_t target_len
+        cdef int count
 
         if PyString_GET_SIZE(self.content) < RABIN_WINDOW:
             # We don't have enough content to match, so don't try
@@ -1021,19 +1048,27 @@
 
         self.insert_count = RABIN_WINDOW
         self.match_size = 0
+        count = 0
         while data < data_end:
             if self.match_size < 4096:
                 # Our current match isn't "good enough", so look for a better
                 # one
                 # Pull in the next data character
                 RABIN_UPDATE(cur_rabin_val, data[-RABIN_WINDOW], data[0])
-                self._find_better_match(data, data_end, cur_rabin_val)
+                t = _timer()
+                count = self._find_better_match(data, data_end, cur_rabin_val)
+                global find_better_total_time, matches_checked
+                find_better_total_time += _timer() - t
+                matches_checked += (count - 1)
             if self.match_size < 4:
                 # Our best match is only 4 extra bytes, just count this as an
                 # insert
                 self._output_one(data[0])
                 data += 1
                 self.match_size = 0
+                global try_again_count, try_again_matches_checked
+                try_again_count += 1
+                try_again_matches_checked += (count -1)
             else:
                 data = self._output_match(data)
                 if self.match_size < 4096: