Rev 2894: add -Dhashcache, sha_file_by_name using raw os files rather than file objects (mbp) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Mon Oct 8 06:10:02 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2894
revision-id: pqm at pqm.ubuntu.com-20071008050959-i785alc2ome5c1k5
parent: pqm at pqm.ubuntu.com-20071008022621-m0rzk7yfrfoszd38
parent: mbp at sourcefrog.net-20071008042419-p8mbk94fexwbadpl
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2007-10-08 06:09:59 +0100
message:
  add -Dhashcache, sha_file_by_name using raw os files rather than file objects (mbp)
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/help_topics.py          help_topics.py-20060920210027-rnim90q9e0bwxvy4-1
  bzrlib/osutils.py              osutils.py-20050309040759-eeaff12fbf77ac86
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
    ------------------------------------------------------------
    revno: 2872.1.1.1.3
    merged: mbp at sourcefrog.net-20071008042419-p8mbk94fexwbadpl
    parent: mbp at sourcefrog.net-20070928075828-vp07yjxa2x29ckjq
    committer: Martin Pool <mbp at sourcefrog.net>
    branch nick: shafile
    timestamp: Mon 2007-10-08 14:24:19 +1000
    message:
      Fix up test_update_entry to work with -Dhashcache
    ------------------------------------------------------------
    revno: 2872.1.1.1.2
    merged: mbp at sourcefrog.net-20070928075828-vp07yjxa2x29ckjq
    parent: mbp at sourcefrog.net-20070928070905-ge0e8x20c21192j9
    committer: Martin Pool <mbp at sourcefrog.net>
    branch nick: 145511-dirstate
    timestamp: Fri 2007-09-28 17:58:28 +1000
    message:
      Do sha_file_by_name using raw os files rather than file objects; makes this routine about 12osutils.py faster
    ------------------------------------------------------------
    revno: 2872.1.1.1.1
    merged: mbp at sourcefrog.net-20070928070905-ge0e8x20c21192j9
    parent: mbp at sourcefrog.net-20070928065208-lvvvt05b0tov46h4
    committer: Martin Pool <mbp at sourcefrog.net>
    branch nick: 145511-dirstate
    timestamp: Fri 2007-09-28 17:09:05 +1000
    message:
      Add -Dhashcache option; clean up dirstate sha1 code
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-09-21 04:22:53 +0000
+++ b/bzrlib/dirstate.py	2007-09-28 07:09:05 +0000
@@ -212,6 +212,7 @@
 import zlib
 
 from bzrlib import (
+    debug,
     errors,
     inventory,
     lock,
@@ -340,6 +341,10 @@
         self._cutoff_time = None
         self._split_path_cache = {}
         self._bisect_page_size = DirState.BISECT_PAGE_SIZE
+        if 'hashcache' in debug.debug_flags:
+            self._sha1_file = self._sha1_file_and_mutter
+        else:
+            self._sha1_file = osutils.sha_file_by_name
 
     def __repr__(self):
         return "%s(%r)" % \
@@ -1128,7 +1133,7 @@
         # process this entry.
         link_or_sha1 = None
         if minikind == 'f':
-            link_or_sha1 = self._sha1_file(abspath, entry)
+            link_or_sha1 = self._sha1_file(abspath)
             executable = self._is_executable(stat_value.st_mode,
                                              saved_executable)
             if self._cutoff_time is None:
@@ -1182,13 +1187,11 @@
         """Return the os.lstat value for this path."""
         return os.lstat(abspath)
 
-    def _sha1_file(self, abspath, entry):
-        """Calculate the SHA1 of a file by reading the full text"""
-        f = file(abspath, 'rb', buffering=65000)
-        try:
-            return osutils.sha_file(f)
-        finally:
-            f.close()
+    def _sha1_file_and_mutter(self, abspath):
+        # when -Dhashcache is turned on, this is monkey-patched in to log
+        # file reads
+        trace.mutter("dirstate sha1 " + abspath)
+        return osutils.sha_file_by_name(abspath)
 
     def _is_executable(self, mode, old_executable):
         """Is this file executable?"""

=== modified file 'bzrlib/help_topics.py'
--- a/bzrlib/help_topics.py	2007-09-07 03:42:41 +0000
+++ b/bzrlib/help_topics.py	2007-09-28 07:09:05 +0000
@@ -265,6 +265,7 @@
                error.
 -Devil         Capture call sites that do expensive or badly-scaling
                operations.
+-Dhashcache    Log every time a working file is read to determine its hash.
 -Dhooks        Trace hook execution.
 -Dhpss         Trace smart protocol requests and responses.
 -Dindex        Trace major index operations.

=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py	2007-10-04 05:09:58 +0000
+++ b/bzrlib/osutils.py	2007-10-08 05:09:59 +0000
@@ -590,6 +590,20 @@
     return s.hexdigest()
 
 
+def sha_file_by_name(fname):
+    """Calculate the SHA1 of a file by reading the full text"""
+    s = sha.new()
+    f = os.open(fname, os.O_RDONLY)
+    try:
+        while True:
+            b = os.read(f, 1<<16)
+            if not b:
+                return s.hexdigest()
+            s.update(b)
+    finally:
+        os.close(f)
+
+
 def sha_strings(strings, _factory=sha.new):
     """Return the sha-1 of concatenation of strings"""
     s = _factory()

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-07-21 18:27:19 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-10-08 04:24:19 +0000
@@ -1399,14 +1399,16 @@
         super(InstrumentedDirState, self).__init__(path)
         self._time_offset = 0
         self._log = []
+        # member is dynamically set in DirState.__init__ to turn on trace
+        self._sha1_file = self._sha1_file_and_log
 
     def _sha_cutoff_time(self):
         timestamp = super(InstrumentedDirState, self)._sha_cutoff_time()
         self._cutoff_time = timestamp + self._time_offset
 
-    def _sha1_file(self, abspath, entry):
+    def _sha1_file_and_log(self, abspath):
         self._log.append(('sha1', abspath))
-        return super(InstrumentedDirState, self)._sha1_file(abspath, entry)
+        return osutils.sha_file_by_name(abspath)
 
     def _read_link(self, abspath, old_link):
         self._log.append(('read_link', abspath, old_link))




More information about the bazaar-commits mailing list