Rev 3710: Fixes for the relocated code, and use _update_entry within the C accelerated code, another 8 percent saving. in http://people.ubuntu.com/~robertc/baz2.0/readdir

Robert Collins robertc at robertcollins.net
Sun Sep 14 23:50:26 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/readdir

------------------------------------------------------------
revno: 3710
revision-id: robertc at robertcollins.net-20080914225011-50w64zp8ykc1twi0
parent: robertc at robertcollins.net-20080914124043-lt97fxv205326rf1
committer: Robert Collins <robertc at robertcollins.net>
branch nick: process-entry-optimised
timestamp: Mon 2008-09-15 08:50:11 +1000
message:
  Fixes for the relocated code, and use _update_entry within the C accelerated code, another 8 percent saving.
modified:
  bzrlib/_dirstate_helpers_c.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'bzrlib/_dirstate_helpers_c.pyx'
--- a/bzrlib/_dirstate_helpers_c.pyx	2008-09-14 12:40:43 +0000
+++ b/bzrlib/_dirstate_helpers_c.pyx	2008-09-14 22:50:11 +0000
@@ -21,7 +21,7 @@
 
 import binascii
 
-from bzrlib import errors, osutils
+from bzrlib import cache_utf8, errors, osutils
 from bzrlib.dirstate import DirState, pack_stat
 from bzrlib.osutils import pathjoin
 
@@ -787,6 +787,18 @@
     :return: The sha1 hexdigest of the file (40 bytes) or link target of a
             symlink.
     """
+    return _update_entry(self, entry, abspath, stat_value)
+
+cdef _update_entry(self, entry, abspath, stat_value):
+    """Update the entry based on what is actually on disk.
+
+    :param entry: This is the dirblock entry for the file in question.
+    :param abspath: The path on disk for this file.
+    :param stat_value: (optional) if we already have done a stat on the
+        file, re-use it.
+    :return: The sha1 hexdigest of the file (40 bytes) or link target of a
+            symlink.
+    """
     # TODO - require pyrex 0.8, then use a pyd file to define access to the _st
     # mode of the compiled stat objects.
     cdef int minikind, saved_minikind
@@ -873,6 +885,7 @@
     cdef object last_target_parent
     cdef object include_unchanged
     cdef object use_filesystem_for_exec
+    cdef object utf8_decode
 
     def __init__(self, include_unchanged, use_filesystem_for_exec):
         self.old_dirname_to_file_id = {}
@@ -886,6 +899,7 @@
         self.last_target_parent = [None, None]
         self.include_unchanged = include_unchanged
         self.use_filesystem_for_exec = use_filesystem_for_exec
+        self.utf8_decode = cache_utf8._utf8_decode
 
     def _process_entry(self, entry, path_info, source_index, target_index, state):
         """Compare an entry and real disk to generate delta information.
@@ -901,25 +915,35 @@
                  the object 'uninteresting' if these match, but are
                  basically identical.
         """
+        cdef char target_minikind
         if source_index is None:
-            source_details = NULL_PARENT_DETAILS
+            source_details = DirState.NULL_PARENT_DETAILS
         else:
             source_details = entry[1][source_index]
         target_details = entry[1][target_index]
-        target_minikind = target_details[0]
-        if path_info is not None and target_minikind in 'fdlt':
+        target_minikind_str = target_details[0]
+        target_minikind = PyString_AsString(target_minikind_str)[0]
+        if (path_info is not None and (
+            target_minikind == c'f' or
+            target_minikind == c'd' or
+            target_minikind == c'l' or
+            target_minikind == c't')):
             if not (target_index == 0):
                 raise AssertionError()
-            link_or_sha1 = update_entry(state, entry,
-                abspath=path_info[4], stat_value=path_info[3])
+            link_or_sha1 = _update_entry(state, entry, path_info[4], path_info[3])
             # The entry may have been modified by update_entry
             target_details = entry[1][target_index]
-            target_minikind = target_details[0]
+            target_minikind_str = target_details[0]
+            target_minikind = PyString_AsString(target_minikind_str)[0]
         else:
             link_or_sha1 = None
         file_id = entry[0][2]
         source_minikind = source_details[0]
-        if source_minikind in 'fdltr' and target_minikind in 'fdlt':
+        if (source_minikind in 'fdltr' and (
+            target_minikind == c'f' or
+            target_minikind == c'd' or
+            target_minikind == c'l' or
+            target_minikind == c't')):
             # claimed content in both: diff
             #   r    | fdlt   |      | add source to search, add id path move and perform
             #        |        |      | diff check on source-target
@@ -1049,26 +1073,30 @@
                 ):
                 if old_path is None:
                     old_path = path = pathjoin(old_dirname, old_basename)
-                    old_path_u = utf8_decode(old_path)[0]
+                    old_path_u = self.utf8_decode(old_path)[0]
                     path_u = old_path_u
                 else:
-                    old_path_u = utf8_decode(old_path)[0]
+                    old_path_u = self.utf8_decode(old_path)[0]
                     if old_path == path:
                         path_u = old_path_u
                     else:
-                        path_u = utf8_decode(path)[0]
-                source_kind = _minikind_to_kind[source_minikind]
+                        path_u = self.utf8_decode(path)[0]
+                source_kind = DirState._minikind_to_kind[source_minikind]
                 return (entry[0][2],
                        (old_path_u, path_u),
                        content_change,
                        (True, True),
                        (source_parent_id, target_parent_id),
-                       (utf8_decode(old_basename)[0], utf8_decode(entry[0][1])[0]),
+                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
                        (source_kind, target_kind),
                        (source_exec, target_exec))
             else:
                 return self.uninteresting
-        elif source_minikind in 'a' and target_minikind in 'fdlt':
+        elif (source_minikind in 'a' and (
+            target_minikind == c'f' or
+            target_minikind == c'd' or
+            target_minikind == c'l' or
+            target_minikind == c't')):
             # looks like a new file
             path = pathjoin(entry[0][0], entry[0][1])
             # parent id is the entry for the path in the target tree
@@ -1088,24 +1116,24 @@
                 else:
                     target_exec = target_details[3]
                 return (entry[0][2],
-                       (None, utf8_decode(path)[0]),
+                       (None, self.utf8_decode(path)[0]),
                        True,
                        (False, True),
                        (None, parent_id),
-                       (None, utf8_decode(entry[0][1])[0]),
+                       (None, self.utf8_decode(entry[0][1])[0]),
                        (None, path_info[2]),
                        (None, target_exec))
             else:
                 # Its a missing file, report it as such.
                 return (entry[0][2],
-                       (None, utf8_decode(path)[0]),
+                       (None, self.utf8_decode(path)[0]),
                        False,
                        (False, True),
                        (None, parent_id),
-                       (None, utf8_decode(entry[0][1])[0]),
+                       (None, self.utf8_decode(entry[0][1])[0]),
                        (None, None),
                        (None, False))
-        elif source_minikind in 'fdlt' and target_minikind in 'a':
+        elif source_minikind in 'fdlt' and target_minikind == c'a':
             # unversioned, possibly, or possibly not deleted: we dont care.
             # if its still on disk, *and* theres no other entry at this
             # path [we dont know this in this routine at the moment -
@@ -1116,21 +1144,23 @@
             if parent_id == entry[0][2]:
                 parent_id = None
             return (entry[0][2],
-                   (utf8_decode(old_path)[0], None),
+                   (self.utf8_decode(old_path)[0], None),
                    True,
                    (True, False),
                    (parent_id, None),
-                   (utf8_decode(entry[0][1])[0], None),
-                   (_minikind_to_kind[source_minikind], None),
+                   (self.utf8_decode(entry[0][1])[0], None),
+                   (DirState._minikind_to_kind[source_minikind], None),
                    (source_details[3], None))
-        elif source_minikind in 'fdlt' and target_minikind in 'r':
+        elif source_minikind in 'fdlt' and target_minikind == c'r':
             # a rename; could be a true rename, or a rename inherited from
             # a renamed parent. TODO: handle this efficiently. Its not
             # common case to rename dirs though, so a correct but slow
             # implementation will do.
             if not osutils.is_inside_any(searched_specific_files, target_details[1]):
                 search_specific_files.add(target_details[1])
-        elif source_minikind in 'ra' and target_minikind in 'ra':
+        elif (source_minikind in 'ra' and (
+            target_minikind == c'r' or
+            target_minikind == c'a')):
             # neither of the selected trees contain this file,
             # so skip over it. This is not currently directly tested, but
             # is indirectly via test_too_much.TestCommands.test_conflicts.

=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2008-09-14 12:40:43 +0000
+++ b/bzrlib/dirstate.py	2008-09-14 22:50:11 +0000
@@ -2784,7 +2784,7 @@
 
     __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
         "last_source_parent", "last_target_parent", "include_unchanged",
-        "use_filesystem_for_exec"]
+        "use_filesystem_for_exec", "utf8_decode"]
 
     def __init__(self, include_unchanged, use_filesystem_for_exec):
         self.old_dirname_to_file_id = {}
@@ -2798,6 +2798,7 @@
         self.last_target_parent = [None, None]
         self.include_unchanged = include_unchanged
         self.use_filesystem_for_exec = use_filesystem_for_exec
+        self.utf8_decode = cache_utf8._utf8_decode
 
     def _process_entry(self, entry, path_info, source_index, target_index, state):
         """Compare an entry and real disk to generate delta information.
@@ -2814,7 +2815,7 @@
                  basically identical.
         """
         if source_index is None:
-            source_details = NULL_PARENT_DETAILS
+            source_details = DirState.NULL_PARENT_DETAILS
         else:
             source_details = entry[1][source_index]
         target_details = entry[1][target_index]
@@ -2961,21 +2962,21 @@
                 ):
                 if old_path is None:
                     old_path = path = pathjoin(old_dirname, old_basename)
-                    old_path_u = utf8_decode(old_path)[0]
+                    old_path_u = self.utf8_decode(old_path)[0]
                     path_u = old_path_u
                 else:
-                    old_path_u = utf8_decode(old_path)[0]
+                    old_path_u = self.utf8_decode(old_path)[0]
                     if old_path == path:
                         path_u = old_path_u
                     else:
-                        path_u = utf8_decode(path)[0]
-                source_kind = _minikind_to_kind[source_minikind]
+                        path_u = self.utf8_decode(path)[0]
+                source_kind = DirState._minikind_to_kind[source_minikind]
                 return (entry[0][2],
                        (old_path_u, path_u),
                        content_change,
                        (True, True),
                        (source_parent_id, target_parent_id),
-                       (utf8_decode(old_basename)[0], utf8_decode(entry[0][1])[0]),
+                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
                        (source_kind, target_kind),
                        (source_exec, target_exec))
             else:
@@ -3000,21 +3001,21 @@
                 else:
                     target_exec = target_details[3]
                 return (entry[0][2],
-                       (None, utf8_decode(path)[0]),
+                       (None, self.utf8_decode(path)[0]),
                        True,
                        (False, True),
                        (None, parent_id),
-                       (None, utf8_decode(entry[0][1])[0]),
+                       (None, self.utf8_decode(entry[0][1])[0]),
                        (None, path_info[2]),
                        (None, target_exec))
             else:
                 # Its a missing file, report it as such.
                 return (entry[0][2],
-                       (None, utf8_decode(path)[0]),
+                       (None, self.utf8_decode(path)[0]),
                        False,
                        (False, True),
                        (None, parent_id),
-                       (None, utf8_decode(entry[0][1])[0]),
+                       (None, self.utf8_decode(entry[0][1])[0]),
                        (None, None),
                        (None, False))
         elif source_minikind in 'fdlt' and target_minikind in 'a':
@@ -3028,12 +3029,12 @@
             if parent_id == entry[0][2]:
                 parent_id = None
             return (entry[0][2],
-                   (utf8_decode(old_path)[0], None),
+                   (self.utf8_decode(old_path)[0], None),
                    True,
                    (True, False),
                    (parent_id, None),
-                   (utf8_decode(entry[0][1])[0], None),
-                   (_minikind_to_kind[source_minikind], None),
+                   (self.utf8_decode(entry[0][1])[0], None),
+                   (DirState._minikind_to_kind[source_minikind], None),
                    (source_details[3], None))
         elif source_minikind in 'fdlt' and target_minikind in 'r':
             # a rename; could be a true rename, or a rename inherited from

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2008-09-14 11:59:29 +0000
+++ b/bzrlib/workingtree_4.py	2008-09-14 22:50:11 +0000
@@ -1974,7 +1974,6 @@
         # relocated path as one to search if its not searched already. If the
         # detail is not relocated, add the id.
         searched_specific_files = set()
-        NULL_PARENT_DETAILS = dirstate.DirState.NULL_PARENT_DETAILS
 
         # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
         #       keeping a cache of directories that we have seen.




More information about the bazaar-commits mailing list