Rev 3712: Fix up inter_changes with dirstate both C and python. in http://people.ubuntu.com/~robertc/baz2.0/readdir

Robert Collins robertc at robertcollins.net
Mon Sep 15 04:23:58 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/readdir

------------------------------------------------------------
revno: 3712
revision-id: robertc at robertcollins.net-20080915032353-s0i0zt6b4rob77vp
parent: robertc at robertcollins.net-20080915011842-qq74hk4ezvo2lcin
committer: Robert Collins <robertc at robertcollins.net>
branch nick: process-entry-optimised
timestamp: Mon 2008-09-15 13:23:53 +1000
message:
  Fix up inter_changes with dirstate both C and python.
modified:
  bzrlib/_dirstate_helpers_c.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
  bzrlib/delta.py                delta.py-20050729221636-54cf14ef94783d0a
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/osutils.py              osutils.py-20050309040759-eeaff12fbf77ac86
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'bzrlib/_dirstate_helpers_c.pyx'
--- a/bzrlib/_dirstate_helpers_c.pyx	2008-09-15 01:18:42 +0000
+++ b/bzrlib/_dirstate_helpers_c.pyx	2008-09-15 03:23:53 +0000
@@ -20,6 +20,7 @@
 """
 
 import binascii
+import os
 
 from bzrlib import cache_utf8, errors, osutils
 from bzrlib.dirstate import DirState, pack_stat
@@ -881,11 +882,37 @@
     return PyString_AsString(string)[0]
 
 
+cdef object _kind_absent
+cdef object _kind_file
+cdef object _kind_directory
+cdef object _kind_symlink
+cdef object _kind_relocated
+cdef object _kind_tree_reference
+_kind_absent = "absent"
+_kind_file = "file"
+_kind_directory = "directory"
+_kind_symlink = "symlink"
+_kind_relocated = "relocated"
+_kind_tree_reference = "tree-reference"
+
+
 cdef object _minikind_to_kind(char minikind):
     """Create a string kind for minikind."""
     cdef char _minikind[1]
+    if minikind == c'f':
+        return _kind_file
+    elif minikind == c'd':
+        return _kind_directory
+    elif minikind == c'a':
+        return _kind_absent
+    elif minikind == c'r':
+        return _kind_relocated
+    elif minikind == c'l':
+        return _kind_symlink
+    elif minikind == c't':
+        return _kind_tree_reference
     _minikind[0] = minikind
-    return DirState._minikind_to_kind[PyString_FromStringAndSize(_minikind, 1)]
+    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
 
 
 cdef int _versioned_minikind(char minikind):
@@ -906,8 +933,10 @@
     cdef object include_unchanged
     cdef object use_filesystem_for_exec
     cdef object utf8_decode
+    cdef readonly object searched_specific_files
+    cdef object search_specific_files
 
-    def __init__(self, include_unchanged, use_filesystem_for_exec):
+    def __init__(self, include_unchanged, use_filesystem_for_exec, search_specific_files):
         self.old_dirname_to_file_id = {}
         self.new_dirname_to_file_id = {}
         # Just a sentry, so that _process_entry can say that this
@@ -920,6 +949,12 @@
         self.include_unchanged = include_unchanged
         self.use_filesystem_for_exec = use_filesystem_for_exec
         self.utf8_decode = cache_utf8._utf8_decode
+        # for all search_indexs in each path at or under each element of
+        # search_specific_files, if the detail is relocated: add the id, and add the
+        # relocated path as one to search if its not searched already. If the
+        # detail is not relocated, add the id.
+        self.searched_specific_files = set()
+        self.search_specific_files = search_specific_files
 
     def _process_entry(self, entry, path_info, source_index, int target_index, state):
         """Compare an entry and real disk to generate delta information.
@@ -937,6 +972,9 @@
         """
         cdef char target_minikind
         cdef char source_minikind
+        cdef object file_id
+        cdef int content_change
+        file_id = None
         if source_index is None:
             source_details = DirState.NULL_PARENT_DETAILS
         else:
@@ -954,7 +992,6 @@
             link_or_sha1 = None
         # the rest of this function is 0.3 seconds on 50K paths, or
         # 0.000006 seconds per call.
-        file_id = entry[0][2]
         source_minikind = _minikind_from_string(source_details[0])
         if ((_versioned_minikind(source_minikind) or source_minikind == c'r')
             and _versioned_minikind(target_minikind)):
@@ -963,12 +1000,16 @@
             #        |        |      | diff check on source-target
             #   r    | fdlt   |  a   | dangling file that was present in the basis.
             #        |        |      | ???
-            if source_minikind == c'r':
+            if source_minikind != c'r':
+                old_dirname = entry[0][0]
+                old_basename = entry[0][1]
+                old_path = path = None
+            else:
                 # add the source to the search path to find any children it
                 # has.  TODO ? : only add if it is a container ?
-                if not osutils.is_inside_any(searched_specific_files,
+                if not osutils.is_inside_any(self.searched_specific_files,
                                              source_details[1]):
-                    search_specific_files.add(source_details[1])
+                    self.search_specific_files.add(source_details[1])
                 # generate the old path; this is needed for stating later
                 # as well.
                 old_path = source_details[1]
@@ -985,13 +1026,9 @@
                         "entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
                 source_details = old_entry[1][source_index]
                 source_minikind = _minikind_from_string(source_details[0])
-            else:
-                old_dirname = entry[0][0]
-                old_basename = entry[0][1]
-                old_path = path = None
             if path_info is None:
                 # the file is missing on disk, show as removed.
-                content_change = True
+                content_change = 1
                 target_kind = None
                 target_exec = False
             else:
@@ -1000,16 +1037,17 @@
                 if target_kind == 'directory':
                     if path is None:
                         old_path = path = pathjoin(old_dirname, old_basename)
+                    file_id = entry[0][2]
                     self.new_dirname_to_file_id[path] = file_id
                     if source_minikind != c'd':
-                        content_change = True
+                        content_change = 1
                     else:
                         # directories have no fingerprint
-                        content_change = False
+                        content_change = 0
                     target_exec = False
                 elif target_kind == 'file':
                     if source_minikind != c'f':
-                        content_change = True
+                        content_change = 1
                     else:
                         # We could check the size, but we already have the
                         # sha1 hash.
@@ -1023,21 +1061,23 @@
                         target_exec = target_details[3]
                 elif target_kind == 'symlink':
                     if source_minikind != c'l':
-                        content_change = True
+                        content_change = 1
                     else:
                         content_change = (link_or_sha1 != source_details[1])
                     target_exec = False
                 elif target_kind == 'tree-reference':
                     if source_minikind != c't':
-                        content_change = True
+                        content_change = 1
                     else:
-                        content_change = False
+                        content_change = 0
                     target_exec = False
                 else:
                     raise Exception, "unknown kind %s" % path_info[2]
             if source_minikind == c'd':
                 if path is None:
                     old_path = path = pathjoin(old_dirname, old_basename)
+                if file_id is None:
+                    file_id = entry[0][2]
                 self.old_dirname_to_file_id[old_path] = file_id
             # parent id is the entry for the path in the target tree
             if old_dirname == self.last_source_parent[0]:
@@ -1166,8 +1206,8 @@
             # a renamed parent. TODO: handle this efficiently. Its not
             # common case to rename dirs though, so a correct but slow
             # implementation will do.
-            if not osutils.is_inside_any(searched_specific_files, target_details[1]):
-                search_specific_files.add(target_details[1])
+            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
+                self.search_specific_files.add(target_details[1])
         elif ((source_minikind == c'r' or source_minikind == c'a') and
               (target_minikind == c'r' or target_minikind == c'a')):
             # neither of the selected trees contain this file,

=== modified file 'bzrlib/delta.py'
--- a/bzrlib/delta.py	2008-04-24 07:22:53 +0000
+++ b/bzrlib/delta.py	2008-09-15 03:23:53 +0000
@@ -241,7 +241,7 @@
                                   (executable[0] != executable[1])))
         elif kind[0] != kind[1]:
             delta.kind_changed.append((path[1], file_id, kind[0], kind[1]))
-        elif content_change is True or executable[0] != executable[1]:
+        elif content_change or executable[0] != executable[1]:
             delta.modified.append((path[1], file_id, kind[1],
                                    content_change,
                                    (executable[0] != executable[1])))

=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2008-09-14 22:50:11 +0000
+++ b/bzrlib/dirstate.py	2008-09-15 03:23:53 +0000
@@ -2784,9 +2784,11 @@
 
     __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
         "last_source_parent", "last_target_parent", "include_unchanged",
-        "use_filesystem_for_exec", "utf8_decode"]
+        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
+        "search_specific_files"]
 
-    def __init__(self, include_unchanged, use_filesystem_for_exec):
+    def __init__(self, include_unchanged, use_filesystem_for_exec,
+        search_specific_files):
         self.old_dirname_to_file_id = {}
         self.new_dirname_to_file_id = {}
         # Just a sentry, so that _process_entry can say that this
@@ -2799,6 +2801,12 @@
         self.include_unchanged = include_unchanged
         self.use_filesystem_for_exec = use_filesystem_for_exec
         self.utf8_decode = cache_utf8._utf8_decode
+        # for all search_indexs in each path at or under each element of
+        # search_specific_files, if the detail is relocated: add the id, and add the
+        # relocated path as one to search if its not searched already. If the
+        # detail is not relocated, add the id.
+        self.searched_specific_files = set()
+        self.search_specific_files = search_specific_files
 
     def _process_entry(self, entry, path_info, source_index, target_index, state):
         """Compare an entry and real disk to generate delta information.
@@ -2841,7 +2849,7 @@
             if source_minikind in 'r':
                 # add the source to the search path to find any children it
                 # has.  TODO ? : only add if it is a container ?
-                if not osutils.is_inside_any(searched_specific_files,
+                if not osutils.is_inside_any(self.searched_specific_files,
                                              source_details[1]):
                     search_specific_files.add(source_details[1])
                 # generate the old path; this is needed for stating later
@@ -3041,7 +3049,7 @@
             # a renamed parent. TODO: handle this efficiently. Its not
             # common case to rename dirs though, so a correct but slow
             # implementation will do.
-            if not osutils.is_inside_any(searched_specific_files, target_details[1]):
+            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
                 search_specific_files.add(target_details[1])
         elif source_minikind in 'ra' and target_minikind in 'ra':
             # neither of the selected trees contain this file,

=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py	2008-09-10 07:42:59 +0000
+++ b/bzrlib/osutils.py	2008-09-15 03:23:53 +0000
@@ -1555,7 +1555,7 @@
             file_kind_from_stat_mode = UTF8DirReader().kind_from_mode
         except ImportError:
             from bzrlib._readdir_py import (
-                _kind_from_mode as _file_kind_from_stat_mode
+                _kind_from_mode as file_kind_from_stat_mode
                 )
     return file_kind_from_stat_mode(mode)
 file_kind_from_stat_mode = file_kind_from_stat_mode_thunk

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2008-09-14 22:50:11 +0000
+++ b/bzrlib/workingtree_4.py	2008-09-15 03:23:53 +0000
@@ -1858,9 +1858,12 @@
                 % (self.source._revision_id, self.target, self.iter_changes))
         update_entry = dirstate.update_entry
         use_filesystem_for_exec = (sys.platform != 'win32')
-        process_entry = dirstate._process_entry(include_unchanged, use_filesystem_for_exec)
+        search_specific_files = set()
+        process_entry = dirstate._process_entry(include_unchanged,
+            use_filesystem_for_exec, search_specific_files)
         _process_entry = process_entry._process_entry
         uninteresting = process_entry.uninteresting
+        searched_specific_files = process_entry.searched_specific_files
         target_index = 0
         if self.source._revision_id == NULL_REVISION:
             source_index = None
@@ -1928,7 +1931,6 @@
             if not all_versioned:
                 raise errors.PathsNotVersionedError(specific_files)
         # -- remove redundancy in supplied specific_files to prevent over-scanning --
-        search_specific_files = set()
         for path in specific_files:
             other_specific_files = specific_files.difference(set([path]))
             if not osutils.is_inside_any(other_specific_files, path):
@@ -1969,12 +1971,6 @@
         #        |        |      | Dont diff, we will see an r,fd; pair when we reach
         #        |        |      | this id at the other path.
 
-        # for all search_indexs in each path at or under each element of
-        # search_specific_files, if the detail is relocated: add the id, and add the
-        # relocated path as one to search if its not searched already. If the
-        # detail is not relocated, add the id.
-        searched_specific_files = set()
-
         # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
         #       keeping a cache of directories that we have seen.
 




More information about the bazaar-commits mailing list