Rev 3712: Fix up inter_changes with dirstate both C and python. in http://people.ubuntu.com/~robertc/baz2.0/readdir
Robert Collins
robertc at robertcollins.net
Mon Sep 15 04:23:58 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/readdir
------------------------------------------------------------
revno: 3712
revision-id: robertc at robertcollins.net-20080915032353-s0i0zt6b4rob77vp
parent: robertc at robertcollins.net-20080915011842-qq74hk4ezvo2lcin
committer: Robert Collins <robertc at robertcollins.net>
branch nick: process-entry-optimised
timestamp: Mon 2008-09-15 13:23:53 +1000
message:
Fix up inter_changes with dirstate both C and python.
modified:
bzrlib/_dirstate_helpers_c.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
bzrlib/delta.py delta.py-20050729221636-54cf14ef94783d0a
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/osutils.py osutils.py-20050309040759-eeaff12fbf77ac86
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'bzrlib/_dirstate_helpers_c.pyx'
--- a/bzrlib/_dirstate_helpers_c.pyx 2008-09-15 01:18:42 +0000
+++ b/bzrlib/_dirstate_helpers_c.pyx 2008-09-15 03:23:53 +0000
@@ -20,6 +20,7 @@
"""
import binascii
+import os
from bzrlib import cache_utf8, errors, osutils
from bzrlib.dirstate import DirState, pack_stat
@@ -881,11 +882,37 @@
return PyString_AsString(string)[0]
+cdef object _kind_absent
+cdef object _kind_file
+cdef object _kind_directory
+cdef object _kind_symlink
+cdef object _kind_relocated
+cdef object _kind_tree_reference
+_kind_absent = "absent"
+_kind_file = "file"
+_kind_directory = "directory"
+_kind_symlink = "symlink"
+_kind_relocated = "relocated"
+_kind_tree_reference = "tree-reference"
+
+
cdef object _minikind_to_kind(char minikind):
"""Create a string kind for minikind."""
cdef char _minikind[1]
+ if minikind == c'f':
+ return _kind_file
+ elif minikind == c'd':
+ return _kind_directory
+ elif minikind == c'a':
+ return _kind_absent
+ elif minikind == c'r':
+ return _kind_relocated
+ elif minikind == c'l':
+ return _kind_symlink
+ elif minikind == c't':
+ return _kind_tree_reference
_minikind[0] = minikind
- return DirState._minikind_to_kind[PyString_FromStringAndSize(_minikind, 1)]
+ raise KeyError(PyString_FromStringAndSize(_minikind, 1))
cdef int _versioned_minikind(char minikind):
@@ -906,8 +933,10 @@
cdef object include_unchanged
cdef object use_filesystem_for_exec
cdef object utf8_decode
+ cdef readonly object searched_specific_files
+ cdef object search_specific_files
- def __init__(self, include_unchanged, use_filesystem_for_exec):
+ def __init__(self, include_unchanged, use_filesystem_for_exec, search_specific_files):
self.old_dirname_to_file_id = {}
self.new_dirname_to_file_id = {}
# Just a sentry, so that _process_entry can say that this
@@ -920,6 +949,12 @@
self.include_unchanged = include_unchanged
self.use_filesystem_for_exec = use_filesystem_for_exec
self.utf8_decode = cache_utf8._utf8_decode
+ # for all search_indexs in each path at or under each element of
+ # search_specific_files, if the detail is relocated: add the id, and add the
+ # relocated path as one to search if its not searched already. If the
+ # detail is not relocated, add the id.
+ self.searched_specific_files = set()
+ self.search_specific_files = search_specific_files
def _process_entry(self, entry, path_info, source_index, int target_index, state):
"""Compare an entry and real disk to generate delta information.
@@ -937,6 +972,9 @@
"""
cdef char target_minikind
cdef char source_minikind
+ cdef object file_id
+ cdef int content_change
+ file_id = None
if source_index is None:
source_details = DirState.NULL_PARENT_DETAILS
else:
@@ -954,7 +992,6 @@
link_or_sha1 = None
# the rest of this function is 0.3 seconds on 50K paths, or
# 0.000006 seconds per call.
- file_id = entry[0][2]
source_minikind = _minikind_from_string(source_details[0])
if ((_versioned_minikind(source_minikind) or source_minikind == c'r')
and _versioned_minikind(target_minikind)):
@@ -963,12 +1000,16 @@
# | | | diff check on source-target
# r | fdlt | a | dangling file that was present in the basis.
# | | | ???
- if source_minikind == c'r':
+ if source_minikind != c'r':
+ old_dirname = entry[0][0]
+ old_basename = entry[0][1]
+ old_path = path = None
+ else:
# add the source to the search path to find any children it
# has. TODO ? : only add if it is a container ?
- if not osutils.is_inside_any(searched_specific_files,
+ if not osutils.is_inside_any(self.searched_specific_files,
source_details[1]):
- search_specific_files.add(source_details[1])
+ self.search_specific_files.add(source_details[1])
# generate the old path; this is needed for stating later
# as well.
old_path = source_details[1]
@@ -985,13 +1026,9 @@
"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
source_details = old_entry[1][source_index]
source_minikind = _minikind_from_string(source_details[0])
- else:
- old_dirname = entry[0][0]
- old_basename = entry[0][1]
- old_path = path = None
if path_info is None:
# the file is missing on disk, show as removed.
- content_change = True
+ content_change = 1
target_kind = None
target_exec = False
else:
@@ -1000,16 +1037,17 @@
if target_kind == 'directory':
if path is None:
old_path = path = pathjoin(old_dirname, old_basename)
+ file_id = entry[0][2]
self.new_dirname_to_file_id[path] = file_id
if source_minikind != c'd':
- content_change = True
+ content_change = 1
else:
# directories have no fingerprint
- content_change = False
+ content_change = 0
target_exec = False
elif target_kind == 'file':
if source_minikind != c'f':
- content_change = True
+ content_change = 1
else:
# We could check the size, but we already have the
# sha1 hash.
@@ -1023,21 +1061,23 @@
target_exec = target_details[3]
elif target_kind == 'symlink':
if source_minikind != c'l':
- content_change = True
+ content_change = 1
else:
content_change = (link_or_sha1 != source_details[1])
target_exec = False
elif target_kind == 'tree-reference':
if source_minikind != c't':
- content_change = True
+ content_change = 1
else:
- content_change = False
+ content_change = 0
target_exec = False
else:
raise Exception, "unknown kind %s" % path_info[2]
if source_minikind == c'd':
if path is None:
old_path = path = pathjoin(old_dirname, old_basename)
+ if file_id is None:
+ file_id = entry[0][2]
self.old_dirname_to_file_id[old_path] = file_id
# parent id is the entry for the path in the target tree
if old_dirname == self.last_source_parent[0]:
@@ -1166,8 +1206,8 @@
# a renamed parent. TODO: handle this efficiently. Its not
# common case to rename dirs though, so a correct but slow
# implementation will do.
- if not osutils.is_inside_any(searched_specific_files, target_details[1]):
- search_specific_files.add(target_details[1])
+ if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
+ self.search_specific_files.add(target_details[1])
elif ((source_minikind == c'r' or source_minikind == c'a') and
(target_minikind == c'r' or target_minikind == c'a')):
# neither of the selected trees contain this file,
=== modified file 'bzrlib/delta.py'
--- a/bzrlib/delta.py 2008-04-24 07:22:53 +0000
+++ b/bzrlib/delta.py 2008-09-15 03:23:53 +0000
@@ -241,7 +241,7 @@
(executable[0] != executable[1])))
elif kind[0] != kind[1]:
delta.kind_changed.append((path[1], file_id, kind[0], kind[1]))
- elif content_change is True or executable[0] != executable[1]:
+ elif content_change or executable[0] != executable[1]:
delta.modified.append((path[1], file_id, kind[1],
content_change,
(executable[0] != executable[1])))
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2008-09-14 22:50:11 +0000
+++ b/bzrlib/dirstate.py 2008-09-15 03:23:53 +0000
@@ -2784,9 +2784,11 @@
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
"last_source_parent", "last_target_parent", "include_unchanged",
- "use_filesystem_for_exec", "utf8_decode"]
+ "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
+ "search_specific_files"]
- def __init__(self, include_unchanged, use_filesystem_for_exec):
+ def __init__(self, include_unchanged, use_filesystem_for_exec,
+ search_specific_files):
self.old_dirname_to_file_id = {}
self.new_dirname_to_file_id = {}
# Just a sentry, so that _process_entry can say that this
@@ -2799,6 +2801,12 @@
self.include_unchanged = include_unchanged
self.use_filesystem_for_exec = use_filesystem_for_exec
self.utf8_decode = cache_utf8._utf8_decode
+ # for all search_indexs in each path at or under each element of
+ # search_specific_files, if the detail is relocated: add the id, and add the
+ # relocated path as one to search if its not searched already. If the
+ # detail is not relocated, add the id.
+ self.searched_specific_files = set()
+ self.search_specific_files = search_specific_files
def _process_entry(self, entry, path_info, source_index, target_index, state):
"""Compare an entry and real disk to generate delta information.
@@ -2841,7 +2849,7 @@
if source_minikind in 'r':
# add the source to the search path to find any children it
# has. TODO ? : only add if it is a container ?
- if not osutils.is_inside_any(searched_specific_files,
+ if not osutils.is_inside_any(self.searched_specific_files,
source_details[1]):
search_specific_files.add(source_details[1])
# generate the old path; this is needed for stating later
@@ -3041,7 +3049,7 @@
# a renamed parent. TODO: handle this efficiently. Its not
# common case to rename dirs though, so a correct but slow
# implementation will do.
- if not osutils.is_inside_any(searched_specific_files, target_details[1]):
+ if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
search_specific_files.add(target_details[1])
elif source_minikind in 'ra' and target_minikind in 'ra':
# neither of the selected trees contain this file,
=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py 2008-09-10 07:42:59 +0000
+++ b/bzrlib/osutils.py 2008-09-15 03:23:53 +0000
@@ -1555,7 +1555,7 @@
file_kind_from_stat_mode = UTF8DirReader().kind_from_mode
except ImportError:
from bzrlib._readdir_py import (
- _kind_from_mode as _file_kind_from_stat_mode
+ _kind_from_mode as file_kind_from_stat_mode
)
return file_kind_from_stat_mode(mode)
file_kind_from_stat_mode = file_kind_from_stat_mode_thunk
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2008-09-14 22:50:11 +0000
+++ b/bzrlib/workingtree_4.py 2008-09-15 03:23:53 +0000
@@ -1858,9 +1858,12 @@
% (self.source._revision_id, self.target, self.iter_changes))
update_entry = dirstate.update_entry
use_filesystem_for_exec = (sys.platform != 'win32')
- process_entry = dirstate._process_entry(include_unchanged, use_filesystem_for_exec)
+ search_specific_files = set()
+ process_entry = dirstate._process_entry(include_unchanged,
+ use_filesystem_for_exec, search_specific_files)
_process_entry = process_entry._process_entry
uninteresting = process_entry.uninteresting
+ searched_specific_files = process_entry.searched_specific_files
target_index = 0
if self.source._revision_id == NULL_REVISION:
source_index = None
@@ -1928,7 +1931,6 @@
if not all_versioned:
raise errors.PathsNotVersionedError(specific_files)
# -- remove redundancy in supplied specific_files to prevent over-scanning --
- search_specific_files = set()
for path in specific_files:
other_specific_files = specific_files.difference(set([path]))
if not osutils.is_inside_any(other_specific_files, path):
@@ -1969,12 +1971,6 @@
# | | | Dont diff, we will see an r,fd; pair when we reach
# | | | this id at the other path.
- # for all search_indexs in each path at or under each element of
- # search_specific_files, if the detail is relocated: add the id, and add the
- # relocated path as one to search if its not searched already. If the
- # detail is not relocated, add the id.
- searched_specific_files = set()
-
# TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
# keeping a cache of directories that we have seen.
More information about the bazaar-commits
mailing list