Rev 2388: Implement _paths2ids using bisect recursive rather than loading in http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate

John Arbash Meinel john at arbash-meinel.com
Sun Feb 25 16:23:57 GMT 2007


At http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate

------------------------------------------------------------
revno: 2388
revision-id: john at arbash-meinel.com-20070225162253-8expr1vlo70wxp72
parent: john at arbash-meinel.com-20070225144550-3rcyjf6ui5oo0gaf
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Sun 2007-02-25 10:22:53 -0600
message:
  Implement _paths2ids using bisect recursive rather than loading
  the whole dataset from disk. But leave it disabled until it can actually benefit
  us by having other code paths not require the whole dir-state in memory.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-02-25 14:45:50 +0000
+++ b/bzrlib/dirstate.py	2007-02-25 16:22:53 +0000
@@ -1300,8 +1300,9 @@
         """
         self._read_header_if_needed()
         if self._dirblock_state == DirState.NOT_IN_MEMORY:
-            # the _state_file pointer will be positioned at the start of the 
-            # dirblocks.
+            # move the _state_file pointer to after the header (in case bisect
+            # has been called in the mean time)
+            self._state_file.seek(self._end_of_header)
             text = self._state_file.read()
             # TODO: check the adler checksums. adler_measured = zlib.adler32(text)
 

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-23 05:12:06 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-25 16:22:53 +0000
@@ -612,6 +612,17 @@
         # -- paths is now a utf8 path set --
         # -- get the state object and prepare it.
         state = self.current_dirstate()
+        if False and (state._dirblock_state == dirstate.DirState.NOT_IN_MEMORY
+            and '' not in paths):
+            paths2ids = self._paths2ids_using_bisect
+        else:
+            paths2ids = self._paths2ids_in_memory
+        return paths2ids(paths, search_indexes,
+                         require_versioned=require_versioned)
+
+    def _paths2ids_in_memory(self, paths, search_indexes,
+                             require_versioned=True):
+        state = self.current_dirstate()
         state._read_dirblocks_if_needed()
         def _entries_for_path(path):
             """Return a list with all the entries that match path for all ids.
@@ -703,6 +714,26 @@
                 block_index += 1
         return found_ids
 
+    def _paths2ids_using_bisect(self, paths, search_indexes,
+                                require_versioned=True):
+        state = self.current_dirstate()
+        found_ids = set()
+
+        split_paths = sorted(osutils.split(p) for p in paths)
+        found = state._bisect_recursive(split_paths)
+
+        if require_versioned:
+            found_dir_names = set(dir_name_id[:2] for dir_name_id in found)
+            for dir_name in split_paths:
+                if dir_name not in found_dir_names:
+                    raise errors.PathsNotVersionedError(paths)
+
+        for dir_name_id, trees_info in found.iteritems():
+            for index in search_indexes:
+                if trees_info[index][0] not in ('r', 'a'):
+                    found_ids.add(dir_name_id[2])
+        return found_ids
+
     def read_working_inventory(self):
         """Read the working inventory.
         



More information about the bazaar-commits mailing list