Rev 2353: Save a small fraction (2.5s => 2.4s) by using the cached encoders in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

John Arbash Meinel john at arbash-meinel.com
Thu Feb 22 00:39:49 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

------------------------------------------------------------
revno: 2353
revision-id: john at arbash-meinel.com-20070222003946-7huladldtgb9iz8v
parent: john at arbash-meinel.com-20070221224546-3frplqdi81632elw
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Wed 2007-02-21 18:39:46 -0600
message:
  Save a small fraction (2.5s => 2.4s) by using the cached encoders
modified:
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-21 11:10:37 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-22 00:39:46 +0000
@@ -60,7 +60,7 @@
 import bzrlib.ui
 """)
 
-from bzrlib import symbol_versioning
+from bzrlib import cache_utf8, symbol_versioning
 from bzrlib.decorators import needs_read_lock, needs_write_lock
 from bzrlib.inventory import InventoryEntry, Inventory, ROOT_ID, entry_factory
 from bzrlib.lockable_files import LockableFiles, TransportLock
@@ -96,6 +96,14 @@
 from bzrlib.workingtree import WorkingTree, WorkingTree3, WorkingTreeFormat3
 
 
+def _utf8_encode(unicode_str, _encoder=cache_utf8._utf8_encode):
+    return _encoder(unicode_str)[0]
+
+
+def _utf8_decode(utf8_str, _decoder=cache_utf8._utf8_decode):
+    return _decoder(utf8_str)[0]
+
+
 class WorkingTree4(WorkingTree3):
     """This is the Format 4 working tree.
 
@@ -205,8 +213,9 @@
         result = set()
         state = self.current_dirstate()
         # TODO we want a paths_to_dirblocks helper I think
+        encode = _utf8_encode
         for path in paths:
-            dirname, basename = os.path.split(path.encode('utf8'))
+            dirname, basename = os.path.split(encode(path))
             _, _, _, path_is_versioned = state._get_block_entry_index(
                 dirname, basename, 0)
             if path_is_versioned:
@@ -232,7 +241,8 @@
         state = self.current_dirstate()
         state._read_dirblocks_if_needed()
         root_key, current_entry = self._get_entry(path='')
-        current_id = root_key[2].decode('utf8')
+        decode = _utf8_decode
+        current_id = decode(root_key[2])
         assert current_entry[0][0] == 'directory'
         inv = Inventory(root_id=current_id)
         # we could do this straight out of the dirstate; it might be fast
@@ -249,8 +259,8 @@
                 if entry[0][0] in ('absent', 'relocated'):
                     # a parent tree only entry
                     continue
-                name = key[1].decode('utf8')
-                file_id = key[2].decode('utf8')
+                name = decode(key[1])
+                file_id = decode(key[2])
                 kind, link_or_sha1, size, executable, stat = entry[0]
                 inv_entry = entry_factory[kind](file_id, name, parent_id)
                 if kind == 'file':
@@ -279,10 +289,11 @@
         if file_id is None and path is None:
             raise errors.BzrError('must supply file_id or path')
         state = self.current_dirstate()
+        encode = _utf8_encode
         if file_id is not None:
-            file_id = file_id.encode('utf8')
+            file_id = encode(file_id)
         if path is not None:
-            path = path.encode('utf8')
+            path = encode(path)
         return state._get_entry(0, fileid_utf8=file_id, path_utf8=path)
 
     def get_file_sha1(self, file_id, path=None, stat_value=None):
@@ -292,7 +303,7 @@
         # TODO:
         # if row stat is valid, use cached sha1, else, get a new sha1.
         if path is None:
-            path = os.path.join(*key[0:2]).decode('utf8')
+            path = _utf8_decode(os.path.join(*key[0:2]))
         return self._hashcache.get_sha1(path, stat_value)
 
     def _get_inventory(self):
@@ -316,23 +327,23 @@
     @needs_read_lock
     def get_root_id(self):
         """Return the id of this trees root"""
-        return self._get_entry(path='')[0][2].decode('utf8')
+        return _utf8_decode(self._get_entry(path='')[0][2])
 
     def has_id(self, file_id):
         state = self.current_dirstate()
-        fileid_utf8 = file_id.encode('utf8')
         row, parents = self._get_entry(file_id=file_id)
         if row is None:
             return False
+        decode = _utf8_decode
         return osutils.lexists(pathjoin(
-                    self.basedir, row[0].decode('utf8'), row[1].decode('utf8')))
+                    self.basedir, decode(row[0]), decode(row[1])))
 
     @needs_read_lock
     def id2path(self, fileid):
         state = self.current_dirstate()
         fileid_utf8 = fileid.encode('utf8')
         key, tree_details = state._get_entry(0, fileid_utf8=fileid_utf8)
-        return os.path.join(*key[0:2]).decode('utf8')
+        return _utf8_decode(os.path.join(*key[0:2]))
 
     @needs_read_lock
     def __iter__(self):
@@ -346,9 +357,10 @@
             if tree_details[0][0] in ('absent', 'relocated'):
                 # not relevant to the working tree
                 continue
-            path = pathjoin(self.basedir, key[0].decode('utf8'), key[1].decode('utf8'))
+            decode = _utf8_decode
+            path = pathjoin(self.basedir, decode(key[0]), decode(key[1]))
             if osutils.lexists(path):
-                result.append(key[2].decode('utf8'))
+                result.append(decode(key[2]))
         return iter(result)
 
     @needs_read_lock
@@ -379,7 +391,9 @@
                                        DeprecationWarning)
 
         assert not isinstance(from_paths, basestring)
-        to_dir_utf8 = to_dir.encode('utf8')
+        encode = _utf8_encode
+        decode = _utf8_decode
+        to_dir_utf8 = encode(to_dir)
         to_entry_dirname, to_basename = os.path.split(to_dir_utf8)
         # check destination directory
         # get the details for it
@@ -406,7 +420,7 @@
             update_inventory = True
             inv = self.inventory
             to_dir_ie = inv[to_dir_id]
-            to_dir_id = to_entry[0][2].decode('utf8')
+            to_dir_id = decode(to_entry[0][2])
         else:
             update_inventory = False
 
@@ -414,13 +428,13 @@
         for from_rel in from_paths:
             # from_rel is 'pathinroot/foo/bar'
             from_dirname, from_tail = os.path.split(from_rel)
-            from_dirname = from_dirname.encode('utf8')
+            from_dirname = encode(from_dirname)
             from_entry = self._get_entry(path=from_rel)
             if from_entry == (None, None):
                 raise errors.BzrMoveFailedError(from_rel,to_dir,
                     errors.NotVersionedError(path=str(from_rel)))
 
-            from_id = from_entry[0][2].decode('utf8')
+            from_id = decode(from_entry[0][2])
             to_rel = pathjoin(to_dir, from_tail)
             item_to_entry = self._get_entry(path=to_rel)
             if item_to_entry != (None, None):
@@ -486,7 +500,7 @@
                         lambda: inv.rename(from_id, current_parent, from_tail))
                 # finally do the rename in the dirstate, which is a little
                 # tricky to rollback, but least likely to need it.
-                basename = from_tail.encode('utf8')
+                basename = encode(from_tail)
                 old_block_index, old_entry_index, dir_present, file_present = \
                     state._get_block_entry_index(from_dirname, basename, 0)
                 old_block = state._dirblocks[old_block_index][1]
@@ -504,7 +518,7 @@
                         packed_stat=old_entry_details[0][4],
                         size=old_entry_details[0][2],
                         id_index=state._get_id_index(),
-                        path_utf8=from_rel.encode('utf8')))
+                        path_utf8=encode(from_rel)))
                 # create new row in current block
                 state.update_minimal(to_key,
                         old_entry_details[0][0],
@@ -514,7 +528,7 @@
                         packed_stat=old_entry_details[0][4],
                         size=old_entry_details[0][2],
                         id_index=state._get_id_index(),
-                        path_utf8=to_rel.encode('utf8'))
+                        path_utf8=encode(to_rel))
                 added_entry_index, _ = state._find_entry_index(to_key, to_block[1])
                 new_entry = to_block[added_entry_index]
                 rollbacks.append(lambda:state._make_absent(new_entry))
@@ -548,7 +562,7 @@
         entry = self._get_entry(path=path)
         if entry == (None, None):
             return None
-        return entry[0][2].decode('utf8')
+        return _utf8_decode(entry[0][2])
 
     def read_working_inventory(self):
         """Read the working inventory.
@@ -680,8 +694,9 @@
         state = self.current_dirstate()
         state._read_dirblocks_if_needed()
         ids_to_unversion = set()
+        encode = _utf8_encode
         for fileid in file_ids:
-            ids_to_unversion.add(fileid.encode('utf8'))
+            ids_to_unversion.add(encode(fileid))
         paths_to_unversion = set()
         # sketch:
         # check if the root is to be unversioned, if so, assert for now.
@@ -878,7 +893,8 @@
         # This is identical now to the WorkingTree _generate_inventory except
         # for the tree index use.
         root_key, current_entry = self._dirstate._get_entry(parent_index, path_utf8='')
-        current_id = root_key[2].decode('utf8')
+        decode = _utf8_decode
+        current_id = decode(root_key[2])
         assert current_entry[parent_index][0] == 'directory'
         inv = Inventory(root_id=current_id, revision_id=self._revision_id)
         inv.root.revision = current_entry[parent_index][4]
@@ -896,8 +912,8 @@
                 if entry[parent_index][0] in ('absent', 'relocated'):
                     # not this tree
                     continue
-                name = key[1].decode('utf8')
-                file_id = key[2].decode('utf8')
+                name = decode(key[1])
+                file_id = decode(key[2])
                 kind, link_or_sha1, size, executable, revid = entry[parent_index]
                 inv_entry = entry_factory[kind](file_id, name, parent_id)
                 inv_entry.revision = revid
@@ -910,7 +926,7 @@
                 elif kind == 'symlink':
                     inv_entry.executable = False
                     inv_entry.text_size = size
-                    inv_entry.symlink_target = link_or_sha1.decode('utf8')
+                    inv_entry.symlink_target = decode(link_or_sha1)
                 else:
                     raise Exception, kind
                 inv.add(inv_entry)



More information about the bazaar-commits mailing list