Rev 2365: Inlining the conversion and using dirblock form instead of a list and then converting shaves another 50ms off. From 650ms => 600ms in http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate

John Arbash Meinel john at arbash-meinel.com
Thu Feb 22 17:48:51 GMT 2007


At http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate

------------------------------------------------------------
revno: 2365
revision-id: john at arbash-meinel.com-20070222174747-dlbg1982hqbjcjqu
parent: john at arbash-meinel.com-20070222165055-9n3m8v0qwjc5kh20
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Thu 2007-02-22 11:47:47 -0600
message:
  Inlining the conversion and using dirblock form instead of a list and then converting shaves another 50ms off. From 650ms => 600ms
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-02-22 16:48:42 +0000
+++ b/bzrlib/dirstate.py	2007-02-22 17:47:47 +0000
@@ -870,10 +870,75 @@
                     field_count - cur, expected_field_count, entry_size,
                     self._num_entries, fields)
 
-            fields_to_entry = self._get_fields_to_entry()
-            entries = [fields_to_entry(fields[pos:pos+entry_size])
-                       for pos in xrange(cur, field_count, entry_size)]
-            self._entries_to_current_state(entries)
+            if num_present_parents == 1:
+                # Bind external functions to local names
+                _mini_to_kind = DirState._minikind_to_kind
+                _int = int
+                # We access all fields in order, so we can just iterate over
+                # them. Grab an straight iterator over the fields. (We use an
+                # iterator because we don't want to do a lot of additions, nor
+                # do we want to do a lot of slicing)
+                next = iter(fields).next
+                # Move the iterator to the current position
+                for x in xrange(cur):
+                    next()
+                # The two blocks here are deliberate: the root block and the
+                # contents-of-root block.
+                self._dirblocks = [('', []), ('', [])]
+                current_block = self._dirblocks[0][1]
+                current_dirname = ''
+                append_entry = current_block.append
+                for count in xrange(self._num_entries):
+                    dirname = next()
+                    name = next()
+                    file_id = next()
+                    if dirname != current_dirname:
+                        # new block - different dirname
+                        current_block = []
+                        current_dirname = dirname
+                        self._dirblocks.append((current_dirname, current_block))
+                        append_entry = current_block.append
+                    elif not dirname and name:
+                        # this is not a root entry for a tree (it has a basename)
+                        # TODO: jam 20070222 This is used to step from root
+                        #       block to contents of root block. We need a
+                        #       custom step, because they both have a path
+                        #       prefix of ''. However this else is only
+                        #       evaluated for the first few rows, and
+                        #       significantly impacts the parsing speed. We
+                        #       need to find a way to avoid this. We could
+                        #       either create an earlier loop which exits when
+                        #       this condition is met, or we find a way to
+                        #       treat "root block" as different than
+                        #       "contents-of-root block".
+                        append_entry = self._dirblocks[-1][1].append
+                    # we know current_dirname == dirname, so re-use it to avoid
+                    # creating new strings
+                    entry = ((current_dirname, name, file_id),
+                             [(# Current Tree
+                                 _mini_to_kind[next()], # kind
+                                 next(),                # fingerprint
+                                 _int(next()),          # size
+                                 next() == 'y',         # executable
+                                 next(),                # packed_stat or revision_id
+                             ),
+                             ( # Parent 1
+                                 _mini_to_kind[next()], # kind
+                                 next(),                # fingerprint
+                                 _int(next()),          # size
+                                 next() == 'y',         # executable
+                                 next(),                # packed_stat or revision_id
+                             ),
+                             ])
+                    trailing = next()
+                    assert trailing == '\n'
+                    # append the entry to the current block
+                    append_entry(entry)
+            else:
+                fields_to_entry = self._get_fields_to_entry()
+                entries = [fields_to_entry(fields[pos:pos+entry_size])
+                           for pos in xrange(cur, field_count, entry_size)]
+                self._entries_to_current_state(entries)
             self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
 
     def _read_header(self):



More information about the bazaar-commits mailing list