Rev 5610: Very quick hack. If we just switch to using classes, and avoid a couple extra mallocs, in http://bazaar.launchpad.net/~jameinel/2.x-dirstate-speed

Thu Jan 13 22:19:15 UTC 2011

At http://bazaar.launchpad.net/~jameinel/2.x-dirstate-speed

------------------------------------------------------------
revno: 5610
revision-id: john at arbash-meinel.com-20110113221908-ena676gt1losslck
parent: pqm at pqm.ubuntu.com-20110113165202-44k299aa1yzyy6yk
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.x-dirstate-speed
timestamp: Thu 2011-01-13 16:19:08 -0600
message:
  Very quick hack. If we just switch to using classes, and avoid a couple extra mallocs,
  we can get down to 20ms for a 10k entry tree, down from 25ms.
  Avoiding string allocation is ~2ms for each string we can avoid per row.
  Even at 100k entries, this should only be 200/250ms out of total time.
  It is unlikely it will be a major factor in performance time.
-------------- next part --------------
=== modified file 'bzrlib/_dirstate_helpers_pyx.pyx'

--- a/bzrlib/_dirstate_helpers_pyx.pyx	2010-08-27 18:02:22 +0000
+++ b/bzrlib/_dirstate_helpers_pyx.pyx	2011-01-13 22:19:08 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2007-2010 Canonical Ltd
+# Copyright (C) 2007-2011 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -527,6 +527,7 @@
             _hi = _mid
     return _lo
 
+cdef class DirstateRow
 
 cdef class Reader:
     """Maintain the current location, and return fields as you parse them."""
@@ -594,8 +595,8 @@
                                  % (first,))
         return 0
 
-    cdef object _get_entry(self, int num_trees, void **p_current_dirname,
-                           int *new_block):
+    cdef DirstateRow _get_row(self, int num_trees, void **p_current_dirname,
+                              int *new_block):
         """Extract the next entry.
 
         This parses the next entry based on the current location in
@@ -609,24 +610,23 @@
             representing the directory name.
             We pass this in as a void * so that pyrex doesn't have to
             increment/decrement the PyObject reference counter for each
-            _get_entry call.
-            We use a pointer so that _get_entry can update it with the new
+            _get_row call.
+            We use a pointer so that _get_row can update it with the new
             value.
         :param new_block: This is to let the caller know that it needs to
             create a new directory block to store the next entry.
         """
-        cdef StaticTuple path_name_file_id_key
-        cdef StaticTuple tmp
+        cdef DirstateRow row
+        cdef StaticTuple key
+        cdef DirstateEntry entry
         cdef char *entry_size_cstr
-        cdef unsigned long int entry_size
+        cdef char *minikind_cstr
         cdef char* executable_cstr
         cdef int is_executable
         cdef char* dirname_cstr
         cdef char* trailing
         cdef int cur_size
         cdef int i
-        cdef object minikind
-        cdef object fingerprint
         cdef object info
 
         # Read the 'key' information (dirname, name, file_id)
@@ -663,13 +663,12 @@
         #                          self.get_next_str(),
         #                          self.get_next_str(),
         #                         )
-        tmp = StaticTuple_New(3)
-        Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
-        cur_basename = self.get_next_str()
-        cur_file_id = self.get_next_str()
-        Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
-        Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
-        path_name_file_id_key = tmp
+        # XXX: file_ids should be interned?
+        row = DirstateRow()
+        # row.key = DirstateKey(cur_dirname, self.get_next_str(),
+        #     self.get_next_str())
+        row.key = StaticTuple(cur_dirname, self.get_next_str(),
+                              self.get_next_str())
 
         # Parse all of the per-tree information. current has the information in
         # the same location as parent trees. The only difference is that 'info'
@@ -684,38 +683,30 @@
         #       work with, and it will be rare that we have a file >2GB.
         #       Especially since this code is pretty much fixed at a max of
         #       4GB.
-        trees = []
         for i from 0 <= i < num_trees:
-            minikind = self.get_next_str()
-            fingerprint = self.get_next_str()
+            entry = DirstateEntry()
+            minikind_cstr = self.get_next(&cur_size)
+            assert cur_size == 1
+            entry.minikind = minikind_cstr[0]
+            # entry.fingerprint = self.get_next_str()
+            self.get_next(&cur_size)
             entry_size_cstr = self.get_next(&cur_size)
-            entry_size = strtoul(entry_size_cstr, NULL, 10)
+            entry.size = strtoul(entry_size_cstr, NULL, 10)
             executable_cstr = self.get_next(&cur_size)
-            is_executable = (executable_cstr[0] == c'y')
-            info = self.get_next_str()
-            # TODO: If we want to use StaticTuple_New here we need to be pretty
-            #       careful. We are relying on a bit of Pyrex
-            #       automatic-conversion from 'int' to PyInt, and that doesn't
-            #       play well with the StaticTuple_SET_ITEM macro.
-            #       Timing doesn't (yet) show a worthwile improvement in speed
-            #       versus complexity and maintainability.
-            # tmp = StaticTuple_New(5)
-            # Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
-            # Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
-            # Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
-            # Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
-            # Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
-            # PyList_Append(trees, tmp)
-            PyList_Append(trees, StaticTuple(
-                minikind,     # minikind
-                fingerprint,  # fingerprint
-                entry_size,   # size
-                is_executable,# executable
-                info,         # packed_stat or revision_id
-            ))
+            entry.executable = (executable_cstr[0] == c'y')
+            self.get_next(&cur_size)
+            # entry.info = self.get_next_str()
+            if i == 0:
+                row.current = entry
+            elif i == 1:
+                row.base = entry
+            else:
+                if i == 2:
+                    row.extra_trees = []
+                PyList_Append(row.extra_trees, entry)
 
         # The returned tuple is (key, [trees])
-        ret = (path_name_file_id_key, trees)
+        # ret = (path_name_file_id_key, trees)
         # Ignore the trailing newline, but assert that it does exist, this
         # ensures that we always finish parsing a line on an end-of-entry
         # marker.
@@ -724,14 +715,14 @@
             raise errors.DirstateCorrupt(self.state,
                 'Bad parse, we expected to end on \\n, not: %d %s: %s'
                 % (cur_size, safe_string_from_size(trailing, cur_size),
-                   ret))
-        return ret
+                   row))
+        return row
 
     def _parse_dirblocks(self):
         """Parse all dirblocks in the state file."""
         cdef int num_trees
         cdef object current_block
-        cdef object entry
+        cdef DirstateRow row
         cdef void * current_dirname
         cdef int new_block
         cdef int expected_entry_count
@@ -759,20 +750,20 @@
         #       so), and then truncate. That would give us a malloc + realloc,
         #       rather than lots of reallocs.
         while self.cur_cstr < self.end_cstr:
-            entry = self._get_entry(num_trees, &current_dirname, &new_block)
+            row = self._get_row(num_trees, &current_dirname, &new_block)
             if new_block:
                 # new block - different dirname
                 current_block = []
                 PyList_Append(dirblocks,
                               (<object>current_dirname, current_block))
-            PyList_Append(current_block, entry)
+            PyList_Append(current_block, row)
             entry_count = entry_count + 1
         if entry_count != expected_entry_count:
             raise errors.DirstateCorrupt(self.state,
                     'We read the wrong number of entries.'
                     ' We expected to read %s, but read %s'
                     % (expected_entry_count, entry_count))
-        self.state._split_root_dirblock_into_contents()
+        # self.state._split_root_dirblock_into_contents()
 
 
 def _read_dirblocks(state):
@@ -811,7 +802,6 @@
 _encode = binascii.b2a_base64
 
 
-from struct import pack
 cdef _pack_stat(stat_value):
     """return a string representing the stat value's key fields.
 
@@ -2002,3 +1992,43 @@
                 self.root_dir_info = self.root_dir_info[:2] + \
                     ('tree-reference',) + self.root_dir_info[3:]
         return dir_info
+
+
+
+# TODO: Remove this from gc, potentially via PyObject* instead of object
+cdef class DirstateKey:
+    cdef public object utf8_dirname
+    cdef public object utf8_basename
+    cdef public object utf8_fileid
+
+    def __init__(self, utf8_dirname, utf8_basename, utf8_fileid):
+        self.utf8_dirname = utf8_dirname
+        self.utf8_basename = utf8_basename
+        self.utf8_fileid = utf8_fileid
+
+cdef class DirstateEntry:
+    cdef public object fingerprint # eg sha hash, symlink target, subtree revision
+    cdef public object info # packed_stat or revision_id
+    cdef public unsigned long size # 64-bit always?
+    cdef public char minikind
+    cdef public bint executable
+
+
+cdef class DirstateRow:
+    cdef public StaticTuple key
+    # Offset 0
+    cdef public DirstateEntry current
+    # Offset 1
+    cdef public DirstateEntry base
+    # Rest of the trees, this could be a fixed size tuple, etc
+    cdef public list extra_trees
+
+    property trees:
+        def __get__(self):
+            cdef list trees
+            trees = [self.current]
+            if self.base is not None:
+                trees.append(self.base)
+            if self.extra_trees is not None:
+                trees.extend(self.extra_trees)
+            return trees