Rev 5365: id_index size drops to 230kB with StaticTuples. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-dirstate-index

John Arbash Meinel john at arbash-meinel.com
Mon Aug 2 23:08:04 BST 2010


At http://bazaar.launchpad.net/~jameinel/bzr/2.3-dirstate-index

------------------------------------------------------------
revno: 5365
revision-id: john at arbash-meinel.com-20100802220754-jvtkt7dtuwon2gdk
parent: john at arbash-meinel.com-20100802214707-rz6zvnyb0t3tlbe5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-dirstate-index
timestamp: Mon 2010-08-02 17:07:54 -0500
message:
  id_index size drops to 230kB with StaticTuples.
  Even better, though, is that using StaticTuple in the parser gives us
  an overall benefit of about 20%. It isn't our biggest data use, but
  it is something, at least.
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS	2010-08-02 21:47:07 +0000
+++ b/NEWS	2010-08-02 22:07:54 +0000
@@ -47,8 +47,9 @@
 Improvements
 ************
 
-* ``DirState`` internals use a little bit less memory.
-  (John Arbash Meinel)
+* ``DirState`` internals use a little bit less memory. For bzr.dev it
+  drops the memory from 1MB down to about 800kB. And replaces a few
+  thousand tuples and sets with StaticTuple.  (John Arbash Meinel)
 
 * When building new working trees, default to reading from the repository
   rather than the source tree unless explicitly requested. (via

=== modified file 'bzrlib/_dirstate_helpers_pyx.pyx'
--- a/bzrlib/_dirstate_helpers_pyx.pyx	2010-05-20 02:57:52 +0000
+++ b/bzrlib/_dirstate_helpers_pyx.pyx	2010-08-02 22:07:54 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2007, 2008, 2010 Canonical Ltd
+# Copyright (C) 2007-2010 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -118,6 +118,13 @@
     # ??? memrchr is a GNU extension :(
     # void *memrchr(void *s, int c, size_t len)
 
+# cimport all of the definitions we will need to access
+from _static_tuple_c cimport StaticTuple,\
+    import_static_tuple_c, StaticTuple_New, \
+    StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact, \
+    StaticTuple_GET_SIZE
+
+import_static_tuple_c()
 
 cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
     # memrchr seems to be a GNU extension, so we have to implement it ourselves
@@ -650,7 +657,7 @@
         # Build up the key that will be used.
         # By using <object>(void *) Pyrex will automatically handle the
         # Py_INCREF that we need.
-        path_name_file_id_key = (<object>p_current_dirname[0],
+        path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
                                  self.get_next_str(),
                                  self.get_next_str(),
                                 )
@@ -677,7 +684,7 @@
             executable_cstr = self.get_next(&cur_size)
             is_executable = (executable_cstr[0] == c'y')
             info = self.get_next_str()
-            PyList_Append(trees, (
+            PyList_Append(trees, StaticTuple(
                 minikind,     # minikind
                 fingerprint,  # fingerprint
                 entry_size,   # size

=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2010-08-02 21:43:15 +0000
+++ b/bzrlib/dirstate.py	2010-08-02 22:07:54 +0000
@@ -220,6 +220,7 @@
     inventory,
     lock,
     osutils,
+    static_tuple,
     trace,
     )
 
@@ -2166,8 +2167,9 @@
         # cause quadratic failure.
         # TODO: This should use StaticTuple
         file_id = entry_key[2]
+        entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
         if file_id not in id_index:
-            id_index[file_id] = (entry_key,)
+            id_index[file_id] = static_tuple.StaticTuple(entry_key,)
         else:
             entry_keys = id_index[file_id]
             if entry_key not in entry_keys:
@@ -2180,9 +2182,9 @@
         already present.
         """
         file_id = entry_key[2]
-        entry_keys = id_index[file_id]
-        idx = entry_keys.index(entry_key)
-        id_index[file_id] = entry_keys[:idx] + entry_keys[idx+1:]
+        entry_keys = list(id_index[file_id])
+        entry_keys.remove(entry_key)
+        id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
 
     def _get_output_lines(self, lines):
         """Format lines for final output.



More information about the bazaar-commits mailing list