Rev 5365: id_index size drops to 230kB with StaticTuples. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-dirstate-index
John Arbash Meinel
john at arbash-meinel.com
Mon Aug 2 23:08:04 BST 2010
At http://bazaar.launchpad.net/~jameinel/bzr/2.3-dirstate-index
------------------------------------------------------------
revno: 5365
revision-id: john at arbash-meinel.com-20100802220754-jvtkt7dtuwon2gdk
parent: john at arbash-meinel.com-20100802214707-rz6zvnyb0t3tlbe5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-dirstate-index
timestamp: Mon 2010-08-02 17:07:54 -0500
message:
id_index size drops to 230kB with StaticTuples.
Even better, though, is that using StaticTuple in the parser gives us
an overall benefit of about 20%. It isn't our biggest data use, but
it is something, at least.
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS 2010-08-02 21:47:07 +0000
+++ b/NEWS 2010-08-02 22:07:54 +0000
@@ -47,8 +47,9 @@
Improvements
************
-* ``DirState`` internals use a little bit less memory.
- (John Arbash Meinel)
+* ``DirState`` internals use a little bit less memory. For bzr.dev it
+ drops the memory from 1MB down to about 800kB. And replaces a few
+ thousand tuples and sets with StaticTuple. (John Arbash Meinel)
* When building new working trees, default to reading from the repository
rather than the source tree unless explicitly requested. (via
=== modified file 'bzrlib/_dirstate_helpers_pyx.pyx'
--- a/bzrlib/_dirstate_helpers_pyx.pyx 2010-05-20 02:57:52 +0000
+++ b/bzrlib/_dirstate_helpers_pyx.pyx 2010-08-02 22:07:54 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2007, 2008, 2010 Canonical Ltd
+# Copyright (C) 2007-2010 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -118,6 +118,13 @@
# ??? memrchr is a GNU extension :(
# void *memrchr(void *s, int c, size_t len)
+# cimport all of the definitions we will need to access
+from _static_tuple_c cimport StaticTuple,\
+ import_static_tuple_c, StaticTuple_New, \
+ StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact, \
+ StaticTuple_GET_SIZE
+
+import_static_tuple_c()
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
# memrchr seems to be a GNU extension, so we have to implement it ourselves
@@ -650,7 +657,7 @@
# Build up the key that will be used.
# By using <object>(void *) Pyrex will automatically handle the
# Py_INCREF that we need.
- path_name_file_id_key = (<object>p_current_dirname[0],
+ path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
self.get_next_str(),
self.get_next_str(),
)
@@ -677,7 +684,7 @@
executable_cstr = self.get_next(&cur_size)
is_executable = (executable_cstr[0] == c'y')
info = self.get_next_str()
- PyList_Append(trees, (
+ PyList_Append(trees, StaticTuple(
minikind, # minikind
fingerprint, # fingerprint
entry_size, # size
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2010-08-02 21:43:15 +0000
+++ b/bzrlib/dirstate.py 2010-08-02 22:07:54 +0000
@@ -220,6 +220,7 @@
inventory,
lock,
osutils,
+ static_tuple,
trace,
)
@@ -2166,8 +2167,9 @@
# cause quadratic failure.
# TODO: This should use StaticTuple
file_id = entry_key[2]
+ entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
if file_id not in id_index:
- id_index[file_id] = (entry_key,)
+ id_index[file_id] = static_tuple.StaticTuple(entry_key,)
else:
entry_keys = id_index[file_id]
if entry_key not in entry_keys:
@@ -2180,9 +2182,9 @@
already present.
"""
file_id = entry_key[2]
- entry_keys = id_index[file_id]
- idx = entry_keys.index(entry_key)
- id_index[file_id] = entry_keys[:idx] + entry_keys[idx+1:]
+ entry_keys = list(id_index[file_id])
+ entry_keys.remove(entry_key)
+ id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
def _get_output_lines(self, lines):
"""Format lines for final output.
More information about the bazaar-commits
mailing list