Rev 4709: Interning with a regular 'dict' is a tradeoff for bzr.dev of: in http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption

John Arbash Meinel john at arbash-meinel.com
Tue Sep 29 16:37:04 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/2.1-memory-consumption

------------------------------------------------------------
revno: 4709
revision-id: john at arbash-meinel.com-20090929153659-arh13xituaerkzpm
parent: john at arbash-meinel.com-20090929152512-md6x01vbcb9rl2ul
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.1-memory-consumption
timestamp: Tue 2009-09-29 10:36:59 -0500
message:
  Interning with a regular 'dict' is a tradeoff for bzr.dev of:
  PeakWorking  110268KB   final  real    0m5.897s
  to
  PeakWorking  107380KB   final  real    0m6.100s
  
  So a small memory gain for a small performance loss.
  Caching the hash gives:
  PeakWorking  108700KB   final  real    0m6.193s
  
  someone surprisingly, absolutely no performance gain.
-------------- next part --------------
=== modified file 'bzrlib/_btree_serializer_pyx.pyx'
--- a/bzrlib/_btree_serializer_pyx.pyx	2009-09-28 19:14:08 +0000
+++ b/bzrlib/_btree_serializer_pyx.pyx	2009-09-29 15:36:59 +0000
@@ -179,7 +179,7 @@
             # PyTuple_SET_ITEM(key, loop_counter, key_element)
             Key_SET_ITEM(key, loop_counter, key_element)
         # return _keys_type_c.Key(*key)
-        return key
+        return key.intern()
 
     cdef int process_line(self) except -1:
         """Process a line in the bytes."""

=== modified file 'bzrlib/_keys_type_py.py'
--- a/bzrlib/_keys_type_py.py	2009-09-29 15:25:12 +0000
+++ b/bzrlib/_keys_type_py.py	2009-09-29 15:36:59 +0000
@@ -61,6 +61,9 @@
     def as_tuple(self):
         return self._tuple
 
+    def intern(self):
+        return _intern.setdefault(self, self)
+
 
 
 def Keys(width, *args):

=== modified file 'bzrlib/tests/test__keys_type.py'
--- a/bzrlib/tests/test__keys_type.py	2009-09-29 15:25:12 +0000
+++ b/bzrlib/tests/test__keys_type.py	2009-09-29 15:36:59 +0000
@@ -357,3 +357,21 @@
         self.assertIsNot(key, key2)
 
         refcount = sys.getrefcount(key)
+        self.assertEqual(2, refcount)
+
+        # TODO: Eventually the C version will diverge from the python version
+        #       here. Namely, it will follow the String route, and interning()
+        #       a string will not make it immortal. Instead it will remove
+        #       itself from the intern dict when all other references are
+        #       removed.
+        #       Further, the intern dict may become a custom type rather than a
+        #       pure 'dict'.
+        key3 = key.intern()
+        self.assertIs(key, key3)
+        self.assertTrue(key in self.module._intern)
+        self.assertEqual(key, self.module._intern[key])
+        del key3
+        self.assertEqual(4, sys.getrefcount(key))
+        key2 = key2.intern()
+        self.assertEqual(5, sys.getrefcount(key))
+        self.assertIs(key, key2)



More information about the bazaar-commits mailing list