Rev 5367: (spiv) Reduce the memory consumption of InventoryEntry instances. (Andrew in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Thu Aug 5 08:02:34 BST 2010


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5367 [merge]
revision-id: pqm at pqm.ubuntu.com-20100805070232-ezo69a4k078j1xmp
parent: pqm at pqm.ubuntu.com-20100805050215-tbk9eoxr7x6z3nsl
parent: andrew.bennetts at canonical.com-20100805042103-pdc01ltvca2nu7bl
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2010-08-05 08:02:32 +0100
message:
  (spiv) Reduce the memory consumption of InventoryEntry instances. (Andrew
   Bennetts)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/bundle/bundle_data.py   read_changeset.py-20050619171944-c0d95aa685537640
  bzrlib/bzrdir.py               bzrdir.py-20060131065624-156dfea39c4387cb
  bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
  bzrlib/tests/test_bundle.py    test.py-20050630184834-092aa401ab9f039c
  bzrlib/tests/test_inv.py       testinv.py-20050722220913-1dc326138d1a5892
  bzrlib/tests/test_inventory_delta.py test_journalled_inv.-20080103012121-ny2w9slze5jgty8i-1
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'NEWS'
--- a/NEWS	2010-07-29 11:17:57 +0000
+++ b/NEWS	2010-08-03 13:31:06 +0000
@@ -68,6 +68,10 @@
 Improvements
 ************
 
+* Inventory entries now consume less memory (on 32-bit Ubuntu file entries
+  have dropped from 68 bytes to 40, and directory entries from 120 bytes
+  to 48).  (Andrew Bennetts)
+
 * When building new working trees, default to reading from the repository
   rather than the source tree unless explicitly requested. (via
   ``--files-from`` and ``--hardlink`` for ``bzr branch`` and
@@ -81,6 +85,15 @@
 API Changes
 ***********
 
+* InventoryEntry instances now raise AttributeError if you try to assign
+  to attributes that are irrelevant to that kind of entry.  e.g. setting
+  ``symlink_target`` on an InventoryFile will fail.  It is still okay to
+  read those attributes on any kind of InventoryEntry.  The complete list
+  of affected attributes is: ``executable``, ``text_id``, ``text_sha1``,
+  ``text_size`` (only valid for kind == file); ``symlink_target`` (only
+  valid for kind == link); and ``reference_revision`` (only valid for kind
+  == tree-reference).  (Andrew Bennetts)
+
 Internals
 *********
 

=== modified file 'bzrlib/bundle/bundle_data.py'
--- a/bzrlib/bundle/bundle_data.py	2010-06-16 12:47:51 +0000
+++ b/bzrlib/bundle/bundle_data.py	2010-08-05 04:21:03 +0000
@@ -715,12 +715,11 @@
                 ie.symlink_target = self.get_symlink_target(file_id)
             ie.revision = revision_id
 
-            if kind in ('directory', 'symlink'):
-                ie.text_size, ie.text_sha1 = None, None
-            else:
+            if kind == 'file':
                 ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
-            if (ie.text_size is None) and (kind == 'file'):
-                raise BzrError('Got a text_size of None for file_id %r' % file_id)
+                if ie.text_size is None:
+                    raise BzrError(
+                        'Got a text_size of None for file_id %r' % file_id)
             inv.add(ie)
 
         sorted_entries = self.sorted_path_id()

=== modified file 'bzrlib/bzrdir.py'
--- a/bzrlib/bzrdir.py	2010-06-28 03:12:48 +0000
+++ b/bzrlib/bzrdir.py	2010-08-05 04:21:03 +0000
@@ -2939,7 +2939,6 @@
         previous_entries = dict((head, parent_candiate_entries[head]) for head
             in heads)
         self.snapshot_ie(previous_entries, ie, w, rev_id)
-        del ie.text_id
 
     def get_parent_map(self, revision_ids):
         """See graph.StackedParentsProvider.get_parent_map"""

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2010-07-18 14:52:26 +0000
+++ b/bzrlib/inventory.py	2010-08-03 13:31:06 +0000
@@ -35,13 +35,11 @@
 import re
 import tarfile
 
-import bzrlib
 from bzrlib import (
     chk_map,
     errors,
     generate_ids,
     osutils,
-    symbol_versioning,
     )
 """)
 
@@ -49,7 +47,6 @@
     BzrCheckError,
     BzrError,
     )
-from bzrlib.symbol_versioning import deprecated_in, deprecated_method
 from bzrlib.trace import mutter
 from bzrlib.static_tuple import StaticTuple
 
@@ -131,7 +128,23 @@
     RENAMED = 'renamed'
     MODIFIED_AND_RENAMED = 'modified and renamed'
 
-    __slots__ = []
+    __slots__ = ['file_id', 'revision', 'parent_id', 'name']
+
+    # Attributes that all InventoryEntry instances are expected to have, but
+    # that don't vary for all kinds of entry.  (e.g. symlink_target is only
+    # relevant to InventoryLink, so there's no reason to make every
+    # InventoryFile instance allocate space to hold a value for it.)
+    # Attributes that only vary for files: executable, text_sha1, text_size,
+    # text_id
+    executable = False
+    text_sha1 = None
+    text_size = None
+    text_id = None
+    # Attributes that only vary for symlinks: symlink_target
+    symlink_target = None
+    # Attributes that only vary for tree-references: reference_revision
+    reference_revision = None
+
 
     def detect_changes(self, old_entry):
         """Return a (text_modified, meta_modified) from this to old_entry.
@@ -187,7 +200,7 @@
         """
         return False
 
-    def __init__(self, file_id, name, parent_id, text_id=None):
+    def __init__(self, file_id, name, parent_id):
         """Create an InventoryEntry
 
         The filename must be a single component, relative to the
@@ -204,16 +217,10 @@
         """
         if '/' in name or '\\' in name:
             raise errors.InvalidEntryName(name=name)
-        self.executable = False
+        self.file_id = file_id
         self.revision = None
-        self.text_sha1 = None
-        self.text_size = None
-        self.file_id = file_id
         self.name = name
-        self.text_id = text_id
         self.parent_id = parent_id
-        self.symlink_target = None
-        self.reference_revision = None
 
     def kind_character(self):
         """Return a short kind indicator useful for appending to names."""
@@ -379,16 +386,12 @@
 class InventoryDirectory(InventoryEntry):
     """A directory in an inventory."""
 
-    __slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
-                 'text_id', 'parent_id', 'children', 'executable',
-                 'revision', 'symlink_target', 'reference_revision']
+    __slots__ = ['children']
+
+    kind = 'directory'
 
     def _check(self, checker, rev_id):
         """See InventoryEntry._check"""
-        if (self.text_sha1 is not None or self.text_size is not None or
-            self.text_id is not None):
-            checker._report_items.append('directory {%s} has text in revision {%s}'
-                                % (self.file_id, rev_id))
         # In non rich root repositories we do not expect a file graph for the
         # root.
         if self.name == '' and not checker.rich_roots:
@@ -410,7 +413,6 @@
     def __init__(self, file_id, name, parent_id):
         super(InventoryDirectory, self).__init__(file_id, name, parent_id)
         self.children = {}
-        self.kind = 'directory'
 
     def kind_character(self):
         """See InventoryEntry.kind_character."""
@@ -433,9 +435,16 @@
 class InventoryFile(InventoryEntry):
     """A file in an inventory."""
 
-    __slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
-                 'text_id', 'parent_id', 'children', 'executable',
-                 'revision', 'symlink_target', 'reference_revision']
+    __slots__ = ['text_sha1', 'text_size', 'text_id', 'executable']
+
+    kind = 'file'
+
+    def __init__(self, file_id, name, parent_id):
+        super(InventoryFile, self).__init__(file_id, name, parent_id)
+        self.text_sha1 = None
+        self.text_size = None
+        self.text_id = None
+        self.executable = False
 
     def _check(self, checker, tree_revision_id):
         """See InventoryEntry._check"""
@@ -484,10 +493,6 @@
         """See InventoryEntry.has_text."""
         return True
 
-    def __init__(self, file_id, name, parent_id):
-        super(InventoryFile, self).__init__(file_id, name, parent_id)
-        self.kind = 'file'
-
     def kind_character(self):
         """See InventoryEntry.kind_character."""
         return ''
@@ -546,16 +551,16 @@
 class InventoryLink(InventoryEntry):
     """A file in an inventory."""
 
-    __slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
-                 'text_id', 'parent_id', 'children', 'executable',
-                 'revision', 'symlink_target', 'reference_revision']
+    __slots__ = ['symlink_target']
+
+    kind = 'symlink'
+
+    def __init__(self, file_id, name, parent_id):
+        super(InventoryLink, self).__init__(file_id, name, parent_id)
+        self.symlink_target = None
 
     def _check(self, checker, tree_revision_id):
         """See InventoryEntry._check"""
-        if self.text_sha1 is not None or self.text_size is not None or self.text_id is not None:
-            checker._report_items.append(
-               'symlink {%s} has text in revision {%s}'
-                    % (self.file_id, tree_revision_id))
         if self.symlink_target is None:
             checker._report_items.append(
                 'symlink {%s} has no target in revision {%s}'
@@ -599,10 +604,6 @@
         differ = DiffSymlink(old_tree, new_tree, output_to)
         return differ.diff_symlink(old_target, new_target)
 
-    def __init__(self, file_id, name, parent_id):
-        super(InventoryLink, self).__init__(file_id, name, parent_id)
-        self.kind = 'symlink'
-
     def kind_character(self):
         """See InventoryEntry.kind_character."""
         return ''
@@ -640,6 +641,8 @@
 
 class TreeReference(InventoryEntry):
 
+    __slots__ = ['reference_revision']
+
     kind = 'tree-reference'
 
     def __init__(self, file_id, name, parent_id, revision=None,
@@ -2192,17 +2195,13 @@
 class CHKInventoryDirectory(InventoryDirectory):
     """A directory in an inventory."""
 
-    __slots__ = ['text_sha1', 'text_size', 'file_id', 'name', 'kind',
-                 'text_id', 'parent_id', '_children', 'executable',
-                 'revision', 'symlink_target', 'reference_revision',
-                 '_chk_inventory']
+    __slots__ = ['_children', '_chk_inventory']
 
     def __init__(self, file_id, name, parent_id, chk_inventory):
         # Don't call InventoryDirectory.__init__ - it isn't right for this
         # class.
         InventoryEntry.__init__(self, file_id, name, parent_id)
         self._children = None
-        self.kind = 'directory'
         self._chk_inventory = chk_inventory
 
     @property

=== modified file 'bzrlib/tests/test_bundle.py'
--- a/bzrlib/tests/test_bundle.py	2010-02-23 07:43:11 +0000
+++ b/bzrlib/tests/test_bundle.py	2010-08-05 04:21:03 +0000
@@ -114,12 +114,12 @@
             ie = InventoryDirectory(file_id, name, parent_id)
         elif kind == 'file':
             ie = InventoryFile(file_id, name, parent_id)
+            ie.text_sha1 = text_sha_1
+            ie.text_size = text_size
         elif kind == 'symlink':
             ie = InventoryLink(file_id, name, parent_id)
         else:
             raise errors.BzrError('unknown kind %r' % kind)
-        ie.text_sha1 = text_sha_1
-        ie.text_size = text_size
         return ie
 
     def add_dir(self, file_id, path):

=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py	2009-09-24 19:26:45 +0000
+++ b/bzrlib/tests/test_inv.py	2010-08-03 13:31:06 +0000
@@ -18,7 +18,6 @@
 from bzrlib import (
     chk_map,
     groupcompress,
-    bzrdir,
     errors,
     inventory,
     osutils,
@@ -607,12 +606,7 @@
 
     def test_dir_detect_changes(self):
         left = inventory.InventoryDirectory('123', 'hello.c', ROOT_ID)
-        left.text_sha1 = 123
-        left.executable = True
-        left.symlink_target='foo'
         right = inventory.InventoryDirectory('123', 'hello.c', ROOT_ID)
-        right.text_sha1 = 321
-        right.symlink_target='bar'
         self.assertEqual((False, False), left.detect_changes(right))
         self.assertEqual((False, False), right.detect_changes(left))
 
@@ -632,11 +626,8 @@
 
     def test_symlink_detect_changes(self):
         left = inventory.InventoryLink('123', 'hello.c', ROOT_ID)
-        left.text_sha1 = 123
-        left.executable = True
         left.symlink_target='foo'
         right = inventory.InventoryLink('123', 'hello.c', ROOT_ID)
-        right.text_sha1 = 321
         right.symlink_target='foo'
         self.assertEqual((False, False), left.detect_changes(right))
         self.assertEqual((False, False), right.detect_changes(left))

=== modified file 'bzrlib/tests/test_inventory_delta.py'
--- a/bzrlib/tests/test_inventory_delta.py	2009-08-13 00:20:29 +0000
+++ b/bzrlib/tests/test_inventory_delta.py	2010-08-03 13:31:06 +0000
@@ -387,9 +387,10 @@
         root = new_inv.make_entry('directory', '', None, 'my-rich-root-id')
         root.revision = 'changed'
         new_inv.add(root)
-        non_root = new_inv.make_entry('directory', 'foo', root.file_id, 'id')
+        class StrangeInventoryEntry(inventory.InventoryEntry):
+            kind = 'strange'
+        non_root = StrangeInventoryEntry('id', 'foo', root.file_id)
         non_root.revision = 'changed'
-        non_root.kind = 'strangelove'
         new_inv.add(non_root)
         delta = new_inv._make_delta(old_inv)
         serializer = inventory_delta.InventoryDeltaSerializer(
@@ -398,7 +399,7 @@
         # This test aims to prove that it errors more than how it errors.
         err = self.assertRaises(KeyError,
             serializer.delta_to_lines, NULL_REVISION, 'entry-version', delta)
-        self.assertEqual(('strangelove',), err.args)
+        self.assertEqual(('strange',), err.args)
 
     def test_tree_reference_disabled(self):
         old_inv = Inventory(None)

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2010-07-21 09:58:42 +0000
+++ b/bzrlib/workingtree_4.py	2010-08-03 13:31:06 +0000
@@ -1738,8 +1738,6 @@
                 elif kind == 'directory':
                     parent_ies[(dirname + '/' + name).strip('/')] = inv_entry
                 elif kind == 'symlink':
-                    inv_entry.executable = False
-                    inv_entry.text_size = None
                     inv_entry.symlink_target = utf8_decode(fingerprint)[0]
                 elif kind == 'tree-reference':
                     inv_entry.reference_revision = fingerprint or None




More information about the bazaar-commits mailing list