Rev 4442: (jam) Tweak chk_map.InternalNode._iter_nodes to optimize common cases. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Mon Jun 15 16:47:50 BST 2009

At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4442
revision-id: pqm at pqm.ubuntu.com-20090615154745-ma7p8rkmyegrzodf
parent: pqm at pqm.ubuntu.com-20090615120359-3a9650kr76j1jyf2
parent: john at arbash-meinel.com-20090615144927-odc6zdsutx7on3v1
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2009-06-15 16:47:45 +0100
message:
  (jam) Tweak chk_map.InternalNode._iter_nodes to optimize common cases.
modified:
  bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
  bzrlib/tests/test_chk_map.py   test_chk_map.py-20081001014447-ue6kkuhofvdecvxa-2
  bzrlib/workingtree.py          workingtree.py-20050511021032-29b6ec0a681e02e3
    ------------------------------------------------------------
    revno: 4413.4.4
    revision-id: john at arbash-meinel.com-20090615144927-odc6zdsutx7on3v1
    parent: john at arbash-meinel.com-20090605195715-q2gcpaypbixwk4wg
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: 1.16-chkmap-updates
    timestamp: Mon 2009-06-15 09:49:27 -0500
    message:
      Fix some type() == tuple to be 'type() is tuple' or '.__class__ is tuple'
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
    ------------------------------------------------------------
    revno: 4413.4.3
    revision-id: john at arbash-meinel.com-20090605195715-q2gcpaypbixwk4wg
    parent: john at arbash-meinel.com-20090605180340-3dqyamsptwix22m0
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: 1.16-chkmap-updates
    timestamp: Fri 2009-06-05 14:57:15 -0500
    message:
      Move the boolean check to the first part of the if statement
    modified:
      bzrlib/workingtree.py          workingtree.py-20050511021032-29b6ec0a681e02e3
    ------------------------------------------------------------
    revno: 4413.4.2
    revision-id: john at arbash-meinel.com-20090605180340-3dqyamsptwix22m0
    parent: john at arbash-meinel.com-20090605150139-osvqaqcv0bk55cuc
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: 1.16-chkmap-updates
    timestamp: Fri 2009-06-05 13:03:40 -0500
    message:
      Rewrite the shortcuts.
      
      Keep the single entry key_filter shortcut, because it can be heavily tuned
      to not have to build any structures, etc.
      Do a bit more optimizing in the multi-key cases. If all of the keys are full-width,
      then we can still do optimized self._items[search_key] lookups.
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
      bzrlib/tests/test_chk_map.py   test_chk_map.py-20081001014447-ue6kkuhofvdecvxa-2
    ------------------------------------------------------------
    revno: 4413.4.1
    revision-id: john at arbash-meinel.com-20090605150139-osvqaqcv0bk55cuc
    parent: pqm at pqm.ubuntu.com-20090605081039-abvojdsxjbg5i4ff
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: 1.16-chkmap-updates
    timestamp: Fri 2009-06-05 10:01:39 -0500
    message:
      Add a shortcut for the case when we are searching for a single full-width key.
      This saves approx 14.0s => 12.5s for 'initial commit' of a MySQL tree.
      Mostly it is about improving 'InternalNode.map()' performance for a very large 'apply_delta'.
      Needs further testing to ensure it handles edge cases.
      Also, we could probably tweak the generic code path a bit as well.
      We could probably split the lookups into full-width matches versus sub-matches,
      and then combine keys based on the search prefixes being identical, etc.
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
=== modified file 'bzrlib/chk_map.py'

--- a/bzrlib/chk_map.py	2009-05-13 21:59:57 +0000
+++ b/bzrlib/chk_map.py	2009-06-15 14:49:27 +0000
@@ -121,7 +121,7 @@
 
     def _ensure_root(self):
         """Ensure that the root node is an object not a key."""
-        if type(self._root_node) == tuple:
+        if type(self._root_node) is tuple:
             # Demand-load the root
             self._root_node = self._get_node(self._root_node)
 
@@ -135,7 +135,7 @@
         :param node: A tuple key or node object.
         :return: A node object.
         """
-        if type(node) == tuple:
+        if type(node) is tuple:
             bytes = self._read_bytes(node)
             return _deserialise(bytes, node,
                 search_key_func=self._search_key_func)
@@ -465,7 +465,7 @@
 
     def _node_key(self, node):
         """Get the key for a node whether it's a tuple or node."""
-        if type(node) == tuple:
+        if type(node) is tuple:
             return node
         else:
             return node._key
@@ -491,7 +491,7 @@
 
         :return: The key of the root node.
         """
-        if type(self._root_node) == tuple:
+        if type(self._root_node) is tuple:
             # Already saved.
             return self._root_node
         keys = list(self._root_node.serialise(self._store))
@@ -955,34 +955,99 @@
         # prefix is the key in self._items to use, key_filter is the key_filter
         # entries that would match this node
         keys = {}
+        shortcut = False
         if key_filter is None:
+            # yielding all nodes, yield whatever we have, and queue up a read
+            # for whatever we are missing
+            shortcut = True
             for prefix, node in self._items.iteritems():
-                if type(node) == tuple:
+                if node.__class__ is tuple:
                     keys[node] = (prefix, None)
                 else:
                     yield node, None
-        else:
-            # XXX defaultdict ?
+        elif len(key_filter) == 1:
+            # Technically, this path could also be handled by the first check
+            # in 'self._node_width' in length_filters. However, we can handle
+            # this case without spending any time building up the
+            # prefix_to_keys, etc state.
+
+            # This is a bit ugly, but TIMEIT showed it to be by far the fastest
+            # 0.626us   list(key_filter)[0]
+            #       is a func() for list(), 2 mallocs, and a getitem
+            # 0.489us   [k for k in key_filter][0]
+            #       still has the mallocs, avoids the func() call
+            # 0.350us   iter(key_filter).next()
+            #       has a func() call, and mallocs an iterator
+            # 0.125us   for key in key_filter: pass
+            #       no func() overhead, might malloc an iterator
+            # 0.105us   for key in key_filter: break
+            #       no func() overhead, might malloc an iterator, probably
+            #       avoids checking an 'else' clause as part of the for
+            for key in key_filter:
+                break
+            search_prefix = self._search_prefix_filter(key)
+            if len(search_prefix) == self._node_width:
+                # This item will match exactly, so just do a dict lookup, and
+                # see what we can return
+                shortcut = True
+                try:
+                    node = self._items[search_prefix]
+                except KeyError:
+                    # A given key can only match 1 child node, if it isn't
+                    # there, then we can just return nothing
+                    return
+                if node.__class__ is tuple:
+                    keys[node] = (search_prefix, [key])
+                else:
+                    # This is loaded, and the only thing that can match,
+                    # return
+                    yield node, [key]
+                    return
+        if not shortcut:
+            # First, convert all keys into a list of search prefixes
+            # Aggregate common prefixes, and track the keys they come from
             prefix_to_keys = {}
             length_filters = {}
             for key in key_filter:
-                search_key = self._search_prefix_filter(key)
+                search_prefix = self._search_prefix_filter(key)
                 length_filter = length_filters.setdefault(
-                                    len(search_key), set())
-                length_filter.add(search_key)
-                prefix_to_keys.setdefault(search_key, []).append(key)
-            length_filters = length_filters.items()
-            for prefix, node in self._items.iteritems():
-                node_key_filter = []
-                for length, length_filter in length_filters:
-                    sub_prefix = prefix[:length]
-                    if sub_prefix in length_filter:
-                        node_key_filter.extend(prefix_to_keys[sub_prefix])
-                if node_key_filter: # this key matched something, yield it
-                    if type(node) == tuple:
-                        keys[node] = (prefix, node_key_filter)
+                                    len(search_prefix), set())
+                length_filter.add(search_prefix)
+                prefix_to_keys.setdefault(search_prefix, []).append(key)
+
+            if (self._node_width in length_filters
+                and len(length_filters) == 1):
+                # all of the search prefixes match exactly _node_width. This
+                # means that everything is an exact match, and we can do a
+                # lookup into self._items, rather than iterating over the items
+                # dict.
+                search_prefixes = length_filters[self._node_width]
+                for search_prefix in search_prefixes:
+                    try:
+                        node = self._items[search_prefix]
+                    except KeyError:
+                        # We can ignore this one
+                        continue
+                    node_key_filter = prefix_to_keys[search_prefix]
+                    if node.__class__ is tuple:
+                        keys[node] = (search_prefix, node_key_filter)
                     else:
                         yield node, node_key_filter
+            else:
+                # The slow way. We walk every item in self._items, and check to
+                # see if there are any matches
+                length_filters = length_filters.items()
+                for prefix, node in self._items.iteritems():
+                    node_key_filter = []
+                    for length, length_filter in length_filters:
+                        sub_prefix = prefix[:length]
+                        if sub_prefix in length_filter:
+                            node_key_filter.extend(prefix_to_keys[sub_prefix])
+                    if node_key_filter: # this key matched something, yield it
+                        if node.__class__ is tuple:
+                            keys[node] = (prefix, node_key_filter)
+                        else:
+                            yield node, node_key_filter
         if keys:
             # Look in the page cache for some more bytes
             found_keys = set()
@@ -1117,7 +1182,7 @@
         :return: An iterable of the keys inserted by this operation.
         """
         for node in self._items.itervalues():
-            if type(node) == tuple:
+            if type(node) is tuple:
                 # Never deserialised.
                 continue
             if node._key is not None:
@@ -1134,7 +1199,7 @@
         lines.append('%s\n' % (self._search_prefix,))
         prefix_len = len(self._search_prefix)
         for prefix, node in sorted(self._items.items()):
-            if type(node) == tuple:
+            if type(node) is tuple:
                 key = node[0]
             else:
                 key = node._key[0]
@@ -1179,7 +1244,7 @@
             raise AssertionError("unserialised nodes have no refs.")
         refs = []
         for value in self._items.itervalues():
-            if type(value) == tuple:
+            if type(value) is tuple:
                 refs.append(value)
             else:
                 refs.append(value.key())

=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py	2009-05-13 21:59:57 +0000
+++ b/bzrlib/tests/test_chk_map.py	2009-06-05 18:03:40 +0000
@@ -1560,13 +1560,66 @@
         child.map(None, ("baz",), "val")
         node.add_node("b", child)
 
-        key_filter = (('foo',), ('fob',), ('bar',), ('baz',))
+        # Note that 'ram' doesn't match anything, so it should be freely
+        # ignored
+        key_filter = (('foo',), ('fob',), ('bar',), ('baz',), ('ram',))
         for child, node_key_filter in node._iter_nodes(None,
                                                        key_filter=key_filter):
-            # each child could matches two key filters, so make sure they were
+            # each child could match two key filters, so make sure they were
             # both included.
             self.assertEqual(2, len(node_key_filter))
 
+    def make_fo_fa_node(self):
+        node = InternalNode('f')
+        child = LeafNode()
+        child.set_maximum_size(100)
+        child.map(None, ("foo",), "val")
+        child.map(None, ("fob",), "val")
+        node.add_node('fo', child)
+        child = LeafNode()
+        child.set_maximum_size(100)
+        child.map(None, ("far",), "val")
+        child.map(None, ("faz",), "val")
+        node.add_node("fa", child)
+        return node
+
+    def test__iter_nodes_single_entry(self):
+        node = self.make_fo_fa_node()
+        key_filter = [('foo',)]
+        nodes = list(node._iter_nodes(None, key_filter=key_filter))
+        self.assertEqual(1, len(nodes))
+        self.assertEqual(key_filter, nodes[0][1])
+
+    def test__iter_nodes_single_entry_misses(self):
+        node = self.make_fo_fa_node()
+        key_filter = [('bar',)]
+        nodes = list(node._iter_nodes(None, key_filter=key_filter))
+        self.assertEqual(0, len(nodes))
+
+    def test__iter_nodes_mixed_key_width(self):
+        node = self.make_fo_fa_node()
+        key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('b',)]
+        nodes = list(node._iter_nodes(None, key_filter=key_filter))
+        self.assertEqual(1, len(nodes))
+        matches = key_filter[:]
+        matches.remove(('b',))
+        self.assertEqual(sorted(matches), sorted(nodes[0][1]))
+
+    def test__iter_nodes_match_all(self):
+        node = self.make_fo_fa_node()
+        key_filter = [('foo', 'bar'), ('foo',), ('fo',), ('f',)]
+        nodes = list(node._iter_nodes(None, key_filter=key_filter))
+        self.assertEqual(2, len(nodes))
+
+    def test__iter_nodes_fixed_widths_and_misses(self):
+        node = self.make_fo_fa_node()
+        # foo and faa should both match one child, baz should miss
+        key_filter = [('foo',), ('faa',), ('baz',)]
+        nodes = list(node._iter_nodes(None, key_filter=key_filter))
+        self.assertEqual(2, len(nodes))
+        for node, matches in nodes:
+            self.assertEqual(1, len(matches))
+
     def test_iteritems_empty_new(self):
         node = InternalNode()
         self.assertEqual([], sorted(node.iteritems(None)))

=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py	2009-06-10 03:56:49 +0000
+++ b/bzrlib/workingtree.py	2009-06-15 15:47:45 +0000
@@ -451,7 +451,7 @@
             path = self.id2path(file_id)
         file_obj = self.get_file_byname(path, filtered=False)
         stat_value = _fstat(file_obj.fileno())
-        if self.supports_content_filtering() and filtered:
+        if filtered and self.supports_content_filtering():
             filters = self._content_filter_stack(path)
             file_obj = filtered_input_file(file_obj, filters)
         return (file_obj, stat_value)
@@ -462,7 +462,7 @@
     def get_file_byname(self, filename, filtered=True):
         path = self.abspath(filename)
         f = file(path, 'rb')
-        if self.supports_content_filtering() and filtered:
+        if filtered and self.supports_content_filtering():
             filters = self._content_filter_stack(filename)
             return filtered_input_file(f, filters)
         else: