Rev 3819: Don't track state for an infrequent edge case. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key

John Arbash Meinel john at arbash-meinel.com
Thu Feb 12 21:08:08 GMT 2009


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key

------------------------------------------------------------
revno: 3819
revision-id: john at arbash-meinel.com-20090212210741-1azx1cuyvrdl2lfy
parent: john at arbash-meinel.com-20090212205535-qcqdw8xdicm5es0i
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hash_search_key
timestamp: Thu 2009-02-12 15:07:41 -0600
message:
  Don't track state for an infrequent edge case.
  
  Almost never will all search keys be identical. So rather than always tracking
  the state, add a function which can check. It is more expensive,
  but 99.9% of the time we never need to evaluate it.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2009-02-12 20:55:35 +0000
+++ b/bzrlib/chk_map.py	2009-02-12 21:07:41 +0000
@@ -455,8 +455,6 @@
         self._items = {}
         # The common search prefix
         self._search_prefix = None
-        # Do all of the keys in this leaf share an identical search_key?
-        self._all_search_keys_identical = None
 
     def __repr__(self):
         items_str = sorted(self._items)
@@ -658,20 +656,18 @@
         search_key = self._search_key(key)
         if self._search_prefix is None:
             self._search_prefix = search_key
-            self._all_search_keys_identical = True
         else:
-            old_search_prefix = self._search_prefix
             self._search_prefix = self.common_prefix(
                 self._search_prefix, search_key)
-            if (self._all_search_keys_identical
-                and (old_search_prefix != self._search_prefix
-                     or self._search_prefix != search_key)):
-                self._all_search_keys_identical = False
         if (self._len > 1
             and self._maximum_size
-            and self._current_size() > self._maximum_size
-            and not self._all_search_keys_identical):
-            return True
+            and self._current_size() > self._maximum_size):
+            # Check to see if all of the search_keys for this node are
+            # identical. We allow the node to grow under that circumstance
+            # (we could track this as common state, but it is infrequent)
+            if (search_key != self._search_prefix
+                or not self._are_search_keys_identical()):
+                return True
         return False
 
     def _split(self, store):
@@ -759,12 +755,24 @@
         """
         search_keys = [self._search_key(key) for key in self._items]
         self._search_prefix = self.common_prefix_for_keys(search_keys)
-        if len(set(search_keys)) == 1:
-            self._all_search_keys_identical = True
-        else:
-            self._all_search_keys_identical = False
         return self._search_prefix
 
+    def _are_search_keys_identical(self):
+        """Check to see if the search keys for all entries are the same.
+
+        When using a hash as the search_key it is possible for non-identical
+        keys to collide. If that happens enough, we may try overflow a
+        LeafNode, but as all are collisions, we must not split.
+        """
+        common_search_key = None
+        for key in self._items:
+            search_key = self._search_key(key)
+            if common_search_key is None:
+                common_search_key = search_key
+            elif search_key != common_search_key:
+                return False
+        return True
+
     def _compute_serialised_prefix(self):
         """Determine the common prefix for serialised keys in this node.
 



More information about the bazaar-commits mailing list