Rev 3819: Don't track state for an infrequent edge case. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key
John Arbash Meinel
john at arbash-meinel.com
Thu Feb 12 21:08:08 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key
------------------------------------------------------------
revno: 3819
revision-id: john at arbash-meinel.com-20090212210741-1azx1cuyvrdl2lfy
parent: john at arbash-meinel.com-20090212205535-qcqdw8xdicm5es0i
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hash_search_key
timestamp: Thu 2009-02-12 15:07:41 -0600
message:
Don't track state for an infrequent edge case.
Almost never will all search keys be identical. So rather than always tracking
the state, add a function which can check. It is more expensive,
but 99.9% of the time we never need to evaluate it.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2009-02-12 20:55:35 +0000
+++ b/bzrlib/chk_map.py 2009-02-12 21:07:41 +0000
@@ -455,8 +455,6 @@
self._items = {}
# The common search prefix
self._search_prefix = None
- # Do all of the keys in this leaf share an identical search_key?
- self._all_search_keys_identical = None
def __repr__(self):
items_str = sorted(self._items)
@@ -658,20 +656,18 @@
search_key = self._search_key(key)
if self._search_prefix is None:
self._search_prefix = search_key
- self._all_search_keys_identical = True
else:
- old_search_prefix = self._search_prefix
self._search_prefix = self.common_prefix(
self._search_prefix, search_key)
- if (self._all_search_keys_identical
- and (old_search_prefix != self._search_prefix
- or self._search_prefix != search_key)):
- self._all_search_keys_identical = False
if (self._len > 1
and self._maximum_size
- and self._current_size() > self._maximum_size
- and not self._all_search_keys_identical):
- return True
+ and self._current_size() > self._maximum_size):
+ # Check to see if all of the search_keys for this node are
+ # identical. We allow the node to grow under that circumstance
+ # (we could track this as common state, but it is infrequent)
+ if (search_key != self._search_prefix
+ or not self._are_search_keys_identical()):
+ return True
return False
def _split(self, store):
@@ -759,12 +755,24 @@
"""
search_keys = [self._search_key(key) for key in self._items]
self._search_prefix = self.common_prefix_for_keys(search_keys)
- if len(set(search_keys)) == 1:
- self._all_search_keys_identical = True
- else:
- self._all_search_keys_identical = False
return self._search_prefix
+ def _are_search_keys_identical(self):
+ """Check to see if the search keys for all entries are the same.
+
+ When using a hash as the search_key it is possible for non-identical
+ keys to collide. If that happens enough, we may try overflow a
+ LeafNode, but as all are collisions, we must not split.
+ """
+ common_search_key = None
+ for key in self._items:
+ search_key = self._search_key(key)
+ if common_search_key is None:
+ common_search_key = search_key
+ elif search_key != common_search_key:
+ return False
+ return True
+
def _compute_serialised_prefix(self):
"""Determine the common prefix for serialised keys in this node.
More information about the bazaar-commits
mailing list