Rev 3818: Handle collisions. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key

John Arbash Meinel john at arbash-meinel.com
Thu Feb 12 20:56:02 GMT 2009


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key

------------------------------------------------------------
revno: 3818
revision-id: john at arbash-meinel.com-20090212205535-qcqdw8xdicm5es0i
parent: john at arbash-meinel.com-20090212202555-eb721ebcbm3arun1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hash_search_key
timestamp: Thu 2009-02-12 14:55:35 -0600
message:
  Handle collisions.
  
  When using a hash trie, it is possible to have all keys hash to the
  same value, even though that would no-longer fit in the desired
  LeafNode maximum size.
  If this happens, we want to go ahead and just keep growing the
  LeafNode. (The alternative causes an infinite recursion as we
  try to put the keys in another node.)
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2009-01-21 23:04:50 +0000
+++ b/bzrlib/chk_map.py	2009-02-12 20:55:35 +0000
@@ -455,6 +455,8 @@
         self._items = {}
         # The common search prefix
         self._search_prefix = None
+        # Do all of the keys in this leaf share an identical search_key?
+        self._all_search_keys_identical = None
 
     def __repr__(self):
         items_str = sorted(self._items)
@@ -656,12 +658,19 @@
         search_key = self._search_key(key)
         if self._search_prefix is None:
             self._search_prefix = search_key
+            self._all_search_keys_identical = True
         else:
+            old_search_prefix = self._search_prefix
             self._search_prefix = self.common_prefix(
                 self._search_prefix, search_key)
+            if (self._all_search_keys_identical
+                and (old_search_prefix != self._search_prefix
+                     or self._search_prefix != search_key)):
+                self._all_search_keys_identical = False
         if (self._len > 1
             and self._maximum_size
-            and self._current_size() > self._maximum_size):
+            and self._current_size() > self._maximum_size
+            and not self._all_search_keys_identical):
             return True
         return False
 
@@ -750,6 +759,10 @@
         """
         search_keys = [self._search_key(key) for key in self._items]
         self._search_prefix = self.common_prefix_for_keys(search_keys)
+        if len(set(search_keys)) == 1:
+            self._all_search_keys_identical = True
+        else:
+            self._all_search_keys_identical = False
         return self._search_prefix
 
     def _compute_serialised_prefix(self):

=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py	2009-01-21 20:14:26 +0000
+++ b/bzrlib/tests/test_chk_map.py	2009-02-12 20:55:35 +0000
@@ -1003,6 +1003,10 @@
             chkmap._dump_tree(include_keys=True))
 
 
+def _search_key_single(key):
+    """A search key function that maps all nodes to the same value"""
+    return 'value'
+
 def _test_search_key(key):
     return 'test:' + '\x00'.join(key)
 
@@ -1128,6 +1132,21 @@
                              "      ('1',) 'foo'\n"
                              , chkmap._dump_tree())
 
+    def test_search_key_collisions(self):
+        chkmap = chk_map.CHKMap(self.get_chk_bytes(), None,
+                                search_key_func=_search_key_single)
+        # The node will want to expand, but it cannot, because it knows that
+        # all the keys must map to this node
+        chkmap._root_node.set_maximum_size(20)
+        chkmap.map(('1',), 'foo')
+        chkmap.map(('2',), 'bar')
+        chkmap.map(('3',), 'baz')
+        self.assertEqualDiff("'' LeafNode\n"
+                             "      ('1',) 'foo'\n"
+                             "      ('2',) 'bar'\n"
+                             "      ('3',) 'baz'\n"
+                             , chkmap._dump_tree())
+
 
 class TestSearchKeyFuncs(tests.TestCase):
 



More information about the bazaar-commits mailing list