Rev 3816: Start parameterizing CHKInventory and CHKSerializer so that we can in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key

John Arbash Meinel john at arbash-meinel.com
Wed Jan 21 23:04:59 GMT 2009


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key

------------------------------------------------------------
revno: 3816
revision-id: john at arbash-meinel.com-20090121230450-rcv4y4r3wsee87r8
parent: john at arbash-meinel.com-20090121221958-73e6ejetze235lpn
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hash_search_key
timestamp: Wed 2009-01-21 17:04:50 -0600
message:
  Start parameterizing CHKInventory and CHKSerializer so that we can
  have different repository formats which use different hash keys.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2009-01-21 22:19:58 +0000
+++ b/bzrlib/chk_map.py	2009-01-21 23:04:50 +0000
@@ -46,7 +46,10 @@
 
 from bzrlib import versionedfile
 """)
-from bzrlib import lru_cache
+from bzrlib import (
+    lru_cache,
+    registry,
+    )
 
 # approx 2MB
 # If each line is 50 bytes, and you have 255 internal pages, with 255-way fan
@@ -76,6 +79,12 @@
     return bytes.replace('\n', '_')
 
 
+search_key_registry = registry.Registry()
+search_key_registry.register('plain', _search_key_plain)
+search_key_registry.register('hash-16-way', _search_key_16)
+search_key_registry.register('hash-255-way', _search_key_255)
+
+
 class CHKMap(object):
     """A persistent map from string to string backed by a CHK store."""
 

=== modified file 'bzrlib/chk_serializer.py'
--- a/bzrlib/chk_serializer.py	2008-11-14 01:28:40 +0000
+++ b/bzrlib/chk_serializer.py	2009-01-21 23:04:50 +0000
@@ -46,9 +46,11 @@
         else:
             return xml6.Serializer_v6._unpack_entry(self, elt)
 
-    def __init__(self, node_size, parent_id_basename_index):
+    def __init__(self, node_size, parent_id_basename_index,
+                 search_key_name):
         self.maximum_size = node_size
         self.parent_id_basename_index = parent_id_basename_index
+        self.search_key_name = search_key_name
 
 
 class CHKSerializer(xml5.Serializer_v5):
@@ -58,12 +60,16 @@
     revision_format_num = None
     support_altered_by_hack = False
 
-    def __init__(self, node_size, parent_id_basename_index):
+    def __init__(self, node_size, parent_id_basename_index,
+                 search_key_name):
         self.maximum_size = node_size
         self.parent_id_basename_index = parent_id_basename_index
-
-
-chk_serializer_subtree = CHKSerializerSubtree(4096, False)
-chk_serializer = CHKSerializer(4096, False)
-chk_serializer_subtree_parent_id = CHKSerializerSubtree(4096, True)
-chk_serializer_parent_id = CHKSerializer(4096, True)
+        self.search_key_name = search_key_name
+
+
+chk_serializer_subtree = CHKSerializerSubtree(4096, False, 'plain')
+chk_serializer = CHKSerializer(4096, False, 'plain')
+chk_serializer_subtree_parent_id = CHKSerializerSubtree(4096, True, 'plain')
+chk_serializer_parent_id = CHKSerializer(4096, True, 'plain')
+chk_serializer_16_parent_id = CHKSerializer(4096, True, 'hash-16-way')
+chk_serializer_255_parent_id = CHKSerializer(4096, True, 'hash-255-way')

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2008-12-19 23:07:32 +0000
+++ b/bzrlib/inventory.py	2009-01-21 23:04:50 +0000
@@ -1362,9 +1362,10 @@
     to reuse.
     """
 
-    def __init__(self):
+    def __init__(self, search_key_name):
         CommonInventory.__init__(self)
         self._entry_cache = {}
+        self._search_key_name = search_key_name
 
     def _entry_to_bytes(self, entry):
         """Serialise entry as a single bytestring.
@@ -1447,15 +1448,18 @@
         :param new_revision_id: The revision id of the resulting CHKInventory.
         :return: The new CHKInventory.
         """
-        result = CHKInventory()
+        result = CHKInventory(self._search_key_name)
+        search_key_func = chk_map.search_key_registry.get(self._search_key_name)
         result.revision_id = new_revision_id
         result.id_to_entry = chk_map.CHKMap(
             self.id_to_entry._store,
-            self.id_to_entry._root_node)
+            self.id_to_entry._root_node,
+            search_key_func=search_key_func)
         if self.parent_id_basename_to_file_id is not None:
             result.parent_id_basename_to_file_id = chk_map.CHKMap(
                 self.parent_id_basename_to_file_id._store,
-                self.parent_id_basename_to_file_id._root_node)
+                self.parent_id_basename_to_file_id._root_node,
+                search_key_func=search_key_func)
             parent_id_basename_delta = []
         else:
             result.parent_id_basename_to_file_id = None
@@ -1509,20 +1513,32 @@
             for.
         :return: A CHKInventory
         """
-        result = CHKInventory()
         lines = bytes.splitlines()
         if lines[0] != 'chkinventory:':
             raise ValueError("not a serialised CHKInventory: %r" % bytes)
-        result.revision_id = lines[1][13:]
-        result.root_id = lines[2][9:]
-        if lines[3].startswith('parent_id_basename_to_file_id:'):
+        revision_id = lines[1][13:]
+        root_id = lines[2][9:]
+        if lines[3].startswith('search_key_name:'):
+            search_key_name = lines[3][17:]
             next = 4
-            result.parent_id_basename_to_file_id = chk_map.CHKMap(
-                chk_store, (lines[3][31:],))
         else:
+            search_key_name = 'plain'
             next = 3
+        result = CHKInventory(search_key_name)
+        result.revision_id = revision_id
+        result.root_id = root_id
+        search_key_func = chk_map.search_key_registry.get(
+                            result._search_key_name)
+        if lines[next].startswith('parent_id_basename_to_file_id:'):
+            result.parent_id_basename_to_file_id = chk_map.CHKMap(
+                chk_store, (lines[next][31:],),
+                search_key_func=search_key_func)
+            next += 1
+        else:
             result.parent_id_basename_to_file_id = None
-        result.id_to_entry = chk_map.CHKMap(chk_store, (lines[next][13:],))
+
+        result.id_to_entry = chk_map.CHKMap(chk_store, (lines[next][13:],),
+                                            search_key_func=search_key_func)
         if (result.revision_id,) != expected_revision_id:
             raise ValueError("Mismatched revision id and expected: %r, %r" %
                 (result.revision_id, expected_revision_id))
@@ -1530,7 +1546,7 @@
 
     @classmethod
     def from_inventory(klass, chk_store, inventory, maximum_size=0,
-        parent_id_basename_index=False):
+        parent_id_basename_index=False, search_key_name='plain'):
         """Create a CHKInventory from an existing inventory.
 
         The content of inventory is copied into the chk_store, and a
@@ -1541,15 +1557,18 @@
         :param maximum_size: The CHKMap node size limit.
         :param parent_id_basename_index: If True create and use a
             parent_id,basename->file_id index.
+        :param search_key_name: The identifier for the search key function
         """
-        result = CHKInventory()
+        result = CHKInventory(search_key_name)
         result.revision_id = inventory.revision_id
         result.root_id = inventory.root.file_id
-        result.id_to_entry = chk_map.CHKMap(chk_store, None)
+        search_key_func = chk_map.search_key_registry.get(search_key_name)
+        result.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
         result.id_to_entry._root_node.set_maximum_size(maximum_size)
         file_id_delta = []
         if parent_id_basename_index:
-            result.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store, None)
+            result.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
+                None, search_key_func)
             result.parent_id_basename_to_file_id._root_node.set_maximum_size(
                 maximum_size)
             result.parent_id_basename_to_file_id._root_node._key_width = 2
@@ -1745,6 +1764,8 @@
         lines = ["chkinventory:\n"]
         lines.append("revision_id: %s\n" % self.revision_id)
         lines.append("root_id: %s\n" % self.root_id)
+        if self._search_key_name != 'plain':
+            lines.append('search_key_name: %s\n' % (self._search_key_name,))
         if self.parent_id_basename_to_file_id is not None:
             lines.append('parent_id_basename_to_file_id: %s\n' %
                 self.parent_id_basename_to_file_id.key())

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2009-01-12 18:44:55 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2009-01-21 23:04:50 +0000
@@ -2169,7 +2169,8 @@
         serializer = self._format._serializer
         result = CHKInventory.from_inventory(self.chk_bytes, inv,
             maximum_size=serializer.maximum_size,
-            parent_id_basename_index=serializer.parent_id_basename_index)
+            parent_id_basename_index=serializer.parent_id_basename_index,
+            search_key_name=serializer.search_key_name)
         inv_lines = result.to_lines()
         return self._inventory_add_lines(revision_id, parents,
             inv_lines, check_content=False)

=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py	2008-12-03 22:53:37 +0000
+++ b/bzrlib/tests/test_inv.py	2009-01-21 23:04:50 +0000
@@ -198,6 +198,7 @@
         self.assertEqual(inv.root.parent_id, new_inv.root.parent_id)
         self.assertEqual(inv.root.name, new_inv.root.name)
         self.assertEqual("rootrev", new_inv.root.revision)
+        self.assertEqual('plain', new_inv._search_key_name)
 
     def test_deserialise_wrong_revid(self):
         inv = Inventory()
@@ -215,13 +216,53 @@
         inv.root.revision = "bar"
         chk_bytes = self.get_chk_bytes()
         chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
-        self.assertEqual([
-            'chkinventory:\n',
-            'revision_id: foo\n',
-            'root_id: TREE_ROOT\n',
-            'id_to_entry: sha1:36219af8518a9bed1e52db58e99131db2a00b329\n',
-            ],
-            chk_inv.to_lines())
+        lines = chk_inv.to_lines()
+        self.assertEqual([
+            'chkinventory:\n',
+            'revision_id: foo\n',
+            'root_id: TREE_ROOT\n',
+            'id_to_entry: sha1:c9d15ff2621b8774506f702ff4ffd5f4af885a51\n',
+            ], lines)
+        chk_inv = CHKInventory.deserialise(chk_bytes, ''.join(lines), ('foo',))
+        self.assertEqual('plain', chk_inv._search_key_name)
+
+    def test_captures_parent_id_basename_index(self):
+        inv = Inventory()
+        inv.revision_id = "foo"
+        inv.root.revision = "bar"
+        chk_bytes = self.get_chk_bytes()
+        chk_inv = CHKInventory.from_inventory(chk_bytes, inv,
+                    parent_id_basename_index=True)
+        lines = chk_inv.to_lines()
+        self.assertEqual([
+            'chkinventory:\n',
+            'revision_id: foo\n',
+            'root_id: TREE_ROOT\n',
+            'parent_id_basename_to_file_id: sha1:46f33678d1c8cfd9b6d00dc658b6c8a9ac7bb0f0\n',
+            'id_to_entry: sha1:c9d15ff2621b8774506f702ff4ffd5f4af885a51\n',
+            ], lines)
+        chk_inv = CHKInventory.deserialise(chk_bytes, ''.join(lines), ('foo',))
+        self.assertEqual('plain', chk_inv._search_key_name)
+
+    def test_captures_search_key_name(self):
+        inv = Inventory()
+        inv.revision_id = "foo"
+        inv.root.revision = "bar"
+        chk_bytes = self.get_chk_bytes()
+        chk_inv = CHKInventory.from_inventory(chk_bytes, inv,
+                                              parent_id_basename_index=True,
+                                              search_key_name='hash-16-way')
+        lines = chk_inv.to_lines()
+        self.assertEqual([
+            'chkinventory:\n',
+            'revision_id: foo\n',
+            'root_id: TREE_ROOT\n',
+            'search_key_name: hash-16-way\n',
+            'parent_id_basename_to_file_id: sha1:46f33678d1c8cfd9b6d00dc658b6c8a9ac7bb0f0\n',
+            'id_to_entry: sha1:c9d15ff2621b8774506f702ff4ffd5f4af885a51\n',
+            ], lines)
+        chk_inv = CHKInventory.deserialise(chk_bytes, ''.join(lines), ('foo',))
+        self.assertEqual('hash-16-way', chk_inv._search_key_name)
 
     def test_directory_children_on_demand(self):
         inv = Inventory()



More information about the bazaar-commits mailing list