Rev 3816: Start parameterizing CHKInventory and CHKSerializer so that we can in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key
John Arbash Meinel
john at arbash-meinel.com
Wed Jan 21 23:04:59 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hash_search_key
------------------------------------------------------------
revno: 3816
revision-id: john at arbash-meinel.com-20090121230450-rcv4y4r3wsee87r8
parent: john at arbash-meinel.com-20090121221958-73e6ejetze235lpn
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hash_search_key
timestamp: Wed 2009-01-21 17:04:50 -0600
message:
Start parameterizing CHKInventory and CHKSerializer so that we can
have different repository formats which use different hash keys.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2009-01-21 22:19:58 +0000
+++ b/bzrlib/chk_map.py 2009-01-21 23:04:50 +0000
@@ -46,7 +46,10 @@
from bzrlib import versionedfile
""")
-from bzrlib import lru_cache
+from bzrlib import (
+ lru_cache,
+ registry,
+ )
# approx 2MB
# If each line is 50 bytes, and you have 255 internal pages, with 255-way fan
@@ -76,6 +79,12 @@
return bytes.replace('\n', '_')
+search_key_registry = registry.Registry()
+search_key_registry.register('plain', _search_key_plain)
+search_key_registry.register('hash-16-way', _search_key_16)
+search_key_registry.register('hash-255-way', _search_key_255)
+
+
class CHKMap(object):
"""A persistent map from string to string backed by a CHK store."""
=== modified file 'bzrlib/chk_serializer.py'
--- a/bzrlib/chk_serializer.py 2008-11-14 01:28:40 +0000
+++ b/bzrlib/chk_serializer.py 2009-01-21 23:04:50 +0000
@@ -46,9 +46,11 @@
else:
return xml6.Serializer_v6._unpack_entry(self, elt)
- def __init__(self, node_size, parent_id_basename_index):
+ def __init__(self, node_size, parent_id_basename_index,
+ search_key_name):
self.maximum_size = node_size
self.parent_id_basename_index = parent_id_basename_index
+ self.search_key_name = search_key_name
class CHKSerializer(xml5.Serializer_v5):
@@ -58,12 +60,16 @@
revision_format_num = None
support_altered_by_hack = False
- def __init__(self, node_size, parent_id_basename_index):
+ def __init__(self, node_size, parent_id_basename_index,
+ search_key_name):
self.maximum_size = node_size
self.parent_id_basename_index = parent_id_basename_index
-
-
-chk_serializer_subtree = CHKSerializerSubtree(4096, False)
-chk_serializer = CHKSerializer(4096, False)
-chk_serializer_subtree_parent_id = CHKSerializerSubtree(4096, True)
-chk_serializer_parent_id = CHKSerializer(4096, True)
+ self.search_key_name = search_key_name
+
+
+chk_serializer_subtree = CHKSerializerSubtree(4096, False, 'plain')
+chk_serializer = CHKSerializer(4096, False, 'plain')
+chk_serializer_subtree_parent_id = CHKSerializerSubtree(4096, True, 'plain')
+chk_serializer_parent_id = CHKSerializer(4096, True, 'plain')
+chk_serializer_16_parent_id = CHKSerializer(4096, True, 'hash-16-way')
+chk_serializer_255_parent_id = CHKSerializer(4096, True, 'hash-255-way')
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2008-12-19 23:07:32 +0000
+++ b/bzrlib/inventory.py 2009-01-21 23:04:50 +0000
@@ -1362,9 +1362,10 @@
to reuse.
"""
- def __init__(self):
+ def __init__(self, search_key_name):
CommonInventory.__init__(self)
self._entry_cache = {}
+ self._search_key_name = search_key_name
def _entry_to_bytes(self, entry):
"""Serialise entry as a single bytestring.
@@ -1447,15 +1448,18 @@
:param new_revision_id: The revision id of the resulting CHKInventory.
:return: The new CHKInventory.
"""
- result = CHKInventory()
+ result = CHKInventory(self._search_key_name)
+ search_key_func = chk_map.search_key_registry.get(self._search_key_name)
result.revision_id = new_revision_id
result.id_to_entry = chk_map.CHKMap(
self.id_to_entry._store,
- self.id_to_entry._root_node)
+ self.id_to_entry._root_node,
+ search_key_func=search_key_func)
if self.parent_id_basename_to_file_id is not None:
result.parent_id_basename_to_file_id = chk_map.CHKMap(
self.parent_id_basename_to_file_id._store,
- self.parent_id_basename_to_file_id._root_node)
+ self.parent_id_basename_to_file_id._root_node,
+ search_key_func=search_key_func)
parent_id_basename_delta = []
else:
result.parent_id_basename_to_file_id = None
@@ -1509,20 +1513,32 @@
for.
:return: A CHKInventory
"""
- result = CHKInventory()
lines = bytes.splitlines()
if lines[0] != 'chkinventory:':
raise ValueError("not a serialised CHKInventory: %r" % bytes)
- result.revision_id = lines[1][13:]
- result.root_id = lines[2][9:]
- if lines[3].startswith('parent_id_basename_to_file_id:'):
+ revision_id = lines[1][13:]
+ root_id = lines[2][9:]
+ if lines[3].startswith('search_key_name:'):
+ search_key_name = lines[3][17:]
next = 4
- result.parent_id_basename_to_file_id = chk_map.CHKMap(
- chk_store, (lines[3][31:],))
else:
+ search_key_name = 'plain'
next = 3
+ result = CHKInventory(search_key_name)
+ result.revision_id = revision_id
+ result.root_id = root_id
+ search_key_func = chk_map.search_key_registry.get(
+ result._search_key_name)
+ if lines[next].startswith('parent_id_basename_to_file_id:'):
+ result.parent_id_basename_to_file_id = chk_map.CHKMap(
+ chk_store, (lines[next][31:],),
+ search_key_func=search_key_func)
+ next += 1
+ else:
result.parent_id_basename_to_file_id = None
- result.id_to_entry = chk_map.CHKMap(chk_store, (lines[next][13:],))
+
+ result.id_to_entry = chk_map.CHKMap(chk_store, (lines[next][13:],),
+ search_key_func=search_key_func)
if (result.revision_id,) != expected_revision_id:
raise ValueError("Mismatched revision id and expected: %r, %r" %
(result.revision_id, expected_revision_id))
@@ -1530,7 +1546,7 @@
@classmethod
def from_inventory(klass, chk_store, inventory, maximum_size=0,
- parent_id_basename_index=False):
+ parent_id_basename_index=False, search_key_name='plain'):
"""Create a CHKInventory from an existing inventory.
The content of inventory is copied into the chk_store, and a
@@ -1541,15 +1557,18 @@
:param maximum_size: The CHKMap node size limit.
:param parent_id_basename_index: If True create and use a
parent_id,basename->file_id index.
+ :param search_key_name: The identifier for the search key function
"""
- result = CHKInventory()
+ result = CHKInventory(search_key_name)
result.revision_id = inventory.revision_id
result.root_id = inventory.root.file_id
- result.id_to_entry = chk_map.CHKMap(chk_store, None)
+ search_key_func = chk_map.search_key_registry.get(search_key_name)
+ result.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
result.id_to_entry._root_node.set_maximum_size(maximum_size)
file_id_delta = []
if parent_id_basename_index:
- result.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store, None)
+ result.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
+ None, search_key_func)
result.parent_id_basename_to_file_id._root_node.set_maximum_size(
maximum_size)
result.parent_id_basename_to_file_id._root_node._key_width = 2
@@ -1745,6 +1764,8 @@
lines = ["chkinventory:\n"]
lines.append("revision_id: %s\n" % self.revision_id)
lines.append("root_id: %s\n" % self.root_id)
+ if self._search_key_name != 'plain':
+ lines.append('search_key_name: %s\n' % (self._search_key_name,))
if self.parent_id_basename_to_file_id is not None:
lines.append('parent_id_basename_to_file_id: %s\n' %
self.parent_id_basename_to_file_id.key())
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2009-01-12 18:44:55 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2009-01-21 23:04:50 +0000
@@ -2169,7 +2169,8 @@
serializer = self._format._serializer
result = CHKInventory.from_inventory(self.chk_bytes, inv,
maximum_size=serializer.maximum_size,
- parent_id_basename_index=serializer.parent_id_basename_index)
+ parent_id_basename_index=serializer.parent_id_basename_index,
+ search_key_name=serializer.search_key_name)
inv_lines = result.to_lines()
return self._inventory_add_lines(revision_id, parents,
inv_lines, check_content=False)
=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py 2008-12-03 22:53:37 +0000
+++ b/bzrlib/tests/test_inv.py 2009-01-21 23:04:50 +0000
@@ -198,6 +198,7 @@
self.assertEqual(inv.root.parent_id, new_inv.root.parent_id)
self.assertEqual(inv.root.name, new_inv.root.name)
self.assertEqual("rootrev", new_inv.root.revision)
+ self.assertEqual('plain', new_inv._search_key_name)
def test_deserialise_wrong_revid(self):
inv = Inventory()
@@ -215,13 +216,53 @@
inv.root.revision = "bar"
chk_bytes = self.get_chk_bytes()
chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
- self.assertEqual([
- 'chkinventory:\n',
- 'revision_id: foo\n',
- 'root_id: TREE_ROOT\n',
- 'id_to_entry: sha1:36219af8518a9bed1e52db58e99131db2a00b329\n',
- ],
- chk_inv.to_lines())
+ lines = chk_inv.to_lines()
+ self.assertEqual([
+ 'chkinventory:\n',
+ 'revision_id: foo\n',
+ 'root_id: TREE_ROOT\n',
+ 'id_to_entry: sha1:c9d15ff2621b8774506f702ff4ffd5f4af885a51\n',
+ ], lines)
+ chk_inv = CHKInventory.deserialise(chk_bytes, ''.join(lines), ('foo',))
+ self.assertEqual('plain', chk_inv._search_key_name)
+
+ def test_captures_parent_id_basename_index(self):
+ inv = Inventory()
+ inv.revision_id = "foo"
+ inv.root.revision = "bar"
+ chk_bytes = self.get_chk_bytes()
+ chk_inv = CHKInventory.from_inventory(chk_bytes, inv,
+ parent_id_basename_index=True)
+ lines = chk_inv.to_lines()
+ self.assertEqual([
+ 'chkinventory:\n',
+ 'revision_id: foo\n',
+ 'root_id: TREE_ROOT\n',
+ 'parent_id_basename_to_file_id: sha1:46f33678d1c8cfd9b6d00dc658b6c8a9ac7bb0f0\n',
+ 'id_to_entry: sha1:c9d15ff2621b8774506f702ff4ffd5f4af885a51\n',
+ ], lines)
+ chk_inv = CHKInventory.deserialise(chk_bytes, ''.join(lines), ('foo',))
+ self.assertEqual('plain', chk_inv._search_key_name)
+
+ def test_captures_search_key_name(self):
+ inv = Inventory()
+ inv.revision_id = "foo"
+ inv.root.revision = "bar"
+ chk_bytes = self.get_chk_bytes()
+ chk_inv = CHKInventory.from_inventory(chk_bytes, inv,
+ parent_id_basename_index=True,
+ search_key_name='hash-16-way')
+ lines = chk_inv.to_lines()
+ self.assertEqual([
+ 'chkinventory:\n',
+ 'revision_id: foo\n',
+ 'root_id: TREE_ROOT\n',
+ 'search_key_name: hash-16-way\n',
+ 'parent_id_basename_to_file_id: sha1:46f33678d1c8cfd9b6d00dc658b6c8a9ac7bb0f0\n',
+ 'id_to_entry: sha1:c9d15ff2621b8774506f702ff4ffd5f4af885a51\n',
+ ], lines)
+ chk_inv = CHKInventory.deserialise(chk_bytes, ''.join(lines), ('foo',))
+ self.assertEqual('hash-16-way', chk_inv._search_key_name)
def test_directory_children_on_demand(self):
inv = Inventory()
More information about the bazaar-commits
mailing list