Rev 3744: New chk_map module for use in tree based inventory storage. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Wed Oct 1 04:46:37 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 3744
revision-id: robertc at robertcollins.net-20081001034628-rbqronwrku3h5qlf
parent: robertc at robertcollins.net-20080930015214-c9tzc5qwv156kjtt
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2008-10-01 13:46:28 +1000
message:
New chk_map module for use in tree based inventory storage.
added:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/tests/test_chk_map.py test_chk_map.py-20081001014447-ue6kkuhofvdecvxa-2
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/tests/__init__.py selftest.py-20050531073622-8d0e3c8845c97a64
=== modified file 'NEWS'
--- a/NEWS 2008-09-29 04:21:34 +0000
+++ b/NEWS 2008-10-01 03:46:28 +0000
@@ -117,6 +117,9 @@
* New method ``RevisionSpec.as_tree`` for representing the revision
specifier as a revision tree object. (Lukáš Lalinský)
+ * New module ``chk_map`` providing a persistent CHK store backed map.
+ (Robert Collins)
+
* New win32utils.get_local_appdata_location() provides access to a local
directory for storing data. (Mark Hammond)
=== added file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/chk_map.py 2008-10-01 03:46:28 +0000
@@ -0,0 +1,201 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Persistent maps from string->string using CHK stores."""
+
+import osutils
+
+
+class CHKMap(object):
+ """A persistent map from string to string backed by a CHK store."""
+
+ def __init__(self, store, root_key):
+ """Create a CHKMap object.
+
+ :param store: The store the CHKMap is stored in.
+ :param root_key: The root key of the map. None to create an empty
+ CHKMap.
+ """
+ self._store = store
+ if root_key is None:
+ self._root_node = RootNode()
+ else:
+ self._root_node = root_key
+
+ def apply_delta(self, delta):
+ """Apply a delta to the map.
+
+ :param delta: An iterable of old_key, new_key, new_value tuples.
+ If new_key is not None, then new_key->new_value is inserted
+ into the map; if old_key is not None, then the old mapping
+ of old_key is removed.
+ """
+ for old, new, value in delta:
+ if old is not None and old != new:
+ # unmap
+ self._unmap(old)
+ for old, new, value in delta:
+ if new is not None:
+ # map
+ self._map(new, value)
+ return self._save()
+
+ def _ensure_root(self):
+ """Ensure that the root node is an object not a key."""
+ if type(self._root_node) == tuple:
+ # Demand-load the root
+ bytes = self._read_bytes(self._root_node)
+ root_key = self._root_node
+ self._root_node = RootNode()
+ self._root_node.deserialise(bytes, root_key)
+
+ def _read_bytes(self, key):
+ stream = self._store.get_record_stream([key], 'unordered', True)
+ return stream.next().get_bytes_as('fulltext')
+
+ @classmethod
+ def from_dict(klass, store, initial_value):
+ """Create a CHKMap in store with initial_value as the content.
+
+ :param store: The store to record initial_value in, a VersionedFiles
+ object with 1-tuple keys supporting CHK key generation.
+ :param initial_value: A dict to store in store. Its keys and values
+ must be bytestrings.
+ :return: The root chk of te resulting CHKMap.
+ """
+ result = CHKMap(store, None)
+ for key, value in initial_value.items():
+ result._map(key, value)
+ return result._save()
+
+ def iteritems(self):
+ """Iterate over the entire CHKMap's contents."""
+ self._ensure_root()
+ for name, key, in self._root_node._nodes.iteritems():
+ bytes = self._read_bytes(key)
+ yield name, ValueNode.deserialise(bytes).value
+
+ def _map(self, key, value):
+ """Map key to value."""
+ self._ensure_root()
+ # Store the value
+ bytes = ValueNode(value).serialise()
+ sha1, _, _ = self._store.add_lines((None,), (), osutils.split_lines(bytes))
+ # And link into the root
+ self._root_node.add_child(key, ("sha1:" + sha1,))
+
+ def _unmap(self, key):
+ """remove key from the map."""
+ self._ensure_root()
+ self._root_node.remove_child(key)
+
+ def _save(self):
+ """Save the map completely.
+
+ :return: The key of the root node.
+ """
+ if type(self._root_node) == tuple:
+ # Already saved.
+ return self._root_node
+ # TODO: flush root_nodes children?
+ bytes = self._root_node.serialise()
+ sha1, _, _ = self._store.add_lines((None,), (),
+ osutils.split_lines(bytes))
+ result = ("sha1:" + sha1,)
+ self._root_node._key = result
+ return result
+
+
+class RootNode(object):
+ """A root node in a CHKMap."""
+
+ def __init__(self):
+ self._nodes = {}
+
+ def add_child(self, name, child):
+ """Add a child to the node.
+
+ If the node changes, it's key is reset.
+
+ :param name: The name of the child. A bytestring.
+ :param child: The child, a key tuple for the childs value.
+ """
+ self._nodes[name] = child
+ self._key = None
+
+ def deserialise(self, bytes, key):
+ """Set the nodes value to that contained in bytes.
+
+ :param bytes: The bytes of the node.
+ :param key: The key that the serialised node has.
+ """
+ lines = bytes.splitlines()
+ nodes = {}
+ if lines[0] != 'chkroot:':
+ raise ValueError("not a serialised root node: %r" % bytes)
+ for line in lines[1:]:
+ name, value = line.split('\x00')
+ nodes[name] = (value,)
+ self._nodes = nodes
+ self._key = key
+
+ def remove_child(self, name):
+ """Remove name from the node.
+
+ If the node changes, it's key is reset.
+
+ :param name: The name to remove from the node.
+ """
+ del self._nodes[name]
+ self._key = None
+
+ def serialise(self):
+ """Flatten the node to a bytestring.
+
+ :return: A bytestring.
+ """
+ lines = ["chkroot:\n"]
+ for name, child in sorted(self._nodes.items()):
+ lines.append("%s\x00%s\n" % (name, child[0]))
+ return "".join(lines)
+
+
+class ValueNode(object):
+ """A value in a CHKMap."""
+
+ def __init__(self, value):
+ """Create a ValueNode.
+
+ :param value: The value of this node, must be a bytestring.
+ """
+ self.value = value
+
+ @classmethod
+ def deserialise(klass, bytes):
+ """Get a ValueNode from a serialised bytestring.
+
+ :param bytes: The bytes returned by an earlier serialisation.
+ """
+ if not bytes.startswith("chkvalue:\n"):
+ raise ValueError("not a chkvalue %r" % bytes)
+ return ValueNode(bytes[10:])
+
+ def serialise(self):
+ """Flatten the value to a bytestring.
+
+ :return: A bytestring.
+ """
+ return "chkvalue:\n" + self.value
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py 2008-09-25 06:41:42 +0000
+++ b/bzrlib/tests/__init__.py 2008-10-01 03:46:28 +0000
@@ -2771,6 +2771,7 @@
'bzrlib.tests.test_bundle',
'bzrlib.tests.test_bzrdir',
'bzrlib.tests.test_cache_utf8',
+ 'bzrlib.tests.test_chk_map',
'bzrlib.tests.test_chunk_writer',
'bzrlib.tests.test_commands',
'bzrlib.tests.test_commit',
=== added file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py 1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_chk_map.py 2008-10-01 03:46:28 +0000
@@ -0,0 +1,162 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Tests for maps built on a CHK versionedfiles facility."""
+
+from bzrlib.chk_map import CHKMap, RootNode, ValueNode
+from bzrlib.tests import TestCaseWithTransport
+
+
+class TestDumbMap(TestCaseWithTransport):
+
+ def get_chk_bytes(self):
+ # The eassiest way to get a CHK store is a development3 repository and
+ # then work with the chk_bytes attribute directly.
+ repo = self.make_repository(".", format="development3")
+ repo.lock_write()
+ self.addCleanup(repo.unlock)
+ repo.start_write_group()
+ self.addCleanup(repo.abort_write_group)
+ return repo.chk_bytes
+
+ def read_bytes(self, chk_bytes, key):
+ stream = chk_bytes.get_record_stream([key], 'unordered', True)
+ return stream.next().get_bytes_as("fulltext")
+
+ def assertHasABMap(self, chk_bytes):
+ root_key = ('sha1:5c464bbd8fecba1aa2574c6d2eb26813d622ce17',)
+ self.assertEqual(
+ "chkroot:\na\x00sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b\n",
+ self.read_bytes(chk_bytes, root_key))
+ self.assertEqual(
+ "chkvalue:\nb",
+ self.read_bytes(chk_bytes,
+ ("sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b",)))
+
+ def assertHasEmptyMap(self, chk_bytes):
+ root_key = ('sha1:572d8da882e1ebf0f50f1e2da2d7a9cadadf4db5',)
+ self.assertEqual("chkroot:\n", self.read_bytes(chk_bytes, root_key))
+
+ def test_from_dict_empty(self):
+ chk_bytes = self.get_chk_bytes()
+ root_key = CHKMap.from_dict(chk_bytes, {})
+ self.assertEqual(('sha1:572d8da882e1ebf0f50f1e2da2d7a9cadadf4db5',),
+ root_key)
+ self.assertHasEmptyMap(chk_bytes)
+
+ def test_from_dict_ab(self):
+ chk_bytes = self.get_chk_bytes()
+ root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
+ self.assertEqual(('sha1:5c464bbd8fecba1aa2574c6d2eb26813d622ce17',),
+ root_key)
+ self.assertHasABMap(chk_bytes)
+
+ def test_apply_empty_ab(self):
+ # applying a delta (None, "a", "b") to an empty chkmap generates the
+ # same map as from_dict_ab.
+ chk_bytes = self.get_chk_bytes()
+ root_key = CHKMap.from_dict(chk_bytes, {})
+ chkmap = CHKMap(chk_bytes, root_key)
+ new_root = chkmap.apply_delta([(None, "a", "b")])
+ self.assertEqual(('sha1:5c464bbd8fecba1aa2574c6d2eb26813d622ce17',),
+ new_root)
+ self.assertHasABMap(chk_bytes)
+ # The update should have left us with an in memory root node, with an
+ # updated key.
+ self.assertEqual(new_root, chkmap._root_node._key)
+
+ def test_apply_ab_empty(self):
+ # applying a delta ("a", None, None) to an empty chkmap generates the
+ # same map as from_dict_ab.
+ chk_bytes = self.get_chk_bytes()
+ root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
+ chkmap = CHKMap(chk_bytes, root_key)
+ new_root = chkmap.apply_delta([("a", None, None)])
+ self.assertEqual(('sha1:572d8da882e1ebf0f50f1e2da2d7a9cadadf4db5',),
+ new_root)
+ self.assertHasEmptyMap(chk_bytes)
+ # The update should have left us with an in memory root node, with an
+ # updated key.
+ self.assertEqual(new_root, chkmap._root_node._key)
+
+ def test_iteritems_empty(self):
+ chk_bytes = self.get_chk_bytes()
+ root_key = CHKMap.from_dict(chk_bytes, {})
+ chkmap = CHKMap(chk_bytes, root_key)
+ self.assertEqual([], list(chkmap.iteritems()))
+
+ def test_iteritems_two_items(self):
+ chk_bytes = self.get_chk_bytes()
+ root_key = CHKMap.from_dict(chk_bytes,
+ {"a":"content here", "b":"more content"})
+ chkmap = CHKMap(chk_bytes, root_key)
+ self.assertEqual([("a", "content here"), ("b", "more content")],
+ sorted(list(chkmap.iteritems())))
+
+
+class TestRootNode(TestCaseWithTransport):
+
+ def test_serialise_empty(self):
+ node = RootNode()
+ bytes = node.serialise()
+ self.assertEqual("chkroot:\n", bytes)
+
+ def test_add_child_resets_key(self):
+ node = RootNode()
+ node._key = ("something",)
+ node.add_child("c", ("sha1:1234",))
+ self.assertEqual(None, node._key)
+
+ def test_remove_child_removes_child(self):
+ node = RootNode()
+ node.add_child("a", ("sha1:4321",))
+ node.add_child("c", ("sha1:1234",))
+ node._key = ("something",)
+ node.remove_child("a")
+ self.assertEqual({"c":("sha1:1234",)}, node._nodes)
+
+ def test_remove_child_resets_key(self):
+ node = RootNode()
+ node.add_child("c", ("sha1:1234",))
+ node._key = ("something",)
+ node.remove_child("c")
+ self.assertEqual(None, node._key)
+
+ def test_deserialise(self):
+ # deserialising from a bytestring & key sets the nodes and the known
+ # key.
+ node = RootNode()
+ node.deserialise("chkroot:\nc\x00sha1:1234\n", ("foo",))
+ self.assertEqual({"c": ("sha1:1234",)}, node._nodes)
+ self.assertEqual(("foo",), node._key)
+
+ def test_serialise_with_child(self):
+ node = RootNode()
+ node.add_child("c", ("sha1:1234",))
+ bytes = node.serialise()
+ self.assertEqual("chkroot:\nc\x00sha1:1234\n", bytes)
+
+
+class TestValueNode(TestCaseWithTransport):
+
+ def test_deserialise(self):
+ node = ValueNode.deserialise("chkvalue:\nfoo bar baz\n")
+ self.assertEqual("foo bar baz\n", node.value)
+
+ def test_serialise(self):
+ node = ValueNode("b")
+ bytes = node.serialise()
+ self.assertEqual("chkvalue:\nb", bytes)
More information about the bazaar-commits
mailing list