Rev 3744: New chk_map module for use in tree based inventory storage. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Wed Oct 1 04:46:37 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 3744
revision-id: robertc at robertcollins.net-20081001034628-rbqronwrku3h5qlf
parent: robertc at robertcollins.net-20080930015214-c9tzc5qwv156kjtt
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2008-10-01 13:46:28 +1000
message:
  New chk_map module for use in tree based inventory storage.
added:
  bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
  bzrlib/tests/test_chk_map.py   test_chk_map.py-20081001014447-ue6kkuhofvdecvxa-2
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
=== modified file 'NEWS'
--- a/NEWS	2008-09-29 04:21:34 +0000
+++ b/NEWS	2008-10-01 03:46:28 +0000
@@ -117,6 +117,9 @@
     * New method ``RevisionSpec.as_tree`` for representing the revision
       specifier as a revision tree object. (Lukáš Lalinský)
 
+    * New module ``chk_map`` providing a persistent CHK store backed map.
+      (Robert Collins)
+
     * New win32utils.get_local_appdata_location() provides access to a local
       directory for storing data.  (Mark Hammond)
 

=== added file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/chk_map.py	2008-10-01 03:46:28 +0000
@@ -0,0 +1,201 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Persistent maps from string->string using CHK stores."""
+
+import osutils
+
+
+class CHKMap(object):
+    """A persistent map from string to string backed by a CHK store."""
+
+    def __init__(self, store, root_key):
+        """Create a CHKMap object.
+
+        :param store: The store the CHKMap is stored in.
+        :param root_key: The root key of the map. None to create an empty
+            CHKMap.
+        """
+        self._store = store
+        if root_key is None:
+            self._root_node = RootNode()
+        else:
+            self._root_node = root_key
+
+    def apply_delta(self, delta):
+        """Apply a delta to the map.
+
+        :param delta: An iterable of old_key, new_key, new_value tuples.
+            If new_key is not None, then new_key->new_value is inserted
+            into the map; if old_key is not None, then the old mapping
+            of old_key is removed.
+        """
+        for old, new, value in delta:
+            if old is not None and old != new:
+                # unmap
+                self._unmap(old)
+        for old, new, value in delta:
+            if new is not None:
+                # map
+                self._map(new, value)
+        return self._save()
+
+    def _ensure_root(self):
+        """Ensure that the root node is an object not a key."""
+        if type(self._root_node) == tuple:
+            # Demand-load the root
+            bytes = self._read_bytes(self._root_node)
+            root_key = self._root_node
+            self._root_node = RootNode()
+            self._root_node.deserialise(bytes, root_key)
+
+    def _read_bytes(self, key):
+        stream = self._store.get_record_stream([key], 'unordered', True)
+        return stream.next().get_bytes_as('fulltext')
+
+    @classmethod
+    def from_dict(klass, store, initial_value):
+        """Create a CHKMap in store with initial_value as the content.
+        
+        :param store: The store to record initial_value in, a VersionedFiles
+            object with 1-tuple keys supporting CHK key generation.
+        :param initial_value: A dict to store in store. Its keys and values
+            must be bytestrings.
+        :return: The root chk of te resulting CHKMap.
+        """
+        result = CHKMap(store, None)
+        for key, value in initial_value.items():
+            result._map(key, value)
+        return result._save()
+
+    def iteritems(self):
+        """Iterate over the entire CHKMap's contents."""
+        self._ensure_root()
+        for name, key, in self._root_node._nodes.iteritems():
+            bytes = self._read_bytes(key)
+            yield name, ValueNode.deserialise(bytes).value
+
+    def _map(self, key, value):
+        """Map key to value."""
+        self._ensure_root()
+        # Store the value
+        bytes = ValueNode(value).serialise()
+        sha1, _, _ = self._store.add_lines((None,), (), osutils.split_lines(bytes))
+        # And link into the root
+        self._root_node.add_child(key, ("sha1:" + sha1,))
+
+    def _unmap(self, key):
+        """remove key from the map."""
+        self._ensure_root()
+        self._root_node.remove_child(key)
+
+    def _save(self):
+        """Save the map completely.
+
+        :return: The key of the root node.
+        """
+        if type(self._root_node) == tuple:
+            # Already saved.
+            return self._root_node
+        # TODO: flush root_nodes children?
+        bytes = self._root_node.serialise()
+        sha1, _, _ = self._store.add_lines((None,), (),
+            osutils.split_lines(bytes))
+        result = ("sha1:" + sha1,)
+        self._root_node._key = result
+        return result
+
+
+class RootNode(object):
+    """A root node in a CHKMap."""
+
+    def __init__(self):
+        self._nodes = {}
+
+    def add_child(self, name, child):
+        """Add a child to the node.
+
+        If the node changes, it's key is reset.
+
+        :param name: The name of the child. A bytestring.
+        :param child: The child, a key tuple for the childs value.
+        """
+        self._nodes[name] = child
+        self._key = None
+
+    def deserialise(self, bytes, key):
+        """Set the nodes value to that contained in bytes.
+        
+        :param bytes: The bytes of the node.
+        :param key: The key that the serialised node has.
+        """
+        lines = bytes.splitlines()
+        nodes = {}
+        if lines[0] != 'chkroot:':
+            raise ValueError("not a serialised root node: %r" % bytes)
+        for line in lines[1:]:
+            name, value = line.split('\x00')
+            nodes[name] = (value,)
+        self._nodes = nodes
+        self._key = key
+
+    def remove_child(self, name):
+        """Remove name from the node.
+
+        If the node changes, it's key is reset.
+
+        :param name: The name to remove from the node.
+        """
+        del self._nodes[name]
+        self._key = None
+
+    def serialise(self):
+        """Flatten the node to a bytestring.
+
+        :return: A bytestring.
+        """
+        lines = ["chkroot:\n"]
+        for name, child in sorted(self._nodes.items()):
+            lines.append("%s\x00%s\n" % (name, child[0]))
+        return "".join(lines)
+
+
+class ValueNode(object):
+    """A value in a CHKMap."""
+
+    def __init__(self, value):
+        """Create a ValueNode.
+
+        :param value: The value of this node, must be a bytestring.
+        """
+        self.value = value
+
+    @classmethod
+    def deserialise(klass, bytes):
+        """Get a ValueNode from a serialised bytestring.
+        
+        :param bytes: The bytes returned by an earlier serialisation.
+        """
+        if not bytes.startswith("chkvalue:\n"):
+            raise ValueError("not a chkvalue %r" % bytes)
+        return ValueNode(bytes[10:])
+
+    def serialise(self):
+        """Flatten the value to a bytestring.
+
+        :return: A bytestring.
+        """
+        return "chkvalue:\n" + self.value

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2008-09-25 06:41:42 +0000
+++ b/bzrlib/tests/__init__.py	2008-10-01 03:46:28 +0000
@@ -2771,6 +2771,7 @@
                    'bzrlib.tests.test_bundle',
                    'bzrlib.tests.test_bzrdir',
                    'bzrlib.tests.test_cache_utf8',
+                   'bzrlib.tests.test_chk_map',
                    'bzrlib.tests.test_chunk_writer',
                    'bzrlib.tests.test_commands',
                    'bzrlib.tests.test_commit',

=== added file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_chk_map.py	2008-10-01 03:46:28 +0000
@@ -0,0 +1,162 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for maps built on a CHK versionedfiles facility."""
+
+from bzrlib.chk_map import CHKMap, RootNode, ValueNode
+from bzrlib.tests import TestCaseWithTransport
+
+
+class TestDumbMap(TestCaseWithTransport):
+
+    def get_chk_bytes(self):
+        # The eassiest way to get a CHK store is a development3 repository and
+        # then work with the chk_bytes attribute directly.
+        repo = self.make_repository(".", format="development3")
+        repo.lock_write()
+        self.addCleanup(repo.unlock)
+        repo.start_write_group()
+        self.addCleanup(repo.abort_write_group)
+        return repo.chk_bytes
+
+    def read_bytes(self, chk_bytes, key):
+        stream = chk_bytes.get_record_stream([key], 'unordered', True)
+        return stream.next().get_bytes_as("fulltext")
+
+    def assertHasABMap(self, chk_bytes):
+        root_key = ('sha1:5c464bbd8fecba1aa2574c6d2eb26813d622ce17',)
+        self.assertEqual(
+            "chkroot:\na\x00sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b\n",
+            self.read_bytes(chk_bytes, root_key))
+        self.assertEqual(
+            "chkvalue:\nb",
+            self.read_bytes(chk_bytes,
+                ("sha1:cb29f32e561a1b7f862c38ccfd6bc7c7d892f04b",)))
+
+    def assertHasEmptyMap(self, chk_bytes):
+        root_key = ('sha1:572d8da882e1ebf0f50f1e2da2d7a9cadadf4db5',)
+        self.assertEqual("chkroot:\n", self.read_bytes(chk_bytes, root_key))
+
+    def test_from_dict_empty(self):
+        chk_bytes = self.get_chk_bytes()
+        root_key = CHKMap.from_dict(chk_bytes, {})
+        self.assertEqual(('sha1:572d8da882e1ebf0f50f1e2da2d7a9cadadf4db5',),
+            root_key)
+        self.assertHasEmptyMap(chk_bytes)
+
+    def test_from_dict_ab(self):
+        chk_bytes = self.get_chk_bytes()
+        root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
+        self.assertEqual(('sha1:5c464bbd8fecba1aa2574c6d2eb26813d622ce17',),
+            root_key)
+        self.assertHasABMap(chk_bytes)
+
+    def test_apply_empty_ab(self):
+        # applying a delta (None, "a", "b") to an empty chkmap generates the
+        # same map as from_dict_ab.
+        chk_bytes = self.get_chk_bytes()
+        root_key = CHKMap.from_dict(chk_bytes, {})
+        chkmap = CHKMap(chk_bytes, root_key)
+        new_root = chkmap.apply_delta([(None, "a", "b")])
+        self.assertEqual(('sha1:5c464bbd8fecba1aa2574c6d2eb26813d622ce17',),
+            new_root)
+        self.assertHasABMap(chk_bytes)
+        # The update should have left us with an in memory root node, with an
+        # updated key.
+        self.assertEqual(new_root, chkmap._root_node._key)
+
+    def test_apply_ab_empty(self):
+        # applying a delta ("a", None, None) to an empty chkmap generates the
+        # same map as from_dict_ab.
+        chk_bytes = self.get_chk_bytes()
+        root_key = CHKMap.from_dict(chk_bytes, {"a":"b"})
+        chkmap = CHKMap(chk_bytes, root_key)
+        new_root = chkmap.apply_delta([("a", None, None)])
+        self.assertEqual(('sha1:572d8da882e1ebf0f50f1e2da2d7a9cadadf4db5',),
+            new_root)
+        self.assertHasEmptyMap(chk_bytes)
+        # The update should have left us with an in memory root node, with an
+        # updated key.
+        self.assertEqual(new_root, chkmap._root_node._key)
+
+    def test_iteritems_empty(self):
+        chk_bytes = self.get_chk_bytes()
+        root_key = CHKMap.from_dict(chk_bytes, {})
+        chkmap = CHKMap(chk_bytes, root_key)
+        self.assertEqual([], list(chkmap.iteritems()))
+
+    def test_iteritems_two_items(self):
+        chk_bytes = self.get_chk_bytes()
+        root_key = CHKMap.from_dict(chk_bytes,
+            {"a":"content here", "b":"more content"})
+        chkmap = CHKMap(chk_bytes, root_key)
+        self.assertEqual([("a", "content here"), ("b", "more content")],
+            sorted(list(chkmap.iteritems())))
+
+
+class TestRootNode(TestCaseWithTransport):
+
+    def test_serialise_empty(self):
+        node = RootNode()
+        bytes = node.serialise()
+        self.assertEqual("chkroot:\n", bytes)
+
+    def test_add_child_resets_key(self):
+        node = RootNode()
+        node._key = ("something",)
+        node.add_child("c", ("sha1:1234",))
+        self.assertEqual(None, node._key)
+
+    def test_remove_child_removes_child(self):
+        node = RootNode()
+        node.add_child("a", ("sha1:4321",))
+        node.add_child("c", ("sha1:1234",))
+        node._key = ("something",)
+        node.remove_child("a")
+        self.assertEqual({"c":("sha1:1234",)}, node._nodes)
+
+    def test_remove_child_resets_key(self):
+        node = RootNode()
+        node.add_child("c", ("sha1:1234",))
+        node._key = ("something",)
+        node.remove_child("c")
+        self.assertEqual(None, node._key)
+
+    def test_deserialise(self):
+        # deserialising from a bytestring & key sets the nodes and the known
+        # key.
+        node = RootNode()
+        node.deserialise("chkroot:\nc\x00sha1:1234\n", ("foo",))
+        self.assertEqual({"c": ("sha1:1234",)}, node._nodes)
+        self.assertEqual(("foo",), node._key)
+
+    def test_serialise_with_child(self):
+        node = RootNode()
+        node.add_child("c", ("sha1:1234",))
+        bytes = node.serialise()
+        self.assertEqual("chkroot:\nc\x00sha1:1234\n", bytes)
+
+
+class TestValueNode(TestCaseWithTransport):
+
+    def test_deserialise(self):
+        node = ValueNode.deserialise("chkvalue:\nfoo bar baz\n")
+        self.assertEqual("foo bar baz\n", node.value)
+
+    def test_serialise(self):
+        node = ValueNode("b")
+        bytes = node.serialise()
+        self.assertEqual("chkvalue:\nb", bytes)




More information about the bazaar-commits mailing list