Rev 3762: CHKInventory migrated to new CHKMap code. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Wed Nov 12 04:17:11 GMT 2008


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 3762
revision-id: robertc at robertcollins.net-20081112041701-gm15awvwc5c9rbml
parent: robertc at robertcollins.net-20081112031934-3wj5819th95gyiwg
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2008-11-12 15:17:01 +1100
message:
  CHKInventory migrated to new CHKMap code.
modified:
  bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/per_repository_chk/test_supported.py test_supported.py-20080925063728-k65ry0n2rhta6t34-1
  bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2008-11-12 03:19:34 +0000
+++ b/bzrlib/chk_map.py	2008-11-12 04:17:01 +0000
@@ -264,19 +264,6 @@
     def key(self):
         return self._key
 
-    def _key_count(self, parent_width, cut_width):
-        """Return the number of keys in/under this node between two widths.
-
-        :param parent_width: The start offset in keys to consider.
-        :param cut_width: The width to stop considering at.
-        """
-        # This assumes the keys are unique up to parent_width.
-        serialised_keys = set()
-        for key in self._items:
-            serialised_key = '\x00'.join(key)
-            serialised_keys.add(serialised_key[parent_width:cut_width])
-        return len(serialised_keys)
-
     def map(self, store, key, value):
         """Map key to value."""
         if key in self._items:
@@ -326,6 +313,10 @@
         """Return the serialised key for key in this node."""
         return '\x00'.join(key)
 
+    def refs(self):
+        """Return the references to other CHK's held by this node."""
+        return []
+
     def unique_serialised_prefix(self):
         """Return the unique key prefix for this node.
 
@@ -537,23 +528,17 @@
         for key, node in self._items.items():
             pass
 
-    def _key_count(self, parent_width, cut_width):
-        """Return the number of keys in/under this node between two widths.
-
-        :param parent_width: The start offset in keys to consider.
-        :param cut_width: The width to stop considering at.
-        """
-        if cut_width > self._node_width:
-            raise NotImplementedError(self._key_count)
-        # This assumes the keys are unique up to parent_width.
-        serialised_keys = set()
-        for serialised_key in self._items:
-            serialised_keys.add(serialised_key[parent_width:cut_width])
-        return len(serialised_keys)
-
-    def _prelude_size(self):
-        """Return the size of the node prelude."""
-        return 15
+    def refs(self):
+        """Return the references to other CHK's held by this node."""
+        if self._key is None:
+            raise AssertionError("unserialised nodes have no refs.")
+        refs = []
+        for value in self._items.itervalues():
+            if type(value) == tuple:
+                refs.append(value)
+            else:
+                refs.append(value.key())
+        return refs
 
     def unique_serialised_prefix(self):
         """Return the unique key prefix for this node.
@@ -601,90 +586,6 @@
         return self
 
 
-class RootNode(Node):
-    """A root node in a CHKMap."""
-
-    def __init__(self):
-        Node.__init__(self)
-        self._nodes = {}
-        self._size = 12
-        self.prefix_width = 0
-
-    def add_child(self, name, child):
-        """Add a child to the node.
-
-        If the node changes, it's key is reset.
-
-        :param name: The name of the child. A bytestring.
-        :param child: The child, a key tuple for the childs value.
-        """
-        if self._maximum_size and self._current_size() >= self._maximum_size:
-            return False
-        if name in self._nodes:
-            self.remove_child(name)
-        self._nodes[name] = child
-        self._len += 1
-        self._key = None
-        self._size += len(name) + len(child[0]) + 2
-        return True
-
-    def _current_size(self):
-        """Answer the current serialised size of this node."""
-        return (self._size + len(str(self._maximum_size)) + len(str(self._len))
-            + len(str(self.prefix_width)))
-
-    def deserialise(self, bytes, key):
-        """Set the nodes value to that contained in bytes.
-        
-        :param bytes: The bytes of the node.
-        :param key: The key that the serialised node has.
-        """
-        lines = bytes.splitlines()
-        nodes = {}
-        if lines[0] != 'chkroot:':
-            raise ValueError("not a serialised root node: %r" % bytes)
-        maximum_size = int(lines[1])
-        prefix_width = int(lines[2])
-        length = int(lines[3])
-        for line in lines[4:]:
-            name, value = line.split('\x00')
-            nodes[name] = (value,)
-        self._nodes = nodes
-        self._len = length
-        self._maximum_size = maximum_size
-        self._key = key
-        self.prefix_width = prefix_width
-
-    def refs(self):
-        """Get the CHK key references this node holds."""
-        return self._nodes.values()
-
-    def remove_child(self, name):
-        """Remove name from the node.
-
-        If the node changes, it's key is reset.
-
-        :param name: The name to remove from the node.
-        """
-        node = self._nodes.pop(name)
-        self._size -= 2 + len(name) + len(node[0])
-        self._len -= 1
-        self._key = None
-
-    def serialise(self):
-        """Flatten the node to a bytestring.
-
-        :return: A bytestring.
-        """
-        lines = ["chkroot:\n"]
-        lines.append("%d\n" % self._maximum_size)
-        lines.append("%d\n" % self.prefix_width)
-        lines.append("%d\n" % self._len)
-        for name, child in sorted(self._nodes.items()):
-            lines.append("%s\x00%s\n" % (name, child[0]))
-        return "".join(lines)
-
-
 def _deserialise(bytes, key):
     """Helper for repositorydetails - convert bytes to a node."""
     if bytes.startswith("chkleaf:\n"):

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2008-11-11 04:28:43 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2008-11-12 04:17:01 +0000
@@ -807,15 +807,10 @@
         new_refs = set()
         def accumlate_refs(lines):
             # XXX: move to a generic location
-            kind = lines[0]
-            if kind == 'chkroot:\n':
-                node = chk_map.RootNode()
-                node.deserialise(''.join(lines),  None)
-                new_refs.update(node.refs())
-            elif kind == 'chkvalue:\n':
-                pass
-            else:
-                raise ValueError("unknown chk node %r" % lines[0])
+            # Yay mismatch:
+            bytes = ''.join(lines)
+            node = chk_map._deserialise(bytes, ("unknown",))
+            new_refs.update(node.refs())
         self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
             self.new_pack.chk_index, output_lines=accumlate_refs)
         return new_refs
@@ -2188,24 +2183,51 @@
             uninteresting_map = uninteresting_inv.id_to_entry
             uninteresting_map._ensure_root()
             uninteresting_chk_refs = set(uninteresting_map._root_node.refs())
+            pending_nodes = set([uninteresting_map._root_node])
+            while pending_nodes:
+                nodes = pending_nodes
+                pending_nodes = set()
+                for node in nodes:
+                    uninteresting_chk_refs.add(node.key())
+                    if not isinstance(node, chk_map.InternalNode):
+                        continue
+                    for subnode in node._iter_nodes(uninteresting_map._store):
+                        pending_nodes.add(subnode)
+        # XXX: This currently only avoids traversing top level unchanged trees.
+        # What we need is parallel traversal with uninteresting-only trees
+        # pruned, interesting-only trees included, common trees with identical
+        # pointers pruned, common trees with different pointers examined
+        # further.
         for idx, inv in enumerate(self.iter_inventories(revision_ids)):
             pb.update('fetch', idx, len(revision_ids))
             inv_chk_map = inv.id_to_entry
             inv_chk_map._ensure_root()
-            candidate_names = {}
-            for name, ref in inv_chk_map._root_node._nodes.iteritems():
-                if ref in uninteresting_chk_refs:
-                    continue
-                candidate_names[name] = ref
-            for name, bytes in inv_chk_map.iteritems(candidate_names):
-                entry = inv._bytes_to_entry(bytes)
-                if entry.name == '' and not rich_root:
-                    continue
-                if entry.revision == inv.revision_id:
-                    # add it to uninteresting_chk_refs to
-                    # avoid processing twice it if we see it again later.
-                    uninteresting_chk_refs.add(candidate_names[name])
-                    yield ("file", entry.file_id, [entry.revision])
+            pending_nodes = set([inv_chk_map._root_node])
+            while pending_nodes:
+                nodes = pending_nodes
+                pending_nodes = set()
+                for node in nodes:
+                    # Do not examine this node again
+                    uninteresting_chk_refs.add(node.key())
+                    if not isinstance(node, chk_map.InternalNode):
+                        # Leaf node: pull out its contents:
+                        for name, bytes in node.iteritems():
+                            entry = inv._bytes_to_entry(bytes)
+                            if entry.name == '' and not rich_root:
+                                continue
+                            if entry.revision == inv.revision_id:
+                                yield ("file", entry.file_id, [entry.revision])
+                        continue
+                    # Recurse deeper
+                    # Two-pass; api fixup needed to allow exclusion
+                    wanted_keys = set()
+                    for key, value in node._items.iteritems:
+                        if key in uninteresting_chk_refs:
+                            continue
+                        wanted_keys.add((key,))
+                    for subnode in node._iter_nodes(inv_chk_map._store,
+                        key_filter=wanted_keys):
+                        pending_nodes.add(subnode)
         
     def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
         """Find the file ids and versions affected by revisions.
@@ -2803,9 +2825,9 @@
 class RepositoryFormatPackDevelopment3(RepositoryFormatPack):
     """A no-subtrees development repository.
 
-    This format should be retained until the second release after bzr 1.7.
+    This format should be retained until the second release after bzr 1.11.
 
-    This is pack-1.6.1 with B+Tree indices and a chk index.
+    This is pack-1.9 with CHKMap based inventories.
     """
 
     repository_class = CHKInventoryRepository
@@ -2828,7 +2850,7 @@
 
     def get_format_string(self):
         """See RepositoryFormat.get_format_string()."""
-        return "Bazaar development format 3 (needs bzr.dev from before 1.8)\n"
+        return "Bazaar development format 3 (needs bzr.dev from before 1.10)\n"
 
     def get_format_description(self):
         """See RepositoryFormat.get_format_description()."""
@@ -2842,9 +2864,9 @@
 class RepositoryFormatPackDevelopment3Subtree(RepositoryFormatPack):
     """A subtrees development repository.
 
-    This format should be retained until the second release after bzr 1.7.
+    This format should be retained until the second release after bzr 1.11.
 
-    1.6.1-subtree[as it might have been] with B+Tree indices and a chk index.
+    1.9-subtree[as it might have been] with CHKMap based inventories.
     """
 
     repository_class = CHKInventoryRepository
@@ -2879,7 +2901,7 @@
     def get_format_string(self):
         """See RepositoryFormat.get_format_string()."""
         return ("Bazaar development format 3 with subtree support "
-            "(needs bzr.dev from before 1.8)\n")
+            "(needs bzr.dev from before 1.10)\n")
 
     def get_format_description(self):
         """See RepositoryFormat.get_format_description()."""

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-11-11 04:28:43 +0000
+++ b/bzrlib/repository.py	2008-11-12 04:17:01 +0000
@@ -2503,14 +2503,15 @@
     'bzrlib.repofmt.pack_repo',
     'RepositoryFormatPackDevelopment2Subtree',
     )
+# 1.9->1.110 go below here
 format_registry.register_lazy(
-    "Bazaar development format 3 (needs bzr.dev from before 1.8)\n",
+    "Bazaar development format 3 (needs bzr.dev from before 1.10)\n",
     'bzrlib.repofmt.pack_repo',
     'RepositoryFormatPackDevelopment3',
     )
 format_registry.register_lazy(
     ("Bazaar development format 3 with subtree support "
-        "(needs bzr.dev from before 1.8)\n"),
+        "(needs bzr.dev from before 1.10)\n"),
     'bzrlib.repofmt.pack_repo',
     'RepositoryFormatPackDevelopment3Subtree',
     )

=== modified file 'bzrlib/tests/per_repository_chk/test_supported.py'
--- a/bzrlib/tests/per_repository_chk/test_supported.py	2008-11-05 06:31:00 +0000
+++ b/bzrlib/tests/per_repository_chk/test_supported.py	2008-11-12 04:17:01 +0000
@@ -65,15 +65,18 @@
             repo.unlock()
 
     def test_pack_preserves_chk_bytes_store(self):
-        expected_set = set([('sha1:062ef095245d8617d7671e50a71b529d13d93022',),
-            ('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',)])
+        expected_set = set([('sha1:4e6482a3a5cb2d61699971ac77befe11a0ec5779',),
+            ('sha1:af554bebcd35f8573896f3f6314bc46dd832e01c',)])
         repo = self.make_repository('.')
         repo.lock_write()
         try:
             repo.start_write_group()
             try:
+                # Internal node pointing at a leaf.
                 repo.chk_bytes.add_lines((None,), None,
-                    ["chkroot:\n", "0\n", "0\n"], random_id=True)
+                    ["chknode:\n", "0\n", "1\n", "1\n",
+                     "foo\x00sha1:4e6482a3a5cb2d61699971ac77befe11a0ec5779\n"],
+                     random_id=True)
             except:
                 repo.abort_write_group()
                 raise
@@ -81,8 +84,9 @@
                 repo.commit_write_group()
             repo.start_write_group()
             try:
-                repo.chk_bytes.add_lines((None,), None, ["chkvalue:\n"],
-                    random_id=True)
+                # Leaf in a separate pack.
+                repo.chk_bytes.add_lines((None,), None,
+                    ["chkleaf:\n", "0\n", "1\n", "0\n"], random_id=True)
             except:
                 repo.abort_write_group()
                 raise

=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py	2008-11-11 00:03:17 +0000
+++ b/bzrlib/tests/test_repository.py	2008-11-12 04:17:01 +0000
@@ -683,8 +683,7 @@
         repo.add_inventory(revid, inv, [])
         self.assertEqual(set([(revid,)]), repo.inventories.keys())
         self.assertEqual(
-            set([('sha1:a12ffb3c1810005e8ed9388d29602e1e9e8e06ac',),
-                 ('sha1:fd7f4d1237b9ae0021bd8c52e99a930430444c79',)]),
+            set([('sha1:daaf97ebc784b6b6ef029d1710cc812d0ff200ff',)]),
             repo.chk_bytes.keys())
 
 




More information about the bazaar-commits mailing list