Rev 3762: CHKInventory migrated to new CHKMap code. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Wed Nov 12 04:17:11 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 3762
revision-id: robertc at robertcollins.net-20081112041701-gm15awvwc5c9rbml
parent: robertc at robertcollins.net-20081112031934-3wj5819th95gyiwg
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2008-11-12 15:17:01 +1100
message:
CHKInventory migrated to new CHKMap code.
modified:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/per_repository_chk/test_supported.py test_supported.py-20080925063728-k65ry0n2rhta6t34-1
bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2008-11-12 03:19:34 +0000
+++ b/bzrlib/chk_map.py 2008-11-12 04:17:01 +0000
@@ -264,19 +264,6 @@
def key(self):
return self._key
- def _key_count(self, parent_width, cut_width):
- """Return the number of keys in/under this node between two widths.
-
- :param parent_width: The start offset in keys to consider.
- :param cut_width: The width to stop considering at.
- """
- # This assumes the keys are unique up to parent_width.
- serialised_keys = set()
- for key in self._items:
- serialised_key = '\x00'.join(key)
- serialised_keys.add(serialised_key[parent_width:cut_width])
- return len(serialised_keys)
-
def map(self, store, key, value):
"""Map key to value."""
if key in self._items:
@@ -326,6 +313,10 @@
"""Return the serialised key for key in this node."""
return '\x00'.join(key)
+ def refs(self):
+ """Return the references to other CHK's held by this node."""
+ return []
+
def unique_serialised_prefix(self):
"""Return the unique key prefix for this node.
@@ -537,23 +528,17 @@
for key, node in self._items.items():
pass
- def _key_count(self, parent_width, cut_width):
- """Return the number of keys in/under this node between two widths.
-
- :param parent_width: The start offset in keys to consider.
- :param cut_width: The width to stop considering at.
- """
- if cut_width > self._node_width:
- raise NotImplementedError(self._key_count)
- # This assumes the keys are unique up to parent_width.
- serialised_keys = set()
- for serialised_key in self._items:
- serialised_keys.add(serialised_key[parent_width:cut_width])
- return len(serialised_keys)
-
- def _prelude_size(self):
- """Return the size of the node prelude."""
- return 15
+ def refs(self):
+ """Return the references to other CHK's held by this node."""
+ if self._key is None:
+ raise AssertionError("unserialised nodes have no refs.")
+ refs = []
+ for value in self._items.itervalues():
+ if type(value) == tuple:
+ refs.append(value)
+ else:
+ refs.append(value.key())
+ return refs
def unique_serialised_prefix(self):
"""Return the unique key prefix for this node.
@@ -601,90 +586,6 @@
return self
-class RootNode(Node):
- """A root node in a CHKMap."""
-
- def __init__(self):
- Node.__init__(self)
- self._nodes = {}
- self._size = 12
- self.prefix_width = 0
-
- def add_child(self, name, child):
- """Add a child to the node.
-
- If the node changes, it's key is reset.
-
- :param name: The name of the child. A bytestring.
- :param child: The child, a key tuple for the childs value.
- """
- if self._maximum_size and self._current_size() >= self._maximum_size:
- return False
- if name in self._nodes:
- self.remove_child(name)
- self._nodes[name] = child
- self._len += 1
- self._key = None
- self._size += len(name) + len(child[0]) + 2
- return True
-
- def _current_size(self):
- """Answer the current serialised size of this node."""
- return (self._size + len(str(self._maximum_size)) + len(str(self._len))
- + len(str(self.prefix_width)))
-
- def deserialise(self, bytes, key):
- """Set the nodes value to that contained in bytes.
-
- :param bytes: The bytes of the node.
- :param key: The key that the serialised node has.
- """
- lines = bytes.splitlines()
- nodes = {}
- if lines[0] != 'chkroot:':
- raise ValueError("not a serialised root node: %r" % bytes)
- maximum_size = int(lines[1])
- prefix_width = int(lines[2])
- length = int(lines[3])
- for line in lines[4:]:
- name, value = line.split('\x00')
- nodes[name] = (value,)
- self._nodes = nodes
- self._len = length
- self._maximum_size = maximum_size
- self._key = key
- self.prefix_width = prefix_width
-
- def refs(self):
- """Get the CHK key references this node holds."""
- return self._nodes.values()
-
- def remove_child(self, name):
- """Remove name from the node.
-
- If the node changes, it's key is reset.
-
- :param name: The name to remove from the node.
- """
- node = self._nodes.pop(name)
- self._size -= 2 + len(name) + len(node[0])
- self._len -= 1
- self._key = None
-
- def serialise(self):
- """Flatten the node to a bytestring.
-
- :return: A bytestring.
- """
- lines = ["chkroot:\n"]
- lines.append("%d\n" % self._maximum_size)
- lines.append("%d\n" % self.prefix_width)
- lines.append("%d\n" % self._len)
- for name, child in sorted(self._nodes.items()):
- lines.append("%s\x00%s\n" % (name, child[0]))
- return "".join(lines)
-
-
def _deserialise(bytes, key):
"""Helper for repositorydetails - convert bytes to a node."""
if bytes.startswith("chkleaf:\n"):
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2008-11-11 04:28:43 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2008-11-12 04:17:01 +0000
@@ -807,15 +807,10 @@
new_refs = set()
def accumlate_refs(lines):
# XXX: move to a generic location
- kind = lines[0]
- if kind == 'chkroot:\n':
- node = chk_map.RootNode()
- node.deserialise(''.join(lines), None)
- new_refs.update(node.refs())
- elif kind == 'chkvalue:\n':
- pass
- else:
- raise ValueError("unknown chk node %r" % lines[0])
+ # Yay mismatch:
+ bytes = ''.join(lines)
+ node = chk_map._deserialise(bytes, ("unknown",))
+ new_refs.update(node.refs())
self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
self.new_pack.chk_index, output_lines=accumlate_refs)
return new_refs
@@ -2188,24 +2183,51 @@
uninteresting_map = uninteresting_inv.id_to_entry
uninteresting_map._ensure_root()
uninteresting_chk_refs = set(uninteresting_map._root_node.refs())
+ pending_nodes = set([uninteresting_map._root_node])
+ while pending_nodes:
+ nodes = pending_nodes
+ pending_nodes = set()
+ for node in nodes:
+ uninteresting_chk_refs.add(node.key())
+ if not isinstance(node, chk_map.InternalNode):
+ continue
+ for subnode in node._iter_nodes(uninteresting_map._store):
+ pending_nodes.add(subnode)
+ # XXX: This currently only avoids traversing top level unchanged trees.
+ # What we need is parallel traversal with uninteresting-only trees
+ # pruned, interesting-only trees included, common trees with identical
+ # pointers pruned, common trees with different pointers examined
+ # further.
for idx, inv in enumerate(self.iter_inventories(revision_ids)):
pb.update('fetch', idx, len(revision_ids))
inv_chk_map = inv.id_to_entry
inv_chk_map._ensure_root()
- candidate_names = {}
- for name, ref in inv_chk_map._root_node._nodes.iteritems():
- if ref in uninteresting_chk_refs:
- continue
- candidate_names[name] = ref
- for name, bytes in inv_chk_map.iteritems(candidate_names):
- entry = inv._bytes_to_entry(bytes)
- if entry.name == '' and not rich_root:
- continue
- if entry.revision == inv.revision_id:
- # add it to uninteresting_chk_refs to
- # avoid processing twice it if we see it again later.
- uninteresting_chk_refs.add(candidate_names[name])
- yield ("file", entry.file_id, [entry.revision])
+ pending_nodes = set([inv_chk_map._root_node])
+ while pending_nodes:
+ nodes = pending_nodes
+ pending_nodes = set()
+ for node in nodes:
+ # Do not examine this node again
+ uninteresting_chk_refs.add(node.key())
+ if not isinstance(node, chk_map.InternalNode):
+ # Leaf node: pull out its contents:
+ for name, bytes in node.iteritems():
+ entry = inv._bytes_to_entry(bytes)
+ if entry.name == '' and not rich_root:
+ continue
+ if entry.revision == inv.revision_id:
+ yield ("file", entry.file_id, [entry.revision])
+ continue
+ # Recurse deeper
+ # Two-pass; api fixup needed to allow exclusion
+ wanted_keys = set()
+ for key, value in node._items.iteritems:
+ if key in uninteresting_chk_refs:
+ continue
+ wanted_keys.add((key,))
+ for subnode in node._iter_nodes(inv_chk_map._store,
+ key_filter=wanted_keys):
+ pending_nodes.add(subnode)
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
"""Find the file ids and versions affected by revisions.
@@ -2803,9 +2825,9 @@
class RepositoryFormatPackDevelopment3(RepositoryFormatPack):
"""A no-subtrees development repository.
- This format should be retained until the second release after bzr 1.7.
+ This format should be retained until the second release after bzr 1.11.
- This is pack-1.6.1 with B+Tree indices and a chk index.
+ This is pack-1.9 with CHKMap based inventories.
"""
repository_class = CHKInventoryRepository
@@ -2828,7 +2850,7 @@
def get_format_string(self):
"""See RepositoryFormat.get_format_string()."""
- return "Bazaar development format 3 (needs bzr.dev from before 1.8)\n"
+ return "Bazaar development format 3 (needs bzr.dev from before 1.10)\n"
def get_format_description(self):
"""See RepositoryFormat.get_format_description()."""
@@ -2842,9 +2864,9 @@
class RepositoryFormatPackDevelopment3Subtree(RepositoryFormatPack):
"""A subtrees development repository.
- This format should be retained until the second release after bzr 1.7.
+ This format should be retained until the second release after bzr 1.11.
- 1.6.1-subtree[as it might have been] with B+Tree indices and a chk index.
+ 1.9-subtree[as it might have been] with CHKMap based inventories.
"""
repository_class = CHKInventoryRepository
@@ -2879,7 +2901,7 @@
def get_format_string(self):
"""See RepositoryFormat.get_format_string()."""
return ("Bazaar development format 3 with subtree support "
- "(needs bzr.dev from before 1.8)\n")
+ "(needs bzr.dev from before 1.10)\n")
def get_format_description(self):
"""See RepositoryFormat.get_format_description()."""
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2008-11-11 04:28:43 +0000
+++ b/bzrlib/repository.py 2008-11-12 04:17:01 +0000
@@ -2503,14 +2503,15 @@
'bzrlib.repofmt.pack_repo',
'RepositoryFormatPackDevelopment2Subtree',
)
+# 1.9->1.110 go below here
format_registry.register_lazy(
- "Bazaar development format 3 (needs bzr.dev from before 1.8)\n",
+ "Bazaar development format 3 (needs bzr.dev from before 1.10)\n",
'bzrlib.repofmt.pack_repo',
'RepositoryFormatPackDevelopment3',
)
format_registry.register_lazy(
("Bazaar development format 3 with subtree support "
- "(needs bzr.dev from before 1.8)\n"),
+ "(needs bzr.dev from before 1.10)\n"),
'bzrlib.repofmt.pack_repo',
'RepositoryFormatPackDevelopment3Subtree',
)
=== modified file 'bzrlib/tests/per_repository_chk/test_supported.py'
--- a/bzrlib/tests/per_repository_chk/test_supported.py 2008-11-05 06:31:00 +0000
+++ b/bzrlib/tests/per_repository_chk/test_supported.py 2008-11-12 04:17:01 +0000
@@ -65,15 +65,18 @@
repo.unlock()
def test_pack_preserves_chk_bytes_store(self):
- expected_set = set([('sha1:062ef095245d8617d7671e50a71b529d13d93022',),
- ('sha1:d0826cf765ff45bd602f602ecb0efbe375ea3b50',)])
+ expected_set = set([('sha1:4e6482a3a5cb2d61699971ac77befe11a0ec5779',),
+ ('sha1:af554bebcd35f8573896f3f6314bc46dd832e01c',)])
repo = self.make_repository('.')
repo.lock_write()
try:
repo.start_write_group()
try:
+ # Internal node pointing at a leaf.
repo.chk_bytes.add_lines((None,), None,
- ["chkroot:\n", "0\n", "0\n"], random_id=True)
+ ["chknode:\n", "0\n", "1\n", "1\n",
+ "foo\x00sha1:4e6482a3a5cb2d61699971ac77befe11a0ec5779\n"],
+ random_id=True)
except:
repo.abort_write_group()
raise
@@ -81,8 +84,9 @@
repo.commit_write_group()
repo.start_write_group()
try:
- repo.chk_bytes.add_lines((None,), None, ["chkvalue:\n"],
- random_id=True)
+ # Leaf in a separate pack.
+ repo.chk_bytes.add_lines((None,), None,
+ ["chkleaf:\n", "0\n", "1\n", "0\n"], random_id=True)
except:
repo.abort_write_group()
raise
=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py 2008-11-11 00:03:17 +0000
+++ b/bzrlib/tests/test_repository.py 2008-11-12 04:17:01 +0000
@@ -683,8 +683,7 @@
repo.add_inventory(revid, inv, [])
self.assertEqual(set([(revid,)]), repo.inventories.keys())
self.assertEqual(
- set([('sha1:a12ffb3c1810005e8ed9388d29602e1e9e8e06ac',),
- ('sha1:fd7f4d1237b9ae0021bd8c52e99a930430444c79',)]),
+ set([('sha1:daaf97ebc784b6b6ef029d1710cc812d0ff200ff',)]),
repo.chk_bytes.keys())
More information about the bazaar-commits
mailing list