Rev 33: Experiment with multi-parents and multi-child base selection algorithms. in http://bzr.arbash-meinel.com/plugins/xdelta_test
John Arbash Meinel
john at arbash-meinel.com
Fri Jun 22 18:45:11 BST 2007
At http://bzr.arbash-meinel.com/plugins/xdelta_test
------------------------------------------------------------
revno: 33
revision-id: john at arbash-meinel.com-20070622174504-abk2qp1s9wqkb40a
parent: john at arbash-meinel.com-20070622154902-5rvjc20mrrt373zg
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: xdelta_test
timestamp: Fri 2007-06-22 12:45:04 -0500
message:
Experiment with multi-parents and multi-child base selection algorithms.
They seem to genuinely shrink the delta texts.
modified:
base_selection_algorithms.py base_selection_algor-20070528163119-4mg41krgj6fz5xen-1
bench_important_algorithms/bench_all_texts.py bench_all_texts.py-20070528175343-sl9y3m4xrylwr5n9-3
compression_algorithms.py compression_algorith-20070528163119-4mg41krgj6fz5xen-2
-------------- next part --------------
=== modified file 'base_selection_algorithms.py'
--- a/base_selection_algorithms.py 2007-05-29 14:32:09 +0000
+++ b/base_selection_algorithms.py 2007-06-22 17:45:04 +0000
@@ -33,6 +33,18 @@
base_selection_algorithms.append(lh_parent)
+def multi_parent(ancestry_graph, offsets, texts):
+ base = {}
+ for version_id, parents in ancestry_graph.iteritems():
+ if not parents:
+ base[version_id] = None
+ else:
+ base[version_id] = tuple(parents)
+ return base
+
+base_selection_algorithms.append(multi_parent)
+
+
def rh_parent(ancestry_graph, offsets, texts):
base = {}
for version_id, parents in ancestry_graph.iteritems():
@@ -107,6 +119,37 @@
base_selection_algorithms.append(lh_child_child)
+def multi_child(ancestry_graph, offsets, texts):
+ """Find a close compression relative.
+
+ We delta against all children.
+
+ :param ancestry_graph: dict of (revision_id => [parents])
+ :param offsets: dict of (revision_id => offset_in_list)
+ :param texts: List of full texts
+ """
+ children = {}
+ for version_id, parents in ancestry_graph.iteritems():
+ if not parents:
+ continue
+ offset = offsets[version_id]
+ for p in parents:
+ children.setdefault(p, []).append((offset, version_id))
+
+ base = {}
+ for version_id in ancestry_graph:
+ if version_id in children:
+ # reverse=True has been shown to be slightly (1%?) smaller than
+ # reverse=False.
+ base_version = tuple(x[1] for x in sorted(children[version_id], reverse=True))
+ else:
+ base_version = None
+ base[version_id] = base_version
+ return base
+
+base_selection_algorithms.append(multi_child)
+
+
# This algorithm seems to be "illegal" in that it doesn't give a proper
# fulltext base to start from. (ie if you only had the compressed output, you
# could not generate any texts.)
@@ -200,4 +243,8 @@
base_selection_algorithms.append(by_size_base_longer)
best_base_algorithms = [lh_parent, lh_child_child, linear_base_after,
- by_size_base_longer]
+ by_size_base_longer,
+ # Not all code paths can use these yet
+ #multi_parent,
+ #multi_child,
+ ]
=== modified file 'bench_important_algorithms/bench_all_texts.py'
--- a/bench_important_algorithms/bench_all_texts.py 2007-06-22 15:49:02 +0000
+++ b/bench_important_algorithms/bench_all_texts.py 2007-06-22 17:45:04 +0000
@@ -46,8 +46,14 @@
if base is None:
out = self.compress_algorithm.compress_one(text)
else:
- base_offset = offsets[base]
- base_text = texts[base_offset]
+ # This will be highly innefficient for lots of algorithms,
+ # since it will create a full delta, but it *is* what xdelta
+ # wants. Maybe we should just skip everything else.
+ if isinstance(base, tuple):
+ base_text = ''.join(texts[offsets[b]] for b in base)
+ else:
+ base_offset = offsets[base]
+ base_text = texts[base_offset]
out = self.compress_algorithm.delta(base_text, text)
compressed_texts.append(out)
return compressed_texts
@@ -67,8 +73,14 @@
out = self.compress_algorithm.decompress_one(c_text,
len(texts[offset]))
else:
- base_offset = offsets[base]
- base_text = texts[base_offset]
+ # This will be highly innefficient for lots of algorithms,
+ # since it will create a full delta, but it *is* what xdelta
+ # wants. Maybe we should just skip everything else.
+ if isinstance(base, tuple):
+ base_text = ''.join(texts[offsets[b]] for b in base)
+ else:
+ base_offset = offsets[base]
+ base_text = texts[base_offset]
out = self.compress_algorithm.apply_deltas(base_text,
[c_text])
decompressed_texts.append(out)
=== modified file 'compression_algorithms.py'
--- a/compression_algorithms.py 2007-06-22 15:49:02 +0000
+++ b/compression_algorithms.py 2007-06-22 17:45:04 +0000
@@ -364,5 +364,6 @@
#'xd3-NOCOMPRESS',
'xd3-NOCOMPRESS+zlib',
#'bdiff-one',
- 'bdiff-one+zlib', 'bdiff-multi+zlib',
+ 'bdiff-one+zlib',
+ 'bdiff-multi+zlib',
]]
More information about the bazaar-commits
mailing list