Rev 4112: Add a groupcompress sort order. in http://people.ubuntu.com/~robertc/baz2.0/versioned_files
Robert Collins
robertc at robertcollins.net
Wed Mar 11 07:59:23 GMT 2009
At http://people.ubuntu.com/~robertc/baz2.0/versioned_files
------------------------------------------------------------
revno: 4112
revision-id: robertc at robertcollins.net-20090311075906-lc0mj10glf1eur1f
parent: pqm at pqm.ubuntu.com-20090311062514-cuxe92b6jnavadru
committer: Robert Collins <robertc at robertcollins.net>
branch nick: VersionedFiles.add_api
timestamp: Wed 2009-03-11 18:59:06 +1100
message:
Add a groupcompress sort order.
=== modified file 'NEWS'
--- a/NEWS 2009-03-10 09:55:11 +0000
+++ b/NEWS 2009-03-11 07:59:06 +0000
@@ -174,6 +174,10 @@
* MutableTree.commit now favours the "authors" argument, with the old
"author" argument being deprecated.
+ * New sort order for ``get_record_stream`` ``groupcompress`` which
+ sorts optimally for use with groupcompress compressors. (John Arbash
+ Meinel, Robert Collins)
+
* Remove deprecated EmptyTree. (Martin Pool)
* ``Repository.fetch`` now accepts an optional ``fetch_spec``
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2009-03-09 02:31:23 +0000
+++ b/bzrlib/knit.py 2009-03-11 07:59:06 +0000
@@ -103,6 +103,7 @@
ConstantMapper,
ContentFactory,
ChunkedContentFactory,
+ sort_groupcompress,
VersionedFile,
VersionedFiles,
)
@@ -1316,7 +1317,7 @@
if not keys:
return
if not self._index.has_graph:
- # Cannot topological order when no graph has been stored.
+ # Cannot sort when no graph has been stored.
ordering = 'unordered'
remaining_keys = keys
@@ -1378,9 +1379,12 @@
needed_from_fallback.add(key)
# Double index lookups here : need a unified api ?
global_map, parent_maps = self._get_parent_map_with_sources(keys)
- if ordering == 'topological':
- # Global topological sort
- present_keys = tsort.topo_sort(global_map)
+ if ordering in ('topological', 'groupcompress'):
+ if ordering == 'topological':
+ # Global topological sort
+ present_keys = tsort.topo_sort(global_map)
+ else:
+ present_keys = sort_groupcompress(global_map)
# Now group by source:
source_keys = []
current_source = None
@@ -1396,7 +1400,7 @@
else:
if ordering != 'unordered':
raise AssertionError('valid values for ordering are:'
- ' "unordered" or "topological" not: %r'
+ ' "unordered", "groupcompress" or "topological" not: %r'
% (ordering,))
# Just group by source; remote sources first.
present_keys = []
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2009-03-07 06:58:17 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2009-03-11 07:59:06 +0000
@@ -1615,6 +1615,26 @@
}
return keys, sort_order
+ def get_keys_and_groupcompress_sort_order(self):
+ """Get diamond test keys list, and their groupcompress sort ordering."""
+ if self.key_length == 1:
+ keys = [('merged',), ('left',), ('right',), ('base',)]
+ sort_order = {('merged',):0, ('left',):1, ('right',):1, ('base',):2}
+ else:
+ keys = [
+ ('FileA', 'merged'), ('FileA', 'left'), ('FileA', 'right'),
+ ('FileA', 'base'),
+ ('FileB', 'merged'), ('FileB', 'left'), ('FileB', 'right'),
+ ('FileB', 'base'),
+ ]
+ sort_order = {
+ ('FileA', 'merged'):0, ('FileA', 'left'):1, ('FileA', 'right'):1,
+ ('FileA', 'base'):2,
+ ('FileB', 'merged'):3, ('FileB', 'left'):4, ('FileB', 'right'):4,
+ ('FileB', 'base'):5,
+ }
+ return keys, sort_order
+
def test_get_record_stream_interface_ordered(self):
"""each item in a stream has to provide a regular interface."""
files = self.get_versionedfiles()
@@ -1648,6 +1668,17 @@
self.assertStreamOrder(sort_order, seen, keys)
+ def test_get_record_stream_interface_groupcompress(self):
+ """each item in a stream has to provide a regular interface."""
+ files = self.get_versionedfiles()
+ self.get_diamond_files(files)
+ keys, sort_order = self.get_keys_and_groupcompress_sort_order()
+ parent_map = files.get_parent_map(keys)
+ entries = files.get_record_stream(keys, 'groupcompress', False)
+ seen = []
+ self.capture_stream(files, entries, seen.append, parent_map)
+ self.assertStreamOrder(sort_order, seen, keys)
+
def assertStreamOrder(self, sort_order, seen, keys):
self.assertEqual(len(set(seen)), len(keys))
if self.key_length == 1:
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2009-03-02 03:38:07 +0000
+++ b/bzrlib/versionedfile.py 2009-03-11 07:59:06 +0000
@@ -1555,3 +1555,31 @@
record_content = record.get_bytes_as('fulltext')
return "fulltext\n%s%s%s" % (
_length_prefix(record_meta), record_meta, record_content)
+
+
+def sort_groupcompress(parent_map):
+ """Sort and group the keys in parent_map into groupcompress order.
+
+ groupcompress is defined (currently) as reverse-topological order, grouped
+ by the key prefix.
+
+ :return: A sorted-list of keys
+ """
+ # gc-optimal ordering is approximately reverse topological,
+ # properly grouped by file-id.
+ per_prefix_map = {}
+ for item in parent_map.iteritems():
+ key = item[0]
+ if isinstance(key, str) or len(key) == 1:
+ prefix = ''
+ else:
+ prefix = key[0]
+ try:
+ per_prefix_map[prefix].append(item)
+ except KeyError:
+ per_prefix_map[prefix] = [item]
+
+ present_keys = []
+ for prefix in sorted(per_prefix_map):
+ present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
+ return present_keys
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2009-02-23 15:29:35 +0000
+++ b/bzrlib/weave.py 2009-03-11 07:59:06 +0000
@@ -99,6 +99,7 @@
AbsentContentFactory,
adapter_registry,
ContentFactory,
+ sort_groupcompress,
VersionedFile,
)
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
@@ -321,6 +322,11 @@
new_versions = tsort.topo_sort(parents)
new_versions.extend(set(versions).difference(set(parents)))
versions = new_versions
+ elif ordering == 'groupcompress':
+ parents = self.get_parent_map(versions)
+ new_versions = sort_groupcompress(parents)
+ new_versions.extend(set(versions).difference(set(parents)))
+ versions = new_versions
for version in versions:
if version in self:
yield WeaveContentFactory(version, self)
More information about the bazaar-commits
mailing list