Rev 4112: Add a groupcompress sort order. in http://people.ubuntu.com/~robertc/baz2.0/versioned_files

Robert Collins robertc at robertcollins.net
Wed Mar 11 07:59:23 GMT 2009


At http://people.ubuntu.com/~robertc/baz2.0/versioned_files

------------------------------------------------------------
revno: 4112
revision-id: robertc at robertcollins.net-20090311075906-lc0mj10glf1eur1f
parent: pqm at pqm.ubuntu.com-20090311062514-cuxe92b6jnavadru
committer: Robert Collins <robertc at robertcollins.net>
branch nick: VersionedFiles.add_api
timestamp: Wed 2009-03-11 18:59:06 +1100
message:
  Add a groupcompress sort order.
=== modified file 'NEWS'
--- a/NEWS	2009-03-10 09:55:11 +0000
+++ b/NEWS	2009-03-11 07:59:06 +0000
@@ -174,6 +174,10 @@
     * MutableTree.commit now favours the "authors" argument, with the old
       "author" argument being deprecated.
 
+    * New sort order for ``get_record_stream`` ``groupcompress`` which
+      sorts optimally for use with groupcompress compressors. (John Arbash
+      Meinel, Robert Collins)
+
     * Remove deprecated EmptyTree.  (Martin Pool)
 
     * ``Repository.fetch`` now accepts an optional ``fetch_spec``

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2009-03-09 02:31:23 +0000
+++ b/bzrlib/knit.py	2009-03-11 07:59:06 +0000
@@ -103,6 +103,7 @@
     ConstantMapper,
     ContentFactory,
     ChunkedContentFactory,
+    sort_groupcompress,
     VersionedFile,
     VersionedFiles,
     )
@@ -1316,7 +1317,7 @@
         if not keys:
             return
         if not self._index.has_graph:
-            # Cannot topological order when no graph has been stored.
+            # Cannot sort when no graph has been stored.
             ordering = 'unordered'
 
         remaining_keys = keys
@@ -1378,9 +1379,12 @@
                     needed_from_fallback.add(key)
         # Double index lookups here : need a unified api ?
         global_map, parent_maps = self._get_parent_map_with_sources(keys)
-        if ordering == 'topological':
-            # Global topological sort
-            present_keys = tsort.topo_sort(global_map)
+        if ordering in ('topological', 'groupcompress'):
+            if ordering == 'topological':
+                # Global topological sort
+                present_keys = tsort.topo_sort(global_map)
+            else:
+                present_keys = sort_groupcompress(global_map)
             # Now group by source:
             source_keys = []
             current_source = None
@@ -1396,7 +1400,7 @@
         else:
             if ordering != 'unordered':
                 raise AssertionError('valid values for ordering are:'
-                    ' "unordered" or "topological" not: %r'
+                    ' "unordered", "groupcompress" or "topological" not: %r'
                     % (ordering,))
             # Just group by source; remote sources first.
             present_keys = []

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2009-03-07 06:58:17 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2009-03-11 07:59:06 +0000
@@ -1615,6 +1615,26 @@
                 }
         return keys, sort_order
 
+    def get_keys_and_groupcompress_sort_order(self):
+        """Get diamond test keys list, and their groupcompress sort ordering."""
+        if self.key_length == 1:
+            keys = [('merged',), ('left',), ('right',), ('base',)]
+            sort_order = {('merged',):0, ('left',):1, ('right',):1, ('base',):2}
+        else:
+            keys = [
+                ('FileA', 'merged'), ('FileA', 'left'), ('FileA', 'right'),
+                ('FileA', 'base'),
+                ('FileB', 'merged'), ('FileB', 'left'), ('FileB', 'right'),
+                ('FileB', 'base'),
+                ]
+            sort_order = {
+                ('FileA', 'merged'):0, ('FileA', 'left'):1, ('FileA', 'right'):1,
+                ('FileA', 'base'):2,
+                ('FileB', 'merged'):3, ('FileB', 'left'):4, ('FileB', 'right'):4,
+                ('FileB', 'base'):5,
+                }
+        return keys, sort_order
+
     def test_get_record_stream_interface_ordered(self):
         """each item in a stream has to provide a regular interface."""
         files = self.get_versionedfiles()
@@ -1648,6 +1668,17 @@
 
         self.assertStreamOrder(sort_order, seen, keys)
 
+    def test_get_record_stream_interface_groupcompress(self):
+        """each item in a stream has to provide a regular interface."""
+        files = self.get_versionedfiles()
+        self.get_diamond_files(files)
+        keys, sort_order = self.get_keys_and_groupcompress_sort_order()
+        parent_map = files.get_parent_map(keys)
+        entries = files.get_record_stream(keys, 'groupcompress', False)
+        seen = []
+        self.capture_stream(files, entries, seen.append, parent_map)
+        self.assertStreamOrder(sort_order, seen, keys)
+
     def assertStreamOrder(self, sort_order, seen, keys):
         self.assertEqual(len(set(seen)), len(keys))
         if self.key_length == 1:

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2009-03-02 03:38:07 +0000
+++ b/bzrlib/versionedfile.py	2009-03-11 07:59:06 +0000
@@ -1555,3 +1555,31 @@
     record_content = record.get_bytes_as('fulltext')
     return "fulltext\n%s%s%s" % (
         _length_prefix(record_meta), record_meta, record_content)
+
+
+def sort_groupcompress(parent_map):
+    """Sort and group the keys in parent_map into groupcompress order.
+
+    groupcompress is defined (currently) as reverse-topological order, grouped
+    by the key prefix.
+
+    :return: A sorted-list of keys
+    """
+    # gc-optimal ordering is approximately reverse topological,
+    # properly grouped by file-id.
+    per_prefix_map = {}
+    for item in parent_map.iteritems():
+        key = item[0]
+        if isinstance(key, str) or len(key) == 1:
+            prefix = ''
+        else:
+            prefix = key[0]
+        try:
+            per_prefix_map[prefix].append(item)
+        except KeyError:
+            per_prefix_map[prefix] = [item]
+
+    present_keys = []
+    for prefix in sorted(per_prefix_map):
+        present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
+    return present_keys

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2009-02-23 15:29:35 +0000
+++ b/bzrlib/weave.py	2009-03-11 07:59:06 +0000
@@ -99,6 +99,7 @@
     AbsentContentFactory,
     adapter_registry,
     ContentFactory,
+    sort_groupcompress,
     VersionedFile,
     )
 from bzrlib.weavefile import _read_weave_v5, write_weave_v5
@@ -321,6 +322,11 @@
             new_versions = tsort.topo_sort(parents)
             new_versions.extend(set(versions).difference(set(parents)))
             versions = new_versions
+        elif ordering == 'groupcompress':
+            parents = self.get_parent_map(versions)
+            new_versions = sort_groupcompress(parents)
+            new_versions.extend(set(versions).difference(set(parents)))
+            versions = new_versions
         for version in versions:
             if version in self:
                 yield WeaveContentFactory(version, self)




More information about the bazaar-commits mailing list