Rev 4119: (robertc) Add a groupcompress sort order for get_record_stream. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Thu Mar 12 00:16:59 GMT 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4119
revision-id: pqm at pqm.ubuntu.com-20090312001649-6tvc2mmeyw992st3
parent: pqm at pqm.ubuntu.com-20090311233307-n6zmhg1y074svja6
parent: robertc at robertcollins.net-20090311075906-lc0mj10glf1eur1f
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2009-03-12 00:16:49 +0000
message:
  (robertc) Add a groupcompress sort order for get_record_stream.
  	(Robert Collins, John Arbash Meinel)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
    ------------------------------------------------------------
    revno: 4111.1.1
    revision-id: robertc at robertcollins.net-20090311075906-lc0mj10glf1eur1f
    parent: pqm at pqm.ubuntu.com-20090311062514-cuxe92b6jnavadru
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: VersionedFiles.add_api
    timestamp: Wed 2009-03-11 18:59:06 +1100
    message:
      Add a groupcompress sort order.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
      bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
      bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
      bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
=== modified file 'NEWS'
--- a/NEWS	2009-03-11 11:01:02 +0000
+++ b/NEWS	2009-03-12 00:16:49 +0000
@@ -184,6 +184,10 @@
     * MutableTree.commit now favours the "authors" argument, with the old
       "author" argument being deprecated.
 
+    * New sort order for ``get_record_stream`` ``groupcompress`` which
+      sorts optimally for use with groupcompress compressors. (John Arbash
+      Meinel, Robert Collins)
+
     * Remove deprecated EmptyTree.  (Martin Pool)
 
     * ``Repository.fetch`` now accepts an optional ``fetch_spec``

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2009-03-09 02:31:23 +0000
+++ b/bzrlib/knit.py	2009-03-11 07:59:06 +0000
@@ -103,6 +103,7 @@
     ConstantMapper,
     ContentFactory,
     ChunkedContentFactory,
+    sort_groupcompress,
     VersionedFile,
     VersionedFiles,
     )
@@ -1316,7 +1317,7 @@
         if not keys:
             return
         if not self._index.has_graph:
-            # Cannot topological order when no graph has been stored.
+            # Cannot sort when no graph has been stored.
             ordering = 'unordered'
 
         remaining_keys = keys
@@ -1378,9 +1379,12 @@
                     needed_from_fallback.add(key)
         # Double index lookups here : need a unified api ?
         global_map, parent_maps = self._get_parent_map_with_sources(keys)
-        if ordering == 'topological':
-            # Global topological sort
-            present_keys = tsort.topo_sort(global_map)
+        if ordering in ('topological', 'groupcompress'):
+            if ordering == 'topological':
+                # Global topological sort
+                present_keys = tsort.topo_sort(global_map)
+            else:
+                present_keys = sort_groupcompress(global_map)
             # Now group by source:
             source_keys = []
             current_source = None
@@ -1396,7 +1400,7 @@
         else:
             if ordering != 'unordered':
                 raise AssertionError('valid values for ordering are:'
-                    ' "unordered" or "topological" not: %r'
+                    ' "unordered", "groupcompress" or "topological" not: %r'
                     % (ordering,))
             # Just group by source; remote sources first.
             present_keys = []

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2009-03-07 06:58:17 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2009-03-11 07:59:06 +0000
@@ -1615,6 +1615,26 @@
                 }
         return keys, sort_order
 
+    def get_keys_and_groupcompress_sort_order(self):
+        """Get diamond test keys list, and their groupcompress sort ordering."""
+        if self.key_length == 1:
+            keys = [('merged',), ('left',), ('right',), ('base',)]
+            sort_order = {('merged',):0, ('left',):1, ('right',):1, ('base',):2}
+        else:
+            keys = [
+                ('FileA', 'merged'), ('FileA', 'left'), ('FileA', 'right'),
+                ('FileA', 'base'),
+                ('FileB', 'merged'), ('FileB', 'left'), ('FileB', 'right'),
+                ('FileB', 'base'),
+                ]
+            sort_order = {
+                ('FileA', 'merged'):0, ('FileA', 'left'):1, ('FileA', 'right'):1,
+                ('FileA', 'base'):2,
+                ('FileB', 'merged'):3, ('FileB', 'left'):4, ('FileB', 'right'):4,
+                ('FileB', 'base'):5,
+                }
+        return keys, sort_order
+
     def test_get_record_stream_interface_ordered(self):
         """each item in a stream has to provide a regular interface."""
         files = self.get_versionedfiles()
@@ -1648,6 +1668,17 @@
 
         self.assertStreamOrder(sort_order, seen, keys)
 
+    def test_get_record_stream_interface_groupcompress(self):
+        """each item in a stream has to provide a regular interface."""
+        files = self.get_versionedfiles()
+        self.get_diamond_files(files)
+        keys, sort_order = self.get_keys_and_groupcompress_sort_order()
+        parent_map = files.get_parent_map(keys)
+        entries = files.get_record_stream(keys, 'groupcompress', False)
+        seen = []
+        self.capture_stream(files, entries, seen.append, parent_map)
+        self.assertStreamOrder(sort_order, seen, keys)
+
     def assertStreamOrder(self, sort_order, seen, keys):
         self.assertEqual(len(set(seen)), len(keys))
         if self.key_length == 1:

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2009-03-02 03:38:07 +0000
+++ b/bzrlib/versionedfile.py	2009-03-11 07:59:06 +0000
@@ -1555,3 +1555,31 @@
     record_content = record.get_bytes_as('fulltext')
     return "fulltext\n%s%s%s" % (
         _length_prefix(record_meta), record_meta, record_content)
+
+
+def sort_groupcompress(parent_map):
+    """Sort and group the keys in parent_map into groupcompress order.
+
+    groupcompress is defined (currently) as reverse-topological order, grouped
+    by the key prefix.
+
+    :return: A sorted-list of keys
+    """
+    # gc-optimal ordering is approximately reverse topological,
+    # properly grouped by file-id.
+    per_prefix_map = {}
+    for item in parent_map.iteritems():
+        key = item[0]
+        if isinstance(key, str) or len(key) == 1:
+            prefix = ''
+        else:
+            prefix = key[0]
+        try:
+            per_prefix_map[prefix].append(item)
+        except KeyError:
+            per_prefix_map[prefix] = [item]
+
+    present_keys = []
+    for prefix in sorted(per_prefix_map):
+        present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
+    return present_keys

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2009-03-11 05:26:14 +0000
+++ b/bzrlib/weave.py	2009-03-12 00:16:49 +0000
@@ -99,6 +99,7 @@
     AbsentContentFactory,
     adapter_registry,
     ContentFactory,
+    sort_groupcompress,
     VersionedFile,
     )
 from bzrlib.weavefile import _read_weave_v5, write_weave_v5
@@ -321,6 +322,11 @@
             new_versions = tsort.topo_sort(parents)
             new_versions.extend(set(versions).difference(set(parents)))
             versions = new_versions
+        elif ordering == 'groupcompress':
+            parents = self.get_parent_map(versions)
+            new_versions = sort_groupcompress(parents)
+            new_versions.extend(set(versions).difference(set(parents)))
+            versions = new_versions
         for version in versions:
             if version in self:
                 yield WeaveContentFactory(version, self)




More information about the bazaar-commits mailing list