Rev 3380: VF.get_sha1s needed changing to be stackable. in http://people.ubuntu.com/~robertc/baz2.0/shallow-branch

Robert Collins robertc at robertcollins.net
Thu Jun 19 14:14:29 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/shallow-branch

------------------------------------------------------------
revno: 3380
revision-id: robertc at robertcollins.net-20080619131424-gbltev5ud5u82gbt
parent: robertc at robertcollins.net-20080619064333-apziisq1g280m70v
committer: Robert Collins <robertc at robertcollins.net>
branch nick: stacking-knits
timestamp: Thu 2008-06-19 23:14:24 +1000
message:
  VF.get_sha1s needed changing to be stackable.
modified:
  bzrlib/bundle/serializer/v4.py v10.py-20070611062757-5ggj7k18s9dej0fr-1
  bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/repository_implementations/test_check_reconcile.py test_broken.py-20070928125406-62236394w0jpbpd6-2
  bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
=== modified file 'bzrlib/bundle/serializer/v4.py'
--- a/bzrlib/bundle/serializer/v4.py	2008-06-11 04:20:16 +0000
+++ b/bzrlib/bundle/serializer/v4.py	2008-06-19 13:14:24 +0000
@@ -356,7 +356,8 @@
         mpdiffs = vf.make_mpdiffs(ordered_keys)
         sha1s = vf.get_sha1s(ordered_keys)
         parent_map = vf.get_parent_map(ordered_keys)
-        for mpdiff, item_key, sha1, in zip(mpdiffs, ordered_keys, sha1s):
+        for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
+            sha1 = sha1s[item_key]
             parents = [key[-1] for key in parent_map[item_key]]
             text = ''.join(mpdiff.to_patch())
             # Infer file id records as appropriate.

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2008-06-11 04:20:16 +0000
+++ b/bzrlib/inventory.py	2008-06-19 13:14:24 +0000
@@ -472,9 +472,9 @@
 
     def _check(self, checker, tree_revision_id, tree):
         """See InventoryEntry._check"""
-        t = (self.file_id, self.revision)
-        if t in checker.checked_texts:
-            prev_sha = checker.checked_texts[t]
+        key = (self.file_id, self.revision)
+        if key in checker.checked_texts:
+            prev_sha = checker.checked_texts[key]
             if prev_sha != self.text_sha1:
                 raise BzrCheckError(
                     'mismatched sha1 on {%s} in {%s} (%s != %s) %r' %
@@ -489,11 +489,9 @@
         # We can't check the length, because Weave doesn't store that
         # information, and the whole point of looking at the weave's
         # sha1sum is that we don't have to extract the text.
-        if (self.text_sha1 !=
-            tree._repository.texts.get_sha1s([(self.file_id, self.revision)])[0]):
-            raise BzrCheckError('text {%s} version {%s} wrong sha1' 
-                                % (self.file_id, self.revision))
-        checker.checked_texts[t] = self.text_sha1
+        if (self.text_sha1 != tree._repository.texts.get_sha1s([key])[key]):
+            raise BzrCheckError('text {%s} version {%s} wrong sha1' % key)
+        checker.checked_texts[key] = self.text_sha1
 
     def copy(self):
         other = InventoryFile(self.file_id, self.name, self.parent_id)

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-06-19 06:43:33 +0000
+++ b/bzrlib/knit.py	2008-06-19 13:14:24 +0000
@@ -1036,7 +1036,7 @@
             missing.difference_update(set(new_result))
         return result
 
-    def _get_record_map(self, keys):
+    def _get_record_map(self, keys, allow_missing=False):
         """Produce a dictionary of knit records.
         
         :return: {key:(record, record_details, digest, next)}
@@ -1049,8 +1049,12 @@
             next
                 build-parent of the version, i.e. the leftmost ancestor.
                 Will be None if the record is not a delta.
+        :param keys: The keys to build a map for
+        :param allow_missing: If some records are missing, rather than 
+            error, just return the data that could be generated.
         """
-        position_map = self._get_components_positions(keys)
+        position_map = self._get_components_positions(keys,
+            noraise=allow_missing)
         # key = component_id, r = record_details, i_m = index_memo, n = next
         records = [(key, i_m) for key, (r, i_m, n)
                              in position_map.iteritems()]
@@ -1142,9 +1146,22 @@
 
     def get_sha1s(self, keys):
         """See VersionedFiles.get_sha1s()."""
-        record_map = self._get_record_map(keys)
-        # record entry 2 is the 'digest'.
-        return [record_map[key][2] for key in keys]
+        missing = set(keys)
+        record_map = self._get_record_map(missing, allow_missing=True)
+        result = {}
+        for key, details in record_map.iteritems():
+            if key not in missing:
+                continue
+            # record entry 2 is the 'digest'.
+            result[key] = details[2]
+        missing.difference_update(set(result))
+        for source in self._fallback_vfs:
+            if not missing:
+                break
+            new_result = source.get_sha1s(missing)
+            result.update(new_result)
+            missing.difference_update(set(new_result))
+        return result
 
     def insert_record_stream(self, stream):
         """Insert a record stream into this container.

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-06-11 07:22:00 +0000
+++ b/bzrlib/repository.py	2008-06-19 13:14:24 +0000
@@ -555,7 +555,8 @@
                 rev.inventory_sha1 = self.add_inventory(revision_id, inv,
                                                         rev.parent_ids)
         else:
-            rev.inventory_sha1 = self.inventories.get_sha1s([(revision_id,)])[0]
+            key = (revision_id,)
+            rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
         self._add_revision(rev)
 
     def _add_revision(self, revision):

=== modified file 'bzrlib/tests/repository_implementations/test_check_reconcile.py'
--- a/bzrlib/tests/repository_implementations/test_check_reconcile.py	2008-06-11 04:20:16 +0000
+++ b/bzrlib/tests/repository_implementations/test_check_reconcile.py	2008-06-19 13:14:24 +0000
@@ -150,7 +150,7 @@
         :returns: A dict of `{version: hash}`.
         """
         keys = [('a-file-id', version) for version in versions]
-        return dict(zip(versions, repo.texts.get_sha1s(keys)))
+        return repo.texts.get_sha1s(keys)
 
     def test_reconcile_behaviour(self):
         """Populate a repository and reconcile it, verifying the state before

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2008-06-19 06:43:33 +0000
+++ b/bzrlib/tests/test_knit.py	2008-06-19 13:14:24 +0000
@@ -1427,7 +1427,27 @@
         pass
 
     def test_get_sha1s(self):
-        pass
+        # sha1's in the test knit are answered without asking the basis
+        basis, test = self.get_basis_and_test_knit()
+        key = ('foo',)
+        key_basis = ('bar',)
+        key_missing = ('missing',)
+        test.add_lines(key, (), ['foo\n'])
+        key_sha1sum = sha.new('foo\n').hexdigest()
+        sha1s = test.get_sha1s([key])
+        self.assertEqual({key: key_sha1sum}, sha1s)
+        self.assertEqual([], basis.calls)
+        # But texts that are not in the test knit are looked for in the basis
+        # directly (rather than via text reconstruction) so that remote servers
+        # etc don't have to answer with full content.
+        basis.add_lines(key_basis, (), ['foo\n', 'bar\n'])
+        basis_sha1sum = sha.new('foo\nbar\n').hexdigest()
+        basis.calls = []
+        sha1s = test.get_sha1s([key, key_missing, key_basis])
+        self.assertEqual({key: key_sha1sum,
+            key_basis: basis_sha1sum}, sha1s)
+        self.assertEqual([("get_sha1s", set([key_basis, key_missing]))],
+            basis.calls)
 
     def test_insert_record_stream(self):
         pass

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2008-06-11 07:22:00 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2008-06-19 13:14:24 +0000
@@ -501,7 +501,7 @@
         for version in multiparent.topo_iter(vf):
             mpdiff = vf.make_mpdiffs([version])[0]
             new_vf.add_mpdiffs([(version, vf.get_parent_map([version])[version],
-                                 vf.get_sha1s([version])[0], mpdiff)])
+                                 vf.get_sha1s([version])[version], mpdiff)])
             self.assertEqualDiff(vf.get_text(version),
                                  new_vf.get_text(version))
 
@@ -840,10 +840,12 @@
         vf.add_lines('b', ['a'], ['a\n'])
         # a file differing only in last newline.
         vf.add_lines('c', [], ['a'])
-        self.assertEqual(['3f786850e387550fdab836ed7e6dc881de23001b',
-                          '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
-                          '3f786850e387550fdab836ed7e6dc881de23001b'],
-                          vf.get_sha1s(['a', 'c', 'b']))
+        self.assertEqual({
+            'a': '3f786850e387550fdab836ed7e6dc881de23001b',
+            'c': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
+            'b': '3f786850e387550fdab836ed7e6dc881de23001b',
+            },
+            vf.get_sha1s(['a', 'c', 'b']))
         
 
 class TestWeave(TestCaseWithMemoryTransport, VersionedFileTestMixIn):
@@ -1562,7 +1564,8 @@
         for factory in entries:
             on_seen(factory.key)
             self.assertValidStorageKind(factory.storage_kind)
-            self.assertEqual(f.get_sha1s([factory.key])[0], factory.sha1)
+            self.assertEqual(f.get_sha1s([factory.key])[factory.key],
+                factory.sha1)
             self.assertEqual(parents[factory.key], factory.parents)
             self.assertIsInstance(factory.get_bytes_as(factory.storage_kind),
                 str)
@@ -1627,7 +1630,8 @@
         for factory in entries:
             seen.append(factory.key)
             self.assertValidStorageKind(factory.storage_kind)
-            self.assertSubset([factory.sha1], [None, files.get_sha1s([factory.key])[0]])
+            self.assertSubset([factory.sha1],
+                [None, files.get_sha1s([factory.key])[factory.key]])
             self.assertEqual(parent_map[factory.key], factory.parents)
             # self.assertEqual(files.get_text(factory.key),
             self.assertIsInstance(factory.get_bytes_as('fulltext'), str)
@@ -1671,7 +1675,8 @@
         for factory in entries:
             seen.add(factory.key)
             self.assertValidStorageKind(factory.storage_kind)
-            self.assertEqual(files.get_sha1s([factory.key])[0], factory.sha1)
+            self.assertEqual(files.get_sha1s([factory.key])[factory.key],
+                factory.sha1)
             self.assertEqual(parent_map[factory.key], factory.parents)
             # currently no stream emits mpdiff
             self.assertRaises(errors.UnavailableRepresentation,
@@ -1710,7 +1715,8 @@
                 self.assertEqual(None, factory.parents)
             else:
                 self.assertValidStorageKind(factory.storage_kind)
-                self.assertEqual(files.get_sha1s([factory.key])[0], factory.sha1)
+                self.assertEqual(files.get_sha1s([factory.key])[factory.key],
+                    factory.sha1)
                 self.assertEqual(parents[factory.key], factory.parents)
                 self.assertIsInstance(factory.get_bytes_as(factory.storage_kind),
                     str)
@@ -1798,13 +1804,13 @@
                 ('FileA', 'base'), ('FileB', 'origin'), ('FileA', 'left'),
                 ('FileA', 'merged'), ('FileB', 'right'),
                 ]
-        self.assertEqual([
-            '51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44',
-            '00e364d235126be43292ab09cb4686cf703ddc17',
-            'a8478686da38e370e32e42e8a0c220e33ee9132f',
-            'ed8bce375198ea62444dc71952b22cfc2b09226d',
-            '9ef09dfa9d86780bdec9219a22560c6ece8e0ef1',
-            ],
+        self.assertEqual({
+            keys[0]: '51c64a6f4fc375daf0d24aafbabe4d91b6f4bb44',
+            keys[1]: '00e364d235126be43292ab09cb4686cf703ddc17',
+            keys[2]: 'a8478686da38e370e32e42e8a0c220e33ee9132f',
+            keys[3]: 'ed8bce375198ea62444dc71952b22cfc2b09226d',
+            keys[4]: '9ef09dfa9d86780bdec9219a22560c6ece8e0ef1',
+            },
             files.get_sha1s(keys))
         
     def test_insert_record_stream_empty(self):
@@ -2132,7 +2138,7 @@
             mpdiff = files.make_mpdiffs([key])[0]
             parents = files.get_parent_map([key])[key] or []
             target.add_mpdiffs(
-                [(key, parents, files.get_sha1s([key])[0], mpdiff)])
+                [(key, parents, files.get_sha1s([key])[key], mpdiff)])
             self.assertEqualDiff(
                 files.get_record_stream([key], 'unordered',
                     True).next().get_bytes_as('fulltext'),

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2008-06-19 06:43:33 +0000
+++ b/bzrlib/versionedfile.py	2008-06-19 13:14:24 +0000
@@ -373,19 +373,11 @@
                     parent_ids, lines, vf_parents,
                     left_matching_blocks=left_matching_blocks)
             vf_parents[version] = version_text
-        for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
-             zip(records, self.get_sha1s(versions)):
-            if expected_sha1 != sha1:
+        sha1s = self.get_sha1s(versions)
+        for version, parent_ids, expected_sha1, mpdiff in records:
+            if expected_sha1 != sha1s[version]:
                 raise errors.VersionedFileInvalidChecksum(version)
 
-    def get_sha1s(self, version_ids):
-        """Get the stored sha1 sums for the given revisions.
-
-        :param version_ids: The names of the versions to lookup
-        :return: a list of sha1s in order according to the version_ids
-        """
-        raise NotImplementedError(self.get_sha1s)
-
     def get_text(self, version_id):
         """Return version contents as a text string.
 
@@ -553,6 +545,10 @@
         return self._backing_vf.get_record_stream(keys, sort_order,
             include_delta_closure)
 
+    def get_sha1s(self, keys):
+        self.calls.append(("get_sha1s", copy(keys)))
+        return self._backing_vf.get_sha1s(keys)
+
 
 class KeyMapper(object):
     """KeyMappers map between keys and underlying paritioned storage."""
@@ -823,7 +819,9 @@
         """Get the sha1's of the texts for the given keys.
 
         :param keys: The names of the keys to lookup
-        :return: a list of sha1s matching keys.
+        :return: a dict from key to sha1 digest. Keys of texts which are not
+            present in the store are not not present in the returned
+            dictionary.
         """
         raise NotImplementedError(self.get_sha1s)
 
@@ -1055,9 +1053,9 @@
         sha1s = {}
         for prefix,suffixes, vf in self._iter_keys_vf(keys):
             vf_sha1s = vf.get_sha1s(suffixes)
-            for suffix, sha1 in zip(suffixes, vf.get_sha1s(suffixes)):
+            for suffix, sha1 in vf_sha1s.iteritems():
                 sha1s[prefix + (suffix,)] = sha1
-        return [sha1s[key] for key in keys]
+        return sha1s
 
     def insert_record_stream(self, stream):
         """Insert a record stream into this container.

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2008-06-11 04:20:16 +0000
+++ b/bzrlib/weave.py	2008-06-19 13:14:24 +0000
@@ -110,7 +110,7 @@
     def __init__(self, version, weave):
         """Create a WeaveContentFactory for version from weave."""
         ContentFactory.__init__(self)
-        self.sha1 = weave.get_sha1s([version])[0]
+        self.sha1 = weave.get_sha1s([version])[version]
         self.key = (version,)
         parents = weave.get_parent_map([version])[version]
         self.parents = tuple((parent,) for parent in parents)
@@ -765,7 +765,10 @@
 
     def get_sha1s(self, version_ids):
         """See VersionedFile.get_sha1s()."""
-        return [self._sha1s[self._lookup(v)] for v in version_ids]
+        result = {}
+        for v in version_ids:
+            result[v] = self._sha1s[self._lookup(v)]
+        return result
 
     def num_versions(self):
         """How many versions are in this weave?"""




More information about the bazaar-commits mailing list