Rev 3366: Back out transaction cache removal. in http://people.ubuntu.com/~robertc/baz2.0/reinstate-transaction-cache

Thu Apr 17 04:32:18 BST 2008

At http://people.ubuntu.com/~robertc/baz2.0/reinstate-transaction-cache

------------------------------------------------------------
revno: 3366
revision-id: robertc at robertcollins.net-20080417033213-ve179c1ud7brur7c
parent: pqm at pqm.ubuntu.com-20080413194429-a5e4pft9sffa2ycu
committer: Robert Collins <robertc at robertcollins.net>
branch nick: reinstate-transaction-cache
timestamp: Thu 2008-04-17 13:32:13 +1000
message:
  Back out transaction cache removal.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/fetch.py                fetch.py-20050818234941-26fea6105696365d
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/store/versioned/__init__.py weavestore.py-20050907094258-88262e0434babab9
  bzrlib/tests/test_fetch.py     testfetch.py-20050825090644-f73e07e7dfb1765a
  bzrlib/tests/test_store.py     teststore.py-20050826022702-f6caadb647395769
=== modified file 'NEWS'

--- a/NEWS	2008-04-13 17:41:14 +0000
+++ b/NEWS	2008-04-17 03:32:13 +0000
@@ -244,13 +244,6 @@
       ``get_parent_map`` which can be used to instantiate a Graph on a
       VersionedFile. (Robert Collins)
 
-    * ``VersionedFileStore`` no longer uses the transaction parameter given
-      to most methods; amongst other things this means that the
-      get_weave_or_empty method no longer guarantees errors on a missing weave
-      in a readonly transaction, and no longer caches versioned file instances
-      which reduces memory pressure (but requires more careful management by
-      callers to preserve performance). (Robert Collins)
-
   TESTING:
 
     * New -Dselftest_debug flag disables clearing of the debug flags during

=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py	2008-04-09 20:13:46 +0000
+++ b/bzrlib/fetch.py	2008-04-17 03:32:13 +0000
@@ -167,12 +167,13 @@
                 if knit_kind == "file":
                     self._fetch_weave_text(file_id, revisions)
                 elif knit_kind == "inventory":
-                    # Before we process the inventory we generate the root
-                    # texts (if necessary) so that the inventories references
-                    # will be valid.
+                    # XXX:
+                    # Once we've processed all the files, then we generate the root
+                    # texts (if necessary), then we process the inventory.  It's a
+                    # bit distasteful to have knit_kind == "inventory" mean this,
+                    # perhaps it should happen on the first non-"file" knit, in case
+                    # it's not always inventory?
                     self._generate_root_texts(revs)
-                    # NB: This currently reopens the inventory weave in source;
-                    # using a full get_data_stream instead would avoid this.
                     self._fetch_inventory_weave(revs, pb)
                 elif knit_kind == "signatures":
                     # Nothing to do here; this will be taken care of when

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-04-10 06:19:49 +0000
+++ b/bzrlib/repository.py	2008-04-17 03:32:13 +0000
@@ -1278,18 +1278,16 @@
                 setdefault(file_id, set()).add(revision_id)
         return result
 
-    def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
+    def fileids_altered_by_revision_ids(self, revision_ids):
         """Find the file ids and versions affected by revisions.
 
         :param revisions: an iterable containing revision ids.
-        :param _inv_weave: The inventory weave from this repository or None.
-            If None, the inventory weave will be opened automatically.
         :return: a dictionary mapping altered file-ids to an iterable of
         revision_ids. Each altered file-ids has the exact revision_ids that
         altered it listed explicitly.
         """
         selected_revision_ids = set(revision_ids)
-        w = _inv_weave or self.get_inventory_weave()
+        w = self.get_inventory_weave()
         pb = ui.ui_factory.nested_progress_bar()
         try:
             return self._find_file_ids_from_xml_inventory_lines(
@@ -1451,7 +1449,7 @@
         inv_w = self.get_inventory_weave()
 
         # file ids that changed
-        file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
+        file_ids = self.fileids_altered_by_revision_ids(revision_ids)
         count = 0
         num_file_ids = len(file_ids)
         for file_id, altered_versions in file_ids.iteritems():

=== modified file 'bzrlib/store/versioned/__init__.py'
--- a/bzrlib/store/versioned/__init__.py	2008-04-08 00:57:07 +0000
+++ b/bzrlib/store/versioned/__init__.py	2008-04-17 03:32:13 +0000
@@ -57,6 +57,21 @@
         # Used for passing get_scope to versioned file constructors;
         self.get_scope = None
 
+    def _clear_cache_id(self, file_id, transaction):
+        """WARNING may lead to inconsistent object references for file_id.
+
+        Remove file_id from the transaction map. 
+
+        NOT in the transaction api because theres no reliable way to clear
+        callers. So its here for very specialised use rather than having an
+        'api' that isn't.
+        """
+        weave = transaction.map.find_weave(file_id)
+        if weave is not None:
+            mutter("old data in transaction in %s for %s", self, file_id)
+            # FIXME abstraction violation - transaction now has stale data.
+            transaction.map.remove_object(weave)
+
     def filename(self, file_id):
         """Return the path relative to the transport root."""
         return self._relpath(file_id)
@@ -95,6 +110,7 @@
         filename = self.filename(file_id)
         for suffix in suffixes:
             self._transport.delete(filename + suffix)
+        self._clear_cache_id(file_id, transaction)
 
     def _get(self, file_id):
         return self._transport.get(self.filename(file_id))
@@ -116,11 +132,17 @@
         file_id. This is used to reduce duplicate filename calculations when
         using 'get_weave_or_empty'. FOR INTERNAL USE ONLY.
         """
+        weave = transaction.map.find_weave(file_id)
+        if weave is not None:
+            #mutter("cache hit in %s for %s", self, file_id)
+            return weave
         if _filename is None:
             _filename = self.filename(file_id)
         if transaction.writeable():
             w = self._versionedfile_class(_filename, self._transport, self._file_mode,
                 get_scope=self.get_scope, **self._versionedfile_kwargs)
+            transaction.map.add_weave(file_id, w)
+            transaction.register_dirty(w)
         else:
             w = self._versionedfile_class(_filename,
                                           self._transport,
@@ -129,6 +151,8 @@
                                           access_mode='r',
                                           get_scope=self.get_scope,
                                           **self._versionedfile_kwargs)
+            transaction.map.add_weave(file_id, w)
+            transaction.register_clean(w, precious=self._precious)
         return w
 
     def _make_new_versionedfile(self, file_id, transaction,
@@ -173,6 +197,9 @@
         except errors.NoSuchFile:
             weave = self._make_new_versionedfile(file_id, transaction,
                 known_missing=True, _filename=_filename)
+            transaction.map.add_weave(file_id, weave)
+            # has to be dirty - its able to mutate on its own.
+            transaction.register_dirty(weave)
             return weave
 
     def _put_weave(self, file_id, weave, transaction):
@@ -182,6 +209,7 @@
 
     def copy(self, source, result_id, transaction):
         """Copy the source versioned file to result_id in this store."""
+        self._clear_cache_id(result_id, transaction)
         source.copy_to(self.filename(result_id), self._transport)
  
     def copy_all_ids(self, store_from, pb=None, from_transaction=None,

=== modified file 'bzrlib/tests/test_fetch.py'
--- a/bzrlib/tests/test_fetch.py	2008-04-08 00:57:07 +0000
+++ b/bzrlib/tests/test_fetch.py	2008-04-17 03:32:13 +0000
@@ -285,13 +285,9 @@
         # unfortunately this log entry is branch format specific. We could 
         # factor out the 'what files does this format use' to a method on the 
         # repository, which would let us to this generically. RBC 20060419
-        # RBC 20080408: Or perhaps we can assert that no files are fully read
-        # twice?
         self.assertEqual(1, self._count_log_matches('/ce/id.kndx', http_logs))
         self.assertEqual(1, self._count_log_matches('/ce/id.knit', http_logs))
-        # XXX: This *should* be '1', but more intrusive fetch changes are
-        # needed to drop this back to 1.
-        self.assertEqual(2, self._count_log_matches('inventory.kndx', http_logs))
+        self.assertEqual(1, self._count_log_matches('inventory.kndx', http_logs))
         # this r-h check test will prevent regressions, but it currently already 
         # passes, before the patch to cache-rh is applied :[
         self.assertTrue(1 >= self._count_log_matches('revision-history',
@@ -315,8 +311,7 @@
         self.log('\n'.join(http_logs))
         self.assertEqual(1, self._count_log_matches('branch-format', http_logs))
         self.assertEqual(1, self._count_log_matches('branch/format', http_logs))
-        self.assertEqual(1, self._count_log_matches('repository/format',
-            http_logs))
+        self.assertEqual(1, self._count_log_matches('repository/format', http_logs))
         self.assertTrue(1 >= self._count_log_matches('revision-history',
                                                      http_logs))
         self.assertTrue(1 >= self._count_log_matches('last-revision',

=== modified file 'bzrlib/tests/test_store.py'
--- a/bzrlib/tests/test_store.py	2008-04-08 00:57:07 +0000
+++ b/bzrlib/tests/test_store.py	2008-04-17 03:32:13 +0000
@@ -426,6 +426,13 @@
         self._transaction = None
         self.assertRaises(errors.OutSideTransaction, vf.add_lines, 'b', [], [])
 
+    def test_get_weave_or_empty_readonly_fails(self):
+        self._transaction = transactions.ReadOnlyTransaction()
+        vf = self.assertRaises(errors.ReadOnlyError,
+                               self.vfstore.get_weave_or_empty,
+                               'id',
+                               self._transaction)
+
     def test_get_weave_readonly_cant_write(self):
         self._transaction = transactions.WriteTransaction()
         vf = self.vfstore.get_weave_or_empty('id', self._transaction)