Rev 3327: * ``VersionedFile.clear_cache`` and ``enable_cache`` are deprecated. in http://people.ubuntu.com/~robertc/baz2.0/versioned_files

Tue Apr 8 22:41:33 BST 2008

At http://people.ubuntu.com/~robertc/baz2.0/versioned_files

------------------------------------------------------------
revno: 3327
revision-id: robertc at robertcollins.net-20080408214115-qiz6rr5kyt5mxsyi
parent: robertc at robertcollins.net-20080408041308-lfbh0gmvwfhc53yx
committer: Robert Collins <robertc at robertcollins.net>
branch nick: api-cleanup
timestamp: Wed 2008-04-09 07:41:15 +1000
message:
   * ``VersionedFile.clear_cache`` and ``enable_cache`` are deprecated.
     These methods added significant complexity to the ``VersionedFile``
     implementation, but were only used for optimising fetches from knits - 
     which can be done from outside the knit layer, or via a caching
     decorator. As knits are not the default format, the complexity is no
     longer worth paying. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repofmt/weaverepo.py    presplitout.py-20070125045333-wfav3tsh73oxu3zk-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'NEWS'

--- a/NEWS	2008-04-08 04:13:08 +0000
+++ b/NEWS	2008-04-08 21:41:15 +0000
@@ -145,6 +145,13 @@
       file level merge to identical state on two branches is rare enough, and
       not expensive enough to special case. (Robert Collins)
 
+    * ``VersionedFile.clear_cache`` and ``enable_cache`` are deprecated.
+      These methods added significant complexity to the ``VersionedFile``
+      implementation, but were only used for optimising fetches from knits - 
+      which can be done from outside the knit layer, or via a caching
+      decorator. As knits are not the default format, the complexity is no
+      longer worth paying. (Robert Collins)
+
     * ``VersionedFile.create_empty`` is removed. This method presupposed a
       sensible mapping to a transport for individual files, but pack backed
       versioned files have no such mapping. (Robert Collins)

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-04-08 04:13:08 +0000
+++ b/bzrlib/knit.py	2008-04-08 21:41:15 +0000
@@ -623,19 +623,9 @@
         for (version_id, options, parents, size), access_memo in zip(
             records, positions):
             index_entries.append((version_id, options, access_memo, parents))
-            if self._data._do_cache:
-                self._data._cache[version_id] = data[offset:offset+size]
             offset += size
         self._index.add_versions(index_entries)
 
-    def enable_cache(self):
-        """Start caching data for this knit"""
-        self._data.enable_cache()
-
-    def clear_cache(self):
-        """Clear the data cache only."""
-        self._data.clear_cache()
-
     def copy_to(self, name, transport):
         """See VersionedFile.copy_to()."""
         # copy the current index to a temp index to avoid racing with local
@@ -2379,21 +2369,6 @@
         """
         self._access = access
         self._checked = False
-        # TODO: jam 20060713 conceptually, this could spill to disk
-        #       if the cached size gets larger than a certain amount
-        #       but it complicates the model a bit, so for now just use
-        #       a simple dictionary
-        self._cache = {}
-        self._do_cache = False
-
-    def enable_cache(self):
-        """Enable caching of reads."""
-        self._do_cache = True
-
-    def clear_cache(self):
-        """Clear the record cache."""
-        self._do_cache = False
-        self._cache = {}
 
     def _open_file(self):
         return self._access.open_file()
@@ -2499,29 +2474,15 @@
         # uses readv so nice and fast we hope.
         if len(records):
             # grab the disk data needed.
-            if self._cache:
-                # Don't check _cache if it is empty
-                needed_offsets = [index_memo for version_id, index_memo
-                                              in records
-                                              if version_id not in self._cache]
-            else:
-                needed_offsets = [index_memo for version_id, index_memo
-                                               in records]
-
+            needed_offsets = [index_memo for version_id, index_memo
+                                           in records]
             raw_records = self._access.get_raw_records(needed_offsets)
 
         for version_id, index_memo in records:
-            if version_id in self._cache:
-                # This data has already been validated
-                data = self._cache[version_id]
-            else:
-                data = raw_records.next()
-                if self._do_cache:
-                    self._cache[version_id] = data
-
-                # validate the header
-                df, rec = self._parse_record_header(version_id, data)
-                df.close()
+            data = raw_records.next()
+            # validate the header
+            df, rec = self._parse_record_header(version_id, data)
+            df.close()
             yield version_id, data
 
     def read_records_iter(self, records):
@@ -2537,24 +2498,7 @@
         if not records:
             return
 
-        if self._cache:
-            # Skip records we have alread seen
-            yielded_records = set()
-            needed_records = set()
-            for record in records:
-                if record[0] in self._cache:
-                    if record[0] in yielded_records:
-                        continue
-                    yielded_records.add(record[0])
-                    data = self._cache[record[0]]
-                    content, digest = self._parse_record(record[0], data)
-                    yield (record[0], content, digest)
-                else:
-                    needed_records.add(record)
-            needed_records = sorted(needed_records, key=operator.itemgetter(1))
-        else:
-            needed_records = sorted(set(records), key=operator.itemgetter(1))
-
+        needed_records = sorted(set(records), key=operator.itemgetter(1))
         if not needed_records:
             return
 
@@ -2566,8 +2510,6 @@
         for (version_id, index_memo), data in \
                 izip(iter(needed_records), raw_data):
             content, digest = self._parse_record(version_id, data)
-            if self._do_cache:
-                self._cache[version_id] = data
             yield version_id, content, digest
 
     def read_records(self, records):

=== modified file 'bzrlib/repofmt/weaverepo.py'
--- a/bzrlib/repofmt/weaverepo.py	2008-04-04 00:06:58 +0000
+++ b/bzrlib/repofmt/weaverepo.py	2008-04-08 21:41:15 +0000
@@ -632,7 +632,6 @@
         result = versionedfile.add_lines(
             self._new_revision_id, parents, new_lines,
             nostore_sha=nostore_sha)[0:2]
-        versionedfile.clear_cache()
         return result
 
 

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2008-04-08 00:57:07 +0000
+++ b/bzrlib/repository.py	2008-04-08 21:41:15 +0000
@@ -1452,7 +1452,6 @@
         # processed?
         self.lock_read()
         inv_w = self.get_inventory_weave()
-        inv_w.enable_cache()
 
         # file ids that changed
         file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2008-04-08 02:31:32 +0000
+++ b/bzrlib/tests/test_knit.py	2008-04-08 21:41:15 +0000
@@ -1204,7 +1204,6 @@
         """Store in knit with parents"""
         k = self.make_test_knit(annotate=False)
         self.add_stock_one_and_one_a(k)
-        k.clear_cache()
         self.assertEqualDiff(''.join(k.get_lines('text-1a')), TEXT_1A)
 
     def test_add_delta_knit_graph_index(self):
@@ -1214,7 +1213,6 @@
             deltas=True)
         k = self.make_test_knit(annotate=True, index=knit_index)
         self.add_stock_one_and_one_a(k)
-        k.clear_cache()
         self.assertEqualDiff(''.join(k.get_lines('text-1a')), TEXT_1A)
         # check the index had the right data added.
         self.assertEqual(set([
@@ -1397,7 +1395,6 @@
         # add texts with no required ordering
         k1.add_lines('base', [], ['text\n'])
         k1.add_lines('base2', [], ['text2\n'])
-        k1.clear_cache()
         # clear the logged activity, but preserve the list instance in case of
         # clones pointing at it.
         del instrumented_t._activity[:]
@@ -2031,87 +2028,6 @@
         self.failIf(WeaveToKnit.is_compatible(k, k))
 
 
-class TestKnitCaching(KnitTests):
-    
-    def create_knit(self):
-        k = self.make_test_knit(True)
-        k.add_lines('text-1', [], split_lines(TEXT_1))
-        k.add_lines('text-2', [], split_lines(TEXT_2))
-        return k
-
-    def test_no_caching(self):
-        k = self.create_knit()
-        # Nothing should be cached without setting 'enable_cache'
-        self.assertEqual({}, k._data._cache)
-
-    def test_cache_data_read_raw(self):
-        k = self.create_knit()
-
-        # Now cache and read
-        k.enable_cache()
-
-        def read_one_raw(version):
-            pos_map = k._get_components_positions([version])
-            method, index_memo, next = pos_map[version]
-            lst = list(k._data.read_records_iter_raw([(version, index_memo)]))
-            self.assertEqual(1, len(lst))
-            return lst[0]
-
-        val = read_one_raw('text-1')
-        self.assertEqual({'text-1':val[1]}, k._data._cache)
-
-        k.clear_cache()
-        # After clear, new reads are not cached
-        self.assertEqual({}, k._data._cache)
-
-        val2 = read_one_raw('text-1')
-        self.assertEqual(val, val2)
-        self.assertEqual({}, k._data._cache)
-
-    def test_cache_data_read(self):
-        k = self.create_knit()
-
-        def read_one(version):
-            pos_map = k._get_components_positions([version])
-            method, index_memo, next = pos_map[version]
-            lst = list(k._data.read_records_iter([(version, index_memo)]))
-            self.assertEqual(1, len(lst))
-            return lst[0]
-
-        # Now cache and read
-        k.enable_cache()
-
-        val = read_one('text-2')
-        self.assertEqual(['text-2'], k._data._cache.keys())
-        self.assertEqual('text-2', val[0])
-        content, digest = k._data._parse_record('text-2',
-                                                k._data._cache['text-2'])
-        self.assertEqual(content, val[1])
-        self.assertEqual(digest, val[2])
-
-        k.clear_cache()
-        self.assertEqual({}, k._data._cache)
-
-        val2 = read_one('text-2')
-        self.assertEqual(val, val2)
-        self.assertEqual({}, k._data._cache)
-
-    def test_cache_read(self):
-        k = self.create_knit()
-        k.enable_cache()
-
-        text = k.get_text('text-1')
-        self.assertEqual(TEXT_1, text)
-        self.assertEqual(['text-1'], k._data._cache.keys())
-
-        k.clear_cache()
-        self.assertEqual({}, k._data._cache)
-
-        text = k.get_text('text-1')
-        self.assertEqual(TEXT_1, text)
-        self.assertEqual({}, k._data._cache)
-
-
 class TestKnitIndex(KnitTests):
 
     def test_add_versions_dictionary_compresses(self):

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2008-04-08 04:13:08 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2008-04-08 21:41:15 +0000
@@ -360,15 +360,6 @@
         self.assertRaises(errors.OutSideTransaction, f.add_lines_with_ghosts, '', [], [])
         self.assertRaises(errors.OutSideTransaction, f.join, '')
         
-    def test_clear_cache(self):
-        f = self.get_file()
-        # on a new file it should not error
-        f.clear_cache()
-        # and after adding content, doing a clear_cache and a get should work.
-        f.add_lines('0', [], ['a'])
-        f.clear_cache()
-        self.assertEqual(['a'], f.get_lines('0'))
-
     def test_clone_text(self):
         f = self.get_file()
         f.add_lines('r0', [], ['a\n', 'b\n'])

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2008-04-08 04:13:08 +0000
+++ b/bzrlib/versionedfile.py	2008-04-08 21:41:15 +0000
@@ -157,6 +157,7 @@
             if '\n' in line[:-1]:
                 raise errors.BzrBadParameterContainsNewline("lines")
 
+    @deprecated_method(one_four)
     def enable_cache(self):
         """Tell this versioned file that it should cache any data it reads.
         
@@ -164,6 +165,7 @@
         """
         pass
     
+    @deprecated_method(one_four)
     def clear_cache(self):
         """Remove any data cached in the versioned file object.