Rev 3027: (John Arbash Meinel) Change pushing from a pack-repo to a knit-repo in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Mon Nov 26 20:18:55 GMT 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 3027
revision-id:pqm at pqm.ubuntu.com-20071126201850-7bugi709x0jjfqpg
parent: pqm at pqm.ubuntu.com-20071126151329-pnmm49obwettpwcd
parent: john at arbash-meinel.com-20071126193511-okj1lcjnltehhq8d
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2007-11-26 20:18:50 +0000
message:
  (John Arbash Meinel) Change pushing from a pack-repo to a knit-repo
  	to not rebuild the full history of effected knits.
modified:
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/lru_cache.py            lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
  bzrlib/tests/interrepository_implementations/__init__.py __init__.py-20060220054744-baf49a1f88f17b1a
  bzrlib/tests/interrepository_implementations/test_interrepository.py test_interrepository.py-20060220061411-1ec13fa99e5e3eee
  bzrlib/tests/test_lru_cache.py test_lru_cache.py-20070119165535-hph6rk4h9rzy4180-1
    ------------------------------------------------------------
    revno: 2998.2.3
    revision-id:john at arbash-meinel.com-20071126193511-okj1lcjnltehhq8d
    parent: john at arbash-meinel.com-20071117001455-abe9e9k8rb9pu239
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: push_packs_to_knits
    timestamp: Mon 2007-11-26 13:35:11 -0600
    message:
      Respond to Aaron's requests
    modified:
      bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
    ------------------------------------------------------------
    revno: 2998.2.2
    revision-id:john at arbash-meinel.com-20071117001455-abe9e9k8rb9pu239
    parent: john at arbash-meinel.com-20071116235317-uymqhilped1rloqy
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: push_packs_to_knits
    timestamp: Fri 2007-11-16 18:14:55 -0600
    message:
      implement a faster path for copying from packs back to knits.
      Also include more interrepo tests for knit<=>knit and pack<=>knit.
    modified:
      bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
      bzrlib/tests/interrepository_implementations/__init__.py __init__.py-20060220054744-baf49a1f88f17b1a
      bzrlib/tests/interrepository_implementations/test_interrepository.py test_interrepository.py-20060220061411-1ec13fa99e5e3eee
    ------------------------------------------------------------
    revno: 2998.2.1
    revision-id:john at arbash-meinel.com-20071116235317-uymqhilped1rloqy
    parent: pqm at pqm.ubuntu.com-20071115144759-zx0nd44rgp38riwr
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: push_packs_to_knits
    timestamp: Fri 2007-11-16 17:53:17 -0600
    message:
      Implement LRUCache.get() which acts like dict.get()
      so that we can return a default if the key isn't present.
    modified:
      bzrlib/lru_cache.py            lru_cache.py-20070119165515-tlw203kuwh0id5gv-1
      bzrlib/tests/test_lru_cache.py test_lru_cache.py-20070119165535-hph6rk4h9rzy4180-1
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-11-09 17:50:31 +0000
+++ b/bzrlib/knit.py	2007-11-26 19:35:11 +0000
@@ -74,6 +74,7 @@
 lazy_import(globals(), """
 from bzrlib import (
     annotate,
+    lru_cache,
     pack,
     trace,
     )
@@ -2224,6 +2225,43 @@
         except AttributeError:
             return False
 
+    def _copy_texts(self, pb, msg, version_ids, ignore_missing=False):
+        """Copy texts to the target by extracting and adding them one by one.
+
+        see join() for the parameter definitions.
+        """
+        version_ids = self._get_source_version_ids(version_ids, ignore_missing)
+        graph = self.source.get_graph(version_ids)
+        order = topo_sort(graph.items())
+
+        def size_of_content(content):
+            return sum(len(line) for line in content.text())
+        # Cache at most 10MB of parent texts
+        parent_cache = lru_cache.LRUSizeCache(max_size=10*1024*1024,
+                                              compute_size=size_of_content)
+        # TODO: jam 20071116 It would be nice to have a streaming interface to
+        #       get multiple texts from a source. The source could be smarter
+        #       about how it handled intermediate stages.
+        #       get_line_list() or make_mpdiffs() seem like a possibility, but
+        #       at the moment they extract all full texts into memory, which
+        #       causes us to store more than our 3x fulltext goal.
+        #       Repository.iter_files_bytes() may be another possibility
+        to_process = [version for version in order
+                               if version not in self.target]
+        total = len(to_process)
+        pb = ui.ui_factory.nested_progress_bar()
+        try:
+            for index, version in enumerate(to_process):
+                pb.update('Converting versioned data', index, total)
+                sha1, num_bytes, parent_text = self.target.add_lines(version,
+                    self.source.get_parents(version),
+                    self.source.get_lines(version),
+                    parent_texts=parent_cache)
+                parent_cache[version] = parent_text
+        finally:
+            pb.finished()
+        return total
+
     def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
         """See InterVersionedFile.join."""
         assert isinstance(self.source, KnitVersionedFile)
@@ -2236,11 +2274,9 @@
         elif self.source.factory.annotated:
             converter = self._anno_to_plain_converter
         else:
-            # We're converting from a plain to an annotated knit. This requires
-            # building the annotations from scratch. The generic join code
-            # handles this implicitly so we delegate to it.
-            return super(InterKnit, self).join(pb, msg, version_ids,
-                ignore_missing)
+            # We're converting from a plain to an annotated knit. Copy them
+            # across by full texts.
+            return self._copy_texts(pb, msg, version_ids, ignore_missing)
 
         version_ids = self._get_source_version_ids(version_ids, ignore_missing)
         if not version_ids:

=== modified file 'bzrlib/lru_cache.py'
--- a/bzrlib/lru_cache.py	2007-11-14 21:07:54 +0000
+++ b/bzrlib/lru_cache.py	2007-11-16 23:53:17 +0000
@@ -69,6 +69,11 @@
             # Trigger the cleanup
             self.cleanup()
 
+    def get(self, key, default=None):
+        if key in self._cache:
+            return self[key]
+        return default
+
     def cleanup(self):
         """Clear the cache until it shrinks to the requested size.
 

=== modified file 'bzrlib/tests/interrepository_implementations/__init__.py'
--- a/bzrlib/tests/interrepository_implementations/__init__.py	2007-10-30 17:39:11 +0000
+++ b/bzrlib/tests/interrepository_implementations/__init__.py	2007-11-17 00:14:55 +0000
@@ -26,9 +26,10 @@
 """
 
 from bzrlib.repository import (
+                               InterKnitRepo,
+                               InterKnit1and2,
+                               InterModel1and2,
                                InterRepository,
-                               InterModel1and2,
-                               InterKnit1and2,
                                )
 from bzrlib.tests import (
                           adapt_modules,
@@ -75,7 +76,7 @@
     @staticmethod
     def default_test_list():
         """Generate the default list of interrepo permutations to test."""
-        from bzrlib.repofmt import knitrepo, weaverepo
+        from bzrlib.repofmt import knitrepo, pack_repo, weaverepo
         result = []
         # test the default InterRepository between format 6 and the current 
         # default format.
@@ -100,6 +101,21 @@
         result.append((InterKnit1and2,
                        knitrepo.RepositoryFormatKnit1(),
                        knitrepo.RepositoryFormatKnit3()))
+        result.append((InterKnitRepo,
+                       knitrepo.RepositoryFormatKnit1(),
+                       knitrepo.RepositoryFormatKnit1()))
+        result.append((InterKnitRepo,
+                       knitrepo.RepositoryFormatKnit1(),
+                       pack_repo.RepositoryFormatKnitPack1()))
+        result.append((InterKnitRepo,
+                       pack_repo.RepositoryFormatKnitPack1(),
+                       knitrepo.RepositoryFormatKnit1()))
+        result.append((InterKnitRepo,
+                       knitrepo.RepositoryFormatKnit3(),
+                       pack_repo.RepositoryFormatKnitPack3()))
+        result.append((InterKnitRepo,
+                       pack_repo.RepositoryFormatKnitPack3(),
+                       knitrepo.RepositoryFormatKnit3()))
         return result
 
 

=== modified file 'bzrlib/tests/interrepository_implementations/test_interrepository.py'
--- a/bzrlib/tests/interrepository_implementations/test_interrepository.py	2007-11-21 23:36:32 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_interrepository.py	2007-11-26 20:18:50 +0000
@@ -382,6 +382,10 @@
         to_repo.fetch(from_tree.branch.repository, tree_rev)
         # to_repo should have a file_graph for from_tree.path2id('subtree') and
         # revid tree_rev.
-        file_vf = to_repo.weave_store.get_weave(
-            from_tree.path2id('subtree'), to_repo.get_transaction())
-        self.assertEqual([tree_rev], file_vf.get_ancestry([tree_rev]))
+        to_repo.lock_read()
+        try:
+            file_vf = to_repo.weave_store.get_weave(
+                from_tree.path2id('subtree'), to_repo.get_transaction())
+            self.assertEqual([tree_rev], file_vf.get_ancestry([tree_rev]))
+        finally:
+            to_repo.unlock()

=== modified file 'bzrlib/tests/test_lru_cache.py'
--- a/bzrlib/tests/test_lru_cache.py	2007-11-14 21:07:54 +0000
+++ b/bzrlib/tests/test_lru_cache.py	2007-11-16 23:53:17 +0000
@@ -203,6 +203,16 @@
         self.assertEqual([1, 4, 5, 3, 2], list(cache._queue))
         self.assertEqual({1:1, 2:1, 3:1, 4:1, 5:1}, cache._refcount)
 
+    def test_get(self):
+        cache = lru_cache.LRUCache(max_cache=5)
+
+        cache.add(1, 10)
+        cache.add(2, 20)
+        self.assertEqual(20, cache.get(2))
+        self.assertIs(None, cache.get(3))
+        obj = object()
+        self.assertIs(obj, cache.get(3, obj))
+
 
 class TestLRUSizeCache(tests.TestCase):
 




More information about the bazaar-commits mailing list