Rev 802: Remember where revids have already been found. in file:///data/jelmer/bzr-svn/0.4/

Jelmer Vernooij jelmer at samba.org
Thu Dec 6 15:54:08 GMT 2007


At file:///data/jelmer/bzr-svn/0.4/

------------------------------------------------------------
revno: 802
revision-id:jelmer at samba.org-20071206155408-23oe0l7zjw17tlqe
parent: jelmer at samba.org-20071201172644-hm2xjcjda0df69lc
committer: Jelmer Vernooij <jelmer at samba.org>
branch nick: 0.4
timestamp: Thu 2007-12-06 16:54:08 +0100
message:
  Remember where revids have already been found.
modified:
  NEWS                           news-20061231030336-h9fhq245ie0de8bs-1
  __init__.py                    __init__.py-20051008155114-eae558e6cf149e1d
  repository.py                  repository.py-20060306123302-1f8c5069b3fe0265
  revids.py                      revids.py-20070416220458-36vfa0730cchevp1-1
  tests/test_revids.py           test_revids.py-20070516230044-d7x872cqi7xb4eow-1
=== modified file 'NEWS'
--- a/NEWS	2007-12-01 17:09:12 +0000
+++ b/NEWS	2007-12-06 15:54:08 +0000
@@ -1,5 +1,11 @@
 bzr-svn 0.4.6	UNRELEASED
 
+  PERFORMANCE
+  
+   * Remember where revids have already been detected. Makes incremental
+     push a lot faster, in particular in Subversion repositories 
+	 with a lot of branches.
+
 bzr-svn 0.4.5	2007-12-01
 
   IMPROVEMENTS

=== modified file '__init__.py'
--- a/__init__.py	2007-12-01 17:26:44 +0000
+++ b/__init__.py	2007-12-06 15:54:08 +0000
@@ -136,6 +136,7 @@
     InterRepository.register_optimiser(fetch.InterFromSvnRepository)
     InterRepository.register_optimiser(commit.InterToSvnRepository)
 
+
 def get_scheme(schemename):
     """Parse scheme identifier and return a branching scheme."""
     from scheme import BranchingScheme

=== modified file 'repository.py'
--- a/repository.py	2007-11-20 19:33:02 +0000
+++ b/repository.py	2007-12-06 15:54:08 +0000
@@ -288,7 +288,6 @@
         self.dir_cache = {}
         self.pool = Pool()
         self.get_config().add_location(self.base)
-        self._revids_seen = {}
         cache_dir = self.create_cache_dir()
         cachedir_transport = get_transport(cache_dir)
         cache_file = os.path.join(cache_dir, 'cache-v%d' % MAPPING_VERSION)
@@ -761,13 +760,13 @@
             if scheme is None:
                 scheme = self.get_scheme()
             last_revnum = self.transport.get_latest_revnum()
-            if (self._revids_seen.has_key(str(scheme)) and 
-                last_revnum <= self._revids_seen[str(scheme)]):
+            if (last_revnum <= self.revmap.last_revnum_checked(str(scheme))):
                 # All revision ids in this repository for the current 
                 # scheme have already been discovered. No need to 
                 # check again.
                 raise e
             found = False
+            # TODO: Start at self.revmap.last_revnum_checked(str(scheme))
             for (branch, revno, _) in self.find_branches(scheme, last_revnum):
                 # Look at their bzr:revision-id-vX
                 revids = []
@@ -788,14 +787,11 @@
                         found = True
                     self.revmap.insert_revid(entry_revid, branch, 0, revno, 
                             str(scheme), entry_revno)
-
-                if found:
-                    break
                 
+            # We've added all the revision ids for this scheme in the repository,
+            # so no need to check again unless new revisions got added
+            self.revmap.set_last_revnum_checked(str(scheme), last_revnum)
             if not found:
-                # We've added all the revision ids for this scheme in the repository,
-                # so no need to check again unless new revisions got added
-                self._revids_seen[str(scheme)] = last_revnum
                 raise e
             (branch_path, min_revnum, max_revnum, scheme) = self.revmap.lookup_revid(revid)
             assert isinstance(branch_path, str)

=== modified file 'revids.py'
--- a/revids.py	2007-09-07 11:49:21 +0000
+++ b/revids.py	2007-12-06 15:54:08 +0000
@@ -92,8 +92,20 @@
         create index if not exists lookup_branch_revnum on revmap (max_revnum, min_revnum, path, scheme);
         create table if not exists revno_cache (revid text unique, dist_to_origin integer);
         create index if not exists revid on revno_cache (revid);
+        create table if not exists revids_seen (scheme text, max_revnum int);
+        create unique index if not exists scheme on revids_seen (scheme);
         """)
         self.cachedb.commit()
+
+    def set_last_revnum_checked(self, scheme, revnum):
+        self.cachedb.execute("replace into revids_seen (scheme, max_revnum) VALUES (?, ?)", (scheme, revnum))
+
+    def last_revnum_checked(self, scheme):
+        ret = self.cachedb.execute(
+            "select max_revnum from revids_seen where scheme = ?", (scheme,)).fetchone()
+        if ret is None:
+            return 0
+        return int(ret[0])
     
     def lookup_revid(self, revid):
         ret = self.cachedb.execute(

=== modified file 'tests/test_revids.py'
--- a/tests/test_revids.py	2007-07-21 17:07:01 +0000
+++ b/tests/test_revids.py	2007-12-06 15:54:08 +0000
@@ -27,6 +27,12 @@
     def test_create(self):
         revidmap = RevidMap()
 
+    def test_lookup_revids_seen(self):
+        revidmap = RevidMap()
+        self.assertEquals(0, revidmap.last_revnum_checked("trunk"))
+        revidmap.set_last_revnum_checked("trunk", 45)
+        self.assertEquals(45, revidmap.last_revnum_checked("trunk"))
+
     def test_lookup_revid_nonexistant(self):
         revidmap = RevidMap()
         self.assertRaises(NoSuchRevision, lambda: revidmap.lookup_revid("bla"))




More information about the bazaar-commits mailing list