Rev 106: Iron out some bugs. in http://bzr.arbash-meinel.com/branches/bzr/history_db/trunk

John Arbash Meinel john at arbash-meinel.com
Wed Apr 21 23:23:47 BST 2010


At http://bzr.arbash-meinel.com/branches/bzr/history_db/trunk

------------------------------------------------------------
revno: 106
revision-id: john at arbash-meinel.com-20100421222335-1a127vu1g36jh1za
parent: john at arbash-meinel.com-20100421210738-121p9mv2tokku564
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: trunk
timestamp: Wed 2010-04-21 17:23:35 -0500
message:
  Iron out some bugs.
  
  It now auto-imports on demand for any of dotted=>rev rev=>dotted and iter_merge.
  It can be a bit slow, but not terrible. And it means it is always correct
  and the next log -n0 should be faster.
  It does slow down log -n1 -r-10..-1 slightly, 700ms=>750ms or so. I'm not
  sure if that really matters.
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py	2010-04-21 21:07:38 +0000
+++ b/__init__.py	2010-04-21 22:23:35 +0000
@@ -386,7 +386,7 @@
     query = getattr(a_branch, '_history_db_querier', _singleton)
     if query is not _singleton:
         if query is not None:
-            query._db_conn.close()
+            query.close()
         del a_branch._history_db_querier
     return _orig_clear_cached_state(a_branch)
 
@@ -400,9 +400,7 @@
     """
     t0 = time.clock()
     query = _get_querier(self)
-    if query is not None:
-        query.ensure_branch_tip()
-    if query is None or query._branch_tip_db_id is None:
+    if query is None:
         # TODO: Consider other cases where we may want to fall back, like
         #       special arguments, etc that we don't handle well yet.
         trace.mutter('history_db falling back to original'
@@ -456,9 +454,7 @@
         return revno
     t0 = time.clock()
     query = _get_querier(self)
-    if query is not None:
-        query.ensure_branch_tip()
-    if query is None or query._branch_tip_db_id is None:
+    if query is None:
         trace.mutter('history_db falling back to original'
                      'revision_id => dotted_revno')
         return _orig_do_rev_id_to_dotted(self, revision_id)
@@ -473,13 +469,14 @@
     if revision_id not in revision_id_map:
         trace.mutter('history_db failed to find a mapping for {%s},'
                      'falling back' % (revision_id,))
-        return _orig_do_rev_id_to_dotted(self, revno)
+        return _orig_do_rev_id_to_dotted(self, revision_id)
     return revision_id_map[revision_id]
 
 
 def _history_db_dotted_revno_to_revision_id(self, revno):
     """See Branch._do_dotted_revno_to_revision_id."""
     # revno should be a dotted revno, aka either 1-part or 3-part tuple
+    import pdb; pdb.set_trace()
     t0 = time.clock()
     query = _get_querier(self)
     if query is None:
@@ -503,6 +500,7 @@
 
 def _history_db_post_change_branch_tip_hook(params):
     """Run when the tip of a branch changes revision_id."""
+    import pdb; pdb.set_trace()
     t0 = time.clock()
     import pprint
     # TODO: This requires a round-trip to the remote server to find out whether

=== modified file 'history_db.py'
--- a/history_db.py	2010-04-21 21:07:38 +0000
+++ b/history_db.py	2010-04-21 22:23:35 +0000
@@ -367,44 +367,49 @@
         children = {}
         parent_map = {}
         known = {}
-        while needed:
-            rev_id = needed.pop()
-            if rev_id in known:
-                # We may add particular parents multiple times, just ignore
-                # them once they've been found
-                continue
-            res = self._cursor.execute("SELECT gdfo"
-                                       "  FROM revision WHERE revision_id = ?",
-                                       (rev_id,)).fetchone()
-            if res is not None:
-                known[rev_id] = res[0]
-                continue
-            # We don't have this entry recorded yet, add the parents to the
-            # search
-            pmap = self._branch.repository.get_parent_map([rev_id])
-            parent_map.update(pmap)
-            parent_ids = pmap.get(rev_id, None)
-            if parent_ids is None or parent_ids == NULL_PARENTS:
-                # We can insert this rev directly, because we know its gdfo,
-                # as it has no parents.
-                parent_map[rev_id] = ()
-                self._cursor.execute("INSERT INTO revision (revision_id, gdfo)"
-                                     " VALUES (?, ?)", (rev_id, 1))
-                # Wrap around to populate known quickly
-                needed.append(rev_id)
-                if parent_ids is None:
-                    # This is a ghost, add it to the table
-                    self._cursor.execute("INSERT INTO ghost (db_id)"
-                                         " SELECT db_id FROM revision"
-                                         "  WHERE revision_id = ?",
-                                         (rev_id,))
-                continue
-            for parent_id in pmap[rev_id]:
-                if parent_id not in known:
-                    if parent_id not in all_needed:
-                        needed.append(parent_id)
-                        all_needed.add(parent_id)
-                children.setdefault(parent_id, []).append(rev_id)
+        pb = ui.ui_factory.nested_progress_bar()
+        try:
+            while needed:
+                pb.update('Finding ancestry', len(all_needed), len(all_needed))
+                rev_id = needed.pop()
+                if rev_id in known:
+                    # We may add particular parents multiple times, just ignore
+                    # them once they've been found
+                    continue
+                res = self._cursor.execute(
+                    "SELECT gdfo FROM revision WHERE revision_id = ?",
+                    [rev_id]).fetchone()
+                if res is not None:
+                    known[rev_id] = res[0]
+                    continue
+                # We don't have this entry recorded yet, add the parents to the
+                # search
+                pmap = self._branch.repository.get_parent_map([rev_id])
+                parent_map.update(pmap)
+                parent_ids = pmap.get(rev_id, None)
+                if parent_ids is None or parent_ids == NULL_PARENTS:
+                    # We can insert this rev directly, because we know its
+                    # gdfo, as it has no parents.
+                    parent_map[rev_id] = ()
+                    self._cursor.execute("INSERT INTO revision (revision_id, gdfo)"
+                                         " VALUES (?, ?)", (rev_id, 1))
+                    # Wrap around to populate known quickly
+                    needed.append(rev_id)
+                    if parent_ids is None:
+                        # This is a ghost, add it to the table
+                        self._cursor.execute("INSERT INTO ghost (db_id)"
+                                             " SELECT db_id FROM revision"
+                                             "  WHERE revision_id = ?",
+                                             (rev_id,))
+                    continue
+                for parent_id in pmap[rev_id]:
+                    if parent_id not in known:
+                        if parent_id not in all_needed:
+                            needed.append(parent_id)
+                            all_needed.add(parent_id)
+                    children.setdefault(parent_id, []).append(rev_id)
+        finally:
+            pb.finished()
         return known, parent_map, children
 
     def _compute_gdfo_and_insert(self, known, children, parent_map):
@@ -1186,34 +1191,63 @@
         self._branch = a_branch
         self._branch_tip_rev_id = a_branch.last_revision()
         self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+        self._tip_is_imported = False
         self._stats = defaultdict(lambda: 0)
 
+    def _get_cursor(self):
+        if self._cursor is not None:
+            return self._cursor
+        db_conn = dbapi2.connect(self._db_path)
+        self._db_conn = db_conn
+        self._cursor = self._db_conn.cursor()
+        return self._cursor
+
     def ensure_branch_tip(self):
         """Ensure that the branch tip has been imported.
 
         This will run Importer if it has not.
         """
-        if self._branch_tip_db_id is not None:
-            # It has been imported
-            return
+        if self._branch_tip_db_id is not None and self._tip_is_imported:
+            return
+        if self._branch_tip_db_id is None:
+            # This revision has not been seen by the DB, so we know it isn't
+            # imported
+            self._import_tip()
+            return
+        if self._is_imported_db_id(self._branch_tip_db_id):
+            # This revision was seen, and imported
+            self._tip_is_imported = True
+            return
+        self._import_tip()
+
+    def _import_tip(self):
         if self._cursor is not None:
-            self._db_conn.close()
-            self._cursor = None
+            self.close()
+        t = time.time()
         importer = Importer(self._db_path, self._branch,
                             tip_revision_id=self._branch_tip_rev_id,
                             incremental=True)
         importer.do_import()
+        tdelta = time.time() - t
+        trace.note('imported %d nodes on-the-fly in %.3fs'
+                   % (importer._stats.get('total_nodes_inserted', 0), tdelta))
         self._db_conn = importer._db_conn
         self._cursor = importer._cursor
         self._branch_tip_db_id = self._get_db_id(self._branch_tip_rev_id)
-
-    def _get_cursor(self):
-        if self._cursor is not None:
-            return self._cursor
-        db_conn = dbapi2.connect(self._db_path)
-        self._db_conn = db_conn
-        self._cursor = self._db_conn.cursor()
-        return self._cursor
+        self._tip_is_imported = True
+
+    def _is_imported_db_id(self, tip_db_id):
+        res = self._get_cursor().execute(
+            "SELECT count(*) FROM dotted_revno"
+            " WHERE tip_revision = ?"
+            "   AND tip_revision = merged_revision",
+            (tip_db_id,)).fetchone()
+        return res[0] > 0
+
+    def close(self):
+        self._db_conn.close()
+        self._db_conn = None
+        self._cursor = None
 
     def _get_db_id(self, revision_id):
         try:
@@ -1356,8 +1390,9 @@
 
     def get_dotted_revno_range_multi(self, revision_ids):
         """Determine the dotted revno, using the range info, etc."""
+        self.ensure_branch_tip()
+        t = time.time()
         cursor = self._get_cursor()
-        t = time.time()
         tip_db_id = self._branch_tip_db_id
         if tip_db_id is None:
             raise TipNotImported(self._branch, self._branch_tip_rev_id)
@@ -1406,8 +1441,9 @@
 
     def get_revision_ids(self, revnos):
         """Map from a dotted-revno back into a revision_id."""
+        self.ensure_branch_tip()
         t = time.time()
-        tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+        tip_db_id = self._branch_tip_db_id
         # TODO: If tip_db_id is None, maybe we want to raise an exception here?
         #       To indicate that the branch has not been imported yet
         revno_strs = set(['.'.join(map(str, revno)) for revno in revnos])
@@ -1426,7 +1462,7 @@
                     "SELECT revision_id, revno"
                     "  FROM dotted_revno, revision"
                     " WHERE merged_revision = revision.db_id"
-                    "   tip_revision = ?"
+                    "   AND tip_revision = ?"
                     "   AND revno IN (%s)", len(revno_strs)),
                     [tip_db_id] + list(revno_strs)).fetchall()
                 next_db_id = self._get_lh_parent_db_id(tip_db_id)
@@ -1730,10 +1766,11 @@
         stop is *always* exclusive. You can simulate the rest by careful
         selection of stop.
         """
+        self.ensure_branch_tip()
         t = time.time()
-        tip_db_id = self._get_db_id(self._branch_tip_rev_id)
+        tip_db_id = self._branch_tip_db_id
         if tip_db_id is None:
-            raise ValueError('tip not imported')
+            raise TipNotImported(self._branch, self._branch_tip_rev_id)
         if start_revision_id is not None:
             start_db_id = self._get_db_id(start_revision_id)
         else:

=== modified file 'test_hooks.py'
--- a/test_hooks.py	2010-04-21 21:07:38 +0000
+++ b/test_hooks.py	2010-04-21 22:23:35 +0000
@@ -93,3 +93,10 @@
         b.get_config().set_user_option('history_db_path', history_db_path)
         self.assertEqual((2,),
                 history_db._history_db_revision_id_to_dotted_revno(b, 'B'))
+
+    def test_dotted_to_rev_not_imported(self):
+        history_db_path = self.get_history_db_path()
+        b, merge_sorted = self.make_simple_history_branch()
+        b.get_config().set_user_option('history_db_path', history_db_path)
+        self.assertEqual('B',
+                history_db._history_db_dotted_revno_to_revision_id(b, (2,)))



More information about the bazaar-commits mailing list