Rev 107: Remove the dead code. Numbers from 'emacs': in http://bzr.arbash-meinel.com/branches/bzr/history_db/tip_numbering

Fri Apr 16 21:58:20 BST 2010

At http://bzr.arbash-meinel.com/branches/bzr/history_db/tip_numbering

------------------------------------------------------------
revno: 107
revision-id: john at arbash-meinel.com-20100416205756-1jlpjufjle2jt3hz
parent: john at arbash-meinel.com-20100416204005-wqi8zd5joa5nmqnm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: tip_numbering
timestamp: Fri 2010-04-16 15:57:56 -0500
message:
  Remove the dead code. Numbers from 'emacs':
  
  Stats:
  {'_insert_node_calls': 107836,
   'gdfo hit': 23,
   'gdfo miss': 77113,
   'is interesting': 77044,
   'nodes_expanded': 48,
   'not interesting imported': 9,
   'not interesting is mainline': 35,
   'not interesting known imported': 3111,
   'pushed': 184880,
   'ranges_inserted': 999,
   'revs_in_ranges': 99885,
   'step mainline': 218216,
   'step mainline added': 242003,
   'step mainline cache missed': 218032,
   'step mainline cached': 184,
   'step mainline unknown': 218216,
   'total_nodes_inserted': 184880}
-------------- next part --------------
=== modified file 'history_db.py'

--- a/history_db.py	2010-04-16 20:40:05 +0000
+++ b/history_db.py	2010-04-16 20:57:56 +0000
@@ -639,29 +639,6 @@
             n.revno = (revno,)
             n._base_revno = revno
 
-    def DONT_get_initial_search_tips(self):
-        """Grab the right-hand parents of all the interesting mainline.
-
-        We know we already searched all of the left-hand parents, so just grab
-        the right-hand parents.
-        """
-        # TODO: Split this into a loop, since sqlite has a maximum number of
-        #       parameters.
-        res = _get_result_for_many(self._cursor,
-            "SELECT parent, gdfo FROM parent, revision"
-            " WHERE parent.parent = revision.db_id"
-            "   AND parent_idx != 0"
-            "   AND child IN (%s)",
-            self._mainline_db_ids)
-        self._search_tips = set([r[0] for r in res])
-        self._stats['num_search_tips'] += len(self._search_tips)
-        self._known_gdfo.update(res)
-        # We know that we will eventually need at least 1 step of the mainline
-        # (it gives us the basis for numbering everything). We do it now,
-        # because it increases the 'cheap' filtering we can do right away.
-        self._stats['step mainline initial'] += 1
-        self._step_mainline()
-
     def _imported_tip_revno(self, tip_db_id):
         res = self._cursor.execute(
             "SELECT revno FROM dotted_revno"
@@ -672,112 +649,6 @@
             return None
         return int(res[0])
 
-    def DONT_split_search_tips_by_gdfo(self, unknown):
-        """For these search tips, mark ones 'interesting' based on gdfo.
-        
-        All search tips are ancestors of _mainline_db_ids. So if their gdfo
-        indicates that they could not be merged into already imported
-        revisions, then we know automatically that they are
-        new-and-interesting. Further, if they are present in
-        _imported_dotted_revno, then we know they are not interesting, and
-        we will stop searching them.
-
-        Otherwise, we don't know for sure which category they fall into, so
-        we return them for further processing.
-
-        :return: still_unknown - search tips that aren't known to be
-            interesting, and aren't known to be in the ancestry of already
-            imported revisions.
-        """
-        still_unknown = []
-        min_gdfo = None
-        for db_id in unknown:
-            if (db_id in self._imported_dotted_revno
-                or db_id == self._imported_mainline_id):
-                # This should be removed as a search tip, we know it isn't
-                # interesting, it is an ancestor of an imported revision
-                self._stats['split already imported'] += 1
-                self._search_tips.remove(db_id)
-                continue
-            gdfo = self._known_gdfo[db_id]
-            if gdfo >= self._imported_gdfo:
-                self._stats['split gdfo'] += 1
-                self._interesting_ancestor_ids.add(db_id)
-            else:
-                still_unknown.append(db_id)
-        return still_unknown
-
-    def DONT_split_interesting_using_children(self, unknown_search_tips):
-        """Find children of these search tips.
-
-        For each search tip, we find all of its known children. We then filter
-        the children by:
-            a) child is ignored if child in _interesting_ancestor_ids
-            b) child is ignored if gdfo(child) > _imported_gdfo
-                or (gdfo(child) == _imported_gdfo and child !=
-                _imported_mainline_id)
-               The reason for the extra check is because for the ancestry
-               left-to-be-searched, with tip at _imported_mainline_id, *only*
-               _imported_mainline_id is allowed to have gdfo = _imported_gdfo.
-        for each search tip, if there are no interesting children, then this
-        search tip is definitely interesting (there is no path for it to be
-        merged into a previous mainline entry.)
-
-        :return: still_unknown
-            still_unknown are the search tips that are still have children that
-            could be possibly merged.
-        """
-        interesting = self._interesting_ancestor_ids
-        parent_child_res = self._cursor.execute(_add_n_params(
-            "SELECT parent, child FROM parent"
-            " WHERE parent IN (%s)",
-            len(unknown_search_tips)), unknown_search_tips).fetchall()
-        parent_to_children = {}
-        already_imported = set()
-        for parent, child in parent_child_res:
-            if (child in self._imported_dotted_revno
-                or child == self._imported_mainline_id):
-                # This child is already imported, so obviously the parent is,
-                # too.
-                self._stats['split child imported'] += 1
-                already_imported.add(parent)
-                already_imported.add(child)
-            parent_to_children.setdefault(parent, []).append(child)
-        self._search_tips.difference_update(already_imported)
-        possibly_merged_children = set(
-            [c for p, c in parent_child_res
-                if c not in interesting and p not in already_imported])
-        known_gdfo = self._known_gdfo
-        unknown_gdfos = [c for c in possibly_merged_children
-                            if c not in known_gdfo]
-        # TODO: Is it more wasteful to join this table early, or is it better
-        #       because we avoid having to pass N parameters back in?
-        # TODO: Would sorting the db ids help? They are the primary key for the
-        #       table, so we could potentially fetch in a better order.
-        if unknown_gdfos:
-            res = self._cursor.execute(_add_n_params(
-                "SELECT db_id, gdfo FROM revision WHERE db_id IN (%s)",
-                len(unknown_gdfos)), unknown_gdfos)
-            known_gdfo.update(res)
-        min_gdfo = self._imported_gdfo
-        # Remove all of the children who have gdfo >= min. We already handled
-        # the == min case in the first loop.
-        possibly_merged_children.difference_update(
-            [c for c in possibly_merged_children if known_gdfo[c] >= min_gdfo])
-        still_unknown = []
-        for parent in unknown_search_tips:
-            if parent in already_imported:
-                self._stats['split parent imported'] += 1
-                continue
-            for c in parent_to_children[parent]:
-                if c in possibly_merged_children:
-                    still_unknown.append(parent)
-                    break
-            else: # All children could not be possibly merged
-                self._stats['split children interesting'] += 1
-                interesting.add(parent)
-        return still_unknown
-
     def _step_mainline(self):
         """Move the mainline pointer by one, updating the data."""
         # TODO: We could probably use a gdfo hint to determine if we want to
@@ -848,70 +719,6 @@
         self._stats['num_search_tips'] += len(self._search_tips)
         self._known_gdfo.update(res)
 
-    def DONT_ensure_lh_parent_info(self):
-        """LH parents of interesting_ancestor_ids is either present or pending.
-
-        Either the data should be in _imported_dotted_revno, or the lh parent
-        should be in interesting_ancestor_ids (meaning we will number it).
-        """
-        # TODO: I think we can remove ghosts from _interesting_ancestor_ids
-        #       here, and then the regular _push_node will handle ghosts trying
-        #       to be added, and we won't have to separately probe for that.
-        missing_parent_ids = set()
-        for db_id in self._interesting_ancestor_ids:
-            parent_ids = self._get_parents(db_id)
-            if not parent_ids: # no parents, nothing to add
-                continue
-            lh_parent = parent_ids[0]
-            if lh_parent in self._interesting_ancestor_ids:
-                continue
-            if lh_parent in self._imported_dotted_revno:
-                continue
-            missing_parent_ids.add(lh_parent)
-        missing_parent_ids.difference_update(self._ghosts)
-        while missing_parent_ids:
-            self._stats['step mainline ensure LH'] += 1
-            self._step_mainline()
-            # We expect missing_parent_ids to be small, but
-            # self._imported_dotted_revno to be large, so we use .difference
-            # rather than .difference_update
-            missing_parent_ids = missing_parent_ids.difference(
-                                    self._imported_dotted_revno)
-
-    def DONT_find_interesting_ancestry(self):
-        self._find_needed_mainline()
-        self._get_initial_search_tips()
-        while self._search_tips:
-            # We don't know whether these search tips are known interesting, or
-            # known uninteresting
-            unknown = list(self._search_tips)
-            while unknown:
-                unknown = self._split_search_tips_by_gdfo(unknown)
-                if not unknown:
-                    break
-                unknown = self._split_interesting_using_children(unknown)
-                if not unknown:
-                    break
-                # The current search tips are the 'newest' possible tips right
-                # now. If we can't classify them as definitely being
-                # interesting, then we need to step the mainline until we can.
-                # This means that the current search tips have children that
-                # could be merged into an earlier mainline, walk the mainline
-                # to see if we can resolve that.
-                # Note that relying strictly on gdfo is a bit of a waste here,
-                # because you may have a rev with 10 children before it lands
-                # in mainline, but all 11 revs will be in the dotted_revno
-                # cache for that mainline.
-                self._stats['step mainline unknown'] += 1
-                self._step_mainline()
-            # All search_tips are known to either be interesting or
-            # uninteresting. Walk any search tips that remain.
-            self._step_search_tips()
-        # We're now sure we have all of the now-interesting revisions. To
-        # number them, we need their left-hand parents to be in
-        # _imported_dotted_revno
-        self._ensure_lh_parent_info()
-
     def _update_info_from_dotted_revno(self, dotted_info):
         """Update info like 'child_seen' from the dotted_revno info."""
         self._imported_dotted_revno.update(dotted_info)