Rev 107: Remove the dead code. Numbers from 'emacs': in http://bzr.arbash-meinel.com/branches/bzr/history_db/tip_numbering
John Arbash Meinel
john at arbash-meinel.com
Fri Apr 16 21:58:20 BST 2010
At http://bzr.arbash-meinel.com/branches/bzr/history_db/tip_numbering
------------------------------------------------------------
revno: 107
revision-id: john at arbash-meinel.com-20100416205756-1jlpjufjle2jt3hz
parent: john at arbash-meinel.com-20100416204005-wqi8zd5joa5nmqnm
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: tip_numbering
timestamp: Fri 2010-04-16 15:57:56 -0500
message:
Remove the dead code. Numbers from 'emacs':
Stats:
{'_insert_node_calls': 107836,
'gdfo hit': 23,
'gdfo miss': 77113,
'is interesting': 77044,
'nodes_expanded': 48,
'not interesting imported': 9,
'not interesting is mainline': 35,
'not interesting known imported': 3111,
'pushed': 184880,
'ranges_inserted': 999,
'revs_in_ranges': 99885,
'step mainline': 218216,
'step mainline added': 242003,
'step mainline cache missed': 218032,
'step mainline cached': 184,
'step mainline unknown': 218216,
'total_nodes_inserted': 184880}
-------------- next part --------------
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-16 20:40:05 +0000
+++ b/history_db.py 2010-04-16 20:57:56 +0000
@@ -639,29 +639,6 @@
n.revno = (revno,)
n._base_revno = revno
- def DONT_get_initial_search_tips(self):
- """Grab the right-hand parents of all the interesting mainline.
-
- We know we already searched all of the left-hand parents, so just grab
- the right-hand parents.
- """
- # TODO: Split this into a loop, since sqlite has a maximum number of
- # parameters.
- res = _get_result_for_many(self._cursor,
- "SELECT parent, gdfo FROM parent, revision"
- " WHERE parent.parent = revision.db_id"
- " AND parent_idx != 0"
- " AND child IN (%s)",
- self._mainline_db_ids)
- self._search_tips = set([r[0] for r in res])
- self._stats['num_search_tips'] += len(self._search_tips)
- self._known_gdfo.update(res)
- # We know that we will eventually need at least 1 step of the mainline
- # (it gives us the basis for numbering everything). We do it now,
- # because it increases the 'cheap' filtering we can do right away.
- self._stats['step mainline initial'] += 1
- self._step_mainline()
-
def _imported_tip_revno(self, tip_db_id):
res = self._cursor.execute(
"SELECT revno FROM dotted_revno"
@@ -672,112 +649,6 @@
return None
return int(res[0])
- def DONT_split_search_tips_by_gdfo(self, unknown):
- """For these search tips, mark ones 'interesting' based on gdfo.
-
- All search tips are ancestors of _mainline_db_ids. So if their gdfo
- indicates that they could not be merged into already imported
- revisions, then we know automatically that they are
- new-and-interesting. Further, if they are present in
- _imported_dotted_revno, then we know they are not interesting, and
- we will stop searching them.
-
- Otherwise, we don't know for sure which category they fall into, so
- we return them for further processing.
-
- :return: still_unknown - search tips that aren't known to be
- interesting, and aren't known to be in the ancestry of already
- imported revisions.
- """
- still_unknown = []
- min_gdfo = None
- for db_id in unknown:
- if (db_id in self._imported_dotted_revno
- or db_id == self._imported_mainline_id):
- # This should be removed as a search tip, we know it isn't
- # interesting, it is an ancestor of an imported revision
- self._stats['split already imported'] += 1
- self._search_tips.remove(db_id)
- continue
- gdfo = self._known_gdfo[db_id]
- if gdfo >= self._imported_gdfo:
- self._stats['split gdfo'] += 1
- self._interesting_ancestor_ids.add(db_id)
- else:
- still_unknown.append(db_id)
- return still_unknown
-
- def DONT_split_interesting_using_children(self, unknown_search_tips):
- """Find children of these search tips.
-
- For each search tip, we find all of its known children. We then filter
- the children by:
- a) child is ignored if child in _interesting_ancestor_ids
- b) child is ignored if gdfo(child) > _imported_gdfo
- or (gdfo(child) == _imported_gdfo and child !=
- _imported_mainline_id)
- The reason for the extra check is because for the ancestry
- left-to-be-searched, with tip at _imported_mainline_id, *only*
- _imported_mainline_id is allowed to have gdfo = _imported_gdfo.
- for each search tip, if there are no interesting children, then this
- search tip is definitely interesting (there is no path for it to be
- merged into a previous mainline entry.)
-
- :return: still_unknown
- still_unknown are the search tips that are still have children that
- could be possibly merged.
- """
- interesting = self._interesting_ancestor_ids
- parent_child_res = self._cursor.execute(_add_n_params(
- "SELECT parent, child FROM parent"
- " WHERE parent IN (%s)",
- len(unknown_search_tips)), unknown_search_tips).fetchall()
- parent_to_children = {}
- already_imported = set()
- for parent, child in parent_child_res:
- if (child in self._imported_dotted_revno
- or child == self._imported_mainline_id):
- # This child is already imported, so obviously the parent is,
- # too.
- self._stats['split child imported'] += 1
- already_imported.add(parent)
- already_imported.add(child)
- parent_to_children.setdefault(parent, []).append(child)
- self._search_tips.difference_update(already_imported)
- possibly_merged_children = set(
- [c for p, c in parent_child_res
- if c not in interesting and p not in already_imported])
- known_gdfo = self._known_gdfo
- unknown_gdfos = [c for c in possibly_merged_children
- if c not in known_gdfo]
- # TODO: Is it more wasteful to join this table early, or is it better
- # because we avoid having to pass N parameters back in?
- # TODO: Would sorting the db ids help? They are the primary key for the
- # table, so we could potentially fetch in a better order.
- if unknown_gdfos:
- res = self._cursor.execute(_add_n_params(
- "SELECT db_id, gdfo FROM revision WHERE db_id IN (%s)",
- len(unknown_gdfos)), unknown_gdfos)
- known_gdfo.update(res)
- min_gdfo = self._imported_gdfo
- # Remove all of the children who have gdfo >= min. We already handled
- # the == min case in the first loop.
- possibly_merged_children.difference_update(
- [c for c in possibly_merged_children if known_gdfo[c] >= min_gdfo])
- still_unknown = []
- for parent in unknown_search_tips:
- if parent in already_imported:
- self._stats['split parent imported'] += 1
- continue
- for c in parent_to_children[parent]:
- if c in possibly_merged_children:
- still_unknown.append(parent)
- break
- else: # All children could not be possibly merged
- self._stats['split children interesting'] += 1
- interesting.add(parent)
- return still_unknown
-
def _step_mainline(self):
"""Move the mainline pointer by one, updating the data."""
# TODO: We could probably use a gdfo hint to determine if we want to
@@ -848,70 +719,6 @@
self._stats['num_search_tips'] += len(self._search_tips)
self._known_gdfo.update(res)
- def DONT_ensure_lh_parent_info(self):
- """LH parents of interesting_ancestor_ids is either present or pending.
-
- Either the data should be in _imported_dotted_revno, or the lh parent
- should be in interesting_ancestor_ids (meaning we will number it).
- """
- # TODO: I think we can remove ghosts from _interesting_ancestor_ids
- # here, and then the regular _push_node will handle ghosts trying
- # to be added, and we won't have to separately probe for that.
- missing_parent_ids = set()
- for db_id in self._interesting_ancestor_ids:
- parent_ids = self._get_parents(db_id)
- if not parent_ids: # no parents, nothing to add
- continue
- lh_parent = parent_ids[0]
- if lh_parent in self._interesting_ancestor_ids:
- continue
- if lh_parent in self._imported_dotted_revno:
- continue
- missing_parent_ids.add(lh_parent)
- missing_parent_ids.difference_update(self._ghosts)
- while missing_parent_ids:
- self._stats['step mainline ensure LH'] += 1
- self._step_mainline()
- # We expect missing_parent_ids to be small, but
- # self._imported_dotted_revno to be large, so we use .difference
- # rather than .difference_update
- missing_parent_ids = missing_parent_ids.difference(
- self._imported_dotted_revno)
-
- def DONT_find_interesting_ancestry(self):
- self._find_needed_mainline()
- self._get_initial_search_tips()
- while self._search_tips:
- # We don't know whether these search tips are known interesting, or
- # known uninteresting
- unknown = list(self._search_tips)
- while unknown:
- unknown = self._split_search_tips_by_gdfo(unknown)
- if not unknown:
- break
- unknown = self._split_interesting_using_children(unknown)
- if not unknown:
- break
- # The current search tips are the 'newest' possible tips right
- # now. If we can't classify them as definitely being
- # interesting, then we need to step the mainline until we can.
- # This means that the current search tips have children that
- # could be merged into an earlier mainline, walk the mainline
- # to see if we can resolve that.
- # Note that relying strictly on gdfo is a bit of a waste here,
- # because you may have a rev with 10 children before it lands
- # in mainline, but all 11 revs will be in the dotted_revno
- # cache for that mainline.
- self._stats['step mainline unknown'] += 1
- self._step_mainline()
- # All search_tips are known to either be interesting or
- # uninteresting. Walk any search tips that remain.
- self._step_search_tips()
- # We're now sure we have all of the now-interesting revisions. To
- # number them, we need their left-hand parents to be in
- # _imported_dotted_revno
- self._ensure_lh_parent_info()
-
def _update_info_from_dotted_revno(self, dotted_info):
"""Update info like 'child_seen' from the dotted_revno info."""
self._imported_dotted_revno.update(dotted_info)
More information about the bazaar-commits
mailing list