[MERGE] Speed up "log <file>" with packs-0.92 repository
Robert Collins
robertc at robertcollins.net
Sun Dec 2 19:57:57 GMT 2007
bb:resubmit
I want to understand some of the changes..
On Sun, 2007-12-02 at 18:56 +0100, Lukáš Lalinský wrote:
> === modified file 'bzrlib/knit.py'
> --- bzrlib/knit.py 2007-11-28 05:35:06 +0000
> +++ bzrlib/knit.py 2007-12-02 17:46:13 +0000
> @@ -1549,7 +1549,7 @@
> raise KnitCorrupt(self, "Cannot do delta compression
> without "
> "parent tracking.")
>
> - def _get_entries(self, keys, check_present=False):
> + def _get_entries_and_check(self, keys):
> """Get the entries for keys.
>
> :param keys: An iterable of index keys, - 1-tuples.
> @@ -1565,14 +1565,27 @@
> for node in self._graph_index.iter_entries(keys):
> yield node[0], node[1], node[2], ()
> found_keys.add(node[1])
> - if check_present:
> - missing_keys = keys.difference(found_keys)
> - if missing_keys:
> - raise RevisionNotPresent(missing_keys.pop(), self)
> + missing_keys = keys.difference(found_keys)
> + if missing_keys:
> + raise RevisionNotPresent(missing_keys.pop(), self)
> +
> + def _get_entries(self, keys):
> + """Get the entries for keys.
> +
> + :param keys: An iterable of index keys, - 1-tuples.
> + """
> + keys = set(keys)
> + if self._parents:
> + for node in self._graph_index.iter_entries(keys):
> + yield node
> + else:
> + # adapt parentless index to the rest of the code.
> + for node in self._graph_index.iter_entries(keys):
> + yield node[0], node[1], node[2], ()
This split of _get_entries into two functions seems to add duplicate
code; whats the reason for it ? (If its the cost of maintaining
found_keys, then consider structuring it as two nested generators, or a
helper 'check_present' function.
e.g. nodes = self._check_present(self._get_entries(version_ids),
version_ids)
> def _parentless_ancestry(self, versions):
> """Honour the get_ancestry API for parentless knit
> indices."""
> @@ -1675,7 +1688,7 @@
> the underlying implementation.
> """
> if self._parents:
> - all_nodes =
> set(self._get_entries(self._version_ids_to_keys(version_ids)))
> + all_nodes =
> list(self._get_entries(self._version_ids_to_keys(version_ids)))
> all_parents = set()
> present_parents = set()
> for node in all_nodes:
^^
I'm guessing you observe a noticable speed difference here?
> def get_view_revisions(mainline_revs, rev_nos, branch, direction,
> - include_merges=True):
> + rev_graph, include_merges=True):
> """Produce an iterator of revisions to show
> :return: an iterator of (revision_id, revno, merge_depth)
> (if there is no revno for a revision, None is supplied)
This function is public; you should:
- add rev_graph as an optional parameter (rev_graph=None) after
include_merges
- issue a deprecation warning if it is not supplied, and generate it on
demand that way.
- record this change in NEWS
Other than that this looks good.
-Rob
--
GPG key available at: <http://www.robertcollins.net/keys.txt>.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: This is a digitally signed message part
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20071203/239293e2/attachment-0001.pgp
More information about the bazaar
mailing list