Rev 2988: (robertc) change VersionedFile.iter_lines_present_or_added_in to return the version_id of the text the line is present in. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Nov 14 00:01:29 GMT 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2988
revision-id: pqm at pqm.ubuntu.com-20071114000124-2rkxwrcwid2wgcqn
parent: pqm at pqm.ubuntu.com-20071113231602-qy0rskctbikrs59x
parent: robertc at robertcollins.net-20071113213937-zguxb7m0j6ofy2au
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2007-11-14 00:01:24 +0000
message:
(robertc) change VersionedFile.iter_lines_present_or_added_in to return the version_id of the text the line is present in. (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/tests/test_knit.py test_knit.py-20051212171302-95d4c00dd5f11f2b
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
bzrlib/weave.py knit.py-20050627021749-759c29984154256b
------------------------------------------------------------
revno: 2975.3.2
merged: robertc at robertcollins.net-20071113213937-zguxb7m0j6ofy2au
parent: robertc at robertcollins.net-20071109175031-agaiy6530rvbprmb
committer: Robert Collins <robertc at robertcollins.net>
branch nick: iter-lines
timestamp: Wed 2007-11-14 08:39:37 +1100
message:
Review feedback - document the API change and improve readability in pack's _do_copy_nodes.
------------------------------------------------------------
revno: 2975.3.1
merged: robertc at robertcollins.net-20071109175031-agaiy6530rvbprmb
parent: pqm at pqm.ubuntu.com-20071107140948-l3p8njdhgwstdkri
committer: Robert Collins <robertc at robertcollins.net>
branch nick: iter-lines
timestamp: Sat 2007-11-10 04:50:31 +1100
message:
Change (without backwards compatibility) the
iter_lines_added_or_present_in_versions VersionedFile API to yield the
text version that each line is being returned from. This is useful for
reconcile in determining what inventories reference what texts.
(Robert Collins)
=== modified file 'NEWS'
--- a/NEWS 2007-11-13 22:26:57 +0000
+++ b/NEWS 2007-11-14 00:01:24 +0000
@@ -89,6 +89,12 @@
* ``osutils.backup_file`` is deprecated. Actually it's not used in bzrlib
during very long time. (Alexander Belchenko)
+ * The return value of
+ ``VersionedFile.iter_lines_added_or_present_in_versions`` has been
+ changed. Previously it was an iterator of lines, now it is an iterator of
+ (line, version_id) tuples. This change has been made to aid reconcile and
+ fetch operations. (Robert Collins)
+
INTERNALS:
* Added ``ContainerSerialiser`` and ``ContainerPushParser`` to
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-10-26 08:56:09 +0000
+++ b/bzrlib/knit.py 2007-11-09 17:50:31 +0000
@@ -1117,8 +1117,11 @@
line_iterator = self.factory.get_fulltext_content(data)
else:
line_iterator = self.factory.get_linedelta_content(data)
+ # XXX: It might be more efficient to yield (version_id,
+ # line_iterator) in the future. However for now, this is a simpler
+ # change to integrate into the rest of the codebase. RBC 20071110
for line in line_iterator:
- yield line
+ yield line, version_id
pb.update('Walking content.', total, total)
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2007-11-07 13:10:37 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2007-11-13 21:39:37 +0000
@@ -708,7 +708,7 @@
"""Copy knit nodes between packs.
:param output_lines: Return lines present in the copied data as
- an iterator.
+ an iterator of line,version_id.
"""
pb = ui.ui_factory.nested_progress_bar()
try:
@@ -754,18 +754,19 @@
for (names, read_func), (_1, _2, (key, eol_flag, references)) in \
izip(reader.iter_records(), pack_readv_requests):
raw_data = read_func(None)
+ version_id = key[-1]
if output_lines:
# read the entire thing
- content, _ = knit_data._parse_record(key[-1], raw_data)
+ content, _ = knit_data._parse_record(version_id, raw_data)
if len(references[-1]) == 0:
line_iterator = factory.get_fulltext_content(content)
else:
line_iterator = factory.get_linedelta_content(content)
for line in line_iterator:
- yield line
+ yield line, version_id
else:
# check the header only
- df, _ = knit_data._parse_record_header(key[-1], raw_data)
+ df, _ = knit_data._parse_record_header(version_id, raw_data)
df.close()
pos, size = writer.add_bytes_record(raw_data, names)
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2007-10-31 22:33:53 +0000
+++ b/bzrlib/repository.py 2007-11-09 17:50:31 +0000
@@ -1061,7 +1061,7 @@
This performs the translation of xml lines to revision ids.
- :param line_iterator: An iterator of lines
+ :param line_iterator: An iterator of lines, origin_version_id
:param revision_ids: The revision ids to filter for. This should be a
set or other type which supports efficient __contains__ lookups, as
the revision id from each parsed line will be looked up in the
@@ -1091,7 +1091,7 @@
search = self._file_ids_altered_regex.search
unescape = _unescape_xml
setdefault = result.setdefault
- for line in line_iterator:
+ for line, version_id in line_iterator:
match = search(line)
if match is None:
continue
@@ -1115,6 +1115,8 @@
unescape_revid_cache[revision_id] = unescaped
revision_id = unescaped
+ # once data is all ensured-consistent; then this is
+ # if revision_id == version_id
if revision_id in revision_ids:
try:
file_id = unescape_fileid_cache[file_id]
=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py 2007-10-17 09:39:41 +0000
+++ b/bzrlib/tests/test_knit.py 2007-11-09 17:50:31 +0000
@@ -1406,7 +1406,7 @@
self.assertEqual(
[('readv', 'id.knit', [(0, 87), (87, 89)], False, None)],
instrumented_t._activity)
- self.assertEqual(['text\n', 'text2\n'], results)
+ self.assertEqual([('text\n', 'base'), ('text2\n', 'base2')], results)
def test_create_empty_annotated(self):
k1 = self.make_test_knit(True)
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2007-10-17 09:39:41 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2007-11-09 17:50:31 +0000
@@ -560,18 +560,14 @@
['base\n', 'lancestor\n', 'otherchild\n'])
def iter_with_versions(versions, expected):
# now we need to see what lines are returned, and how often.
- lines = {'base\n':0,
- 'lancestor\n':0,
- 'rancestor\n':0,
- 'child\n':0,
- 'otherchild\n':0,
- }
+ lines = {}
progress = InstrumentedProgress()
# iterate over the lines
- for line in vf.iter_lines_added_or_present_in_versions(versions,
+ for line in vf.iter_lines_added_or_present_in_versions(versions,
pb=progress):
+ lines.setdefault(line, 0)
lines[line] += 1
- if []!= progress.updates:
+ if []!= progress.updates:
self.assertEqual(expected, progress.updates)
return lines
lines = iter_with_versions(['child', 'otherchild'],
@@ -579,8 +575,8 @@
('Walking content.', 1, 2),
('Walking content.', 2, 2)])
# we must see child and otherchild
- self.assertTrue(lines['child\n'] > 0)
- self.assertTrue(lines['otherchild\n'] > 0)
+ self.assertTrue(lines[('child\n', 'child')] > 0)
+ self.assertTrue(lines[('otherchild\n', 'otherchild')] > 0)
# we dont care if we got more than that.
# test all lines
@@ -591,11 +587,11 @@
('Walking content.', 4, 5),
('Walking content.', 5, 5)])
# all lines must be seen at least once
- self.assertTrue(lines['base\n'] > 0)
- self.assertTrue(lines['lancestor\n'] > 0)
- self.assertTrue(lines['rancestor\n'] > 0)
- self.assertTrue(lines['child\n'] > 0)
- self.assertTrue(lines['otherchild\n'] > 0)
+ self.assertTrue(lines[('base\n', 'base')] > 0)
+ self.assertTrue(lines[('lancestor\n', 'lancestor')] > 0)
+ self.assertTrue(lines[('rancestor\n', 'rancestor')] > 0)
+ self.assertTrue(lines[('child\n', 'child')] > 0)
+ self.assertTrue(lines[('otherchild\n', 'otherchild')] > 0)
def test_add_lines_with_ghosts(self):
# some versioned file formats allow lines to be added with parent
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2007-10-17 09:39:41 +0000
+++ b/bzrlib/versionedfile.py 2007-11-13 21:39:37 +0000
@@ -420,15 +420,16 @@
version_ids,
ignore_missing)
- def iter_lines_added_or_present_in_versions(self, version_ids=None,
+ def iter_lines_added_or_present_in_versions(self, version_ids=None,
pb=None):
"""Iterate over the lines in the versioned file from version_ids.
- This may return lines from other versions, and does not return the
- specific version marker at this point. The api may be changed
- during development to include the version that the versioned file
- thinks is relevant, but given that such hints are just guesses,
- its better not to have it if we don't need it.
+ This may return lines from other versions. Each item the returned
+ iterator yields is a tuple of a line and a text version that that line
+ is present in (not introduced in).
+
+ Ordering of results is in whatever order is most suitable for the
+ underlying storage format.
If a progress bar is supplied, it may be used to indicate progress.
The caller is responsible for cleaning up progress bars (because this
@@ -436,6 +437,8 @@
NOTES: Lines are normalised: they will all have \n terminators.
Lines are returned in arbitrary order.
+
+ :return: An iterator over (line, version_id).
"""
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2007-10-05 05:52:45 +0000
+++ b/bzrlib/weave.py 2007-11-09 17:50:31 +0000
@@ -463,9 +463,9 @@
# properly, we do not filter down to that
# if inserted not in version_ids: continue
if line[-1] != '\n':
- yield line + '\n'
+ yield line + '\n', inserted
else:
- yield line
+ yield line, inserted
def _walk_internal(self, version_ids=None):
"""Helper method for weave actions."""
More information about the bazaar-commits
mailing list