Rev 2988: (robertc) change VersionedFile.iter_lines_present_or_added_in to return the version_id of the text the line is present in. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed Nov 14 00:01:29 GMT 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2988
revision-id: pqm at pqm.ubuntu.com-20071114000124-2rkxwrcwid2wgcqn
parent: pqm at pqm.ubuntu.com-20071113231602-qy0rskctbikrs59x
parent: robertc at robertcollins.net-20071113213937-zguxb7m0j6ofy2au
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2007-11-14 00:01:24 +0000
message:
  (robertc) change VersionedFile.iter_lines_present_or_added_in to return the version_id of the text the line is present in. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
    ------------------------------------------------------------
    revno: 2975.3.2
    merged: robertc at robertcollins.net-20071113213937-zguxb7m0j6ofy2au
    parent: robertc at robertcollins.net-20071109175031-agaiy6530rvbprmb
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: iter-lines
    timestamp: Wed 2007-11-14 08:39:37 +1100
    message:
      Review feedback - document the API change and improve readability in pack's _do_copy_nodes.
    ------------------------------------------------------------
    revno: 2975.3.1
    merged: robertc at robertcollins.net-20071109175031-agaiy6530rvbprmb
    parent: pqm at pqm.ubuntu.com-20071107140948-l3p8njdhgwstdkri
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: iter-lines
    timestamp: Sat 2007-11-10 04:50:31 +1100
    message:
      Change (without backwards compatibility) the
      iter_lines_added_or_present_in_versions VersionedFile API to yield the
      text version that each line is being returned from. This is useful for
      reconcile in determining what inventories reference what texts.
      (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS	2007-11-13 22:26:57 +0000
+++ b/NEWS	2007-11-14 00:01:24 +0000
@@ -89,6 +89,12 @@
    * ``osutils.backup_file`` is deprecated. Actually it's not used in bzrlib
      during very long time. (Alexander Belchenko)
 
+   * The return value of
+     ``VersionedFile.iter_lines_added_or_present_in_versions`` has been
+     changed. Previously it was an iterator of lines, now it is an iterator of
+     (line, version_id) tuples. This change has been made to aid reconcile and
+     fetch operations. (Robert Collins)
+
   INTERNALS:
 
    * Added ``ContainerSerialiser`` and ``ContainerPushParser`` to

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-10-26 08:56:09 +0000
+++ b/bzrlib/knit.py	2007-11-09 17:50:31 +0000
@@ -1117,8 +1117,11 @@
                 line_iterator = self.factory.get_fulltext_content(data)
             else:
                 line_iterator = self.factory.get_linedelta_content(data)
+            # XXX: It might be more efficient to yield (version_id,
+            # line_iterator) in the future. However for now, this is a simpler
+            # change to integrate into the rest of the codebase. RBC 20071110
             for line in line_iterator:
-                yield line
+                yield line, version_id
 
         pb.update('Walking content.', total, total)
         

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-11-07 13:10:37 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-11-13 21:39:37 +0000
@@ -708,7 +708,7 @@
         """Copy knit nodes between packs.
 
         :param output_lines: Return lines present in the copied data as
-            an iterator.
+            an iterator of line,version_id.
         """
         pb = ui.ui_factory.nested_progress_bar()
         try:
@@ -754,18 +754,19 @@
             for (names, read_func), (_1, _2, (key, eol_flag, references)) in \
                 izip(reader.iter_records(), pack_readv_requests):
                 raw_data = read_func(None)
+                version_id = key[-1]
                 if output_lines:
                     # read the entire thing
-                    content, _ = knit_data._parse_record(key[-1], raw_data)
+                    content, _ = knit_data._parse_record(version_id, raw_data)
                     if len(references[-1]) == 0:
                         line_iterator = factory.get_fulltext_content(content)
                     else:
                         line_iterator = factory.get_linedelta_content(content)
                     for line in line_iterator:
-                        yield line
+                        yield line, version_id
                 else:
                     # check the header only
-                    df, _ = knit_data._parse_record_header(key[-1], raw_data)
+                    df, _ = knit_data._parse_record_header(version_id, raw_data)
                     df.close()
                 pos, size = writer.add_bytes_record(raw_data, names)
                 write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-10-31 22:33:53 +0000
+++ b/bzrlib/repository.py	2007-11-09 17:50:31 +0000
@@ -1061,7 +1061,7 @@
 
         This performs the translation of xml lines to revision ids.
 
-        :param line_iterator: An iterator of lines
+        :param line_iterator: An iterator of lines, origin_version_id
         :param revision_ids: The revision ids to filter for. This should be a
             set or other type which supports efficient __contains__ lookups, as
             the revision id from each parsed line will be looked up in the
@@ -1091,7 +1091,7 @@
         search = self._file_ids_altered_regex.search
         unescape = _unescape_xml
         setdefault = result.setdefault
-        for line in line_iterator:
+        for line, version_id in line_iterator:
             match = search(line)
             if match is None:
                 continue
@@ -1115,6 +1115,8 @@
                 unescape_revid_cache[revision_id] = unescaped
                 revision_id = unescaped
 
+            # once data is all ensured-consistent; then this is
+            # if revision_id == version_id
             if revision_id in revision_ids:
                 try:
                     file_id = unescape_fileid_cache[file_id]

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2007-10-17 09:39:41 +0000
+++ b/bzrlib/tests/test_knit.py	2007-11-09 17:50:31 +0000
@@ -1406,7 +1406,7 @@
         self.assertEqual(
             [('readv', 'id.knit', [(0, 87), (87, 89)], False, None)],
             instrumented_t._activity)
-        self.assertEqual(['text\n', 'text2\n'], results)
+        self.assertEqual([('text\n', 'base'), ('text2\n', 'base2')], results)
 
     def test_create_empty_annotated(self):
         k1 = self.make_test_knit(True)

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2007-10-17 09:39:41 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2007-11-09 17:50:31 +0000
@@ -560,18 +560,14 @@
                      ['base\n', 'lancestor\n', 'otherchild\n'])
         def iter_with_versions(versions, expected):
             # now we need to see what lines are returned, and how often.
-            lines = {'base\n':0,
-                     'lancestor\n':0,
-                     'rancestor\n':0,
-                     'child\n':0,
-                     'otherchild\n':0,
-                     }
+            lines = {}
             progress = InstrumentedProgress()
             # iterate over the lines
-            for line in vf.iter_lines_added_or_present_in_versions(versions, 
+            for line in vf.iter_lines_added_or_present_in_versions(versions,
                 pb=progress):
+                lines.setdefault(line, 0)
                 lines[line] += 1
-            if []!= progress.updates: 
+            if []!= progress.updates:
                 self.assertEqual(expected, progress.updates)
             return lines
         lines = iter_with_versions(['child', 'otherchild'],
@@ -579,8 +575,8 @@
                                     ('Walking content.', 1, 2),
                                     ('Walking content.', 2, 2)])
         # we must see child and otherchild
-        self.assertTrue(lines['child\n'] > 0)
-        self.assertTrue(lines['otherchild\n'] > 0)
+        self.assertTrue(lines[('child\n', 'child')] > 0)
+        self.assertTrue(lines[('otherchild\n', 'otherchild')] > 0)
         # we dont care if we got more than that.
         
         # test all lines
@@ -591,11 +587,11 @@
                                           ('Walking content.', 4, 5),
                                           ('Walking content.', 5, 5)])
         # all lines must be seen at least once
-        self.assertTrue(lines['base\n'] > 0)
-        self.assertTrue(lines['lancestor\n'] > 0)
-        self.assertTrue(lines['rancestor\n'] > 0)
-        self.assertTrue(lines['child\n'] > 0)
-        self.assertTrue(lines['otherchild\n'] > 0)
+        self.assertTrue(lines[('base\n', 'base')] > 0)
+        self.assertTrue(lines[('lancestor\n', 'lancestor')] > 0)
+        self.assertTrue(lines[('rancestor\n', 'rancestor')] > 0)
+        self.assertTrue(lines[('child\n', 'child')] > 0)
+        self.assertTrue(lines[('otherchild\n', 'otherchild')] > 0)
 
     def test_add_lines_with_ghosts(self):
         # some versioned file formats allow lines to be added with parent

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2007-10-17 09:39:41 +0000
+++ b/bzrlib/versionedfile.py	2007-11-13 21:39:37 +0000
@@ -420,15 +420,16 @@
             version_ids,
             ignore_missing)
 
-    def iter_lines_added_or_present_in_versions(self, version_ids=None, 
+    def iter_lines_added_or_present_in_versions(self, version_ids=None,
                                                 pb=None):
         """Iterate over the lines in the versioned file from version_ids.
 
-        This may return lines from other versions, and does not return the
-        specific version marker at this point. The api may be changed
-        during development to include the version that the versioned file
-        thinks is relevant, but given that such hints are just guesses,
-        its better not to have it if we don't need it.
+        This may return lines from other versions. Each item the returned
+        iterator yields is a tuple of a line and a text version that that line
+        is present in (not introduced in).
+
+        Ordering of results is in whatever order is most suitable for the
+        underlying storage format.
 
         If a progress bar is supplied, it may be used to indicate progress.
         The caller is responsible for cleaning up progress bars (because this
@@ -436,6 +437,8 @@
 
         NOTES: Lines are normalised: they will all have \n terminators.
                Lines are returned in arbitrary order.
+
+        :return: An iterator over (line, version_id).
         """
         raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
 

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2007-10-05 05:52:45 +0000
+++ b/bzrlib/weave.py	2007-11-09 17:50:31 +0000
@@ -463,9 +463,9 @@
             # properly, we do not filter down to that
             # if inserted not in version_ids: continue
             if line[-1] != '\n':
-                yield line + '\n'
+                yield line + '\n', inserted
             else:
-                yield line
+                yield line, inserted
 
     def _walk_internal(self, version_ids=None):
         """Helper method for weave actions."""




More information about the bazaar-commits mailing list