Rev 2703: Decouple parsing and iterating the lines in knit records from getting the data, making it suitable for use in pack repositories. in http://people.ubuntu.com/~robertc/baz2.0/knits

Robert Collins robertc at robertcollins.net
Thu Aug 16 09:39:59 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/knits

------------------------------------------------------------
revno: 2703
revision-id: robertc at robertcollins.net-20070816083953-sbfb70vw6tmh3vak
parent: robertc at robertcollins.net-20070816081927-rhroje8susrd3a40
committer: Robert Collins <robertc at robertcollins.net>
branch nick: knits
timestamp: Thu 2007-08-16 18:39:53 +1000
message:
  Decouple parsing and iterating the lines in knit records from getting the data, making it suitable for use in pack repositories.
modified:
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-08-16 08:19:27 +0000
+++ b/bzrlib/knit.py	2007-08-16 08:39:53 +0000
@@ -925,33 +925,22 @@
             version_ids = self.versions()
         else:
             version_ids = [osutils.safe_revision_id(v) for v in version_ids]
-        if pb is None:
-            pb = progress.DummyProgress()
         # we don't care about inclusions, the caller cares.
         # but we need to setup a list of records to visit.
         # we need version_id, position, length
         version_id_records = []
         requested_versions = set(version_ids)
+        methods = {}
         # create set of records to read:
         for version_id in requested_versions:
             index_memo = self._index.get_position(version_id)
+            method  = self._index.get_method(version_id)
             version_id_records.append((version_id, index_memo))
-
+            methods[version_id] = method
         total = len(version_id_records)
-        for version_idx, (version_id, data, sha_value) in \
-            enumerate(self._data.read_records_iter(version_id_records)):
-            pb.update('Walking content.', version_idx, total)
-            method = self._index.get_method(version_id)
-
-            assert method in ('fulltext', 'line-delta')
-            if method == 'fulltext':
-                line_iterator = self.factory.get_fulltext_content(data)
-            else:
-                line_iterator = self.factory.get_linedelta_content(data)
-            for line in line_iterator:
-                yield line
-
-        pb.update('Walking content.', total, total)
+        return self._data.iter_lines_added_or_present_in_records(
+            self._data.read_records_iter(version_id_records),
+            methods, self.factory, pb, total)
         
     def iter_parents(self, version_ids):
         """Iterate through the parents for many version ids.
@@ -1895,6 +1884,36 @@
     def _open_file(self):
         return self._access.open_file()
 
+    def iter_lines_added_or_present_in_records(self, record_iterator, methods, 
+        record_parser, pb=None, record_count=0):
+        """Read, parse and yield the contents of records as lines.
+
+        :param record_iterator: An iterable of version_id, data, sha_value for
+            the records to process.
+        :param methods: A dict of version_id -> method.
+        :param record_parser: A knit record parser which can parse each
+            record.
+        :param pb: A progress bar, or None.
+        :param record_count: A total for the progress bar if one is supplied.
+        :return: An iterator over all the lines in no particular order.
+        """
+        if pb is None:
+            pb = progress.DummyProgress()
+        for version_idx, (version_id, data, sha_value) in \
+            enumerate(record_iterator):
+            pb.update('Walking content.', version_idx, record_count)
+            method = methods[version_id]
+
+            assert method in ('fulltext', 'line-delta')
+            if method == 'fulltext':
+                line_iterator = record_parser.get_fulltext_content(data)
+            else:
+                line_iterator = record_parser.get_linedelta_content(data)
+            for line in line_iterator:
+                yield line
+
+        pb.update('Walking content.', record_count, record_count)
+
     def _record_to_data(self, version_id, digest, lines):
         """Convert version_id, digest, lines into a raw data block.
         



More information about the bazaar-commits mailing list