Rev 2907: Play around with a different knit data format that has the parents in the data. in http://bzr.arbash-meinel.com/branches/bzr/0.92-dev/knit_parents

John Arbash Meinel john at arbash-meinel.com
Fri Oct 19 17:45:54 BST 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.92-dev/knit_parents

------------------------------------------------------------
revno: 2907
revision-id: john at arbash-meinel.com-20071019164517-2q6gx1ynkn8mdrx4
parent: pqm at pqm.ubuntu.com-20071012085726-lyq36i8bo7ew28ba
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_parents
timestamp: Fri 2007-10-19 11:45:17 -0500
message:
  Play around with a different knit data format that has the parents in the data.
  This makes the index completely redundant with the data, which means
  it can be regenerated on demand.
modified:
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-10-12 05:26:46 +0000
+++ b/bzrlib/knit.py	2007-10-19 16:45:17 +0000
@@ -916,7 +916,7 @@
             options.append('line-delta')
             store_lines = self.factory.lower_line_delta(delta_hunks)
             size, bytes = self._data._record_to_data(version_id, digest,
-                store_lines)
+                store_lines, parents=parents)
         else:
             options.append('fulltext')
             # isinstance is slower and we have no hierarchy.
@@ -924,13 +924,13 @@
                 # Use the already joined bytes saving iteration time in
                 # _record_to_data.
                 size, bytes = self._data._record_to_data(version_id, digest,
-                    lines, [line_bytes])
+                    lines, [line_bytes], parents=parents)
             else:
                 # get mixed annotation + content and feed it into the
                 # serialiser.
                 store_lines = self.factory.lower_fulltext(content)
                 size, bytes = self._data._record_to_data(version_id, digest,
-                    store_lines)
+                    store_lines, parents=parents)
 
         access_memo = self._data.add_raw_records([size], bytes)[0]
         self._index.add_versions(
@@ -1990,7 +1990,8 @@
     def _open_file(self):
         return self._access.open_file()
 
-    def _record_to_data(self, version_id, digest, lines, dense_lines=None):
+    def _record_to_data(self, version_id, digest, lines, dense_lines=None,
+                        parents=None):
         """Convert version_id, digest, lines into a raw data block.
         
         :param dense_lines: The bytes of lines but in a denser form. For
@@ -2171,6 +2172,51 @@
         return components
 
 
+class _KnitDataWithParents(_KnitData):
+
+    def _record_to_data(self, version_id, digest, lines, dense_lines=None,
+                        parents=None):
+        """Convert version_id, digest, lines into a raw data block.
+        
+        :param dense_lines: The bytes of lines but in a denser form. For
+            instance, if lines is a list of 1000 bytestrings each ending in \n,
+            dense_lines may be a list with one line in it, containing all the
+            1000's lines and their \n's. Using dense_lines if it is already
+            known is a win because the string join to create bytes in this
+            function spends less time resizing the final string.
+        :return: (len, a StringIO instance with the raw data ready to read.)
+        """
+        # Note: using a string copy here increases memory pressure with e.g.
+        # ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine
+        # when doing the initial commit of a mozilla tree. RBC 20070921
+        if parents:
+            parents_str = ' ' + ' '.join(parents)
+        else:
+            parents_str = ''
+
+        bytes = ''.join(chain(
+            ["version %s %d %s%s\n" % (version_id,
+                                     len(lines),
+                                     digest,
+                                     parents_str)],
+            dense_lines or lines,
+            ["end %s\n" % version_id]))
+        assert bytes.__class__ == str
+        compressed_bytes = bytes_to_gzip(bytes)
+        return len(compressed_bytes), compressed_bytes
+
+    def _check_header(self, version_id, line):
+        rec = line.split()
+        if len(rec) >= 4:
+            raise KnitCorrupt(self._access,
+                              'unexpected number of elements in record header')
+        if rec[1] != version_id:
+            raise KnitCorrupt(self._access,
+                              'unexpected version, wanted %r, got %r'
+                              % (version_id, rec[1]))
+        return rec
+
+
 class InterKnit(InterVersionedFile):
     """Optimised code paths for knit to knit operations."""
     
@@ -2285,7 +2331,8 @@
             delta = self.source.factory.parse_line_delta(data, version_id,
                 plain=True)
             lines = self.target.factory.lower_line_delta(delta)
-        return self.target._data._record_to_data(version_id, digest, lines)
+        return self.target._data._record_to_data(version_id, digest, lines,
+                                                 parents=parents)
 
 
 InterVersionedFile.register_optimiser(InterKnit)



More information about the bazaar-commits mailing list