Rev 3817: Merge in the knit parent delta hacks. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack

John Arbash Meinel john at arbash-meinel.com
Tue Dec 23 21:37:37 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack

------------------------------------------------------------
revno: 3817
revision-id: john at arbash-meinel.com-20081223213719-3004u6x38jnh436l
parent: john at arbash-meinel.com-20081223212037-ntp3edz1rqg4wm6y
parent: john at arbash-meinel.com-20081203023542-pquorzck2xwr3p32
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hack
timestamp: Tue 2008-12-23 15:37:19 -0600
message:
  Merge in the knit parent delta hacks.
modified:
  bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
  bzrlib/errors.py               errors.py-20050309040759-20512168c4e14fbd
  bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
    ------------------------------------------------------------
    revno: 3791.2.3
    revision-id: john at arbash-meinel.com-20081203023542-pquorzck2xwr3p32
    parent: john at arbash-meinel.com-20081203022937-82s2xlcafakdqy0v
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: knit_parent_hack
    timestamp: Tue 2008-12-02 20:35:42 -0600
    message:
      Hack the inventory serialization to use multiple lines.
    modified:
      bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
    ------------------------------------------------------------
    revno: 3791.2.2
    revision-id: john at arbash-meinel.com-20081203022937-82s2xlcafakdqy0v
    parent: john at arbash-meinel.com-20081203013227-rou9cbbpgd6bh9fh
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: knit_parent_hack
    timestamp: Tue 2008-12-02 20:29:37 -0600
    message:
      Hack up the serialized form so that we can support multi-line values.
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
    ------------------------------------------------------------
    revno: 3791.2.1
    revision-id: john at arbash-meinel.com-20081203013227-rou9cbbpgd6bh9fh
    parent: john at arbash-meinel.com-20081202235625-h5fo44xy2hxeopo2
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: knit_parent_hack
    timestamp: Tue 2008-12-02 19:32:27 -0600
    message:
      Quick hack to allow chk inventory pages to refer to whatever possible page they came from.
      Causes a lot of collisions in the Knit layer, and fails to convert bzrtools compeletely.
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
      bzrlib/errors.py               errors.py-20050309040759-20512168c4e14fbd
      bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
      bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2008-12-23 21:20:37 +0000
+++ b/bzrlib/chk_map.py	2008-12-23 21:37:19 +0000
@@ -43,7 +43,7 @@
 
 from bzrlib import lazy_import
 lazy_import.lazy_import(globals(), """
-from bzrlib import versionedfile
+from bzrlib import errors, trace, versionedfile
 """)
 from bzrlib import osutils
 from bzrlib.lru_cache import LRUCache
@@ -396,6 +396,7 @@
         :param key_width: The width of keys for this node.
         """
         self._key = None
+        self._orig_key = None
         # Current number of elements
         self._len = 0
         self._maximum_size = 0
@@ -519,20 +520,29 @@
         :param key: The key that the serialised node has.
         """
         result = LeafNode()
-        # splitlines splits on '\r' as well as '\n' so we use split()
-        lines = bytes.split('\n')
-        assert lines.pop() == ''
+        # splitlines splits on '\r' as well as '\n' so we use 'chunks_to_lines'
+        lines = osutils.chunks_to_lines([bytes])
         items = {}
-        if lines[0] != 'chkleaf:':
+        if lines[0] != 'chkleaf:\n':
             raise ValueError("not a serialised leaf node: %r" % bytes)
         maximum_size = int(lines[1])
         width = int(lines[2])
         length = int(lines[3])
+        pos = 5
         prefix = lines[4]
-        for line in lines[5:]:
-            line = prefix + line
-            elements = line.split('\x00', width)
-            items[tuple(elements[:-1])] = elements[-1]
+        assert prefix.pop() == '\n' # Remove the trailing newline
+        while pos < len(lines):
+            elements = (prefix + lines[pos]).split('\x00')
+            assert len(elements) == width + 1
+            num_value_lines = int(elements[-1])
+            pos += 1
+            value_lines = lines[pos:pos+num_value_lines]
+            pos += num_value_lines
+            value = ''.join(value_lines)
+            # We always add an extra '\n' to the end of the value
+            assert value[-1] == '\n'
+            value = value[:-1]
+            items[tuple(elements[:-1])] = value
         if len(items) != length:
             raise AssertionError("item count mismatch")
         result._items = items
@@ -540,6 +550,7 @@
         assert length == len(lines) - 5
         result._maximum_size = maximum_size
         result._key = key
+        result._orig_key = key
         result._key_width = width
         result._raw_size = (sum(map(len, lines[5:])) # the length of the suffix
             + (length)*(len(prefix)+1)) # prefix + '\n'
@@ -669,15 +680,23 @@
             lines.append('%s\n' % (self._common_serialised_prefix,))
             prefix_len = len(self._common_serialised_prefix)
         for key, value in sorted(self._items.items()):
-            serialized = "%s\x00%s\n" % (self._serialise_key(key), value)
+            value_lines = osutils.chunks_to_lines([value + '\n'])
+            serialized = "%s\x00%s\n" % (self._serialise_key(key),
+                                         len(value_lines))
             assert serialized.startswith(self._common_serialised_prefix)
             lines.append(serialized[prefix_len:])
+            lines.extend(value_lines)
         sha1 = osutils.sha_strings(lines)
         key = ('sha1:' + sha1,)
+        if self._orig_key is None:
+            parents = ()
+        else:
+            parents = (self._orig_key,)
         if not store.has_key(key):
             # We know the key doesn't exist, because we just checked
-            store.add_lines(key, (), lines, random_id=True)
+            store.add_lines(key, parents, lines, random_id=True)
         self._key = key
+        self._orig_key = self._key
         bytes = ''.join(lines)
         if len(bytes) != self._current_size():
             import pdb; pdb.set_trace()
@@ -817,6 +836,7 @@
         result._len = length
         result._maximum_size = maximum_size
         result._key = key
+        result._orig_key = key
         result._key_width = width
         # XXX: InternalNodes don't really care about their size, and this will
         #      change if we add prefix compression
@@ -987,10 +1007,15 @@
             lines.append(serialised[prefix_len:])
         sha1 = osutils.sha_strings(lines)
         key = ('sha1:' + sha1,)
+        if self._orig_key is None:
+            parents = ()
+        else:
+            parents = (self._orig_key,)
         if not store.has_key(key):
             # We know the key doesn't exist, because we just checked
-            store.add_lines(key, (), lines, random_id=True)
+            store.add_lines(key, parents, lines, random_id=True)
         self._key = key
+        self._orig_key = self._key
         _page_cache.add(self._key, ''.join(lines))
         yield self._key
 

=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py	2008-12-19 16:48:04 +0000
+++ b/bzrlib/errors.py	2008-12-23 21:37:19 +0000
@@ -1442,6 +1442,13 @@
         self.content = content
 
 
+class DuplicateKeyKnitCorrupt(KnitCorrupt):
+
+    def __init__(self, filename, how, key):
+        KnitCorrupt.__init__(self, filename, how)
+        self.key = key
+
+
 class KnitDataStreamIncompatible(KnitError):
     # Not raised anymore, as we can convert data streams.  In future we may
     # need it again for more exotic cases, so we're keeping it around for now.

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2008-12-19 23:07:32 +0000
+++ b/bzrlib/inventory.py	2008-12-23 21:37:19 +0000
@@ -1390,18 +1390,18 @@
                 exec_str = "Y"
             else:
                 exec_str = "N"
-            return "file: %s\x00%s\x00%s\x00%s\x00%s\x00%d\x00%s" % (
+            return "file: %s\n%s\n%s\n%s\n%s\n%d\n%s" % (
                 entry.file_id, parent_str, name_str, entry.revision,
                 entry.text_sha1, entry.text_size, exec_str)
         elif entry.kind == 'directory':
-            return "dir: %s\x00%s\x00%s\x00%s" % (
+            return "dir: %s\n%s\n%s\n%s" % (
                 entry.file_id, parent_str, name_str, entry.revision)
         elif entry.kind == 'symlink':
-            return "symlink: %s\x00%s\x00%s\x00%s\x00%s" % (
+            return "symlink: %s\n%s\n%s\n%s\n%s" % (
                 entry.file_id, parent_str, name_str, entry.revision,
                 entry.symlink_target.encode("utf8"))
         elif entry.kind == 'tree-reference':
-            return "tree: %s\x00%s\x00%s\x00%s\x00%s" % (
+            return "tree: %s\n%s\n%s\n%s\n%s" % (
                 entry.file_id, parent_str, name_str, entry.revision,
                 entry.reference_revision)
         else:
@@ -1409,7 +1409,7 @@
 
     def _bytes_to_entry(self, bytes):
         """Deserialise a serialised entry."""
-        sections = bytes.split('\x00')
+        sections = bytes.split('\n')
         if sections[0].startswith("file: "):
             result = InventoryFile(sections[0][6:],
                 sections[2].decode('utf8'),

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-12-19 23:07:32 +0000
+++ b/bzrlib/knit.py	2008-12-23 21:37:19 +0000
@@ -2257,8 +2257,10 @@
             for (index, key, value, node_refs) in present_nodes:
                 if (value[0] != keys[key][0][0] or
                     node_refs != keys[key][1]):
-                    raise KnitCorrupt(self, "inconsistent details in add_records"
-                        ": %s %s" % ((value, node_refs), keys[key]))
+                    raise errors.DuplicateKeyKnitCorrupt(self,
+                        "inconsistent details in add_records"
+                        ":\n%s\n%s" % ((value, node_refs), keys[key]),
+                        key)
                 del keys[key]
         result = []
         if self._parents:

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2008-12-23 21:18:26 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2008-12-23 21:37:19 +0000
@@ -240,7 +240,7 @@
         # passed in because the caller has them, so as to avoid object churn.
         index_builder_class = pack_collection._index_builder_class
         if pack_collection.chk_index is not None:
-            chk_index = index_builder_class(reference_lists=0)
+            chk_index = index_builder_class(reference_lists=2)
         else:
             chk_index = None
         Pack.__init__(self,
@@ -2009,9 +2009,9 @@
             self.chk_bytes = KnitVersionedFiles(
                 _KnitGraphIndex(self._pack_collection.chk_index.combined_index,
                     add_callback=self._pack_collection.chk_index.add_callback,
-                    deltas=False, parents=False, is_locked=self.is_locked),
+                    deltas=True, parents=True, is_locked=self.is_locked),
                 data_access=self._pack_collection.chk_index.data_access,
-                max_delta_chain=0)
+                max_delta_chain=200)
         else:
             self.chk_bytes = None
         # True when the repository object is 'write locked' (as opposed to the



More information about the bazaar-commits mailing list