Rev 3817: Merge in the knit parent delta hacks. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack
John Arbash Meinel
john at arbash-meinel.com
Tue Dec 23 21:37:37 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack
------------------------------------------------------------
revno: 3817
revision-id: john at arbash-meinel.com-20081223213719-3004u6x38jnh436l
parent: john at arbash-meinel.com-20081223212037-ntp3edz1rqg4wm6y
parent: john at arbash-meinel.com-20081203023542-pquorzck2xwr3p32
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hack
timestamp: Tue 2008-12-23 15:37:19 -0600
message:
Merge in the knit parent delta hacks.
modified:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
------------------------------------------------------------
revno: 3791.2.3
revision-id: john at arbash-meinel.com-20081203023542-pquorzck2xwr3p32
parent: john at arbash-meinel.com-20081203022937-82s2xlcafakdqy0v
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_parent_hack
timestamp: Tue 2008-12-02 20:35:42 -0600
message:
Hack the inventory serialization to use multiple lines.
modified:
bzrlib/inventory.py inventory.py-20050309040759-6648b84ca2005b37
------------------------------------------------------------
revno: 3791.2.2
revision-id: john at arbash-meinel.com-20081203022937-82s2xlcafakdqy0v
parent: john at arbash-meinel.com-20081203013227-rou9cbbpgd6bh9fh
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_parent_hack
timestamp: Tue 2008-12-02 20:29:37 -0600
message:
Hack up the serialized form so that we can support multi-line values.
modified:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
------------------------------------------------------------
revno: 3791.2.1
revision-id: john at arbash-meinel.com-20081203013227-rou9cbbpgd6bh9fh
parent: john at arbash-meinel.com-20081202235625-h5fo44xy2hxeopo2
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_parent_hack
timestamp: Tue 2008-12-02 19:32:27 -0600
message:
Quick hack to allow chk inventory pages to refer to whatever possible page they came from.
Causes a lot of collisions in the Knit layer, and fails to convert bzrtools compeletely.
modified:
bzrlib/chk_map.py chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py 2008-12-23 21:20:37 +0000
+++ b/bzrlib/chk_map.py 2008-12-23 21:37:19 +0000
@@ -43,7 +43,7 @@
from bzrlib import lazy_import
lazy_import.lazy_import(globals(), """
-from bzrlib import versionedfile
+from bzrlib import errors, trace, versionedfile
""")
from bzrlib import osutils
from bzrlib.lru_cache import LRUCache
@@ -396,6 +396,7 @@
:param key_width: The width of keys for this node.
"""
self._key = None
+ self._orig_key = None
# Current number of elements
self._len = 0
self._maximum_size = 0
@@ -519,20 +520,29 @@
:param key: The key that the serialised node has.
"""
result = LeafNode()
- # splitlines splits on '\r' as well as '\n' so we use split()
- lines = bytes.split('\n')
- assert lines.pop() == ''
+ # splitlines splits on '\r' as well as '\n' so we use 'chunks_to_lines'
+ lines = osutils.chunks_to_lines([bytes])
items = {}
- if lines[0] != 'chkleaf:':
+ if lines[0] != 'chkleaf:\n':
raise ValueError("not a serialised leaf node: %r" % bytes)
maximum_size = int(lines[1])
width = int(lines[2])
length = int(lines[3])
+ pos = 5
prefix = lines[4]
- for line in lines[5:]:
- line = prefix + line
- elements = line.split('\x00', width)
- items[tuple(elements[:-1])] = elements[-1]
+ assert prefix.pop() == '\n' # Remove the trailing newline
+ while pos < len(lines):
+ elements = (prefix + lines[pos]).split('\x00')
+ assert len(elements) == width + 1
+ num_value_lines = int(elements[-1])
+ pos += 1
+ value_lines = lines[pos:pos+num_value_lines]
+ pos += num_value_lines
+ value = ''.join(value_lines)
+ # We always add an extra '\n' to the end of the value
+ assert value[-1] == '\n'
+ value = value[:-1]
+ items[tuple(elements[:-1])] = value
if len(items) != length:
raise AssertionError("item count mismatch")
result._items = items
@@ -540,6 +550,7 @@
assert length == len(lines) - 5
result._maximum_size = maximum_size
result._key = key
+ result._orig_key = key
result._key_width = width
result._raw_size = (sum(map(len, lines[5:])) # the length of the suffix
+ (length)*(len(prefix)+1)) # prefix + '\n'
@@ -669,15 +680,23 @@
lines.append('%s\n' % (self._common_serialised_prefix,))
prefix_len = len(self._common_serialised_prefix)
for key, value in sorted(self._items.items()):
- serialized = "%s\x00%s\n" % (self._serialise_key(key), value)
+ value_lines = osutils.chunks_to_lines([value + '\n'])
+ serialized = "%s\x00%s\n" % (self._serialise_key(key),
+ len(value_lines))
assert serialized.startswith(self._common_serialised_prefix)
lines.append(serialized[prefix_len:])
+ lines.extend(value_lines)
sha1 = osutils.sha_strings(lines)
key = ('sha1:' + sha1,)
+ if self._orig_key is None:
+ parents = ()
+ else:
+ parents = (self._orig_key,)
if not store.has_key(key):
# We know the key doesn't exist, because we just checked
- store.add_lines(key, (), lines, random_id=True)
+ store.add_lines(key, parents, lines, random_id=True)
self._key = key
+ self._orig_key = self._key
bytes = ''.join(lines)
if len(bytes) != self._current_size():
import pdb; pdb.set_trace()
@@ -817,6 +836,7 @@
result._len = length
result._maximum_size = maximum_size
result._key = key
+ result._orig_key = key
result._key_width = width
# XXX: InternalNodes don't really care about their size, and this will
# change if we add prefix compression
@@ -987,10 +1007,15 @@
lines.append(serialised[prefix_len:])
sha1 = osutils.sha_strings(lines)
key = ('sha1:' + sha1,)
+ if self._orig_key is None:
+ parents = ()
+ else:
+ parents = (self._orig_key,)
if not store.has_key(key):
# We know the key doesn't exist, because we just checked
- store.add_lines(key, (), lines, random_id=True)
+ store.add_lines(key, parents, lines, random_id=True)
self._key = key
+ self._orig_key = self._key
_page_cache.add(self._key, ''.join(lines))
yield self._key
=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py 2008-12-19 16:48:04 +0000
+++ b/bzrlib/errors.py 2008-12-23 21:37:19 +0000
@@ -1442,6 +1442,13 @@
self.content = content
+class DuplicateKeyKnitCorrupt(KnitCorrupt):
+
+ def __init__(self, filename, how, key):
+ KnitCorrupt.__init__(self, filename, how)
+ self.key = key
+
+
class KnitDataStreamIncompatible(KnitError):
# Not raised anymore, as we can convert data streams. In future we may
# need it again for more exotic cases, so we're keeping it around for now.
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2008-12-19 23:07:32 +0000
+++ b/bzrlib/inventory.py 2008-12-23 21:37:19 +0000
@@ -1390,18 +1390,18 @@
exec_str = "Y"
else:
exec_str = "N"
- return "file: %s\x00%s\x00%s\x00%s\x00%s\x00%d\x00%s" % (
+ return "file: %s\n%s\n%s\n%s\n%s\n%d\n%s" % (
entry.file_id, parent_str, name_str, entry.revision,
entry.text_sha1, entry.text_size, exec_str)
elif entry.kind == 'directory':
- return "dir: %s\x00%s\x00%s\x00%s" % (
+ return "dir: %s\n%s\n%s\n%s" % (
entry.file_id, parent_str, name_str, entry.revision)
elif entry.kind == 'symlink':
- return "symlink: %s\x00%s\x00%s\x00%s\x00%s" % (
+ return "symlink: %s\n%s\n%s\n%s\n%s" % (
entry.file_id, parent_str, name_str, entry.revision,
entry.symlink_target.encode("utf8"))
elif entry.kind == 'tree-reference':
- return "tree: %s\x00%s\x00%s\x00%s\x00%s" % (
+ return "tree: %s\n%s\n%s\n%s\n%s" % (
entry.file_id, parent_str, name_str, entry.revision,
entry.reference_revision)
else:
@@ -1409,7 +1409,7 @@
def _bytes_to_entry(self, bytes):
"""Deserialise a serialised entry."""
- sections = bytes.split('\x00')
+ sections = bytes.split('\n')
if sections[0].startswith("file: "):
result = InventoryFile(sections[0][6:],
sections[2].decode('utf8'),
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-12-19 23:07:32 +0000
+++ b/bzrlib/knit.py 2008-12-23 21:37:19 +0000
@@ -2257,8 +2257,10 @@
for (index, key, value, node_refs) in present_nodes:
if (value[0] != keys[key][0][0] or
node_refs != keys[key][1]):
- raise KnitCorrupt(self, "inconsistent details in add_records"
- ": %s %s" % ((value, node_refs), keys[key]))
+ raise errors.DuplicateKeyKnitCorrupt(self,
+ "inconsistent details in add_records"
+ ":\n%s\n%s" % ((value, node_refs), keys[key]),
+ key)
del keys[key]
result = []
if self._parents:
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2008-12-23 21:18:26 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2008-12-23 21:37:19 +0000
@@ -240,7 +240,7 @@
# passed in because the caller has them, so as to avoid object churn.
index_builder_class = pack_collection._index_builder_class
if pack_collection.chk_index is not None:
- chk_index = index_builder_class(reference_lists=0)
+ chk_index = index_builder_class(reference_lists=2)
else:
chk_index = None
Pack.__init__(self,
@@ -2009,9 +2009,9 @@
self.chk_bytes = KnitVersionedFiles(
_KnitGraphIndex(self._pack_collection.chk_index.combined_index,
add_callback=self._pack_collection.chk_index.add_callback,
- deltas=False, parents=False, is_locked=self.is_locked),
+ deltas=True, parents=True, is_locked=self.is_locked),
data_access=self._pack_collection.chk_index.data_access,
- max_delta_chain=0)
+ max_delta_chain=200)
else:
self.chk_bytes = None
# True when the repository object is 'write locked' (as opposed to the
More information about the bazaar-commits
mailing list