Rev 3176: Implement sha chaining to prevent data altering or corruption in a journal. in http://people.ubuntu.com/~robertc/baz2.0/inventory.journalled
Robert Collins
robertc at robertcollins.net
Sun Jan 6 22:54:33 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/inventory.journalled
------------------------------------------------------------
revno: 3176
revision-id:robertc at robertcollins.net-20080106225428-subkfjp082a0t1ya
parent: robertc at robertcollins.net-20080106212137-84leoi2cp6vorge0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: inventory.journalled
timestamp: Mon 2008-01-07 09:54:28 +1100
message:
Implement sha chaining to prevent data altering or corruption in a journal.
modified:
bzrlib/journalled_inventory.py journalled_inventory-20080103020931-0ht5n40kwc0p7fy1-1
bzrlib/tests/test_journalled_inv.py test_journalled_inv.-20080103012121-ny2w9slze5jgty8i-1
=== modified file 'bzrlib/journalled_inventory.py'
--- a/bzrlib/journalled_inventory.py 2008-01-06 21:21:37 +0000
+++ b/bzrlib/journalled_inventory.py 2008-01-06 22:54:28 +0000
@@ -26,7 +26,7 @@
__all__ = ['EntryAccess', 'InventoryJournal']
from bzrlib import errors
-from bzrlib.osutils import basename
+from bzrlib.osutils import basename, sha_string
from bzrlib import inventory
from bzrlib.revision import NULL_REVISION
from bzrlib.tsort import topo_sort
@@ -112,19 +112,24 @@
class _JournalEntry(object):
"""An individual entry in a journalled inventory."""
- def __init__(self, version, parent_revision, by_id, versioned_root,
- tree_references):
+ def __init__(self, version, version_sha1, parent_revision,
+ parent_validator, by_id, versioned_root, tree_references):
"""Create a _JournalEntry.
:param version: The version of this InventoryJournal.
+ :param version_sha1: The sha1 of the serialised journal entry.
:param parent_revision: The parent revision this entry is written
against. null: indicates the start of a new delta chain.
+ :param parent_validator: The validator of the parent revision. See
+ delta_to_lines for the validators definition (or the journalled
+ inventory documentation).
:param by_id: The text-split items in this entry indexed by id.
:param version_root: True if / paths will be versioned.
:param tree_references: True if tree references are supported.
"""
self.version = version
self.parent_revision = parent_revision
+ self.validator_chain = [(parent_validator, version_sha1)]
self.by_id = by_id
self.versioned_root = versioned_root
self.tree_references = tree_references
@@ -134,6 +139,7 @@
After combining:
- the parent of this entry will be the parent of it's parent.
+ - the parent validator will have been moved too.
- all ids listed in parent_entry not already in by_id will have been
copied to by_id.
- parent_entry will be unchanged.
@@ -147,8 +153,30 @@
for line in parent_entry.by_id.iteritems():
if line[0] not in self.by_id:
self.by_id[line[0]] = line[1]
+ if self.parent_revision != NULL_REVISION:
+ self.validator_chain = parent_entry.validator_chain \
+ + self.validator_chain
self.parent_revision = parent_entry.parent_revision
+ def get_validator(self):
+ """Get the validator for this _JournalEntry.
+
+ With no parent and no combining this is the sha1 of the bytes
+ (version_sha1 in the constructor).
+ Parents are combined by shaing their validator and the sha1 of the
+ bytes recursively.
+ """
+ current_validator = ''
+ for base_validator, entry_sha in self.validator_chain:
+ if current_validator != base_validator:
+ raise errors.BzrError("failed to match validators %s and %s"
+ % (current_validator, base_validator))
+ if base_validator:
+ current_validator = sha_string(base_validator + entry_sha)
+ else:
+ current_validator = entry_sha
+ return current_validator
+
def to_inventory(self):
"""Convert this JournalEntry to an inventory object.
@@ -213,7 +241,7 @@
class InventoryJournal(object):
"""Serialise and deserialise inventories using a journal."""
- FORMAT_1 = 'bzr journalled inventory v1 (bzr 1.1)'
+ FORMAT_1 = 'bzr journalled inventory v1 (bzr 1.2)'
def __init__(self, versioned_root, tree_references):
"""Create an InventoryJournal.
@@ -232,18 +260,23 @@
if tree_references:
self._entry_to_content['tree-reference'] = _reference_content
- def delta_to_lines(self, old_inventory_name, new_name, delta_to_new):
+ def delta_to_lines(self, old_inventory_name, old_validator, new_name,
+ delta_to_new):
"""Return a line sequence for delta_to_new.
:param old_inventory_name: A UTF8 revision id for the old inventory.
May be NULL_REVISION if there is no older inventory and
delta_to_new includes the entire inventory contents.
+ :param old_validator: The current validator of the delta chain. This is
+ the sha1 of the lines of the journal entry for an entry with parent
+ null:, and the sha1 of the concatenated old validator and the sha1
+ of the lines of the journal entry for all other entries.
:param new_name: The version name of the inventory we create with this
delta.
:param delta_to_new: An inventory delta such as Inventory.apply_delta
takes.
"""
- lines = ['', '', '']
+ lines = ['', '', '', '']
to_line = self._delta_item_to_line
for delta_item in delta_to_new:
lines.append(to_line(delta_item))
@@ -253,7 +286,8 @@
lines.sort()
lines[0] = "format: %s\n" % InventoryJournal.FORMAT_1
lines[1] = "parent: %s\n" % old_inventory_name
- lines[2] = "version: %s\n" % new_name
+ lines[2] = "parent_validator: %s\n" % old_validator
+ lines[3] = "version: %s\n" % new_name
return lines
def _delta_item_to_line(self, delta_item):
@@ -294,17 +328,21 @@
delta_to_lines and then doing ''.join(delta_lines).
:return: A _JournalEntry for the bytes.
"""
+ sha1 = sha_string(bytes)
lines = bytes.split('\n')[:-1] # discard the last empty line
if not lines or lines[0] != 'format: %s' % InventoryJournal.FORMAT_1:
raise errors.BzrError('unknown format %r' % lines[0:1])
if len(lines) < 2 or not lines[1].startswith('parent: '):
raise errors.BzrError('missing parent: marker')
journal_parent_id = lines[1][8:]
- if len(lines) < 3 or not lines[2].startswith('version: '):
+ if len(lines) < 3 or not lines[2].startswith('parent_validator: '):
+ raise errors.BzrError('missing parent_validator: marker')
+ journal_parent_validator = lines[2][18:]
+ if len(lines) < 4 or not lines[3].startswith('version: '):
raise errors.BzrError('missing version: marker')
- journal_version_id = lines[2][9:]
+ journal_version_id = lines[3][9:]
by_id = {}
- for line in lines[3:]:
+ for line in lines[4:]:
newpath_utf8, file_id, parent_id, last_modified, content \
= line.split('\x00', 4)
parent_id = parent_id or None
@@ -317,8 +355,9 @@
raise errors.BzrError('special revisionid found: %r' % line)
if not self._tree_references and content.startswith('tree\x00'):
raise errors.BzrError("Tree reference found: %r" % line)
- if len(by_id) + 3 != len(lines):
+ if len(by_id) + 4 != len(lines):
raise errors.BzrError(
"duplicate file id in journal entry %r" % lines)
- return _JournalEntry(journal_version_id, journal_parent_id, by_id,
- self._versioned_root, self._tree_references)
+ return _JournalEntry(journal_version_id, sha1, journal_parent_id,
+ journal_parent_validator, by_id, self._versioned_root,
+ self._tree_references)
=== modified file 'bzrlib/tests/test_journalled_inv.py'
--- a/bzrlib/tests/test_journalled_inv.py 2008-01-06 21:21:37 +0000
+++ b/bzrlib/tests/test_journalled_inv.py 2008-01-06 22:54:28 +0000
@@ -26,47 +26,67 @@
inventory,
journalled_inventory,
)
+from bzrlib.osutils import sha_string
from bzrlib.inventory import Inventory
from bzrlib.revision import NULL_REVISION
from bzrlib.tests import TestCase
### DO NOT REFLOW THESE TEXTS. NEW LINES ARE SIGNIFICANT. ###
-empty_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+empty_lines = """format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: null:
"""
-root_only_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+root_only_lines = """format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: entry-version
/\x00an-id\x00\x00a at e\xc3\xa5ample.com--2004\x00dir\x00\x00
"""
-root_change_lines = """format: bzr journalled inventory v1 (bzr 1.1)
-parent: entry-version
-version: changed-root
-/\x00an-id\x00\x00different-version\x00dir\x00\x00
-"""
-
-root_only_unversioned = """format: bzr journalled inventory v1 (bzr 1.1)
+root_change_lines = """format: bzr journalled inventory v1 (bzr 1.2)
+parent: entry-version
+parent_validator: 4064d5f6ecde08d963e14b426fd10a6624d33a07
+version: changed-root
+/\x00an-id\x00\x00different-version\x00dir\x00\x00
+"""
+
+corrupt_parent_lines = """format: bzr journalled inventory v1 (bzr 1.2)
+parent: entry-version
+parent_validator: 4064d5f6ecde08d963e14b426fd10a6624d33a08
+version: changed-root
+/\x00an-id\x00\x00different-version\x00dir\x00\x00
+"""
+
+root_only_unversioned = """format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: entry-version
/\x00TREE_ROOT\x00\x00null:\x00dir\x00\x00
"""
-reference_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+reference_lines = """format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: entry-version
/\x00TREE_ROOT\x00\x00a at e\xc3\xa5ample.com--2004\x00dir\x00\x00
/foo\x00id\x00TREE_ROOT\x00changed\x00tree\x00subtree-version\x00
"""
-change_tree_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+change_tree_lines = """format: bzr journalled inventory v1 (bzr 1.2)
parent: entry-version
+parent_validator:
version: change-tree
/foo\x00id\x00TREE_ROOT\x00changed-twice\x00tree\x00subtree-version2\x00
"""
+validator_only = """format: bzr journalled inventory v1 (bzr 1.2)
+parent: null:
+parent_validator: abc
+version: entry-version
+"""
+
class TestSerializer(TestCase):
"""Test journalled inventory serialisation."""
@@ -78,7 +98,7 @@
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
self.assertEqual(StringIO(empty_lines).readlines(),
- journal.delta_to_lines(NULL_REVISION, NULL_REVISION, delta))
+ journal.delta_to_lines(NULL_REVISION, "", NULL_REVISION, delta))
def test_root_only_to_lines(self):
old_inv = Inventory(None)
@@ -90,7 +110,7 @@
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
self.assertEqual(StringIO(root_only_lines).readlines(),
- journal.delta_to_lines(NULL_REVISION, 'entry-version', delta))
+ journal.delta_to_lines(NULL_REVISION, "", 'entry-version', delta))
def test_unversioned_root(self):
old_inv = Inventory(None)
@@ -101,7 +121,16 @@
journal = journalled_inventory.InventoryJournal(versioned_root=False,
tree_references=False)
self.assertEqual(StringIO(root_only_unversioned).readlines(),
- journal.delta_to_lines(NULL_REVISION, 'entry-version', delta))
+ journal.delta_to_lines(NULL_REVISION, "", 'entry-version', delta))
+
+ def test_validator_stored(self):
+ old_inv = Inventory(None)
+ new_inv = Inventory(None)
+ delta = inventory.make_inv_delta(old_inv, new_inv)
+ journal = journalled_inventory.InventoryJournal(versioned_root=False,
+ tree_references=False)
+ self.assertEqual(StringIO(validator_only).readlines(),
+ journal.delta_to_lines(NULL_REVISION, "abc", 'entry-version', delta))
def test_unversioned_non_root_errors(self):
old_inv = Inventory(None)
@@ -115,7 +144,7 @@
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
self.assertRaises(errors.BzrError,
- journal.delta_to_lines, NULL_REVISION, 'entry-version', delta)
+ journal.delta_to_lines, NULL_REVISION, "", 'entry-version', delta)
def test_richroot_unversioned_root_errors(self):
old_inv = Inventory(None)
@@ -126,7 +155,7 @@
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
self.assertRaises(errors.BzrError,
- journal.delta_to_lines, NULL_REVISION, 'entry-version', delta)
+ journal.delta_to_lines, NULL_REVISION, "", 'entry-version', delta)
def test_nonrichroot_versioned_root_errors(self):
old_inv = Inventory(None)
@@ -138,7 +167,7 @@
journal = journalled_inventory.InventoryJournal(versioned_root=False,
tree_references=True)
self.assertRaises(errors.BzrError,
- journal.delta_to_lines, NULL_REVISION, 'entry-version', delta)
+ journal.delta_to_lines, NULL_REVISION, "", 'entry-version', delta)
def test_nonrichroot_non_TREE_ROOT_id_errors(self):
old_inv = Inventory(None)
@@ -149,7 +178,7 @@
journal = journalled_inventory.InventoryJournal(versioned_root=False,
tree_references=True)
self.assertRaises(errors.BzrError,
- journal.delta_to_lines, NULL_REVISION, 'entry-version', delta)
+ journal.delta_to_lines, NULL_REVISION, "", 'entry-version', delta)
def test_unknown_kind_errors(self):
old_inv = Inventory(None)
@@ -167,7 +196,7 @@
# we expect keyerror because there is little value wrapping this.
# This test aims to prove that it errors more than how it errors.
self.assertRaises(KeyError,
- journal.delta_to_lines, NULL_REVISION, 'entry-version', delta)
+ journal.delta_to_lines, NULL_REVISION, "", 'entry-version', delta)
def test_tree_reference_disabled(self):
old_inv = Inventory(None)
@@ -186,7 +215,7 @@
# we expect keyerror because there is little value wrapping this.
# This test aims to prove that it errors more than how it errors.
self.assertRaises(KeyError,
- journal.delta_to_lines, NULL_REVISION, 'entry-version', delta)
+ journal.delta_to_lines, NULL_REVISION, "", 'entry-version', delta)
def test_tree_reference_enabled(self):
old_inv = Inventory(None)
@@ -203,7 +232,7 @@
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
self.assertEqual(StringIO(reference_lines).readlines(),
- journal.delta_to_lines(NULL_REVISION, 'entry-version', delta))
+ journal.delta_to_lines(NULL_REVISION, "", 'entry-version', delta))
def test_parse_no_bytes(self):
journal = journalled_inventory.InventoryJournal(versioned_root=True,
@@ -221,15 +250,24 @@
tree_references=True)
self.assertRaises(errors.BzrError,
journal.parse_text_bytes,
- 'format: bzr journalled inventory v1 (bzr 1.1)\n')
+ 'format: bzr journalled inventory v1 (bzr 1.2)\n')
+
+ def test_parse_no_validator(self):
+ journal = journalled_inventory.InventoryJournal(versioned_root=True,
+ tree_references=True)
+ self.assertRaises(errors.BzrError,
+ journal.parse_text_bytes,
+ 'format: bzr journalled inventory v1 (bzr 1.2)\n'
+ 'parent: null:\n')
def test_parse_no_version(self):
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
self.assertRaises(errors.BzrError,
journal.parse_text_bytes,
- 'format: bzr journalled inventory v1 (bzr 1.1)\n'
- 'parent: null:\n')
+ 'format: bzr journalled inventory v1 (bzr 1.2)\n'
+ 'parent: null:\n'
+ 'parent_validator: \n')
def test_parse_empty(self):
# quick loop to check that the parameters propogate to the generated
@@ -241,6 +279,9 @@
tree_references=tree_references)
journal_entry = journal.parse_text_bytes(empty_lines)
self.assertEqual(NULL_REVISION, journal_entry.parent_revision)
+ self.assertEqual(
+ [('', '47177375263abcfd2b41eafde1ca79c0c69b3a44')],
+ journal_entry.validator_chain)
self.assertEqual(NULL_REVISION, journal_entry.version)
self.assertEqual({}, journal_entry.by_id)
self.assertEqual(versioned_root, journal_entry.versioned_root)
@@ -250,8 +291,9 @@
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
double_root_lines = \
-"""format: bzr journalled inventory v1 (bzr 1.1)
+"""format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: null:
/\x00an-id\x00\x00a at e\xc3\xa5ample.com--2004\x00dir\x00\x00
/\x00an-id\x00\x00a at e\xc3\xa5ample.com--2004\x00dir\x00\x00
@@ -274,8 +316,9 @@
def test_parse_special_revid_not_valid_last_mod(self):
journal = journalled_inventory.InventoryJournal(versioned_root=False,
tree_references=True)
- root_only_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+ root_only_lines = """format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: null:
/\x00TREE_ROOT\x00\x00null:\x00dir\x00\x00
"""
@@ -285,8 +328,9 @@
def test_parse_versioned_root_versioned_disabled(self):
journal = journalled_inventory.InventoryJournal(versioned_root=False,
tree_references=True)
- root_only_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+ root_only_lines = """format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: null:
/\x00TREE_ROOT\x00\x00a at e\xc3\xa5ample.com--2004\x00dir\x00\x00
"""
@@ -296,8 +340,9 @@
def test_parse_unique_root_id_root_versioned_disabled(self):
journal = journalled_inventory.InventoryJournal(versioned_root=False,
tree_references=True)
- root_only_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+ root_only_lines = """format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: null:
/\x00an-id\x00\x00null:\x00dir\x00\x00
"""
@@ -349,6 +394,52 @@
('dir', '', '')),
}, root_change.by_id)
+ def test_combine_accrues_validators(self):
+ journal = journalled_inventory.InventoryJournal(versioned_root=True,
+ tree_references=True)
+ root = journal.parse_text_bytes(root_only_lines)
+ root_change = journal.parse_text_bytes(root_change_lines)
+ root_change.combine_with(root)
+ self.assertEqual([
+ ('', '4064d5f6ecde08d963e14b426fd10a6624d33a07'),
+ ('4064d5f6ecde08d963e14b426fd10a6624d33a07', '4ac18343c8337629b06895d80b4256214603d069')],
+ root_change.validator_chain)
+
+ def test_validate_null_parent(self):
+ journal = journalled_inventory.InventoryJournal(versioned_root=True,
+ tree_references=True)
+ root = journal.parse_text_bytes(root_only_lines)
+ # The lines sha is the validator
+ self.assertEqual(
+ '4064d5f6ecde08d963e14b426fd10a6624d33a07', root.get_validator())
+
+ def test_validate_uncombined_errors(self):
+ journal = journalled_inventory.InventoryJournal(versioned_root=True,
+ tree_references=True)
+ root_change = journal.parse_text_bytes(root_change_lines)
+ self.assertRaises(errors.BzrError, root_change.get_validator)
+
+ def test_validate_combined(self):
+ journal = journalled_inventory.InventoryJournal(versioned_root=True,
+ tree_references=True)
+ root = journal.parse_text_bytes(root_only_lines)
+ base_validator = root.get_validator()
+ root_change = journal.parse_text_bytes(root_change_lines)
+ root_change.combine_with(root)
+ # sha(base_validator, sha(change_lines)) is the validator
+ expected = sha_string(base_validator + sha_string(root_change_lines))
+ self.assertEqual(expected, root_change.get_validator())
+
+ def test_validate_corrupt_chain_raises(self):
+ journal = journalled_inventory.InventoryJournal(versioned_root=True,
+ tree_references=True)
+ root = journal.parse_text_bytes(root_only_lines)
+ # root_change thinks the parent has been corrupted, so get_validator should error
+ root_change = journal.parse_text_bytes(corrupt_parent_lines)
+ root_change.combine_with(root)
+ # XXX: This one will want a custom error class I think
+ self.assertRaises(errors.BzrError, root_change.get_validator)
+
def test_combine_adds_new_ids(self):
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
@@ -362,9 +453,12 @@
('tree', 'subtree-version2', '')),
}, change_tree.by_id)
+ def test_check_validators(self):
+ pass
+
def test_to_inventory_non_null_parent(self):
journal_entry = journalled_inventory._JournalEntry(
- 'something', 'a-change', {}, True, True)
+ 'something', '', 'a-change', '', {}, True, True)
self.assertRaises(errors.BzrError, journal_entry.to_inventory)
def test_to_inventory_root_id_versioned_not_permitted(self):
@@ -373,7 +467,7 @@
('dir', '', '')),
}
journal_entry = journalled_inventory._JournalEntry(
- 'something', NULL_REVISION, entries, False, True)
+ 'something', '', NULL_REVISION, '', entries, False, True)
self.assertRaises(errors.BzrError, journal_entry.to_inventory)
def test_to_inventory_root_id_unique_not_permitted(self):
@@ -382,7 +476,7 @@
('dir', '', '')),
}
journal_entry = journalled_inventory._JournalEntry(
- 'something', NULL_REVISION, entries, False, True)
+ 'something', '', NULL_REVISION, '', entries, False, True)
self.assertRaises(errors.BzrError, journal_entry.to_inventory)
def test_to_inventory_root_id_not_versioned(self):
@@ -391,7 +485,7 @@
('dir', '', '')),
}
journal_entry = journalled_inventory._JournalEntry(
- 'something', NULL_REVISION, entries, True, True)
+ 'something', '', NULL_REVISION, '', entries, True, True)
self.assertRaises(errors.BzrError, journal_entry.to_inventory)
def test_to_inventory_has_tree_not_meant_to(self):
@@ -403,7 +497,7 @@
# a file that followed the root move
}
journal_entry = journalled_inventory._JournalEntry(
- 'something', NULL_REVISION, entries, True, False)
+ 'something', '', NULL_REVISION, '', entries, True, False)
self.assertRaises(errors.BzrError, journal_entry.to_inventory)
def test_to_inventory_torture(self):
@@ -436,12 +530,14 @@
('dir', '', '')),
}
journal_entry = journalled_inventory._JournalEntry(
- 'something', NULL_REVISION, entries, True, True)
+ 'something', '13b796ac5738c663107a9340e19def21a92e3f9f',
+ NULL_REVISION, '', entries, True, True)
inv = journal_entry.to_inventory()
# easiest way to check we got the right inventory is to serialise it
# and compare the serialised forms.
- expected = """format: bzr journalled inventory v1 (bzr 1.1)
+ expected = """format: bzr journalled inventory v1 (bzr 1.2)
parent: null:
+parent_validator:
version: something
/\x00an-id\x00\x00changed-in\x00dir\x00\x00
/ref\x00ref-id\x00an-id\x00new-rev\x00tree\x00tree-reference-id\x00
@@ -454,7 +550,7 @@
delta = inventory.make_inv_delta(old_inv, inv)
journal = journalled_inventory.InventoryJournal(versioned_root=True,
tree_references=True)
- lines = journal.delta_to_lines(NULL_REVISION, inv.revision_id, delta)
+ lines = journal.delta_to_lines(NULL_REVISION, "", inv.revision_id, delta)
serialised = ''.join(lines)
self.assertIsInstance(serialised, str)
self.assertEqual(expected, serialised)
More information about the bazaar-commits
mailing list