[MERGE][RFC] Add simple revision serializer based on RIO.
Aaron Bentley
aaron at aaronbentley.com
Mon Apr 27 20:53:31 BST 2009
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
bb:comment Where is this tested?
Jelmer Vernooij wrote:
> === modified file 'bzrlib/chk_serializer.py'
> --- bzrlib/chk_serializer.py 2009-04-09 20:23:07 +0000
> +++ bzrlib/chk_serializer.py 2009-04-15 01:37:51 +0000
> @@ -16,14 +16,94 @@
>
> """Serializer object for CHK based inventory storage."""
>
> +from cStringIO import (
> + StringIO,
> + )
> +
> from bzrlib import (
> + cache_utf8,
> inventory,
> + osutils,
> + revision as _mod_revision,
> + rio,
> xml5,
> xml6,
> )
>
> -
> -class CHKSerializerSubtree(xml6.Serializer_v6):
> +class RIORevisionSerializer1(object):
> + """Simple line-based revision serializer.
> +
> + This writes simple key/value pairs for all elements on the revision,
> + each on a separate line and using colons to separate key and value. Newlines
> + are backslash-escaped where necessary.
I think this docstring describes your original implementation, not RIO.
RIO does not backslash-escape newlines.
> +
> + The commit message is always serialized last and can span multiple lines.
> + """
Although this is accurate, I don't see a reason to guarantee it by
enshrining it in a docstring.
> +
> + def write_revision(self, rev, f):
> + decode_utf8 = cache_utf8.decode
> + w = rio.RioWriter(f)
> + s = rio.Stanza()
This method would be a lot shorter if you used the kwargs argument of
the Stanza constructor, e.g.
s = rio.Stanza(revision_id=rev.revision_id, timestamp=rev.timestamp,
inventory_sha1=rev.inventory_sha1, ...
> + revision_id = rev.revision_id
> + if isinstance(revision_id, str):
> + revision_id = decode_utf8(revision_id)
Shouldn't we define write_revision as either requiring unicode or utf-8
throughout?
> + s.add("revision-id", revision_id)
> + s.add("timestamp", "%.3f" % rev.timestamp)
> + if rev.parent_ids:
> + s.add("parent-ids",
> + " ".join([decode_utf8(p) for p in rev.parent_ids]))
RIO is designed to allow the same key to be used multiple times, so I
think it's more natural to handle it as follows:
for parent_id in rev.parent_ids:
s.add("parent-id", parent_id)
> + s.add("inventory-sha1", rev.inventory_sha1)
> + s.add("committer", rev.committer)
> + if rev.timezone is not None:
> + s.add("timezone", str(rev.timezone))
> + if rev.properties:
> + revprops_stanza = rio.Stanza()
> + for k, v in rev.properties.iteritems():
> + if isinstance(v, str):
> + v = decode_utf8(v)
> + revprops_stanza.add(decode_utf8(k), v)
> + s.add("properties", revprops_stanza.to_unicode())
> + s.add("message", rev.message)
> + w.write_stanza(s)
> +
> + def write_revision_to_string(self, rev):
> + f = StringIO()
> + self.write_revision(rev, f)
> + return f.getvalue()
> +
> + def read_revision(self, f):
> + s = rio.read_stanza(f)
> + rev = _mod_revision.Revision(None)
> + for (field, value) in s.iter_pairs():
> + if field == "revision-id":
> + rev.revision_id = cache_utf8.encode(value)
> + elif field == "parent-ids":
> + rev.parent_ids = [cache_utf8.encode(p) for p in value.split(" ")]
> + elif field == "committer":
> + rev.committer = value
> + elif field == "inventory-sha1":
> + rev.inventory_sha1 = value
> + elif field == "timezone":
> + rev.timezone = int(value)
> + elif field == "timestamp":
> + rev.timestamp = float(value)
> + elif field == "message":
> + rev.message = value
> + elif field == "properties":
> + rev.properties = rio.read_stanza_unicode(
> + osutils.split_lines(value)).as_dict()
> + else:
> + raise AssertionError("Unknown field %s" % field)
> + l = f.readline()
> + return rev
> +
> + def read_revision_from_string(self, xml_string):
> + f = StringIO(xml_string)
> + rev = self.read_revision(f)
> + return rev
> +
> +
> +class CHKSerializerSubtree(RIORevisionSerializer1,xml6.Serializer_v6):
I think there should be a space after RIORevisionSerializer1.
Also, isn't this a massive change to the behaviour of CHKSerializerSubtree?
> """A CHKInventory based serializer that supports tree references"""
>
> supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])
> @@ -51,7 +131,7 @@
> self.search_key_name = search_key_name
>
>
> -class CHKSerializer(xml5.Serializer_v5):
> +class CHKSerializer(RIORevisionSerializer1,xml5.Serializer_v5):
I think there should be a space after RIORevisionSerializer1.
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
iEYEARECAAYFAkn2DTcACgkQ0F+nu1YWqI0dzwCghiBHFZkUmZwELmQLvur5YYih
i6oAn1VkzHAV04NV9tlhmKZjU9I5LCQ1
=4lpA
-----END PGP SIGNATURE-----
More information about the bazaar
mailing list