Rev 4444: (Jelmer) Move squashing of XML-invalid characters to XMLSerializer. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Mon Jun 15 21:11:36 BST 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4444
revision-id: pqm at pqm.ubuntu.com-20090615201132-p32zthmt3rjn2tfk
parent: pqm at pqm.ubuntu.com-20090615170647-5zu6h93br8c4ue6i
parent: jelmer at samba.org-20090615191156-s7nuxxxfewe11pwj
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2009-06-15 21:11:32 +0100
message:
  (Jelmer) Move squashing of XML-invalid characters to XMLSerializer.
modified:
  bzrlib/chk_serializer.py       chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
  bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
  bzrlib/serializer.py           serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
  bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
  bzrlib/xml4.py                 xml4.py-20050916091259-db5ab55e7e6ca324
  bzrlib/xml8.py                 xml5.py-20050907032657-aac8f960815b66b1
  bzrlib/xml_serializer.py       xml.py-20050309040759-57d51586fdec365d
    ------------------------------------------------------------
    revno: 4443.1.2
    revision-id: jelmer at samba.org-20090615191156-s7nuxxxfewe11pwj
    parent: jelmer at samba.org-20090615190438-3yy3e0gviyej2rtv
    committer: Jelmer Vernooij <jelmer at samba.org>
    branch nick: bzr.dev
    timestamp: Mon 2009-06-15 21:11:56 +0200
    message:
      Fix comment about XML escaping in tests.
    modified:
      bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
    ------------------------------------------------------------
    revno: 4443.1.1
    revision-id: jelmer at samba.org-20090615190438-3yy3e0gviyej2rtv
    parent: pqm at pqm.ubuntu.com-20090615170647-5zu6h93br8c4ue6i
    parent: jelmer at samba.org-20090609005951-apv900cdk35o2ygh
    committer: Jelmer Vernooij <jelmer at samba.org>
    branch nick: bzr.dev
    timestamp: Mon 2009-06-15 21:04:38 +0200
    message:
      Merge in patch to squash XML invalid characters in serializer.
    modified:
      bzrlib/chk_serializer.py       chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
      bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
      bzrlib/serializer.py           serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
      bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
      bzrlib/xml4.py                 xml4.py-20050916091259-db5ab55e7e6ca324
      bzrlib/xml8.py                 xml5.py-20050907032657-aac8f960815b66b1
      bzrlib/xml_serializer.py       xml.py-20050309040759-57d51586fdec365d
    ------------------------------------------------------------
    revno: 4416.5.1
    revision-id: jelmer at samba.org-20090609005951-apv900cdk35o2ygh
    parent: pqm at pqm.ubuntu.com-20090606005240-2qms4n1eqdshzp0w
    committer: Jelmer Vernooij <jelmer at samba.org>
    branch nick: bzr.dev
    timestamp: Tue 2009-06-09 02:59:51 +0200
    message:
      Move squashing of XML-invalid characters to XMLSerializer.
    modified:
      bzrlib/chk_serializer.py       chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
      bzrlib/commit.py               commit.py-20050511101309-79ec1a0168e0e825
      bzrlib/serializer.py           serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
      bzrlib/tests/per_repository/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
      bzrlib/xml4.py                 xml4.py-20050916091259-db5ab55e7e6ca324
      bzrlib/xml8.py                 xml5.py-20050907032657-aac8f960815b66b1
      bzrlib/xml_serializer.py       xml.py-20050309040759-57d51586fdec365d
=== modified file 'bzrlib/chk_serializer.py'
--- a/bzrlib/chk_serializer.py	2009-06-10 03:56:49 +0000
+++ b/bzrlib/chk_serializer.py	2009-06-15 19:04:38 +0000
@@ -50,6 +50,8 @@
     """Simple revision serializer based around bencode.
     """
 
+    squashes_xml_invalid_characters = False
+
     # Maps {key:(Revision attribute, bencode_type, validator)}
     # This tells us what kind we expect bdecode to create, what variable on
     # Revision we should be using, and a function to call to validate/transform

=== modified file 'bzrlib/commit.py'
--- a/bzrlib/commit.py	2009-06-10 03:56:49 +0000
+++ b/bzrlib/commit.py	2009-06-15 19:04:38 +0000
@@ -108,9 +108,6 @@
     def deleted(self, path):
         pass
 
-    def escaped(self, escape_count, message):
-        pass
-
     def missing(self, path):
         pass
 
@@ -153,9 +150,6 @@
     def deleted(self, path):
         self._note('deleted %s', path)
 
-    def escaped(self, escape_count, message):
-        self._note("replaced %d control characters in message", escape_count)
-
     def missing(self, path):
         self._note('missing %s', path)
 
@@ -373,7 +367,6 @@
                 # Prompt the user for a commit message if none provided
                 message = message_callback(self)
                 self.message = message
-                self._escape_commit_message()
 
                 # Add revision data to the local branch
                 self.rev_id = self.builder.commit(self.message)
@@ -602,17 +595,6 @@
         if self.master_locked:
             self.master_branch.unlock()
 
-    def _escape_commit_message(self):
-        """Replace xml-incompatible control characters."""
-        # FIXME: RBC 20060419 this should be done by the revision
-        # serialiser not by commit. Then we can also add an unescaper
-        # in the deserializer and start roundtripping revision messages
-        # precisely. See repository_implementations/test_repository.py
-        self.message, escape_count = xml_serializer.escape_invalid_chars(
-            self.message)
-        if escape_count:
-            self.reporter.escaped(escape_count, self.message)
-
     def _gather_parents(self):
         """Record the parents of a merge for merge detection."""
         # TODO: Make sure that this list doesn't contain duplicate

=== modified file 'bzrlib/serializer.py'
--- a/bzrlib/serializer.py	2009-06-10 03:56:49 +0000
+++ b/bzrlib/serializer.py	2009-06-15 19:04:38 +0000
@@ -24,6 +24,8 @@
 class Serializer(object):
     """Inventory and revision serialization/deserialization."""
 
+    squashes_xml_invalid_characters = False
+
     def write_inventory(self, inv, f):
         """Write inventory to a file"""
         raise NotImplementedError(self.write_inventory)

=== modified file 'bzrlib/tests/per_repository/test_repository.py'
--- a/bzrlib/tests/per_repository/test_repository.py	2009-05-11 07:18:30 +0000
+++ b/bzrlib/tests/per_repository/test_repository.py	2009-06-15 19:11:56 +0000
@@ -28,6 +28,7 @@
     osutils,
     remote,
     repository,
+    xml_serializer,
     )
 from bzrlib.branch import BzrBranchFormat6
 from bzrlib.delta import TreeDelta
@@ -527,16 +528,16 @@
         tree = self.make_branch_and_tree('.')
         tree.commit(message, rev_id='a', allow_pointless=True)
         rev = tree.branch.repository.get_revision('a')
-        # we have to manually escape this as we dont try to
-        # roundtrip xml invalid characters at this point.
-        # when escaping is moved to the serialiser, this test
-        # can check against the literal message rather than
-        # this escaped version.
-        escaped_message, escape_count = re.subn(
-            u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
-            lambda match: match.group(0).encode('unicode_escape'),
-            message)
-        self.assertEqual(rev.message, escaped_message)
+        if tree.branch.repository._serializer.squashes_xml_invalid_characters:
+            # we have to manually escape this as we dont try to
+            # roundtrip xml invalid characters in the xml-based serializers.
+            escaped_message, escape_count = re.subn(
+                u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
+                lambda match: match.group(0).encode('unicode_escape'),
+                message)
+            self.assertEqual(rev.message, escaped_message)
+        else:
+            self.assertEqual(rev.message, message)
         # insist the class is unicode no matter what came in for
         # consistency.
         self.assertIsInstance(rev.message, unicode)

=== modified file 'bzrlib/xml4.py'
--- a/bzrlib/xml4.py	2009-04-02 15:28:11 +0000
+++ b/bzrlib/xml4.py	2009-06-09 00:59:51 +0000
@@ -19,6 +19,7 @@
     ElementTree,
     SubElement,
     XMLSerializer,
+    escape_invalid_chars,
     )
 from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
 import bzrlib.inventory as inventory
@@ -125,7 +126,7 @@
         root.text = '\n'
 
         msg = SubElement(root, 'message')
-        msg.text = rev.message
+        msg.text = escape_invalid_chars(rev.message)[0]
         msg.tail = '\n'
 
         if rev.parents:

=== modified file 'bzrlib/xml8.py'
--- a/bzrlib/xml8.py	2009-04-08 20:21:06 +0000
+++ b/bzrlib/xml8.py	2009-06-09 00:59:51 +0000
@@ -28,6 +28,7 @@
     Element,
     SubElement,
     XMLSerializer,
+    escape_invalid_chars,
     )
 from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
 from bzrlib.revision import Revision
@@ -345,7 +346,7 @@
             root.set('timezone', str(rev.timezone))
         root.text = '\n'
         msg = SubElement(root, 'message')
-        msg.text = rev.message
+        msg.text = escape_invalid_chars(rev.message)[0]
         msg.tail = '\n'
         if rev.parent_ids:
             pelts = SubElement(root, 'parents')

=== modified file 'bzrlib/xml_serializer.py'
--- a/bzrlib/xml_serializer.py	2009-05-12 23:05:21 +0000
+++ b/bzrlib/xml_serializer.py	2009-06-09 00:59:51 +0000
@@ -52,6 +52,8 @@
 class XMLSerializer(Serializer):
     """Abstract XML object serialize/deserialize"""
 
+    squashes_xml_invalid_characters = True
+
     def read_inventory_from_string(self, xml_string, revision_id=None,
                                    entry_cache=None):
         """Read xml_string into an inventory object.
@@ -174,6 +176,8 @@
     :param message: Commit message to escape
     :return: tuple with escaped message and number of characters escaped
     """
+    if message is None:
+        return None, 0
     # Python strings can include characters that can't be
     # represented in well-formed XML; escape characters that
     # aren't listed in the XML specification




More information about the bazaar-commits mailing list