Rev 2818: * Tree's with bad state such as files with no length or sha1, or symlinks in http://people.ubuntu.com/~robertc/baz2.0/xml.writer

Robert Collins robertc at robertcollins.net
Wed Sep 12 23:15:22 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/xml.writer

------------------------------------------------------------
revno: 2818
revision-id: robertc at robertcollins.net-20070912221441-z4ww2z99uvjp7aqc
parent: robertc at robertcollins.net-20070912213108-hghkqespjomlqu43
committer: Robert Collins <robertc at robertcollins.net>
branch nick: xml.writer
timestamp: Thu 2007-09-13 08:14:41 +1000
message:
  * Tree's with bad state such as files with no length or sha1, or symlinks
    with a length will no longer be silently accepted by the repository XML
    serialiser. This has no effect on the bzrlib core as we do not generate
    such corrupt inventories. (Robert Collins)
  * The XML inventory serializer has changed to be more aware of the data
    being written and as such performs less attribute checks while being
    approximately 15% faster. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/tests/test_xml.py       test_xml.py-20050905091053-80b45588931a9b35
  bzrlib/xml5.py                 xml5.py-20050907032657-aac8f960815b66b1
  bzrlib/xml6.py                 xml6.py-20060823042456-dbaaq4atrche7xy5-1
=== modified file 'NEWS'
--- a/NEWS	2007-09-12 07:56:16 +0000
+++ b/NEWS	2007-09-12 22:14:41 +0000
@@ -17,7 +17,18 @@
 
   BUG FIXES:
 
+   * Tree's with bad state such as files with no length or sha1, or symlinks
+     with a length will no longer be silently accepted by the repository XML
+     serialiser. This has no effect on the bzrlib core as we do not generate
+     such corrupt inventories. (Robert Collins)
+
   API BREAKS:
+
+  INTERNALS:
+
+   * The XML inventory serializer has changed to be more aware of the data
+     being written and as such performs less attribute checks while being
+     approximately 15% faster. (Robert Collins)
   
   TESTING:
 

=== modified file 'bzrlib/tests/test_xml.py'
--- a/bzrlib/tests/test_xml.py	2007-06-26 19:31:00 +0000
+++ b/bzrlib/tests/test_xml.py	2007-09-12 22:14:41 +0000
@@ -81,14 +81,14 @@
 _committed_inv_v5 = """<inventory>
 <file file_id="bar-20050901064931-73b4b1138abc9cd2" 
       name="bar" parent_id="TREE_ROOT" 
-      revision="mbp at foo-123123"/>
+      revision="mbp at foo-123123" text_sha1="123cba" text_size="1"/>
 <directory name="subdir"
            file_id="foo-20050801201819-4139aa4a272f4250"
            parent_id="TREE_ROOT" 
            revision="mbp at foo-00"/>
 <file executable="yes" file_id="bar-20050824000535-6bc48cfad47ed134" 
       name="bar" parent_id="foo-20050801201819-4139aa4a272f4250" 
-      revision="mbp at foo-00"/>
+      revision="mbp at foo-00" text_sha1="abcdef" text_size="0" />
 </inventory>
 """
 
@@ -121,25 +121,25 @@
 
 # DO NOT REFLOW THIS. Its the exact inventory we want.
 _expected_inv_v5 = """<inventory format="5">
-<file file_id="bar-20050901064931-73b4b1138abc9cd2" name="bar" revision="mbp at foo-123123" />
+<file file_id="bar-20050901064931-73b4b1138abc9cd2" name="bar" revision="mbp at foo-123123" text_sha1="123cba" text_size="1" />
 <directory file_id="foo-20050801201819-4139aa4a272f4250" name="subdir" revision="mbp at foo-00" />
-<file executable="yes" file_id="bar-20050824000535-6bc48cfad47ed134" name="bar" parent_id="foo-20050801201819-4139aa4a272f4250" revision="mbp at foo-00" />
+<file file_id="bar-20050824000535-6bc48cfad47ed134" name="bar" parent_id="foo-20050801201819-4139aa4a272f4250" revision="mbp at foo-00" executable="yes" text_sha1="abcdef" text_size="0" />
 </inventory>
 """
 
 
 _expected_inv_v5_root = """<inventory file_id="f&lt;" format="5" revision_id="mother!">
-<file file_id="bar-20050901064931-73b4b1138abc9cd2" name="bar" parent_id="f&lt;" revision="mbp at foo-123123" />
+<file file_id="bar-20050901064931-73b4b1138abc9cd2" name="bar" parent_id="f&lt;" revision="mbp at foo-123123" text_sha1="abcdefg" text_size="1" />
 <directory file_id="foo-20050801201819-4139aa4a272f4250" name="subdir" parent_id="f&lt;" revision="mbp at foo-00" />
-<file executable="yes" file_id="bar-20050824000535-6bc48cfad47ed134" name="bar" parent_id="foo-20050801201819-4139aa4a272f4250" revision="mbp at foo-00" />
+<file file_id="bar-20050824000535-6bc48cfad47ed134" name="bar" parent_id="foo-20050801201819-4139aa4a272f4250" revision="mbp at foo-00" executable="yes" text_sha1="abcdef" text_size="0" />
 </inventory>
 """
 
 _expected_inv_v7 = """<inventory format="7" revision_id="rev_outer">
 <directory file_id="tree-root-321" name="" revision="rev_outer" />
 <directory file_id="dir-id" name="dir" parent_id="tree-root-321" revision="rev_outer" />
-<file file_id="file-id" name="file" parent_id="tree-root-321" revision="rev_outer" />
-<symlink file_id="link-id" name="link" parent_id="tree-root-321" revision="rev_outer" />
+<file file_id="file-id" name="file" parent_id="tree-root-321" revision="rev_outer" text_sha1="a-sha" text_size="1" />
+<symlink file_id="link-id" name="link" parent_id="tree-root-321" revision="rev_outer" symlink_target="" />
 <tree-reference file_id="nested-id" name="nested" parent_id="tree-root-321" revision="rev_outer" reference_revision="rev_inner" />
 </inventory>
 """
@@ -308,9 +308,12 @@
         inv.add(inventory.TreeReference('nested-id', 'nested', 'tree-root-321',
                                         'rev_outer', 'rev_inner'))
         inv.add(inventory.InventoryFile('file-id', 'file', 'tree-root-321'))
+        inv['file-id'].text_sha1 = "a-sha"
+        inv['file-id'].text_size = 1
         inv.add(inventory.InventoryDirectory('dir-id', 'dir', 
                                              'tree-root-321'))
         inv.add(inventory.InventoryLink('link-id', 'link', 'tree-root-321'))
+        inv['link-id'].symlink_target = ''
         inv['tree-root-321'].revision = 'rev_outer'
         inv['dir-id'].revision = 'rev_outer'
         inv['file-id'].revision = 'rev_outer'

=== modified file 'bzrlib/xml5.py'
--- a/bzrlib/xml5.py	2007-07-17 13:27:14 +0000
+++ b/bzrlib/xml5.py	2007-09-12 22:14:41 +0000
@@ -144,7 +144,7 @@
     Packs objects into XML and vice versa.
     """
     
-    __slots__ = []
+    __slots__ = ['root_id']
 
     support_altered_by_hack = True
     # This format supports the altered-by hack that reads file ids directly out
@@ -153,6 +153,9 @@
     supported_kinds = set(['file', 'directory', 'symlink'])
     format_num = '5'
 
+    def __init__(self):
+        self.root_id = ROOT_ID
+        
     def write_inventory_to_string(self, inv):
         """Just call write_inventory with a StringIO and return the value"""
         sio = cStringIO.StringIO()
@@ -168,12 +171,49 @@
         _ensure_utf8_re()
         output = []
         append = output.append
+        __encode_and_escape = _encode_and_escape
         self._append_inventory_root(append, inv)
         entries = inv.iter_entries()
         # Skip the root
         root_path, root_ie = entries.next()
         for path, ie in entries:
-            self._append_entry(append, ie)
+            append("<")
+            append(ie.kind)
+            append(' file_id="')
+            append(__encode_and_escape(ie.file_id))
+            append(' name="')
+            append(__encode_and_escape(ie.name))
+            if ie.parent_id != self.root_id:
+                assert isinstance(ie.parent_id, basestring)
+                append(' parent_id="')
+                append(__encode_and_escape(ie.parent_id))
+            assert ie.revision is not None
+            append(' revision="')
+            append(__encode_and_escape(ie.revision))
+            if ie.kind == 'file':
+                if ie.executable:
+                    append(' executable="yes"')
+                assert ie.text_sha1 is not None
+                append(' text_sha1="')
+                append(ie.text_sha1)
+                append('"')
+                append(' text_size="%d"' % ie.text_size)
+            elif ie.kind == 'symlink':
+                assert ie.symlink_target is not None
+                append(' symlink_target="')
+                append(__encode_and_escape(ie.symlink_target))
+            elif ie.kind == 'tree-reference':
+                # tree-reference's are not supported by all serialisers.
+                if ie.kind not in self.supported_kinds:
+                    raise errors.UnsupportedInventoryKind(ie.kind)
+                assert ie.reference_revision is not None
+                append(' reference_revision="')
+                append(__encode_and_escape(ie.reference_revision))
+            else:
+                # directories only have meta-fields.
+                if ie.kind not in self.supported_kinds:
+                    raise errors.UnsupportedInventoryKind(ie.kind)
+            append(" />\n")
         append('</inventory>\n')
         f.writelines(output)
         # Just to keep the cache from growing without bounds
@@ -206,7 +246,7 @@
         append(_encode_and_escape(ie.file_id))
         append(' name="')
         append(_encode_and_escape(ie.name))
-        if self._parent_condition(ie):
+        if ie.parent_id != self.root_id:
             assert isinstance(ie.parent_id, basestring)
             append(' parent_id="')
             append(_encode_and_escape(ie.parent_id))
@@ -228,9 +268,6 @@
         append(" />\n")
         return
 
-    def _parent_condition(self, ie):
-        return ie.parent_id != ROOT_ID
-
     def _pack_revision(self, rev):
         """Revision object -> xml tree"""
         # For the XML format, we need to write them as Unicode rather than as

=== modified file 'bzrlib/xml6.py'
--- a/bzrlib/xml6.py	2007-03-02 08:55:16 +0000
+++ b/bzrlib/xml6.py	2007-09-12 22:14:41 +0000
@@ -19,8 +19,13 @@
 
 class Serializer_v6(xml5.Serializer_v5):
 
+    __slots__ = ['root_id']
+
     format_num = '6'
 
+    def __init__(self):
+        self.root_id = None
+
     def _append_inventory_root(self, append, inv):
         """Append the inventory root to output."""
         append('<inventory')
@@ -31,9 +36,6 @@
         append('>\n')
         self._append_entry(append, inv.root)
 
-    def _parent_condition(self, ie):
-        return ie.parent_id is not None
-
     def _unpack_inventory(self, elt):
         """Construct from XML Element"""
         if elt.tag != 'inventory':



More information about the bazaar-commits mailing list