Rev 5729: Properly decode basename. In the map it is always stored as UTF-8, but in http://bazaar.launchpad.net/~jameinel/bzr/2.4-cheaper-iter-entries-by-dir

John Arbash Meinel john at arbash-meinel.com
Tue Mar 22 11:03:53 UTC 2011


At http://bazaar.launchpad.net/~jameinel/bzr/2.4-cheaper-iter-entries-by-dir

------------------------------------------------------------
revno: 5729
revision-id: john at arbash-meinel.com-20110322110347-6syzog40fq3b8kl0
parent: john at arbash-meinel.com-20110319082100-l5w627s2ypwsulhw
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-cheaper-iter-entries-by-dir
timestamp: Tue 2011-03-22 12:03:47 +0100
message:
  Properly decode basename. In the map it is always stored as UTF-8, but
  in in-memory objects it needs to be Unicode.
-------------- next part --------------
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2011-03-19 08:21:00 +0000
+++ b/bzrlib/inventory.py	2011-03-22 11:03:47 +0000
@@ -1970,7 +1970,7 @@
 
     def iter_just_entries(self):
         """Iterate over all entries.
-        
+
         Unlike iter_entries(), just the entries are returned (not (path, ie))
         and the order of entries is undefined.
 
@@ -1983,7 +1983,7 @@
                 ie = self._bytes_to_entry(entry)
                 self._fileid_to_entry_cache[file_id] = ie
             yield ie
-            
+
     def _preload_cache(self):
         """Make sure all file-ids are in _fileid_to_entry_cache"""
         if self._fully_cached:
@@ -2020,6 +2020,7 @@
                     ' is %r not "directory"' % (parent_id, parent_ie.kind))
             if parent_ie._children is None:
                 parent_ie._children = {}
+            basename = basename.decode('utf-8')
             if basename in parent_ie._children:
                 raise ValueError('Data inconsistency detected.'
                     ' Two entries with basename %r were found'

=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py	2011-03-18 11:51:58 +0000
+++ b/bzrlib/tests/test_inv.py	2011-03-22 11:03:47 +0000
@@ -1222,7 +1222,37 @@
         self.assertIsInstance(ie2.name, unicode)
         self.assertEqual(('tree\xce\xa9name', 'tree-root-id', 'tree-rev-id'),
                          inv._bytes_to_utf8name_key(bytes))
-        
+
+    def test__preload_handles_utf8(self):
+        inv = Inventory()
+        inv.revision_id = "revid"
+        inv.root.revision = "rootrev"
+        root_id = inv.root.file_id
+        inv.add(InventoryFile("fileid", u'f\xefle', root_id))
+        inv["fileid"].revision = "filerev"
+        inv["fileid"].text_sha1 = "ffff"
+        inv["fileid"].text_size = 0
+        inv.add(InventoryDirectory("dirid", u'dir-\N{EURO SIGN}', root_id))
+        inv.add(InventoryFile("childid", u'ch\xefld', "dirid"))
+        inv["childid"].revision = "filerev"
+        inv["childid"].text_sha1 = "ffff"
+        inv["childid"].text_size = 0
+        chk_bytes = self.get_chk_bytes()
+        chk_inv = CHKInventory.from_inventory(chk_bytes, inv)
+        bytes = ''.join(chk_inv.to_lines())
+        new_inv = CHKInventory.deserialise(chk_bytes, bytes, ("revid",))
+        self.assertEqual({}, new_inv._fileid_to_entry_cache)
+        self.assertFalse(new_inv._fully_cached)
+        new_inv._preload_cache()
+        self.assertEqual(
+            sorted([root_id, "fileid", "dirid", "childid"]),
+            sorted(new_inv._fileid_to_entry_cache.keys()))
+        ie_root = new_inv._fileid_to_entry_cache[root_id]
+        self.assertEqual([u'dir-\N{EURO SIGN}', u'f\xefle'],
+                         sorted(ie_root._children.keys()))
+        ie_dir = new_inv._fileid_to_entry_cache['dirid']
+        self.assertEqual([u'ch\xefld'], sorted(ie_dir._children.keys()))
+
     def test__preload_populates_cache(self):
         inv = Inventory()
         inv.revision_id = "revid"



More information about the bazaar-commits mailing list