Rev 3208: (jam, #185458) Switch from NFKC => NFC for normalization checks. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed Jan 30 19:47:14 GMT 2008


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 3208
revision-id:pqm at pqm.ubuntu.com-20080130194701-iykyel7v3q52qsol
parent: pqm at pqm.ubuntu.com-20080130100306-p0uqnxt3hodnyiej
parent: john at arbash-meinel.com-20080130180731-oq4t83eju0t15wzq
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2008-01-30 19:47:01 +0000
message:
  (jam, #185458) Switch from NFKC => NFC for normalization checks.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
  bzrlib/osutils.py              osutils.py-20050309040759-eeaff12fbf77ac86
  bzrlib/tests/test_nonascii.py  testnonascii.py-20051018022645-ea1d8b6477b058a6
  bzrlib/tests/workingtree_implementations/test_rename_one.py test_rename_one.py-20070226161242-2d8ibdedl700jgio-1
    ------------------------------------------------------------
    revno: 3201.1.3
    revision-id:john at arbash-meinel.com-20080130180731-oq4t83eju0t15wzq
    parent: john at arbash-meinel.com-20080128224125-d88zoykrfspon9z5
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: nfc
    timestamp: Wed 2008-01-30 12:07:31 -0600
    message:
      NEWS
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
    ------------------------------------------------------------
    revno: 3201.1.2
    revision-id:john at arbash-meinel.com-20080128224125-d88zoykrfspon9z5
    parent: jameinel at samus-20080128165952-nqybfnus41chsd4x
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: nfc
    timestamp: Mon 2008-01-28 16:41:25 -0600
    message:
      Test needs to be updated now that µ is allowed.
    modified:
      bzrlib/tests/workingtree_implementations/test_rename_one.py test_rename_one.py-20070226161242-2d8ibdedl700jgio-1
    ------------------------------------------------------------
    revno: 3201.1.1
    revision-id:jameinel at samus-20080128165952-nqybfnus41chsd4x
    parent: pqm at pqm.ubuntu.com-20080125195940-iwy2onaiiges6k4t
    committer: jameinel <jameinel at SAMUS>
    branch nick: nfc
    timestamp: Mon 2008-01-28 10:59:52 -0600
    message:
      Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
    modified:
      bzrlib/inventory.py            inventory.py-20050309040759-6648b84ca2005b37
      bzrlib/osutils.py              osutils.py-20050309040759-eeaff12fbf77ac86
      bzrlib/tests/test_nonascii.py  testnonascii.py-20051018022645-ea1d8b6477b058a6
=== modified file 'NEWS'
--- a/NEWS	2008-01-30 08:20:23 +0000
+++ b/NEWS	2008-01-30 19:47:01 +0000
@@ -60,6 +60,10 @@
     * ``reconfigure`` preserves tags when converting to and from lightweight
       checkouts.  (Aaron Bentley, #182040)
 
+    * Switch from NFKC => NFC for normalization checks. NFC allows a few
+      more characters which should be considered valid.
+      (John Arbash Meinel, #185458)
+
     * Unknown hostnames when connecting to a ``bzr://`` URL no longer cause
       tracebacks.  (Andrew Bennetts, #182849)
 

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2007-12-18 18:25:12 +0000
+++ b/bzrlib/inventory.py	2008-01-28 16:59:52 +0000
@@ -1422,7 +1422,7 @@
 
     :raises InvalidNormalization: When name is not normalized, and cannot be
         accessed on this platform by the normalized path.
-    :return: The NFC/NFKC normalised version of name.
+    :return: The NFC normalised version of name.
     """
     #------- This has been copied to bzrlib.dirstate.DirState.add, please
     # keep them synchronised.

=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py	2007-12-27 08:25:04 +0000
+++ b/bzrlib/osutils.py	2008-01-28 16:59:52 +0000
@@ -360,7 +360,7 @@
 
 
 def _mac_getcwd():
-    return unicodedata.normalize('NFKC', os.getcwdu())
+    return unicodedata.normalize('NFC', os.getcwdu())
 
 
 # Default is to just use the python builtins, but these can be rebound on
@@ -1014,20 +1014,20 @@
     On platforms where the system does not normalize filenames 
     (Windows, Linux), you have to access a file by its exact path.
 
-    Internally, bzr only supports NFC/NFKC normalization, since that is 
+    Internally, bzr only supports NFC normalization, since that is 
     the standard for XML documents.
 
     So return the normalized path, and a flag indicating if the file
     can be accessed by that path.
     """
 
-    return unicodedata.normalize('NFKC', unicode(path)), True
+    return unicodedata.normalize('NFC', unicode(path)), True
 
 
 def _inaccessible_normalized_filename(path):
     __doc__ = _accessible_normalized_filename.__doc__
 
-    normalized = unicodedata.normalize('NFKC', unicode(path))
+    normalized = unicodedata.normalize('NFC', unicode(path))
     return normalized, normalized == path
 
 

=== modified file 'bzrlib/tests/test_nonascii.py'
--- a/bzrlib/tests/test_nonascii.py	2006-12-22 08:52:28 +0000
+++ b/bzrlib/tests/test_nonascii.py	2008-01-28 16:59:52 +0000
@@ -46,18 +46,26 @@
 a_dots_d = u'a\u0308'
 z_umlat_c = u'\u017d'
 z_umlat_d = u'Z\u030c'
+squared_c = u'\xbc' # This gets mapped to '2' if we use NFK[CD]
+squared_d = u'\xbc'
+quarter_c = u'\xb2' # Gets mapped to u'1\u20444' (1/4) if we use NFK[CD]
+quarter_d = u'\xb2'
 
 
 class TestNormalization(TestCase):
     """Verify that we have our normalizations correct."""
 
     def test_normalize(self):
-        self.assertEqual(a_circle_d, normalize('NFKD', a_circle_c))
-        self.assertEqual(a_circle_c, normalize('NFKC', a_circle_d))
-        self.assertEqual(a_dots_d, normalize('NFKD', a_dots_c))
-        self.assertEqual(a_dots_c, normalize('NFKC', a_dots_d))
-        self.assertEqual(z_umlat_d, normalize('NFKD', z_umlat_c))
-        self.assertEqual(z_umlat_c, normalize('NFKC', z_umlat_d))
+        self.assertEqual(a_circle_d, normalize('NFD', a_circle_c))
+        self.assertEqual(a_circle_c, normalize('NFC', a_circle_d))
+        self.assertEqual(a_dots_d, normalize('NFD', a_dots_c))
+        self.assertEqual(a_dots_c, normalize('NFC', a_dots_d))
+        self.assertEqual(z_umlat_d, normalize('NFD', z_umlat_c))
+        self.assertEqual(z_umlat_c, normalize('NFC', z_umlat_d))
+        self.assertEqual(squared_d, normalize('NFC', squared_c))
+        self.assertEqual(squared_c, normalize('NFD', squared_d))
+        self.assertEqual(quarter_d, normalize('NFC', quarter_c))
+        self.assertEqual(quarter_c, normalize('NFD', quarter_d))
 
 
 class NormalizedFilename(TestCaseWithTransport):
@@ -74,6 +82,10 @@
         self.assertEqual((a_dots_c, True), anf(a_dots_d))
         self.assertEqual((z_umlat_c, True), anf(z_umlat_c))
         self.assertEqual((z_umlat_c, True), anf(z_umlat_d))
+        self.assertEqual((squared_c, True), anf(squared_c))
+        self.assertEqual((squared_c, True), anf(squared_d))
+        self.assertEqual((quarter_c, True), anf(quarter_c))
+        self.assertEqual((quarter_c, True), anf(quarter_d))
 
     def test__inaccessible_normalized_filename(self):
         inf = osutils._inaccessible_normalized_filename
@@ -86,6 +98,10 @@
         self.assertEqual((a_dots_c, False), inf(a_dots_d))
         self.assertEqual((z_umlat_c, True), inf(z_umlat_c))
         self.assertEqual((z_umlat_c, False), inf(z_umlat_d))
+        self.assertEqual((squared_c, True), inf(squared_c))
+        self.assertEqual((squared_c, True), inf(squared_d))
+        self.assertEqual((quarter_c, True), inf(quarter_c))
+        self.assertEqual((quarter_c, True), inf(quarter_d))
 
     def test_functions(self):
         if osutils.normalizes_filenames():
@@ -121,7 +137,8 @@
         # a_circle_c and a_dots_c actually map to the same file
         # adding a suffix kicks in the 'preserving but insensitive'
         # route, and maintains the right files
-        files = [a_circle_c+'.1', a_dots_c+'.2', z_umlat_c+'.3']
+        files = [a_circle_c+'.1', a_dots_c+'.2', z_umlat_c+'.3',
+                 squared_c+'.4', quarter_c+'.5']
         try:
             self.build_tree(files, line_endings='native')
         except UnicodeError:

=== modified file 'bzrlib/tests/workingtree_implementations/test_rename_one.py'
--- a/bzrlib/tests/workingtree_implementations/test_rename_one.py	2007-09-17 05:33:56 +0000
+++ b/bzrlib/tests/workingtree_implementations/test_rename_one.py	2008-01-28 22:41:25 +0000
@@ -314,4 +314,4 @@
         self.build_tree(['a'])
         tree.add(['a'])
         self.assertRaises((errors.InvalidNormalization, UnicodeEncodeError),
-            tree.rename_one, 'a', u'b\xb5rry')
+            tree.rename_one, 'a', u'ba\u030arry')




More information about the bazaar-commits mailing list