Rev 5122: (mbp, for jspashett) cope with non-utf8 data in the ignore file in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Tue Mar 30 06:12:30 BST 2010
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 5122 [merge]
revision-id: pqm at pqm.ubuntu.com-20100330051224-a8ubi9w2cd0upyb5
parent: pqm at pqm.ubuntu.com-20100329080616-84azimjwafaukcey
parent: mbp at sourcefrog.net-20100330040915-waagxocidz7mr07h
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2010-03-30 06:12:24 +0100
message:
(mbp, for jspashett) cope with non-utf8 data in the ignore file
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/ignores.py ignores.py-20060712153832-2von9l0t7p43ixsv-1
bzrlib/tests/test_ignores.py test_ignores.py-20060712172354-vqq9ln0t8di27v53-1
=== modified file 'NEWS'
--- a/NEWS 2010-03-29 06:37:23 +0000
+++ b/NEWS 2010-03-30 04:09:15 +0000
@@ -139,6 +139,9 @@
``add``.
(Parth Malwankar, #335033, #300001)
+* Cope with non-utf8 characters inside ``.bzrignore``.
+ (Jason Spashett, #183504)
+
* Correctly interpret "451 Rename/move failure: Directory not empty" from
ftp servers while trying to take a lock.
(Martin Pool, #528722)
=== modified file 'bzrlib/ignores.py'
--- a/bzrlib/ignores.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/ignores.py 2010-03-29 00:54:27 +0000
@@ -25,6 +25,8 @@
globbing,
)
+from trace import warning
+
# This was the full ignore list for bzr 0.8
# please keep these sorted (in C locale order) to aid merging
OLD_DEFAULTS = [
@@ -100,10 +102,34 @@
]
+
def parse_ignore_file(f):
- """Read in all of the lines in the file and turn it into an ignore list"""
+ """Read in all of the lines in the file and turn it into an ignore list
+
+ Continue in the case of utf8 decoding errors, and emit a warning when
+ such and error is found. Optimise for the common case -- no decoding
+ errors.
+ """
ignored = set()
- for line in f.read().decode('utf8').split('\n'):
+ ignore_file = f.read()
+ try:
+ # Try and parse whole ignore file at once.
+ unicode_lines = ignore_file.decode('utf8').split('\n')
+ except UnicodeDecodeError:
+ # Otherwise go though line by line and pick out the 'good'
+ # decodable lines
+ lines = ignore_file.split('\n')
+ unicode_lines = []
+ for line_number, line in enumerate(lines):
+ try:
+ unicode_lines.append(line.decode('utf-8'))
+ except UnicodeDecodeError:
+ # report error about line (idx+1)
+ warning('.bzrignore: On Line #%d, malformed utf8 character. '
+ 'Ignoring line.' % (line_number+1))
+
+ # Append each line to ignore list if it's not a comment line
+ for line in unicode_lines:
line = line.rstrip('\r\n')
if not line or line.startswith('#'):
continue
=== modified file 'bzrlib/tests/test_ignores.py'
--- a/bzrlib/tests/test_ignores.py 2010-02-23 07:43:11 +0000
+++ b/bzrlib/tests/test_ignores.py 2009-12-26 21:31:56 +0000
@@ -50,6 +50,18 @@
def test_parse_empty(self):
ignored = ignores.parse_ignore_file(StringIO(''))
self.assertEqual(set([]), ignored)
+
+ def test_parse_non_utf8(self):
+ """Lines with non utf 8 characters should be discarded."""
+ ignored = ignores.parse_ignore_file(StringIO(
+ 'utf8filename_a\n'
+ 'invalid utf8\x80\n'
+ 'utf8filename_b\n'
+ ))
+ self.assertEqual(set([
+ 'utf8filename_a',
+ 'utf8filename_b',
+ ]), ignored)
class TestUserIgnores(TestCaseInTempDir):
More information about the bazaar-commits
mailing list