Rev 4412: Some small tweaks to decoding strings (avoid passing over the length 2x) in lp:///~jameinel/bzr/bencode_serializer
John Arbash Meinel
john at arbash-meinel.com
Thu Jun 4 18:12:48 BST 2009
At lp:///~jameinel/bzr/bencode_serializer
------------------------------------------------------------
revno: 4412
revision-id: john at arbash-meinel.com-20090604171229-kbgfatt63y3u3uh1
parent: john at arbash-meinel.com-20090604165033-bfdo0lyf4yt4vjcz
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: bencode_serializer
timestamp: Thu 2009-06-04 12:12:29 -0500
message:
Some small tweaks to decoding strings (avoid passing over the length 2x)
Down to 1.1s (from 1.4s) for decoding all of bzr.dev.
Also, favor decoding strings and then lists in _decode_object, since that is the
frequency we have those types inside Revisions.
-------------- next part --------------
=== modified file 'bzrlib/_bencode_pyx.pyx'
--- a/bzrlib/_bencode_pyx.pyx 2009-06-04 16:50:33 +0000
+++ b/bzrlib/_bencode_pyx.pyx 2009-06-04 17:12:29 +0000
@@ -98,14 +98,14 @@
raise RuntimeError("too deeply nested")
try:
ch = self.tail[0]
- if ch == c'i':
- D_UPDATE_TAIL(self, 1)
- return self._decode_int()
- elif c'0' <= ch <= c'9':
+ if c'0' <= ch <= c'9':
return self._decode_string()
elif ch == c'l':
D_UPDATE_TAIL(self, 1)
return self._decode_list()
+ elif ch == c'i':
+ D_UPDATE_TAIL(self, 1)
+ return self._decode_int()
elif ch == c'd':
D_UPDATE_TAIL(self, 1)
return self._decode_dict()
@@ -144,10 +144,20 @@
return ret
cdef object _decode_string(self):
- cdef int n, i
- i = self._read_digits(c':')
- n = strtol(self.tail, NULL, 10)
- D_UPDATE_TAIL(self, i+1)
+ cdef int n
+ cdef char *next_tail
+ # strtol allows leading whitespace, negatives, and leading zeros
+ # however, all callers have already checked that '0' <= tail[0] <= '9'
+ # or they wouldn't have called _decode_string
+ # strtol will stop at trailing whitespace, etc
+ n = strtol(self.tail, &next_tail, 10)
+ if next_tail == NULL or next_tail[0] != c':':
+ raise ValueError('string len not terminated by ":"')
+ # strtol allows leading zeros, so validate that we don't have that
+ if (self.tail[0] == c'0'
+ and (n != 0 or (next_tail - self.tail != 1))):
+ raise ValueError('leading zeros are not allowed')
+ D_UPDATE_TAIL(self, next_tail - self.tail + 1)
if n == 0:
return ''
if n > self.size:
@@ -170,6 +180,9 @@
else:
return result
else:
+ # As a quick shortcut, check to see if the next object is a
+ # string, since we know that won't be creating recursion
+ # if self.tail[0] >= c'0' and self.tail[0] <= c'9':
PyList_Append(result, self._decode_object())
raise ValueError('malformed list')
@@ -187,6 +200,8 @@
return result
else:
# keys should be strings only
+ if self.tail[0] < c'0' or self.tail[0] > c'9':
+ raise ValueError('key was not a simple string.')
key = self._decode_string()
if lastkey >= key:
raise ValueError('dict keys disordered')
=== modified file 'bzrlib/tests/test_bencode.py'
--- a/bzrlib/tests/test_bencode.py 2009-06-03 14:14:31 +0000
+++ b/bzrlib/tests/test_bencode.py 2009-06-04 17:12:29 +0000
@@ -101,7 +101,11 @@
self._run_check_error(ValueError, '00:')
self._run_check_error(ValueError, '35208734823ljdahflajhdf')
self._run_check_error(ValueError, '432432432432432:foo')
- self._run_check_error(ValueError, 'i' + ('1' * 1000) + ':')
+ self._run_check_error(ValueError, ' 1:x') # leading whitespace
+ self._run_check_error(ValueError, '-1:x') # negative
+ self._run_check_error(ValueError, '1 x') # space vs colon
+ self._run_check_error(ValueError, '1x') # missing colon
+ self._run_check_error(ValueError, ('1' * 1000) + ':')
def test_list(self):
self._check([], 'le')
More information about the bazaar-commits
mailing list