Rev 2384: Start cleaning up the code, and fix one more edge case in http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate
John Arbash Meinel
john at arbash-meinel.com
Fri Feb 23 22:52:08 GMT 2007
At http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate
------------------------------------------------------------
revno: 2384
revision-id: john at arbash-meinel.com-20070223225101-92gi3mujiugb0tk0
parent: john at arbash-meinel.com-20070223220821-yjinik850b8szgqj
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Fri 2007-02-23 16:51:01 -0600
message:
Start cleaning up the code, and fix one more edge case
when last_entry == first_entry, we don't want to have duplicated entries
in the output.
modified:
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/tests/test_dirstate.py test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-02-23 22:08:21 +0000
+++ b/bzrlib/dirstate.py 2007-02-23 22:51:01 +0000
@@ -379,7 +379,6 @@
# ensure that we read the whole record, and we should have a precursur
# '' which ensures that we start after the previous '\n'
entry_field_count = self._fields_per_entry() + 1
- # print '\nfield_count:', entry_field_count
low = self._end_of_header
high = file_size - 1 # Ignore the final '\0'
@@ -418,8 +417,6 @@
if count > max_count:
raise errors.BzrError('Too many seeks, most likely a bug.')
- # print low, high, cur_files
-
mid = max(low, (low+high-page_size)/2)
state_file.seek(mid)
@@ -429,7 +426,6 @@
block = state_file.read(read_size)
start = mid
- after = mid + len(block)
entries = block.split('\n')
if len(entries) < 2:
@@ -462,6 +458,7 @@
else:
# Find what entries we are looking for, which occur before and
# after this first record.
+ after = start
first_dir_name = (first_fields[1], first_fields[2])
first_loc = bisect.bisect_left(cur_files, first_dir_name)
@@ -469,22 +466,10 @@
pre = cur_files[:first_loc]
# These occur after the current location, which may be in the
# data we read, or might be after the last entry
- middle_files = cur_files[first_loc:]
-
- if len(first_fields) < entry_field_count:
- # We didn't actually get a full record, so just mark
- # everything as pending and continue
- if middle_files:
- pending.append((start, high, middle_files))
- if pre:
- pending.append((low, start-1, pre))
- continue
-
- # These are only after the last entry
- post = []
-
- if middle_files:
- # We have something to look for
+ post = cur_files[first_loc:]
+
+ if post and len(first_fields) >= entry_field_count:
+ # We have files after the first entry
# Parse the last entry
last_entry_num = len(entries)-1
@@ -492,17 +477,17 @@
if len(last_fields) < entry_field_count:
# The very last hunk was not complete,
# read the previous hunk
- # TODO: jam 20070217 There may be an edge case if there are
- # not enough entries that were read.
- after -= len(entries[-1])
+ after = mid + len(block) - len(entries[-1])
last_entry_num -= 1
last_fields = entries[last_entry_num].split('\0')
+ else:
+ after = mid + len(block)
last_dir_name = (last_fields[1], last_fields[2])
- last_loc = bisect.bisect_right(middle_files, last_dir_name)
+ last_loc = bisect.bisect_right(post, last_dir_name)
- post = middle_files[last_loc:]
- middle_files = middle_files[:last_loc]
+ middle_files = post[:last_loc]
+ post = post[last_loc:]
if middle_files:
# We have files that should occur in this block
@@ -512,13 +497,16 @@
if middle_files[0] == first_dir_name:
# We might need to go before this location
- pre.append(middle_files[0])
+ pre.append(first_dir_name)
if middle_files[-1] == last_dir_name:
- post.insert(0, middle_files[-1])
+ post.insert(0, last_dir_name)
# Find out what paths we have
- paths = {first_dir_name:[first_fields],
- last_dir_name:[last_fields]}
+ paths = {first_dir_name:[first_fields]}
+ # last_dir_name might == first_dir_name so we need to be
+ # careful if we should append rather than overwrite
+ if last_entry_num != first_entry_num:
+ paths.setdefault(last_dir_name, []).append(last_fields)
for num in xrange(first_entry_num+1, last_entry_num):
# TODO: jam 20070223 We are already splitting here, so
# shouldn't we just split the whole thing rather
=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py 2007-02-23 22:08:21 +0000
+++ b/bzrlib/tests/test_dirstate.py 2007-02-23 22:51:01 +0000
@@ -1122,6 +1122,11 @@
self.assertBisect([[expected['']]], state, [''])
self.assertBisect([[expected['a'], expected['a2']]], state, ['a'])
self.assertBisect([[expected['b'], expected['b2']]], state, ['b'])
+ self.assertBisect([[expected['b/c'], expected['b/c2']]], state, ['b/c'])
+ self.assertBisect([[expected['b/d'], expected['b/d2']]], state, ['b/d'])
+ self.assertBisect([[expected['b/d/e'], expected['b/d/e2']]],
+ state, ['b/d/e'])
+ self.assertBisect([[expected['f'], expected['f2']]], state, ['f'])
def test_bisect_page_size_too_small(self):
"""If the page size is too small, we will auto increase it."""
More information about the bazaar-commits
mailing list