Rev 4482: Continue breaking things to build it up cleanly. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate
John Arbash Meinel
john at arbash-meinel.com
Sat Jun 20 06:12:28 BST 2009
At http://bazaar.launchpad.net/~jameinel/bzr/1.17-rework-annotate
------------------------------------------------------------
revno: 4482
revision-id: john at arbash-meinel.com-20090620051151-1b86gqpfsuhckbth
parent: john at arbash-meinel.com-20090620032259-dqrl35nuwd604ut6
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-rework-annotate
timestamp: Sat 2009-06-20 00:11:51 -0500
message:
Continue breaking things to build it up cleanly.
At this point we have an _expand_record which is used to turn record streams
into text lines.
Eventually it will do things like track 'num_compression_children' so that
it can re-use content objects, etc.
The code already handles removing 'text' entries as part of Annotator.
At the moment, nodes get queued up, but we haven't worked out when/how to pull
them back out of the queue.
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2009-06-19 21:33:30 +0000
+++ b/bzrlib/knit.py 2009-06-20 05:11:51 +0000
@@ -664,8 +664,6 @@
see parse_fulltext which this inverts.
"""
- # TODO: jam 20070209 We only do the caching thing to make sure that
- # the origin is a valid utf-8 line, eventually we could remove it
return ['%s %s' % (o, t) for o, t in content._lines]
def lower_line_delta(self, delta):
@@ -3321,6 +3319,9 @@
self._content_objects = {}
# The number of children that depend on this fulltext content object
self._num_compression_children = {}
+ # Delta records that need their compression parent before they can be
+ # expanded
+ self._pending_deltas = {}
self._all_build_details = {}
@@ -3363,6 +3364,11 @@
self._num_needed_children[parent_key] += 1
else:
self._num_needed_children[parent_key] = 1
+ if compression_parent:
+ if compression_parent in self._num_compression_children:
+ self._num_compression_children[compression_parent] += 1
+ else:
+ self._num_compression_children[compression_parent] = 1
missing_versions = this_iteration.difference(build_details.keys())
if missing_versions:
@@ -3382,7 +3388,6 @@
try:
records = self._get_build_graph(key)
for key, text, num_lines in self._extract_texts(records):
- self._text_cache[key] = text
yield key, text, num_lines
return
except errors.RetryWithNewPacks, e:
@@ -3390,6 +3395,43 @@
# The cached build_details are no longer valid
self._all_build_details.clear()
+ def _expand_record(self, key, parent_keys, compression_parent, record,
+ record_details):
+ if compression_parent:
+ if compression_parent not in self._content_objects:
+ # Waiting for the parent
+ self._pending_deltas.setdefault(compression_parent, []).append(
+ (key, parent_keys, record, record_details))
+ return None
+ # We have the basis parent, so expand the delta
+ base_content = self._content_objects[compression_parent]
+ content, _ = self._vf._factory.parse_record(
+ key, record, record_details, None, copy_base_content=True)
+ else:
+ # Fulltext record
+ content, _ = self._vf._factory.parse_record(
+ key, record, record_details, None)
+ self._content_objects[key] = content
+ lines = content.text()
+ self._text_cache[key] = lines
+ return lines
+ # if key in pending_deltas:
+ # children = pending_deltas.pop(key)
+ # for key, parent_keys, record, record_details in children:
+ # to_process.extend(self._expand_record(
+ # sub_to_proc = self._expand_record(key
+ # # Check for compression children that we can expand
+ # if key in pending_deltas:
+ # children = pending_deltas.pop(key)
+ # for child_key, child_parent_keys, child_record, child_details in children:
+ # child_content, child_delta = self._vf._factory.parse_record(
+ # child_key, child_record, child_details,
+ # content,
+ # # TODO: track when we can copy the base
+ # copy_base_content=True)
+ # self._content_objects[child_key] = child_content
+ # to_process.append((child_key, child_parent_keys, child_content))
+
def _extract_texts(self, records):
"""Extract the various texts needed based on records"""
# We iterate in the order read, rather than a strict order requested
@@ -3401,64 +3443,27 @@
pending_annotation = {}
# Children that are missing their compression parent
pending_deltas = {}
- for (key, record,
- digest) in self._vf._read_records_iter(records):
- # parent_ids = [p for p in parent_ids if p not in self._ghosts]
+ for (key, record, digest) in self._vf._read_records_iter(records):
+ # ghosts?
details = self._all_build_details[key]
- (index_memo, compression_parent, parent_keys,
- record_details) = details
+ (_, compression_parent, parent_keys, record_details) = details
assert parent_keys == self._parent_map[key]
- # TODO: Remove the punning between compression parents, and
- # parent_ids, we should be able to do this without assuming
- # the build order
- if compression_parent:
- # This is a delta, do we have the parent fulltext?
- if compression_parent not in self._content_objects:
- # Waiting for the parent
- pending_deltas.setdefault(compression_parent, []).append(
- (key, parent_keys, record, record_details))
- continue
- # We can build this as a fulltext
- parent_content = self._content_objects[compression_parent]
- content, delta = self._vf._factory.parse_record(
- key, record, record_details,
- parent_content,
- # TODO: track when we can copy the base
- copy_base_content=True)
+ lines = self._expand_record(key, parent_keys, compression_parent,
+ record, record_details)
+ if lines is None:
+ # Pending delta should be queued up
+ continue
+ # At this point, we may be able to yield this content, if all
+ # parents are also finished
+ for parent_key in parent_keys:
+ if parent_key not in self._annotations_cache:
+ # still waiting on a parent text
+ pending_annotation.setdefault(parent_key,
+ []).append((key, parent_keys, content))
+ break
else:
- # No compression parent means we have a fulltext
- content, delta = self._vf._factory.parse_record(
- key, record, record_details, None)
- self._content_objects[key] = content
- to_process = [(key, parent_keys, content)]
- # Check for compression children that we can expand
- if key in pending_deltas:
- children = pending_deltas.pop(key)
- for child_key, child_parent_keys, child_record, child_details in children:
- child_content, child_delta = self._vf._factory.parse_record(
- child_key, child_record, child_details,
- content,
- # TODO: track when we can copy the base
- copy_base_content=True)
- self._content_objects[child_key] = child_content
- to_process.append((child_key, child_parent_keys, child_content))
- while to_process:
- cur = to_process
- to_process = []
- for key, parent_keys, content in cur:
- # Check if all parents are present
- for parent_key in parent_keys:
- if parent_key not in self._annotations_cache:
- # still waiting on a parent text
- pending_annotation.setdefault(parent_key,
- []).append((key, parent_keys, content))
- break
- else:
- # All parents present
- lines = content.text()
- yield key, lines, len(lines)
- if key in pending_annotation:
- to_process.extend(pending_annotation.pop(key))
+ # All parents present
+ yield key, lines, len(lines)
try:
from bzrlib._knit_load_data_c import _load_data_c as _load_data
=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py 2009-06-16 13:57:14 +0000
+++ b/bzrlib/tests/test_knit.py 2009-06-20 05:11:51 +0000
@@ -1313,6 +1313,48 @@
return _KndxIndex(transport, mapper, lambda:None, allow_writes, lambda:True)
+class Test_KnitAnnotator(TestCaseWithMemoryTransport):
+
+ def make_annotator(self):
+ factory = knit.make_pack_factory(True, True, 1)
+ vf = factory(self.get_transport())
+ return knit._KnitAnnotator(vf)
+
+ def test__expand_fulltext(self):
+ ann = self.make_annotator()
+ rev_key = ('rev-id',)
+ res = ann._expand_record(rev_key, (('parent-id',),), None,
+ ['line1\n', 'line2\n'], ('fulltext', True))
+ # The content object and text lines should be cached appropriately
+ self.assertEqual(['line1\n', 'line2'], res)
+ content_obj = ann._content_objects[rev_key]
+ self.assertEqual(['line1\n', 'line2\n'], content_obj._lines)
+ self.assertEqual(res, content_obj.text())
+ self.assertEqual(res, ann._text_cache[rev_key])
+
+ def test__expand_delta_no_parent(self):
+ # Parent isn't available yet, so we return nothing, but queue up this
+ # node for later processing
+ ann = self.make_annotator()
+ rev_key = ('rev-id',)
+ parent_key = ('parent-id',)
+ record = ['0,1,1\n', 'new-line\n']
+ details = ('line-delta', False)
+ res = ann._expand_record(rev_key, (parent_key,), parent_key,
+ record, details)
+ self.assertEqual(None, res)
+ self.assertTrue(parent_key in ann._pending_deltas)
+ pending = ann._pending_deltas[parent_key]
+ self.assertEqual(1, len(pending))
+ self.assertEqual((rev_key, (parent_key,), record, details), pending[0])
+
+ def test_record_delta_removes_basis(self):
+ ann = self.make_annotator()
+ ann._expand_record(('parent-id',), (), None,
+ ['line1\n', 'line2\n'], ('fulltext', False))
+ ann._num_compression_children['parent-id'] = 2
+
+
class KnitTests(TestCaseWithTransport):
"""Class containing knit test helper routines."""
More information about the bazaar-commits
mailing list