Rev 3239: Finish removing method and noeol from general knowledge, in http://bzr.arbash-meinel.com/branches/bzr/1.3-dev/annotate_cleanup
John Arbash Meinel
john at arbash-meinel.com
Wed Mar 5 17:33:41 GMT 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.3-dev/annotate_cleanup
------------------------------------------------------------
revno: 3239
revision-id:john at arbash-meinel.com-20080305173058-l2x5d0qxpciuva1u
parent: john at arbash-meinel.com-20080305163617-87y5f6xz7s3gjdm7
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: annotate_cleanup
timestamp: Wed 2008-03-05 17:30:58 +0000
message:
Finish removing method and noeol from general knowledge,
pushing them down into the Content and Factory objects.
modified:
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
-------------- next part --------------
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-03-05 16:36:17 +0000
+++ b/bzrlib/knit.py 2008-03-05 17:30:58 +0000
@@ -135,6 +135,9 @@
class KnitContent(object):
"""Content of a knit version to which deltas can be applied."""
+ def __init__(self):
+ self._should_strip_eol = False
+
def annotate(self):
"""Return a list of (origin, text) tuples."""
return list(self.annotate_iter())
@@ -143,6 +146,12 @@
"""Apply delta to this object to become new_version_id."""
raise NotImplementedError(self.apply_delta)
+ def cleanup_eol(self, copy_on_mutate=True):
+ if self._should_strip_eol:
+ if copy_on_mutate:
+ self._lines = self._lines[:]
+ self.strip_last_line_newline()
+
def line_delta_iter(self, new_lines):
"""Generate line-based delta from this content to new_lines."""
new_texts = new_lines.text()
@@ -188,6 +197,7 @@
"""Annotated content."""
def __init__(self, lines):
+ KnitContent.__init__(self)
self._lines = lines
def annotate_iter(self):
@@ -205,6 +215,7 @@
def strip_last_line_newline(self):
line = self._lines[-1][1].rstrip('\n')
self._lines[-1] = (self._lines[-1][0], line)
+ self._should_strip_eol = False
def text(self):
try:
@@ -217,6 +228,17 @@
"line in annotated knit missing annotation information: %s"
% (e,))
+ def text_lines(self):
+ """Return the official fulltext for this content.
+
+ This includes stripping the final newline if it should be done.
+ """
+ lines = [text for o, l in self._lines]
+ if self._should_strip_eol:
+ anno, line = lines[-1]
+ lines[-1] = (anno, line.rstrip('\n'))
+ return lines
+
def copy(self):
return AnnotatedKnitContent(self._lines[:])
@@ -230,6 +252,7 @@
"""
def __init__(self, lines, version_id):
+ KnitContent.__init__(self)
self._lines = lines
self._version_id = version_id
@@ -252,12 +275,59 @@
def strip_last_line_newline(self):
self._lines[-1] = self._lines[-1].rstrip('\n')
+ self._should_strip_eol = False
def text(self):
return self._lines
-
-class KnitAnnotateFactory(object):
+ def text_lines(self):
+ """Return the official fulltext for this content.
+
+ This includes stripping the final newline if it should be done.
+ """
+ lines = self._lines
+ if self._should_strip_eol:
+ lines = lines[:]
+ lines[-1] = lines[-1].rstrip('\n')
+ return lines
+
+
+class _KnitFactory(object):
+ """Base class for common Factory functions."""
+
+ def parse_record(self, version_id, record, record_details,
+ base_content, copy_base_content=True):
+ """Parse a record into a full content object.
+
+ :param version_id: The official version id for this content
+ :param record: The data returned by read_records_iter()
+ :param record_details: Details about the record returned by
+ get_build_details
+ :param base_content: If get_build_details returns a compression_parent,
+ you must return a base_content here, else use None
+ :param copy_base_content: When building from the base_content, decide
+ you can either copy it and return a new object, or modify it in
+ place.
+ :return: (content, delta) A Content object and possibly a line-delta,
+ delta may be None
+ """
+ method, noeol = record_details
+ if method == 'line-delta':
+ assert base_content is not None
+ if copy_base_content:
+ content = base_content.copy()
+ else:
+ content = base_content
+ delta = self.parse_line_delta(record, version_id)
+ content.apply_delta(delta, version_id)
+ else:
+ content = self.parse_fulltext(record, version_id)
+ delta = None
+ content._should_strip_eol = noeol
+ return (content, delta)
+
+
+class KnitAnnotateFactory(_KnitFactory):
"""Factory for creating annotated Content objects."""
annotated = True
@@ -369,7 +439,7 @@
return content.annotate_iter()
-class KnitPlainFactory(object):
+class KnitPlainFactory(_KnitFactory):
"""Factory for creating plain Content objects."""
annotated = False
@@ -876,14 +946,13 @@
This data is intended to be used for retrieving the knit records.
- A dict of version_id to (method, index_memo, next, parents, noeol) is
+ A dict of version_id to (record_details, index_memo, next, parents) is
returned.
method is the way referenced data should be applied.
index_memo is the handle to pass to the data access to actually get the
data
next is the build-parent of the version, or None for fulltexts.
parents is the version_ids of the parents of this version
- noeol is a flag indicating if there is a final newline character
"""
component_data = {}
pending_components = version_ids
@@ -891,11 +960,11 @@
build_details = self._index.get_build_details(pending_components)
pending_components = set()
for version_id, details in build_details.items():
- (index_memo, compression_parent, parents, content_details) = details
- method = content_details[0]
+ (index_memo, compression_parent, parents, record_details) = details
+ method = record_details[0]
if compression_parent is not None:
pending_components.add(compression_parent)
- component_data[version_id] = (method, index_memo,
+ component_data[version_id] = (record_details, index_memo,
compression_parent)
return component_data
@@ -1040,8 +1109,8 @@
def _get_record_map(self, version_ids):
"""Produce a dictionary of knit records.
- The keys are version_ids, the values are tuples of (method, content,
- digest, next).
+ The keys are version_ids, the values are tuples of (record_details,
+ content, digest, next).
method is the way the content should be applied.
content is a KnitContent object.
digest is the SHA1 digest of this version id after all steps are done
@@ -1049,14 +1118,14 @@
If the method is fulltext, next will be None.
"""
position_map = self._get_components_positions(version_ids)
- # c = component_id, m = method, i_m = index_memo, n = next
- records = [(c, i_m) for c, (m, i_m, n)
+ # c = component_id, r = record_details, i_m = index_memo, n = next
+ records = [(c, i_m) for c, (r, i_m, n)
in position_map.iteritems()]
record_map = {}
for component_id, content, digest in \
self._data.read_records_iter(records):
- (method, index_memo, next) = position_map[component_id]
- record_map[component_id] = method, content, digest, next
+ (record_details, index_memo, next) = position_map[component_id]
+ record_map[component_id] = record_details, content, digest, next
return record_map
@@ -1098,35 +1167,25 @@
components = []
cursor = version_id
while cursor is not None:
- method, data, digest, next = record_map[cursor]
- components.append((cursor, method, data, digest))
+ record_details, data, digest, next = record_map[cursor]
+ components.append((cursor, record_details, data, digest))
if cursor in content_map:
break
cursor = next
content = None
- for component_id, method, data, digest in reversed(components):
+ for (component_id, record_details, data,
+ digest) in reversed(components):
if component_id in content_map:
content = content_map[component_id]
else:
- if method == 'fulltext':
- assert content is None
- content = self.factory.parse_fulltext(data, version_id)
- elif method == 'line-delta':
- delta = self.factory.parse_line_delta(data, version_id)
- if multiple_versions:
- # only doing this when we want multiple versions
- # output avoids list copies - which reference and
- # dereference many strings.
- content = content.copy()
- content.apply_delta(delta, version_id)
+ content, delta = self.factory.parse_record(version_id,
+ data, record_details, content,
+ copy_base_content=multiple_versions)
if multiple_versions:
content_map[component_id] = content
- if 'no-eol' in self._index.get_options(version_id):
- if multiple_versions:
- content = content.copy()
- content.strip_last_line_newline()
+ content.cleanup_eol(copy_on_mutate=multiple_versions)
final_content[version_id] = content
# digest here is the digest from the last applied component.
@@ -1435,7 +1494,7 @@
:param version_ids: An iterable of version_ids.
:return: A dict of version_id:(index_memo, compression_parent,
- parents, content_details).
+ parents, record_details).
index_memo
opaque structure to pass to read_records to extract the raw
data
@@ -1443,9 +1502,9 @@
Content that this record is built upon, may be None
parents
Logical parents of this node
- content_details
+ record_details
extra information about the content which needs to be passed to
- Factory.parse_raw_data
+ Factory.parse_record
"""
result = {}
for version_id in version_ids:
@@ -2805,12 +2864,9 @@
self._heads_provider = None
- def _add_fulltext_content(self, revision_id, content_obj, noeol_flag):
+ def _add_fulltext_content(self, revision_id, content_obj):
self._fulltext_contents[revision_id] = content_obj
- if noeol_flag:
- content_obj = content_obj.copy()
- content_obj.strip_last_line_newline()
- fulltext = content_obj.text()
+ fulltext = content_obj.text_lines()
self._fulltexts[revision_id] = fulltext
# XXX: It would probably be good to check the sha1digest here
return fulltext
@@ -2878,7 +2934,7 @@
pending = set()
for rev_id, details in build_details.iteritems():
(index_memo, compression_parent, parents,
- content_details) = details
+ record_details) = details
self._revision_id_graph[rev_id] = parents
records.append((rev_id, index_memo))
pending.update(p for p in parents
@@ -2899,15 +2955,14 @@
# We iterate in the order read, rather than a strict order requested
# However, process what we can, and put off to the side things that still
# need parents, cleaning them up when those parents are processed.
- for (rev_id, raw_content,
+ for (rev_id, record,
digest) in self._knit._data.read_records_iter(records):
if rev_id in self._annotated_lines:
continue
parent_ids = self._revision_id_graph[rev_id]
details = self._all_build_details[rev_id]
(index_memo, compression_parent, parents,
- content_details) = details
- method, noeol = content_details
+ record_details) = details
nodes_to_annotate = []
# TODO: Remove the punning between compression parents, and
# parent_ids, we should be able to do this without assuming
@@ -2916,39 +2971,34 @@
# There are no parents for this node, so just add it
# TODO: This probably needs to be decoupled
assert compression_parent is None
- fulltext_content = self._knit.factory.parse_fulltext(
- raw_content, rev_id)
- fulltext = self._add_fulltext_content(rev_id, fulltext_content,
- noeol)
+ fulltext_content, delta = self._knit.factory.parse_record(
+ rev_id, record, record_details, None)
+ fulltext = self._add_fulltext_content(rev_id, fulltext_content)
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
parent_ids, left_matching_blocks=None))
else:
- child = (rev_id, parent_ids, raw_content)
+ child = (rev_id, parent_ids, record)
# Check if all the parents are present
self._check_parents(child, nodes_to_annotate)
while nodes_to_annotate:
# Should we use a queue here instead of a stack?
- (rev_id, parent_ids, raw_content) = nodes_to_annotate.pop()
+ (rev_id, parent_ids, record) = nodes_to_annotate.pop()
(index_memo, compression_parent, parents,
- content_details) = self._all_build_details[rev_id]
- method, noeol = content_details
+ record_details) = self._all_build_details[rev_id]
if compression_parent is not None:
parent_fulltext_content = self._fulltext_contents[compression_parent]
- delta = self._knit.factory.parse_line_delta(raw_content,
- rev_id)
- # TODO: only copy when the parent is still needed elsewhere
- fulltext_content = parent_fulltext_content.copy()
- fulltext_content.apply_delta(delta, rev_id)
- fulltext = self._add_fulltext_content(rev_id,
- fulltext_content, noeol)
+ fulltext_content, delta = self._knit.factory.parse_record(
+ rev_id, record, record_details, parent_fulltext_content,
+ copy_base_content=True)
+ fulltext = self._add_fulltext_content(rev_id, fulltext_content)
parent_fulltext = self._fulltexts[parent_ids[0]]
blocks = KnitContent.get_line_delta_blocks(delta,
parent_fulltext, fulltext)
else:
fulltext_content = self._knit.factory.parse_fulltext(
- raw_content, rev_id)
+ record, rev_id)
fulltext = self._add_fulltext_content(rev_id,
- fulltext_content, noeol)
+ fulltext_content)
blocks = None
nodes_to_annotate.extend(
self._add_annotation(rev_id, fulltext, parent_ids,
More information about the bazaar-commits
mailing list