Rev 18: Refactoring: make ComponentIndexBuilder responsible for returning a component index and doing the upload, rather than being an index itself. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
Robert Collins
robertc at robertcollins.net
Wed Jun 11 04:59:41 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
------------------------------------------------------------
revno: 18
revision-id: robertc at robertcollins.net-20080611035938-hw90ua35seca1x1y
parent: robertc at robertcollins.net-20080610070818-tjhe1vvf007ayff0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2008-06-11 13:59:38 +1000
message:
Refactoring: make ComponentIndexBuilder responsible for returning a component index and doing the upload, rather than being an index itself.
modified:
index.py index.py-20080608055509-hnimeek7q8tctkqf-2
=== modified file 'index.py'
--- a/index.py 2008-06-09 12:34:11 +0000
+++ b/index.py 2008-06-11 03:59:38 +0000
@@ -210,43 +210,21 @@
# here: index inventory/paths
# here: index revisions
_ensure_regexes()
- index = ComponentIndexBuilder()
+ builder = ComponentIndexBuilder()
terms = self._terms_for_revs(locked_branch.repository,
revisions_to_index)
- self._add_terms(index, terms)
+ self._add_terms(builder, terms)
for rev_id in revisions_to_index:
- index.add_revision(rev_id)
- self._add_index(index)
+ builder.add_revision(rev_id)
+ self._add_index(builder)
- def _add_index(self, index):
+ def _add_index(self, builder):
"""Add a new component index to the list of indices."""
# The index name is the md5sum of the revision index serialised form.
- index_bytes = index.revision_index.finish().read()
- index_name = md5.new(index_bytes).hexdigest()
+ index_name, index_value, elements = builder.upload_index(
+ self._upload_transport)
if index_name in self._current_names:
raise Exception("md5 collision! rad! %s" % index_name)
- # Upload preparatory to renaming into place.
- # write to disc.
- self._upload_transport.put_bytes_non_atomic(index_name + ".rix",
- index_bytes)
- rev_length = len(index_bytes)
- # XXX: We should be able to do:
- # term_length = self._upload_transport.put_file_non_atomic(
- # index_name + '.tix', index.term_index.finish())
- # doc_length = self._upload_transport.put_file_non_atomic(
- # index_name + '.dix', index.document_index.finish())
- # but, put_file_non_atomic is not interface tested to return the byte
- # length.
- index_bytes = index.term_index.finish().read()
- term_length = len(index_bytes)
- self._upload_transport.put_bytes_non_atomic(
- index_name + '.tix', index_bytes)
- index_bytes = index.document_index.finish().read()
- doc_length = len(index_bytes)
- self._upload_transport.put_bytes_non_atomic(
- index_name + '.dix', index_bytes)
- del index_bytes
- index_value = "%d %d %d" % (rev_length, term_length, doc_length)
# The indices are uploaded, we only need to rename to activate.
self._refresh_indices()
if index_name in self._current_names:
@@ -261,10 +239,11 @@
new_names.add_node((name,), value, ())
# Now, as the last step, rename the new index into place and update
# the disk list of names.
- for suffix in [".rix", ".dix", ".tix"]:
- self._upload_transport.rename(index_name + suffix,
- '../indices/' + index_name + suffix)
+ for element in elements:
+ self._upload_transport.rename(element,
+ '../indices/' + element)
self._transport.put_file('names', new_names.finish())
+ index = builder.get_index()
self._orig_names[index_name] = (index_value, index)
finally:
self._lock.unlock()
@@ -340,7 +319,8 @@
found_documents = []
# Use a set to remove duplicates
term_keys = set([(term,) for term in termlist])
- for term_index in self._term_indices:
+ for value, component in self._current_names.values():
+ term_index = component.term_index
doc_references = {}
for node in term_index.iter_entries(term_keys):
doc_references[node[1]] = node[2]
@@ -479,3 +459,44 @@
def add_revision(self, revision_id):
"""List a revision as having been indexed by this index."""
self.revision_index.add_node((revision_id,), '', ())
+
+ def get_index(self):
+ """Returns the created index."""
+ return ComponentIndex(self.revision_index, self.term_index,
+ self.document_index)
+
+ def upload_index(self, upload_transport):
+ """Upload the index in preparation for insertion.
+
+ :param upload_transport: The transport to upload to.
+ :return: The index name, the value for the names list, and a list of
+ the filenames that comprise the index.
+ """
+ # Upload preparatory to renaming into place.
+ # write to disc.
+ index_bytes = self.revision_index.finish().read()
+ index_name = md5.new(index_bytes).hexdigest()
+ upload_transport.put_bytes_non_atomic(index_name + ".rix",
+ index_bytes)
+ rev_length = len(index_bytes)
+ # XXX: We should be able to do:
+ # term_length = upload_transport.put_file_non_atomic(
+ # index_name + '.tix', index.term_index.finish())
+ # doc_length = upload_transport.put_file_non_atomic(
+ # index_name + '.dix', index.document_index.finish())
+ # but, put_file_non_atomic is not interface tested to return the byte
+ # length.
+ index_bytes = self.term_index.finish().read()
+ term_length = len(index_bytes)
+ upload_transport.put_bytes_non_atomic(
+ index_name + '.tix', index_bytes)
+ index_bytes = self.document_index.finish().read()
+ doc_length = len(index_bytes)
+ upload_transport.put_bytes_non_atomic(
+ index_name + '.dix', index_bytes)
+ del index_bytes
+ index_value = "%d %d %d" % (rev_length, term_length, doc_length)
+ elements = []
+ for suffix in [".rix", ".dix", ".tix"]:
+ elements.append(index_name + suffix)
+ return index_name, index_value, elements
More information about the bazaar-commits
mailing list