Rev 50: Giving - as the first letter of a search term will exclude hits for it from the search results. e.g. bzr search -- foo -bar. (Robert Collins) in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
Robert Collins
robertc at robertcollins.net
Fri Aug 1 02:35:33 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
------------------------------------------------------------
revno: 50
revision-id: robertc at robertcollins.net-20080801013531-9mzodxpbhh9bucm2
parent: robertc at robertcollins.net-20080714174419-wqgue26g1t2347mh
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Fri 2008-08-01 11:35:31 +1000
message:
Giving - as the first letter of a search term will exclude hits for it from the search results. e.g. bzr search -- foo -bar. (Robert Collins)
modified:
NEWS news-20080608052041-z5bahsl8kwl0uf4x-2
__init__.py __init__.py-20080608052041-z5bahsl8kwl0uf4x-4
index.py index.py-20080608055509-hnimeek7q8tctkqf-2
setup.py setup.py-20080608052041-z5bahsl8kwl0uf4x-6
tests/test_blackbox.py test_blackbox.py-20080608052041-z5bahsl8kwl0uf4x-9
tests/test_index.py test_index.py-20080608055509-hnimeek7q8tctkqf-4
=== modified file 'NEWS'
--- a/NEWS 2008-06-29 22:33:07 +0000
+++ b/NEWS 2008-08-01 01:35:31 +0000
@@ -27,6 +27,10 @@
IMPROVEMENTS:
+ * Giving ``-`` as the first letter of a search term will exclude hits for
+ it from the search results. e.g. ``bzr search -- foo -bar``.
+ (Robert Collins)
+
BUGFIXES:
* Handles file ids and paths containing any of '"&<> - characters that were
=== modified file '__init__.py'
--- a/__init__.py 2008-07-11 12:41:59 +0000
+++ b/__init__.py 2008-08-01 01:35:31 +0000
@@ -45,7 +45,7 @@
bzrlib.commands.register_command(getattr(commands, 'cmd_' + command))
-version_info = (1, 6, 0, 'dev', 2)
+version_info = (1, 6, 0, 'dev', 3)
def auto_index_branch(result):
=== modified file 'index.py'
--- a/index.py 2008-07-14 17:44:19 +0000
+++ b/index.py 2008-08-01 01:35:31 +0000
@@ -480,22 +480,41 @@
_ensure_regexes()
self._refresh_indices()
# Use a set to remove duplicates
- termlist = set(termlist)
+ new_termlist = set()
+ exclude_terms = set()
+ for term in termlist:
+ if term[0][0] == '-':
+ # exclude this term
+ exclude_terms.add((term[0][1:],) + term[1:])
+ else:
+ new_termlist.add(term)
+ # remove duplicates that were included *and* excluded
+ termlist = new_termlist - exclude_terms
term_keys = [None, set(), set()]
for term in termlist:
term_keys[len(term)].add(term)
+ for term in exclude_terms:
+ term_keys[len(term)].add(term)
for value, component in self._current_names.values():
term_index = component.term_index
# TODO: push into Component
- # TODO: use a dequeue?
+ found_term_count = 0
+ # TODO: use dequeues?
term_info = []
+ exclude_info = []
for node in chain(term_index.iter_entries(term_keys[1]),
component.term_2_index.iter_entries(term_keys[2])):
term_id, posting_count, posting_start, posting_length = \
node[2].split(" ")
- term_info.append((int(posting_count), term_id,
- int(posting_start), int(posting_length)))
+ info = (int(posting_count), term_id, int(posting_start),
+ int(posting_length))
+ if node[1] not in exclude_terms:
+ term_info.append(info)
+ found_term_count += 1
+ else:
+ exclude_info.append(info)
+ excluded = 1
if not termlist:
yield component, termlist, None
continue
@@ -525,6 +544,20 @@
post_index = GraphIndex(view, post_name, posting_length)
common_doc_keys = set([node[1] for node in
post_index.iter_entries(common_doc_keys)])
+ if common_doc_keys:
+ # exclude from largest-first, which should give us less
+ # exclusion steps.
+ exclude_info.sort(reverse=True)
+ while common_doc_keys and exclude_info:
+ _, term_id, posting_start, posting_length = exclude_info.pop(0)
+ posting_stop = posting_start + posting_length
+ post_name = "term_list." + term_id
+ filemap = {post_name:(posting_start, posting_stop)}
+ view = FileView(self._indices_transport,
+ component.name + '.pack', filemap)
+ post_index = GraphIndex(view, post_name, posting_length)
+ common_doc_keys.difference_update(set([node[1] for node in
+ post_index.iter_entries(common_doc_keys)]))
yield component, termlist, common_doc_keys
def search(self, termlist):
=== modified file 'setup.py'
--- a/setup.py 2008-07-11 12:41:59 +0000
+++ b/setup.py 2008-08-01 01:35:31 +0000
@@ -3,7 +3,7 @@
bzr_plugin_name = 'search'
-bzr_plugin_version = (1, 6, 0, 'dev', 2)
+bzr_plugin_version = (1, 6, 0, 'dev', 3)
bzr_commands = ['index', 'search']
bzr_minimum_version = (1, 6, 0)
=== modified file 'tests/test_blackbox.py'
--- a/tests/test_blackbox.py 2008-07-01 13:28:34 +0000
+++ b/tests/test_blackbox.py 2008-08-01 01:35:31 +0000
@@ -48,6 +48,17 @@
self.assertEqual('', err)
self.assertEqual("Revision id '%s'. Summary: 'first post'\n" % rev_id1, out)
+ def test_simple_exclusion(self):
+ tree = self.make_branch_and_tree('.')
+ init_index(tree.branch)
+ rev_id1 = tree.commit('first post')
+ rev_id2 = tree.commit('second post')
+ index_url(self.get_url('.'))
+ index = open_index_url(self.get_url('.'))
+ out, err = self.run_bzr(['search', '--', 'post', '-first'])
+ self.assertEqual('', err)
+ self.assertEqual("Revision id '%s'. Summary: 'second post'\n" % rev_id2, out)
+
def test_directory_option(self):
tree = self.make_branch_and_tree('otherdir')
init_index(tree.branch)
=== modified file 'tests/test_index.py'
--- a/tests/test_index.py 2008-07-11 12:40:46 +0000
+++ b/tests/test_index.py 2008-08-01 01:35:31 +0000
@@ -263,6 +263,32 @@
self.assertIsInstance(results[0], index.RevisionHit)
self.assertEqual((revid,), results[0].revision_key)
+ def test_search_trivial_exclude(self):
+ tree = self.make_branch_and_tree('tree')
+ rev_index = index.init_index(tree.branch)
+ # The double-space is a cheap smoke test for the tokeniser.
+ revid1 = tree.commit('first post')
+ revid2 = tree.commit('second post')
+ rev_index.index_revisions(tree.branch, [revid1, revid2])
+ results = list(rev_index.search([('post',), ('-first',)]))
+ self.assertEqual(1, len(results))
+ self.assertIsInstance(results[0], index.RevisionHit)
+ self.assertEqual((revid2,), results[0].revision_key)
+
+ def test_search_only_exclude(self):
+ tree = self.make_branch_and_tree('tree')
+ rev_index = index.init_index(tree.branch)
+ # The double-space is a cheap smoke test for the tokeniser.
+ revid1 = tree.commit('first post')
+ revid2 = tree.commit('second post')
+ rev_index.index_revisions(tree.branch, [revid1, revid2])
+ self.assertRaises(TypeError, list, rev_index.search([('-first',)]))
+ self.knownFailure('exclude-only searches not implemented')
+ results = list(rev_index.search([('-first',)]))
+ self.assertEqual(1, len(results))
+ self.assertIsInstance(results[0], index.RevisionHit)
+ self.assertEqual((revid2,), results[0].revision_key)
+
def test_suggestions_trivial(self):
tree = self.make_branch_and_tree('tree')
rev_index = index.init_index(tree.branch)
More information about the bazaar-commits
mailing list