Rev 43: Partial support for bzr-svn branches - they are indexed without full texts for now. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
Robert Collins
robertc at robertcollins.net
Sun Jun 22 11:04:39 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
------------------------------------------------------------
revno: 43
revision-id: robertc at robertcollins.net-20080622100438-raipn9bo8wq9sezf
parent: robertc at robertcollins.net-20080622054515-k4qwckj2gfgoip8j
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Sun 2008-06-22 20:04:38 +1000
message:
Partial support for bzr-svn branches - they are indexed without full texts for now.
modified:
DESIGN design-20080608072426-vjoj110dtykfyb7g-1
index.py index.py-20080608055509-hnimeek7q8tctkqf-2
=== modified file 'DESIGN'
--- a/DESIGN 2008-06-22 05:45:15 +0000
+++ b/DESIGN 2008-06-22 10:04:38 +0000
@@ -90,12 +90,14 @@
There are many possible places to store indices. Ideally they get deleted when
a branch is deleted, can be shared between branches, update automatically etc.
For now, I'm going to store them in .bzr/bzr-search when the bzrdir is a
-MetaDir, and just refuse to index other branches. To index an svn branch, I
-would expect to use a look-aside table and store the index in e.g.
-~/.bazaar/search/<encoded_url>. Storing in bzr-search is a bit of an
-abstraction violation, but we have a split-out structure for a reason, and by
-prefixing it with bzr-search I leave 'search' available for a 'core' version of
-this feature.
+MetaDir, and just refuse to index other branches. Storing in bzr-search is a
+bit of an abstraction violation, but we have a split-out structure for a
+reason, and by prefixing it with bzr-search I leave 'search' available for a
+'core' version of this feature.
+
+When indexing a bzr-svn branch (actual svn branch, not a branch created *from*
+svn), the index is stored in
+$bzrconfig/bzr-search/svn-lookaside/SVN_UUID/BRANCHPATH.
Search engine to use
++++++++++++++++++++
=== modified file 'index.py'
--- a/index.py 2008-06-22 05:45:15 +0000
+++ b/index.py 2008-06-22 10:04:38 +0000
@@ -25,6 +25,7 @@
from bzrlib import branch as _mod_branch
from bzrlib import ui
from bzrlib.bzrdir import BzrDirMeta1
+import bzrlib.config
from bzrlib.errors import NotBranchError, NoSuchFile, UnknownFormatError
from bzrlib.index import CombinedGraphIndex, GraphIndex, InMemoryGraphIndex
from bzrlib.lockdir import LockDir
@@ -35,6 +36,7 @@
from bzrlib.plugins.search.transport import FileView
from bzrlib.multiparent import NewText
from bzrlib.revision import NULL_REVISION
+from bzrlib.transport import get_transport
from bzrlib.tsort import topo_sort
@@ -53,12 +55,29 @@
def init_index(branch):
"""Initialise an index on branch."""
- if not isinstance(branch.bzrdir, BzrDirMeta1):
+ if isinstance(branch.bzrdir, BzrDirMeta1):
+ transport = branch.bzrdir.transport
+ transport.mkdir('bzr-search')
+ index_transport = transport.clone('bzr-search')
+ else:
# We don't know how to handle this format.
- raise errors.CannotIndex(branch)
- transport = branch.bzrdir.transport
- transport.mkdir('bzr-search')
- index_transport = transport.clone('bzr-search')
+ try:
+ from bzrlib.plugins.svn.branch import SvnBranch
+ except ImportError:
+ SvnBranch = None
+ if type(branch) != SvnBranch:
+ raise errors.CannotIndex(branch)
+ # We can't write to the 'bzrdir' as it is virtual
+ uuid = branch.repository.uuid
+ branch_path = branch.get_branch_path()
+ config = bzrlib.config.config_dir()
+ transport = get_transport(bzrlib.config.config_dir())
+ path = 'bzr-search/svn-lookaside/' + uuid + '/' + branch_path
+ paths = path.split('/')
+ for path in paths:
+ transport = transport.clone(path)
+ transport.ensure_base()
+ index_transport = transport
lockdir = LockDir(index_transport, 'names-lock')
lockdir.create()
lockdir.lock_write()
@@ -127,7 +146,23 @@
:param branch: The branch to get an index for.
:raises: NoSearchIndex if no index can be located.
"""
- return Index(branch.bzrdir.transport.clone('bzr-search'), branch)
+ try:
+ from bzrlib.plugins.svn.branch import SvnBranch
+ except ImportError:
+ SvnBranch = None
+ if type(branch) == SvnBranch:
+ # We can't write to the 'bzrdir' as it is virtual
+ uuid = branch.repository.uuid
+ branch_path = branch.get_branch_path()
+ config = bzrlib.config.config_dir()
+ transport = get_transport(bzrlib.config.config_dir())
+ path = 'bzr-search/svn-lookaside/' + uuid + '/' + branch_path
+ transport = transport.clone(path)
+ commits_only = True
+ else:
+ transport = branch.bzrdir.transport.clone('bzr-search')
+ commits_only = False
+ return Index(transport, branch, commits_only=commits_only)
# XXX: This wants to be a PackCollection subclass with RepositoryPackCollection
@@ -135,11 +170,15 @@
class Index(object):
"""A bzr content index."""
- def __init__(self, index_transport, branch):
+ def __init__(self, index_transport, branch, commits_only=False):
"""Create an index stored at index_transport.
:param index_transport: The path where the index data should be stored.
:param branch: The branch this Index is indexing.
+ :param commits_only: If True, when indexing only attempt to index
+ commits, not file texts. Useful for foreign formats (often commits
+ are the most mature part of such plugins), or for some projects
+ where file contents may not be useful to index.
"""
self._transport = index_transport
try:
@@ -160,6 +199,7 @@
# CombinedGraphIndex for grouping the term indices or doc indices.
self._lock = LockDir(index_transport, 'names-lock')
self._branch = branch
+ self._commits_only = commits_only
def _add_terms(self, index, terms):
"""Add a set of term posting lists to a in progress index.
@@ -276,13 +316,14 @@
steps = ui.ui_factory.nested_progress_bar()
try:
steps.update("Indexing texts", 0, 4)
- terms = self._terms_for_texts(locked_branch.repository,
- revision_group)
- self._add_terms(builder, terms)
- steps.update("Indexing paths", 1, 4)
- terms = self._terms_for_file_terms(locked_branch.repository,
- terms, order_dict)
- self._add_terms(builder, terms)
+ if not self._commits_only:
+ terms = self._terms_for_texts(locked_branch.repository,
+ revision_group)
+ self._add_terms(builder, terms)
+ steps.update("Indexing paths", 1, 4)
+ terms = self._terms_for_file_terms(
+ locked_branch.repository, terms, order_dict)
+ self._add_terms(builder, terms)
steps.update("Indexing commits", 2, 4)
terms = self._terms_for_revs(locked_branch.repository,
revision_group)
More information about the bazaar-commits
mailing list