[MERGE] WSGI backend for HTTP smart server, and deployment documentation
Andrew Bennetts
andrew at canonical.com
Fri Oct 6 08:46:07 BST 2006
The branch at http://people.ubuntu.com/~andrew/bzr/wsgi-smart-server/ implements
a WSGI backend for the HTTP smart server, and adds some documentation
demonstrating how to deploy it with Apache and mod_fastcgi.
The branch includes the changes in my HTTP smart server branch
http://people.ubuntu.com/~andrew/bzr/http-smart-server/. I've attached a diff
relative to that branch.
It currently has a serious security issue: it will allow access to files outside
of the directory it serves. I've put a warning in the documentation about this,
but I intend to fix this fairly soon.
Please review and give feedback -- I'm particularly interested in ways to
further simplify deployment, and to make sure the documentation is clear. I'm
no Apache guru, so please let me know if you think there's a better way to
intercept requests for .bzr/smart URLs and hand them to bzr via FastCGI or
whatever. Also let me know if you have thoughts on the glue script.
-Andrew.
-------------- next part --------------
=== added file 'bzrlib/tests/test_wsgi.py'
--- bzrlib/tests/test_wsgi.py 1970-01-01 00:00:00 +0000
+++ bzrlib/tests/test_wsgi.py 2006-10-05 09:46:33 +0000
@@ -0,0 +1,189 @@
+# Copyright (C) 2006 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Tests for WSGI application"""
+
+from cStringIO import StringIO
+
+from bzrlib import tests
+from bzrlib.transport.http import wsgi
+from bzrlib.transport import memory
+
+class TestWSGI(tests.TestCase):
+
+ def setUp(self):
+ tests.TestCase.setUp(self)
+ self.status = None
+ self.headers = None
+
+ def build_environ(self, **kw):
+ """Builds an environ dict with all fields required by PEP 333.
+
+ The resulting environ dict will be updated with an **kw that are passed.
+ """
+ environ = {
+ # Required CGI variables
+ 'REQUEST_METHOD': 'GET',
+ 'SCRIPT_NAME': '/script/name/',
+ 'PATH_INFO': 'path/info',
+ 'SERVER_NAME': 'test',
+ 'SERVER_PORT': '9999',
+ 'SERVER_PROTOCOL': 'HTTP/1.0',
+
+ # Required WSGI variables
+ 'wsgi.version': (1,0),
+ 'wsgi.url_scheme': 'http',
+ 'wsgi.input': StringIO(''),
+ 'wsgi.errors': StringIO(),
+ 'wsgi.multithread': False,
+ 'wsgi.multiprocess': False,
+ 'wsgi.run_once': True,
+ }
+ environ.update(kw)
+ return environ
+
+ def read_response(self, iterable):
+ response = ''
+ for string in iterable:
+ response += string
+ return response
+
+ def start_response(self, status, headers):
+ self.status = status
+ self.headers = headers
+
+ def test_construct(self):
+ wsgi.SmartWSGIApp(None)
+
+ def test_http_get_rejected(self):
+ # GET requests are rejected.
+ app = wsgi.SmartWSGIApp(None)
+ environ = self.build_environ(REQUEST_METHOD='GET')
+ iterable = app(environ, self.start_response)
+ self.read_response(iterable)
+ self.assertEqual('405 Method not allowed', self.status)
+ self.assertTrue(('Allow', 'POST') in self.headers)
+
+ def test_smart_wsgi_app_uses_given_relpath(self):
+ # The SmartWSGIApp should use the "bzrlib.relpath" field from the
+ # WSGI environ to construct the transport for this request, by cloning
+ # its base transport with the given relpath.
+ transport = FakeTransport()
+ wsgi_app = wsgi.SmartWSGIApp(transport)
+ def make_request(transport, write_func):
+ request = FakeRequest(transport, write_func)
+ self.request = request
+ return request
+ wsgi_app.make_request = make_request
+ fake_input = StringIO('fake request')
+ environ = self.build_environ()
+ environ.update({
+ 'REQUEST_METHOD': 'POST',
+ 'CONTENT_LENGTH': len(fake_input.getvalue()),
+ 'wsgi.input': fake_input,
+ 'bzrlib.relpath': 'foo/bar',
+ })
+ iterable = wsgi_app(environ, self.start_response)
+ response = self.read_response(iterable)
+ self.assertEqual([('clone', 'foo/bar')] , transport.calls)
+
+ def test_smart_wsgi_app_request_and_response(self):
+ # SmartWSGIApp reads the smart request from the 'wsgi.input' file-like
+ # object in the environ dict, and returns the response via the iterable
+ # returned to the WSGI handler.
+ transport = memory.MemoryTransport()
+ transport.put_bytes('foo', 'some bytes')
+ wsgi_app = wsgi.SmartWSGIApp(transport)
+ def make_request(transport, write_func):
+ request = FakeRequest(transport, write_func)
+ self.request = request
+ return request
+ wsgi_app.make_request = make_request
+ fake_input = StringIO('fake request')
+ environ = self.build_environ()
+ environ.update({
+ 'REQUEST_METHOD': 'POST',
+ 'CONTENT_LENGTH': len(fake_input.getvalue()),
+ 'wsgi.input': fake_input,
+ 'bzrlib.relpath': 'foo',
+ })
+ iterable = wsgi_app(environ, self.start_response)
+ response = self.read_response(iterable)
+ self.assertEqual('200 OK', self.status)
+ self.assertEqual('got bytes: fake request', response)
+
+ def test_relpath_setter(self):
+ # wsgi.RelpathSetter is WSGI "middleware" to set the 'bzrlib.relpath'
+ # variable.
+ calls = []
+ def fake_app(environ, start_response):
+ calls.append(environ['bzrlib.relpath'])
+ wrapped_app = wsgi.RelpathSetter(
+ fake_app, prefix='/abc/', path_var='FOO')
+ wrapped_app({'FOO': '/abc/xyz/.bzr/smart'}, None)
+ self.assertEqual(['xyz'], calls)
+
+ def test_relpath_setter_bad_path(self):
+ # wsgi.RelpathSetter will reject paths with that don't match the prefix
+ # or suffix with a 404. This is probably a sign of misconfiguration; a
+ # server shouldn't ever be invoking our WSGI application with bad paths.
+ def fake_app(environ, start_response):
+ self.fail('The app should never be called when the path is wrong')
+ wrapped_app = wsgi.RelpathSetter(
+ fake_app, prefix='/abc/', path_var='FOO')
+ iterable = wrapped_app(
+ {'FOO': 'AAA/abc/xyz/.bzr/smart'}, self.start_response)
+ self.read_response(iterable)
+ self.assertTrue(self.status.startswith('404'))
+
+ def test_make_app(self):
+ # The make_app helper constructs a SmartWSGIApp wrapped in a
+ # RelpathSetter.
+ app = wsgi.make_app(
+ root='a root',
+ prefix='a prefix',
+ path_var='a path_var')
+ self.assertIsInstance(app, wsgi.RelpathSetter)
+ self.assertIsInstance(app.app, wsgi.SmartWSGIApp)
+ self.assertEndsWith(app.app.backing_transport.base, 'a%20root/')
+ self.assertEqual(app.prefix, 'a prefix')
+ self.assertEqual(app.path_var, 'a path_var')
+
+
+class FakeRequest(object):
+
+ def __init__(self, transport, write_func):
+ self.transport = transport
+ self.write_func = write_func
+ self.accepted_bytes = ''
+
+ def accept_bytes(self, bytes):
+ self.accepted_bytes = bytes
+ self.write_func('got bytes: ' + bytes)
+
+ def next_read_size(self):
+ return 0
+
+
+class FakeTransport(object):
+
+ def __init__(self):
+ self.calls = []
+
+ def clone(self, relpath):
+ self.calls.append(('clone', relpath))
+ return self
+
=== added file 'bzrlib/transport/http/wsgi.py'
--- bzrlib/transport/http/wsgi.py 1970-01-01 00:00:00 +0000
+++ bzrlib/transport/http/wsgi.py 2006-10-05 10:22:54 +0000
@@ -0,0 +1,114 @@
+# Copyright (C) 2006 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""WSGI application for bzr HTTP smart server.
+
+For more information about WSGI, see PEP 333:
+ http://www.python.org/dev/peps/pep-0333/
+"""
+
+from cStringIO import StringIO
+
+from bzrlib.transport import get_transport, smart
+from bzrlib.urlutils import local_path_to_url
+
+
+def make_app(root, prefix, path_var):
+ """Convenience function to construct a WSGI bzr smart server.
+
+ :param root: a local path that requests will be relative to.
+ :param prefix: See RelpathSetter.
+ :param path_var: See RelpathSetter.
+ """
+ base_transport = get_transport('readonly+' + local_path_to_url(root))
+ app = SmartWSGIApp(base_transport)
+ app = RelpathSetter(app, prefix, path_var)
+ return app
+
+
+class RelpathSetter(object):
+ """WSGI middleware to set 'bzrlib.relpath' in the environ.
+
+ Different servers can invoke a SmartWSGIApp in different ways. This
+ middleware allows an adminstrator to configure how to the SmartWSGIApp will
+ determine what path it should be serving for a given request for many common
+ situations.
+
+ For example, a request for "/some/prefix/repo/branch/.bzr/smart" received by
+ a typical Apache and mod_fastcgi configuration will set `REQUEST_URI` to
+ "/some/prefix/repo/branch/.bzr/smart". A RelpathSetter with
+ prefix="/some/prefix/" and path_var="REQUEST_URI" will set that request's
+ 'bzrlib.relpath' variable to "repo/branch".
+ """
+
+ def __init__(self, app, prefix='', path_var='REQUEST_URI'):
+ """Constructor.
+
+ :param app: WSGI app to wrap, e.g. a SmartWSGIApp instance.
+ :param path_var: the variable in the WSGI environ to calculate the
+ 'bzrlib.relpath' variable from.
+ :param prefix: a prefix to strip from the variable specified in
+ path_var before setting 'bzrlib.relpath'.
+ """
+ self.app = app
+ self.prefix = prefix
+ self.path_var = path_var
+
+ def __call__(self, environ, start_response):
+ path = environ[self.path_var]
+ suffix = '/.bzr/smart'
+ if not (path.startswith(self.prefix) and path.endswith(suffix)):
+ start_response('404 Not Found', {})
+ return []
+ environ['bzrlib.relpath'] = path[len(self.prefix):-len(suffix)]
+ return self.app(environ, start_response)
+
+
+class SmartWSGIApp(object):
+ """A WSGI application for the bzr smart server."""
+
+ def __init__(self, backing_transport):
+ """Constructor.
+
+ :param backing_transport: a transport. Requests will be processed
+ relative to this transport.
+ """
+ self.backing_transport = backing_transport
+
+ def __call__(self, environ, start_response):
+ """WSGI application callable."""
+ if environ['REQUEST_METHOD'] != 'POST':
+ start_response('405 Method not allowed', [('Allow', 'POST')])
+ return []
+
+ relpath = environ['bzrlib.relpath']
+ transport = self.backing_transport.clone(relpath)
+ #assert transport.base.startswith(self.backing_transport.base)
+ out_buffer = StringIO()
+ smart_protocol_request = self.make_request(transport, out_buffer.write)
+ request_data_length = int(environ['CONTENT_LENGTH'])
+ request_data_bytes = environ['wsgi.input'].read(request_data_length)
+ smart_protocol_request.accept_bytes(request_data_bytes)
+ assert smart_protocol_request.next_read_size() == 0, (
+ "not finished reading, but all data sent to protocol.")
+ response_data = out_buffer.getvalue()
+ headers = [('Content-type', 'application/octet-stream')]
+ headers.append(("Content-Length", str(len(response_data))))
+ start_response('200 OK', headers)
+ return [response_data]
+
+ def make_request(self, transport, write_func):
+ return smart.SmartServerRequestProtocolOne(transport, write_func)
=== added file 'doc/http_smart_server.txt'
--- doc/http_smart_server.txt 1970-01-01 00:00:00 +0000
+++ doc/http_smart_server.txt 2006-10-06 07:34:48 +0000
@@ -0,0 +1,128 @@
+===========================
+Serving Bazaar with FastCGI
+===========================
+
+**This feature is EXPERIMENTAL and is NOT SECURE. It will allow access to
+arbitrary files on your server.**
+
+This document describes one way to setup a Bazaar HTTP smart server, using
+Apache 2.0 and FastCGI.
+
+Example
+=======
+
+You have a webserver already publishing `/srv/example.com/www/code` as
+`http://example.com/code/...` with plain HTTP. It contains bzr branches and
+directories like `/srv/example.com/www/code/branch-one` and
+`/srv/example.com/www/code/my-repo/branch-two`. You want to provide read-only
+smart server access to these directories in addition to the existing HTTP
+access.
+
+Configuring Apache 2.0
+----------------------
+
+First, configure mod_fastcgi, e.g. by adding lines like these to your
+httpd.conf::
+
+ LoadModule fastcgi_module /usr/lib/apache2/modules/mod_fastcgi.so
+ FastCgiIpcDir /var/lib/apache2/fastcgi
+
+In our example, we're already serving `/srv/example.com/www/code` at
+`http://example.com/code`, so our existing Apache configuration would look
+like::
+
+ Alias /code /srv/example.com/www/code
+ <Directory /srv/example.com/www/code>
+ Options Indexes
+ # ...
+ </Directory>
+
+We need to change it to handle all requests for URLs ending in `.bzr/smart`. It
+will look like::
+
+ Alias /code /srv/example.com/www/code
+ <Directory /srv/example.com/www/code>
+ Options Indexes, FollowSymLinks
+ RewriteEngine On
+ RewriteBase /code
+ RewriteRule ^(.*)/\.bzr/smart$ /srv/example.com/scripts/bzr-smart.fcgi
+ </Directory>
+
+ Alias /srv/example.com/scripts/bzr-smart.fcgi /srv/example.com/scripts/bzr-smart.fcgi
+ <Directory /srv/example.com/scripts>
+ Options ExecCGI
+ <Files bzr-smart.fcgi>
+ SetHandler fastcgi-script
+ </Files>
+ </Directory>
+
+This instructs Apache to hand requests for any URL ending with `/.bzr/smart`
+inside `/code` to a Bazaar smart server via FastCGI.
+
+Refer to the mod_rewrite_ and mod_fastcgi_ documentation for further
+information.
+
+.. _mod_rewrite: http://httpd.apache.org/docs/2.0/mod/mod_rewrite.html
+.. _mod_fastcgi: http://www.fastcgi.com/mod_fastcgi/docs/mod_fastcgi.html
+
+Configuring Bazaar
+------------------
+
+We've configured Apache to run the smart server at
+`/srv/example.com/scripts/bzr-smart.fcgi`. This is just a simple script we need
+to write to configure a smart server, and glue it to the FastCGI gateway.
+Here's what it looks like::
+
+ import fcgi
+ from bzrlib.transport.http import wsgi
+
+ smart_server_app = wsgi.make_app(
+ root='/srv/example.com/code',
+ prefix='/code/',
+ path_var='REQUEST_URI')
+
+ fcgi.WSGIServer(smart_server_app).run()
+
+The `fcgi` module can be found at http://svn.saddi.com/py-lib/trunk/fcgi.py. It
+is part of flup_.
+
+.. _flup: http://www.saddi.com/software/flup/
+
+Clients
+-------
+
+Now you can use `bzr+http://` URLs, e.g.::
+
+ bzr log bzr+http://example.com/code/my-branch
+
+Plain HTTP access should continue to work::
+
+ bzr log http://example.com/code/my-branch
+
+
+Advanced configuration
+======================
+
+Because the Bazaar HTTP smart server is a WSGI application, it can be used with
+any 3rd-party WSGI middleware or server that conforms the WSGI standard. The
+only requirements are:
+
+ * to construct a `SmartWSGIApp`, you need to specify a **root transport** that it
+ will serve.
+ * each request's `environ` dict must have a **'bzrlib.relpath'** variable set.
+
+The `make_app` helper used in the example constructs a `SmartWSGIApp` with a
+transport based on the `root` path given to it, and calculates the
+'bzrlib.relpath` for each request based on the `prefix` and `path_var`
+arguments. In the example above, it will take the 'REQUEST_URI' (which is set
+by Apache), strip the '/code/' prefix and the '/.bzr/smart' suffix, and set that
+as the 'bzrlib.relpath', so that a request for '/code/foo/bar/.bzr/smart' will
+result in a 'bzrlib.relpath' of 'foo/bzr'.
+
+It's possible to configure a smart server for a non-local transport, or that
+does arbitrary path translations, etc, by constructing a `SmartWSGIApp`
+directly. Refer to the docstrings of `bzrlib.transport.http.wsgi` and the `WSGI
+standard`_ for further information.
+
+.. _WSGI standard: http://www.python.org/dev/peps/pep-0333/
+
=== modified file 'BRANCH.TODO'
--- BRANCH.TODO 2006-10-04 02:24:48 +0000
+++ BRANCH.TODO 2006-10-06 07:30:21 +0000
@@ -1,3 +1,10 @@
# This file is for listing TODOs for branches that are being worked on.
# It should ALWAYS be empty in the mainline or in integration branches.
#
+
+Security: it should be impossible, by default, to access files above the base of
+the backing transport of the SmartServerRequestHandler. Currently '..' and the
+like are not vetted, however.
+
+Similarly, the SmartWSGIApp should also be careful to disallow '..' and the
+like.
=== modified file 'bzrlib/tests/__init__.py'
--- bzrlib/tests/__init__.py 2006-09-25 19:29:26 +0000
+++ bzrlib/tests/__init__.py 2006-10-04 06:28:51 +0000
@@ -1605,6 +1605,7 @@
'bzrlib.tests.test_weave',
'bzrlib.tests.test_whitebox',
'bzrlib.tests.test_workingtree',
+ 'bzrlib.tests.test_wsgi',
'bzrlib.tests.test_xml',
]
test_transport_implementations = [
=== modified file 'doc/index.txt'
--- doc/index.txt 2006-09-13 04:48:33 +0000
+++ doc/index.txt 2006-10-05 10:22:24 +0000
@@ -47,6 +47,10 @@
How to run a server to allow remote access to Bazaar branches.
+* `Running a Bazaar HTTP server <http_smart_server.htm>`_
+
+ How to run a smart HTTP server to allow remote access to Bazaar branches.
+
This document automatically created based on output of **bzr help**:
* `Man page (help for bzr commands) <bzr_man.htm>`_
More information about the bazaar
mailing list