[MERGE] WSGI backend for HTTP smart server, and deployment documentation

Andrew Bennetts andrew at canonical.com
Fri Oct 6 08:46:07 BST 2006


The branch at http://people.ubuntu.com/~andrew/bzr/wsgi-smart-server/ implements
a WSGI backend for the HTTP smart server, and adds some documentation
demonstrating how to deploy it with Apache and mod_fastcgi.

The branch includes the changes in my HTTP smart server branch
http://people.ubuntu.com/~andrew/bzr/http-smart-server/.  I've attached a diff
relative to that branch.

It currently has a serious security issue: it will allow access to files outside
of the directory it serves.  I've put a warning in the documentation about this,
but I intend to fix this fairly soon.

Please review and give feedback -- I'm particularly interested in ways to
further simplify deployment, and to make sure the documentation is clear.  I'm
no Apache guru, so please let me know if you think there's a better way to
intercept requests for .bzr/smart URLs and hand them to bzr via FastCGI or
whatever.  Also let me know if you have thoughts on the glue script.

-Andrew.

-------------- next part --------------
=== added file 'bzrlib/tests/test_wsgi.py'
--- bzrlib/tests/test_wsgi.py	1970-01-01 00:00:00 +0000
+++ bzrlib/tests/test_wsgi.py	2006-10-05 09:46:33 +0000
@@ -0,0 +1,189 @@
+# Copyright (C) 2006 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for WSGI application"""
+
+from cStringIO import StringIO
+
+from bzrlib import tests
+from bzrlib.transport.http import wsgi
+from bzrlib.transport import memory
+
+class TestWSGI(tests.TestCase):
+
+    def setUp(self):
+        tests.TestCase.setUp(self)
+        self.status = None
+        self.headers = None
+
+    def build_environ(self, **kw):
+        """Builds an environ dict with all fields required by PEP 333.
+        
+        The resulting environ dict will be updated with an **kw that are passed.
+        """
+        environ = {
+            # Required CGI variables
+            'REQUEST_METHOD': 'GET',
+            'SCRIPT_NAME': '/script/name/',
+            'PATH_INFO': 'path/info',
+            'SERVER_NAME': 'test',
+            'SERVER_PORT': '9999',
+            'SERVER_PROTOCOL': 'HTTP/1.0',
+
+            # Required WSGI variables
+            'wsgi.version': (1,0),
+            'wsgi.url_scheme': 'http',
+            'wsgi.input': StringIO(''),
+            'wsgi.errors': StringIO(),
+            'wsgi.multithread': False,
+            'wsgi.multiprocess': False,
+            'wsgi.run_once': True,
+        }
+        environ.update(kw)
+        return environ
+        
+    def read_response(self, iterable):
+        response = ''
+        for string in iterable:
+            response += string
+        return response
+
+    def start_response(self, status, headers):
+        self.status = status
+        self.headers = headers
+
+    def test_construct(self):
+        wsgi.SmartWSGIApp(None)
+
+    def test_http_get_rejected(self):
+        # GET requests are rejected.
+        app = wsgi.SmartWSGIApp(None)
+        environ = self.build_environ(REQUEST_METHOD='GET')
+        iterable = app(environ, self.start_response)
+        self.read_response(iterable)
+        self.assertEqual('405 Method not allowed', self.status)
+        self.assertTrue(('Allow', 'POST') in self.headers)
+        
+    def test_smart_wsgi_app_uses_given_relpath(self):
+        # The SmartWSGIApp should use the "bzrlib.relpath" field from the
+        # WSGI environ to construct the transport for this request, by cloning
+        # its base transport with the given relpath.
+        transport = FakeTransport()
+        wsgi_app = wsgi.SmartWSGIApp(transport)
+        def make_request(transport, write_func):
+            request = FakeRequest(transport, write_func)
+            self.request = request
+            return request
+        wsgi_app.make_request = make_request
+        fake_input = StringIO('fake request')
+        environ = self.build_environ()
+        environ.update({
+            'REQUEST_METHOD': 'POST',
+            'CONTENT_LENGTH': len(fake_input.getvalue()),
+            'wsgi.input': fake_input,
+            'bzrlib.relpath': 'foo/bar',
+        })
+        iterable = wsgi_app(environ, self.start_response)
+        response = self.read_response(iterable)
+        self.assertEqual([('clone', 'foo/bar')] , transport.calls)
+
+    def test_smart_wsgi_app_request_and_response(self):
+        # SmartWSGIApp reads the smart request from the 'wsgi.input' file-like
+        # object in the environ dict, and returns the response via the iterable
+        # returned to the WSGI handler.
+        transport = memory.MemoryTransport()
+        transport.put_bytes('foo', 'some bytes')
+        wsgi_app = wsgi.SmartWSGIApp(transport)
+        def make_request(transport, write_func):
+            request = FakeRequest(transport, write_func)
+            self.request = request
+            return request
+        wsgi_app.make_request = make_request
+        fake_input = StringIO('fake request')
+        environ = self.build_environ()
+        environ.update({
+            'REQUEST_METHOD': 'POST',
+            'CONTENT_LENGTH': len(fake_input.getvalue()),
+            'wsgi.input': fake_input,
+            'bzrlib.relpath': 'foo',
+        })
+        iterable = wsgi_app(environ, self.start_response)
+        response = self.read_response(iterable)
+        self.assertEqual('200 OK', self.status)
+        self.assertEqual('got bytes: fake request', response)
+
+    def test_relpath_setter(self):
+        # wsgi.RelpathSetter is WSGI "middleware" to set the 'bzrlib.relpath'
+        # variable.
+        calls = []
+        def fake_app(environ, start_response):
+            calls.append(environ['bzrlib.relpath'])
+        wrapped_app = wsgi.RelpathSetter(
+            fake_app, prefix='/abc/', path_var='FOO')
+        wrapped_app({'FOO': '/abc/xyz/.bzr/smart'}, None)
+        self.assertEqual(['xyz'], calls)
+       
+    def test_relpath_setter_bad_path(self):
+        # wsgi.RelpathSetter will reject paths with that don't match the prefix
+        # or suffix with a 404.  This is probably a sign of misconfiguration; a
+        # server shouldn't ever be invoking our WSGI application with bad paths.
+        def fake_app(environ, start_response):
+            self.fail('The app should never be called when the path is wrong')
+        wrapped_app = wsgi.RelpathSetter(
+            fake_app, prefix='/abc/', path_var='FOO')
+        iterable = wrapped_app(
+            {'FOO': 'AAA/abc/xyz/.bzr/smart'}, self.start_response)
+        self.read_response(iterable)
+        self.assertTrue(self.status.startswith('404'))
+        
+    def test_make_app(self):
+        # The make_app helper constructs a SmartWSGIApp wrapped in a
+        # RelpathSetter.
+        app = wsgi.make_app(
+            root='a root',
+            prefix='a prefix',
+            path_var='a path_var')
+        self.assertIsInstance(app, wsgi.RelpathSetter)
+        self.assertIsInstance(app.app, wsgi.SmartWSGIApp)
+        self.assertEndsWith(app.app.backing_transport.base, 'a%20root/')
+        self.assertEqual(app.prefix, 'a prefix')
+        self.assertEqual(app.path_var, 'a path_var')
+
+
+class FakeRequest(object):
+    
+    def __init__(self, transport, write_func):
+        self.transport = transport
+        self.write_func = write_func
+        self.accepted_bytes = ''
+
+    def accept_bytes(self, bytes):
+        self.accepted_bytes = bytes
+        self.write_func('got bytes: ' + bytes)
+
+    def next_read_size(self):
+        return 0
+
+
+class FakeTransport(object):
+
+    def __init__(self):
+        self.calls = []
+
+    def clone(self, relpath):
+        self.calls.append(('clone', relpath))
+        return self
+

=== added file 'bzrlib/transport/http/wsgi.py'
--- bzrlib/transport/http/wsgi.py	1970-01-01 00:00:00 +0000
+++ bzrlib/transport/http/wsgi.py	2006-10-05 10:22:54 +0000
@@ -0,0 +1,114 @@
+# Copyright (C) 2006 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""WSGI application for bzr HTTP smart server.
+
+For more information about WSGI, see PEP 333:
+    http://www.python.org/dev/peps/pep-0333/
+"""
+
+from cStringIO import StringIO
+
+from bzrlib.transport import get_transport, smart
+from bzrlib.urlutils import local_path_to_url
+    
+
+def make_app(root, prefix, path_var):
+    """Convenience function to construct a WSGI bzr smart server.
+    
+    :param root: a local path that requests will be relative to.
+    :param prefix: See RelpathSetter.
+    :param path_var: See RelpathSetter.
+    """
+    base_transport = get_transport('readonly+' + local_path_to_url(root))
+    app = SmartWSGIApp(base_transport)
+    app = RelpathSetter(app, prefix, path_var)
+    return app
+
+
+class RelpathSetter(object):
+    """WSGI middleware to set 'bzrlib.relpath' in the environ.
+    
+    Different servers can invoke a SmartWSGIApp in different ways.  This
+    middleware allows an adminstrator to configure how to the SmartWSGIApp will
+    determine what path it should be serving for a given request for many common
+    situations.
+
+    For example, a request for "/some/prefix/repo/branch/.bzr/smart" received by
+    a typical Apache and mod_fastcgi configuration will set `REQUEST_URI` to
+    "/some/prefix/repo/branch/.bzr/smart".  A RelpathSetter with
+    prefix="/some/prefix/" and path_var="REQUEST_URI" will set that request's
+    'bzrlib.relpath' variable to "repo/branch".
+    """
+    
+    def __init__(self, app, prefix='', path_var='REQUEST_URI'):
+        """Constructor.
+
+        :param app: WSGI app to wrap, e.g. a SmartWSGIApp instance.
+        :param path_var: the variable in the WSGI environ to calculate the
+            'bzrlib.relpath' variable from.
+        :param prefix: a prefix to strip from the variable specified in
+            path_var before setting 'bzrlib.relpath'.
+        """
+        self.app = app
+        self.prefix = prefix
+        self.path_var = path_var
+
+    def __call__(self, environ, start_response):
+        path = environ[self.path_var]
+        suffix = '/.bzr/smart'
+        if not (path.startswith(self.prefix) and path.endswith(suffix)):
+            start_response('404 Not Found', {})
+            return []
+        environ['bzrlib.relpath'] = path[len(self.prefix):-len(suffix)]
+        return self.app(environ, start_response)
+
+
+class SmartWSGIApp(object):
+    """A WSGI application for the bzr smart server."""
+
+    def __init__(self, backing_transport):
+        """Constructor.
+
+        :param backing_transport: a transport.  Requests will be processed
+            relative to this transport.
+        """
+        self.backing_transport = backing_transport
+
+    def __call__(self, environ, start_response):
+        """WSGI application callable."""
+        if environ['REQUEST_METHOD'] != 'POST':
+            start_response('405 Method not allowed', [('Allow', 'POST')])
+            return []
+
+        relpath = environ['bzrlib.relpath']
+        transport = self.backing_transport.clone(relpath)
+        #assert transport.base.startswith(self.backing_transport.base)
+        out_buffer = StringIO()
+        smart_protocol_request = self.make_request(transport, out_buffer.write)
+        request_data_length = int(environ['CONTENT_LENGTH'])
+        request_data_bytes = environ['wsgi.input'].read(request_data_length)
+        smart_protocol_request.accept_bytes(request_data_bytes)
+        assert smart_protocol_request.next_read_size() == 0, (
+            "not finished reading, but all data sent to protocol.")
+        response_data = out_buffer.getvalue()
+        headers = [('Content-type', 'application/octet-stream')]
+        headers.append(("Content-Length", str(len(response_data))))
+        start_response('200 OK', headers)
+        return [response_data]
+
+    def make_request(self, transport, write_func):
+        return smart.SmartServerRequestProtocolOne(transport, write_func)

=== added file 'doc/http_smart_server.txt'
--- doc/http_smart_server.txt	1970-01-01 00:00:00 +0000
+++ doc/http_smart_server.txt	2006-10-06 07:34:48 +0000
@@ -0,0 +1,128 @@
+===========================
+Serving Bazaar with FastCGI
+===========================
+
+**This feature is EXPERIMENTAL and is NOT SECURE.  It will allow access to
+arbitrary files on your server.**
+
+This document describes one way to setup a Bazaar HTTP smart server, using
+Apache 2.0 and FastCGI.
+
+Example
+=======
+
+You have a webserver already publishing `/srv/example.com/www/code` as
+`http://example.com/code/...` with plain HTTP.  It contains bzr branches and
+directories like `/srv/example.com/www/code/branch-one` and
+`/srv/example.com/www/code/my-repo/branch-two`.  You want to provide read-only
+smart server access to these directories in addition to the existing HTTP
+access.
+
+Configuring Apache 2.0
+----------------------
+
+First, configure mod_fastcgi, e.g. by adding lines like these to your
+httpd.conf::
+
+    LoadModule fastcgi_module /usr/lib/apache2/modules/mod_fastcgi.so
+    FastCgiIpcDir /var/lib/apache2/fastcgi
+    
+In our example, we're already serving `/srv/example.com/www/code` at
+`http://example.com/code`, so our existing Apache configuration would look
+like::
+
+    Alias /code /srv/example.com/www/code
+    <Directory /srv/example.com/www/code>
+        Options Indexes
+        # ...
+    </Directory>
+
+We need to change it to handle all requests for URLs ending in `.bzr/smart`.  It
+will look like::
+
+    Alias /code /srv/example.com/www/code
+    <Directory /srv/example.com/www/code>
+        Options Indexes, FollowSymLinks
+        RewriteEngine On
+        RewriteBase /code
+        RewriteRule ^(.*)/\.bzr/smart$ /srv/example.com/scripts/bzr-smart.fcgi
+    </Directory>
+    
+    Alias /srv/example.com/scripts/bzr-smart.fcgi /srv/example.com/scripts/bzr-smart.fcgi
+    <Directory /srv/example.com/scripts>
+        Options ExecCGI
+        <Files bzr-smart.fcgi>
+            SetHandler fastcgi-script
+        </Files>
+    </Directory>
+    
+This instructs Apache to hand requests for any URL ending with `/.bzr/smart`
+inside `/code` to a Bazaar smart server via FastCGI.
+
+Refer to the mod_rewrite_ and mod_fastcgi_ documentation for further
+information.
+
+.. _mod_rewrite: http://httpd.apache.org/docs/2.0/mod/mod_rewrite.html
+.. _mod_fastcgi: http://www.fastcgi.com/mod_fastcgi/docs/mod_fastcgi.html
+
+Configuring Bazaar
+------------------
+
+We've configured Apache to run the smart server at
+`/srv/example.com/scripts/bzr-smart.fcgi`.  This is just a simple script we need
+to write to configure a smart server, and glue it to the FastCGI gateway.
+Here's what it looks like::
+
+    import fcgi
+    from bzrlib.transport.http import wsgi
+
+    smart_server_app = wsgi.make_app(
+        root='/srv/example.com/code',
+        prefix='/code/',
+        path_var='REQUEST_URI')
+
+    fcgi.WSGIServer(smart_server_app).run()
+        
+The `fcgi` module can be found at http://svn.saddi.com/py-lib/trunk/fcgi.py.  It
+is part of flup_.
+
+.. _flup: http://www.saddi.com/software/flup/
+
+Clients
+-------
+
+Now you can use `bzr+http://` URLs, e.g.::
+
+    bzr log bzr+http://example.com/code/my-branch
+
+Plain HTTP access should continue to work::
+
+    bzr log http://example.com/code/my-branch
+
+
+Advanced configuration
+======================
+
+Because the Bazaar HTTP smart server is a WSGI application, it can be used with
+any 3rd-party WSGI middleware or server that conforms the WSGI standard.  The
+only requirements are:
+
+  * to construct a `SmartWSGIApp`, you need to specify a **root transport** that it
+    will serve.
+  * each request's `environ` dict must have a **'bzrlib.relpath'** variable set.
+
+The `make_app` helper used in the example constructs a `SmartWSGIApp` with a
+transport based on the `root` path given to it, and calculates the
+'bzrlib.relpath` for each request based on the `prefix` and `path_var`
+arguments.  In the example above, it will take the 'REQUEST_URI' (which is set
+by Apache), strip the '/code/' prefix and the '/.bzr/smart' suffix, and set that
+as the 'bzrlib.relpath', so that a request for '/code/foo/bar/.bzr/smart' will
+result in a 'bzrlib.relpath' of 'foo/bzr'.
+
+It's possible to configure a smart server for a non-local transport, or that
+does arbitrary path translations, etc, by constructing a `SmartWSGIApp`
+directly.  Refer to the docstrings of `bzrlib.transport.http.wsgi` and the `WSGI
+standard`_ for further information.
+
+.. _WSGI standard: http://www.python.org/dev/peps/pep-0333/
+

=== modified file 'BRANCH.TODO'
--- BRANCH.TODO	2006-10-04 02:24:48 +0000
+++ BRANCH.TODO	2006-10-06 07:30:21 +0000
@@ -1,3 +1,10 @@
 # This file is for listing TODOs for branches that are being worked on.
 # It should ALWAYS be empty in the mainline or in integration branches.
 # 
+
+Security: it should be impossible, by default, to access files above the base of
+the backing transport of the SmartServerRequestHandler.  Currently '..' and the
+like are not vetted, however.
+
+Similarly, the SmartWSGIApp should also be careful to disallow '..' and the
+like.

=== modified file 'bzrlib/tests/__init__.py'
--- bzrlib/tests/__init__.py	2006-09-25 19:29:26 +0000
+++ bzrlib/tests/__init__.py	2006-10-04 06:28:51 +0000
@@ -1605,6 +1605,7 @@
                    'bzrlib.tests.test_weave',
                    'bzrlib.tests.test_whitebox',
                    'bzrlib.tests.test_workingtree',
+                   'bzrlib.tests.test_wsgi',
                    'bzrlib.tests.test_xml',
                    ]
     test_transport_implementations = [

=== modified file 'doc/index.txt'
--- doc/index.txt	2006-09-13 04:48:33 +0000
+++ doc/index.txt	2006-10-05 10:22:24 +0000
@@ -47,6 +47,10 @@
   
     How to run a server to allow remote access to Bazaar branches.
 
+* `Running a Bazaar HTTP server <http_smart_server.htm>`_
+
+    How to run a smart HTTP server to allow remote access to Bazaar branches.
+
 This document automatically created based on output of **bzr help**:
 
 * `Man page (help for bzr commands) <bzr_man.htm>`_



More information about the bazaar mailing list