[MERGE] bzr+http:// should always try POSTing to the same location, not lots of child locations.
Andrew Bennetts
andrew at canonical.com
Fri Dec 22 06:11:51 GMT 2006
There's a problem with the current bzr+http:// transport. If you do e.g. "bzr
branch bzr+http://host/foo", it ought to just POST lots of requests to
http://host/foo/.bzr/smart, but it ends up POSTing to many different URLs below
that point, e.g. http://host/foo/.bzr/branch/.bzr/smart. That is, instead of
sending "get\x01.bzr/branch/format" to $URL/.bzr/smart, it will send
"get\x01format" to $URL/.bzr/branch/.bzr/smart.
This is preventing bzr.dev from working with bzr+http://bazaar.launchpad.net/*
URLs at the moment.
This bundle fixes that, by making cloning of SmartHTTPTransport remember the
inital URL, so that it can always POST to the intended location.
It also teaches urlutils.normalize_url to unescape certain characters that STD
66 says can be safely normalised like that. E.g. "%7E" can safely be converted
to "~". This is helpful when comparing http://host/~bob and http://host/%7Ebob,
and the SmartHTTPTransport fix I describe needs it because at some point the
HTTP transport over-zealously escapes URLs.
-Andrew.
-------------- next part --------------
# Bazaar revision bundle v0.8
#
# message:
# Always POST to the same .bzr/smart URL for a given branch, even when accessing files in subdirectories.
# committer: Andrew Bennetts <andrew.bennetts at canonical.com>
# date: Fri 2006-12-22 17:04:24.269999981 +1100
=== modified file NEWS // last-changed:andrew.bennetts at canonical.com-2006122205
... 5608-t1cfil9ws1l95k0f
--- NEWS
+++ NEWS
@@ -77,6 +77,9 @@
* Single-letter short options are no longer globally declared. (Martin
Pool)
+ * urlutils.normalize_url now unescapes unreserved characters, such as "~".
+ (Andrew Bennetts)
+
BUG FIXES:
* ``bzr missing --verbose`` was showing adds/removals in the wrong
=== modified file bzrlib/tests/test_smart_transport.py
--- bzrlib/tests/test_smart_transport.py
+++ bzrlib/tests/test_smart_transport.py
@@ -1508,6 +1508,21 @@
else:
return self.writefile
+
+class SmartHTTPTransportTestCase(tests.TestCase):
+
+ def test_remote_path_after_close(self):
+ # If a user enters "bzr+http://host/foo", we want to sent all smart
+ # requests to that URL, even when accessing child URLs. i.e., we want
+ # to POST to "bzr+http://host/foo/.bzr/smart" and never something like
+ # "bzr+http://host/foo/.bzr/brancch/.bzr/smart". So, a cloned
+ # SmartHTTPTransport remembers the initial URL, and adjusts the relpaths
+ # it sends in smart requests accordingly.
+ base_transport = smart.SmartHTTPTransport('bzr+http://host/path')
+ new_transport = base_transport.clone('child_dir')
+ self.assertEqual(base_transport._http_transport, new_transport._http_transport)
+ self.assertEqual('child_dir/foo', new_transport._remote_path('foo'))
+
# TODO: Client feature that does get_bundle and then installs that into a
# branch; this can be used in place of the regular pull/fetch operation when
=== modified file bzrlib/tests/test_urlutils.py // last-changed:andrew.bennetts
... @canonical.com-20061222055608-t1cfil9ws1l95k0f
--- bzrlib/tests/test_urlutils.py
+++ bzrlib/tests/test_urlutils.py
@@ -115,6 +115,12 @@
eq('http://host/ab/%C2%B5/%C2%B5',
normalize_url(u'http://host/ab/%C2%B5/\xb5'))
+ # Unescape characters that don't need to be escaped
+ eq('http://host/~bob%2525-._',
+ normalize_url('http://host/%7Ebob%2525%2D%2E%5F'))
+ eq('http://host/~bob%2525-._',
+ normalize_url(u'http://host/%7Ebob%2525%2D%2E%5F'))
+
# Normalize verifies URLs when they are not unicode
# (indicating they did not come from the user)
self.assertRaises(InvalidURL, normalize_url, 'http://host/\xb5')
=== modified file bzrlib/transport/smart.py
--- bzrlib/transport/smart.py
+++ bzrlib/transport/smart.py
@@ -1781,10 +1781,12 @@
def _remote_path(self, relpath):
"""After connecting HTTP Transport only deals in relative URLs."""
- if relpath == '.':
- return ''
- else:
- return relpath
+ # Adjust the relpath based on which URL this smart transport is
+ # connected to.
+ base = self._http_transport.base
+ url = urlutils.join(self.base[len('bzr+'):], relpath)
+ url = urlutils.normalize_url(url)
+ return urlutils.relative_url(base, url)
def abspath(self, relpath):
"""Return the full url to the given relative path.
@@ -1800,15 +1802,20 @@
This is re-implemented rather than using the default
SmartTransport.clone() because we must be careful about the underlying
http transport.
+
+ Also, the cloned smart transport will POST to the same .bzr/smart
+ location as this transport (although obviously the relative paths in the
+ smart requests may be different). This is so that the server doesn't
+ have to handle .bzr/smart requests at arbitrarily places, just at the
+ initial URL the user uses.
"""
if relative_url:
abs_url = self.abspath(relative_url)
else:
abs_url = self.base
- # By cloning the underlying http_transport, we are able to share the
+ # By sharing the underlying http_transport, we are able to share the
# connection.
- new_transport = self._http_transport.clone(relative_url)
- return SmartHTTPTransport(abs_url, http_transport=new_transport)
+ return SmartHTTPTransport(abs_url, http_transport=self._http_transport)
def get_test_permutations():
=== modified file bzrlib/urlutils.py // last-changed:andrew.bennetts at canonical.
... com-20061222055608-t1cfil9ws1l95k0f
--- bzrlib/urlutils.py
+++ bzrlib/urlutils.py
@@ -233,12 +233,26 @@
_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')
+_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
+
+
+def _unescape_safe_chars(matchobj):
+ """re.sub callback to convert hex-escapes to plain characters (if safe).
+
+ e.g. '%7E' will be converted to '~'.
+ """
+ hex_digits = matchobj.group(0)[1:]
+ char = chr(int(hex_digits, 16))
+ if char in _url_dont_escape_characters:
+ return char
+ else:
+ return matchobj.group(0).upper()
def normalize_url(url):
"""Make sure that a path string is in fully normalized URL form.
- This handles URLs which have unicode characters, spaces,
+ This handles URLs which have unicode characters, spaces,
special characters, etc.
It has two basic modes of operation, depending on whether the
@@ -257,21 +271,27 @@
m = _url_scheme_re.match(url)
if not m:
return local_path_to_url(url)
+ scheme = m.group('scheme')
+ path = m.group('path')
if not isinstance(url, unicode):
for c in url:
if c not in _url_safe_characters:
raise errors.InvalidURL(url, 'URLs can only contain specific'
' safe characters (not %r)' % c)
- return url
+ path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
+ return str(scheme + '://' + ''.join(path))
+
# We have a unicode (hybrid) url
- scheme = m.group('scheme')
- path = list(m.group('path'))
+ path_chars = list(path)
- for i in xrange(len(path)):
- if path[i] not in _url_safe_characters:
- chars = path[i].encode('utf-8')
- path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')])
- return str(scheme + '://' + ''.join(path))
+ for i in xrange(len(path_chars)):
+ if path_chars[i] not in _url_safe_characters:
+ chars = path_chars[i].encode('utf-8')
+ path_chars[i] = ''.join(
+ ['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
+ path = ''.join(path_chars)
+ path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
+ return str(scheme + '://' + path)
def relative_url(base, other):
@@ -457,6 +477,15 @@
#These entries get mapped to themselves
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
+# These characters shouldn't be percent-encoded, and it's always safe to
+# unencode them if they are.
+_url_dont_escape_characters = set(
+ "abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
+ "0123456789" # Numbers
+ "-._~" # Unreserved characters
+)
+
# These characters should not be escaped
_url_safe_characters = set(
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
=== modified directory // last-changed:andrew.bennetts at canonical.com-200612220
... 60424-psweqla98ig12mra
# revision id: andrew.bennetts at canonical.com-20061222060424-psweqla98ig12mra
# sha1: 1a65d7294f1a8de456bf22019b372b4a78905343
# inventory sha1: 006090fdf3ff2c78b1cf18bd652d2e56bee8493d
# parent ids:
# andrew.bennetts at canonical.com-20061222055608-t1cfil9ws1l95k0f
# base id: pqm at pqm.ubuntu.com-20061221043820-0b56b176269f173a
# properties:
# branch-nick: bzr
# message:
# normalize_url should normalise escaping of unreserved characters, like '~'.
# committer: Andrew Bennetts <andrew.bennetts at canonical.com>
# date: Fri 2006-12-22 16:56:08.933000088 +1100
=== modified file NEWS // encoding:base64
LS0tIE5FV1MKKysrIE5FV1MKQEAgLTc3LDYgKzc3LDkgQEAKICAgICAqIFNpbmdsZS1sZXR0ZXIg
c2hvcnQgb3B0aW9ucyBhcmUgbm8gbG9uZ2VyIGdsb2JhbGx5IGRlY2xhcmVkLiAgKE1hcnRpbgog
ICAgICAgUG9vbCkKIAorICAgICogdXJsdXRpbHMubm9ybWFsaXplX3VybCBub3cgdW5lc2NhcGVz
IHVucmVzZXJ2ZWQgY2hhcmFjdGVycywgc3VjaCBhcyAifiIuCisgICAgICAoQW5kcmV3IEJlbm5l
dHRzKQorCiAgIEJVRyBGSVhFUzoKIAogICAgICogYGBienIgbWlzc2luZyAtLXZlcmJvc2VgYCB3
YXMgc2hvd2luZyBhZGRzL3JlbW92YWxzIGluIHRoZSB3cm9uZwoK
=== modified file bzrlib/tests/test_urlutils.py // encoding:base64
LS0tIGJ6cmxpYi90ZXN0cy90ZXN0X3VybHV0aWxzLnB5CisrKyBienJsaWIvdGVzdHMvdGVzdF91
cmx1dGlscy5weQpAQCAtMTE1LDYgKzExNSwxMiBAQAogICAgICAgICBlcSgnaHR0cDovL2hvc3Qv
YWIvJUMyJUI1LyVDMiVCNScsCiAgICAgICAgICAgICBub3JtYWxpemVfdXJsKHUnaHR0cDovL2hv
c3QvYWIvJUMyJUI1L1x4YjUnKSkKIAorICAgICAgICAjIFVuZXNjYXBlIGNoYXJhY3RlcnMgdGhh
dCBkb24ndCBuZWVkIHRvIGJlIGVzY2FwZWQKKyAgICAgICAgZXEoJ2h0dHA6Ly9ob3N0L35ib2Il
MjUyNS0uXycsCisgICAgICAgICAgICAgICAgbm9ybWFsaXplX3VybCgnaHR0cDovL2hvc3QvJTdF
Ym9iJTI1MjUlMkQlMkUlNUYnKSkKKyAgICAgICAgZXEoJ2h0dHA6Ly9ob3N0L35ib2IlMjUyNS0u
XycsCisgICAgICAgICAgICAgICAgbm9ybWFsaXplX3VybCh1J2h0dHA6Ly9ob3N0LyU3RWJvYiUy
NTI1JTJEJTJFJTVGJykpCisKICAgICAgICAgIyBOb3JtYWxpemUgdmVyaWZpZXMgVVJMcyB3aGVu
IHRoZXkgYXJlIG5vdCB1bmljb2RlCiAgICAgICAgICMgKGluZGljYXRpbmcgdGhleSBkaWQgbm90
IGNvbWUgZnJvbSB0aGUgdXNlcikKICAgICAgICAgc2VsZi5hc3NlcnRSYWlzZXMoSW52YWxpZFVS
TCwgbm9ybWFsaXplX3VybCwgJ2h0dHA6Ly9ob3N0L1x4YjUnKQoK
=== modified file bzrlib/urlutils.py // encoding:base64
LS0tIGJ6cmxpYi91cmx1dGlscy5weQorKysgYnpybGliL3VybHV0aWxzLnB5CkBAIC0yMzMsMTIg
KzIzMywyNiBAQAogCiAKIF91cmxfc2NoZW1lX3JlID0gcmUuY29tcGlsZShyJ14oP1A8c2NoZW1l
PlteOi9dezIsfSk6Ly8oP1A8cGF0aD4uKikkJykKK191cmxfaGV4X2VzY2FwZXNfcmUgPSByZS5j
b21waWxlKHInKCVbMC05YS1mQS1GXXsyfSknKQorCisKK2RlZiBfdW5lc2NhcGVfc2FmZV9jaGFy
cyhtYXRjaG9iaik6CisgICAgIiIicmUuc3ViIGNhbGxiYWNrIHRvIGNvbnZlcnQgaGV4LWVzY2Fw
ZXMgdG8gcGxhaW4gY2hhcmFjdGVycyAoaWYgc2FmZSkuCisgICAgCisgICAgZS5nLiAnJTdFJyB3
aWxsIGJlIGNvbnZlcnRlZCB0byAnficuCisgICAgIiIiCisgICAgaGV4X2RpZ2l0cyA9IG1hdGNo
b2JqLmdyb3VwKDApWzE6XQorICAgIGNoYXIgPSBjaHIoaW50KGhleF9kaWdpdHMsIDE2KSkKKyAg
ICBpZiBjaGFyIGluIF91cmxfZG9udF9lc2NhcGVfY2hhcmFjdGVyczoKKyAgICAgICAgcmV0dXJu
IGNoYXIKKyAgICBlbHNlOgorICAgICAgICByZXR1cm4gbWF0Y2hvYmouZ3JvdXAoMCkudXBwZXIo
KQogCiAKIGRlZiBub3JtYWxpemVfdXJsKHVybCk6CiAgICAgIiIiTWFrZSBzdXJlIHRoYXQgYSBw
YXRoIHN0cmluZyBpcyBpbiBmdWxseSBub3JtYWxpemVkIFVSTCBmb3JtLgogICAgIAotICAgIFRo
aXMgaGFuZGxlcyBVUkxzIHdoaWNoIGhhdmUgdW5pY29kZSBjaGFyYWN0ZXJzLCBzcGFjZXMsIAor
ICAgIFRoaXMgaGFuZGxlcyBVUkxzIHdoaWNoIGhhdmUgdW5pY29kZSBjaGFyYWN0ZXJzLCBzcGFj
ZXMsCiAgICAgc3BlY2lhbCBjaGFyYWN0ZXJzLCBldGMuCiAKICAgICBJdCBoYXMgdHdvIGJhc2lj
IG1vZGVzIG9mIG9wZXJhdGlvbiwgZGVwZW5kaW5nIG9uIHdoZXRoZXIgdGhlCkBAIC0yNTcsMjEg
KzI3MSwyNyBAQAogICAgIG0gPSBfdXJsX3NjaGVtZV9yZS5tYXRjaCh1cmwpCiAgICAgaWYgbm90
IG06CiAgICAgICAgIHJldHVybiBsb2NhbF9wYXRoX3RvX3VybCh1cmwpCisgICAgc2NoZW1lID0g
bS5ncm91cCgnc2NoZW1lJykKKyAgICBwYXRoID0gbS5ncm91cCgncGF0aCcpCiAgICAgaWYgbm90
IGlzaW5zdGFuY2UodXJsLCB1bmljb2RlKToKICAgICAgICAgZm9yIGMgaW4gdXJsOgogICAgICAg
ICAgICAgaWYgYyBub3QgaW4gX3VybF9zYWZlX2NoYXJhY3RlcnM6CiAgICAgICAgICAgICAgICAg
cmFpc2UgZXJyb3JzLkludmFsaWRVUkwodXJsLCAnVVJMcyBjYW4gb25seSBjb250YWluIHNwZWNp
ZmljJwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnIHNhZmUg
Y2hhcmFjdGVycyAobm90ICVyKScgJSBjKQotICAgICAgICByZXR1cm4gdXJsCisgICAgICAgIHBh
dGggPSBfdXJsX2hleF9lc2NhcGVzX3JlLnN1YihfdW5lc2NhcGVfc2FmZV9jaGFycywgcGF0aCkK
KyAgICAgICAgcmV0dXJuIHN0cihzY2hlbWUgKyAnOi8vJyArICcnLmpvaW4ocGF0aCkpCisKICAg
ICAjIFdlIGhhdmUgYSB1bmljb2RlIChoeWJyaWQpIHVybAotICAgIHNjaGVtZSA9IG0uZ3JvdXAo
J3NjaGVtZScpCi0gICAgcGF0aCA9IGxpc3QobS5ncm91cCgncGF0aCcpKQorICAgIHBhdGhfY2hh
cnMgPSBsaXN0KHBhdGgpCiAKLSAgICBmb3IgaSBpbiB4cmFuZ2UobGVuKHBhdGgpKToKLSAgICAg
ICAgaWYgcGF0aFtpXSBub3QgaW4gX3VybF9zYWZlX2NoYXJhY3RlcnM6Ci0gICAgICAgICAgICBj
aGFycyA9IHBhdGhbaV0uZW5jb2RlKCd1dGYtOCcpCi0gICAgICAgICAgICBwYXRoW2ldID0gJycu
am9pbihbJyUlJTAyWCcgJSBvcmQoYykgZm9yIGMgaW4gcGF0aFtpXS5lbmNvZGUoJ3V0Zi04Jyld
KQotICAgIHJldHVybiBzdHIoc2NoZW1lICsgJzovLycgKyAnJy5qb2luKHBhdGgpKQorICAgIGZv
ciBpIGluIHhyYW5nZShsZW4ocGF0aF9jaGFycykpOgorICAgICAgICBpZiBwYXRoX2NoYXJzW2ld
IG5vdCBpbiBfdXJsX3NhZmVfY2hhcmFjdGVyczoKKyAgICAgICAgICAgIGNoYXJzID0gcGF0aF9j
aGFyc1tpXS5lbmNvZGUoJ3V0Zi04JykKKyAgICAgICAgICAgIHBhdGhfY2hhcnNbaV0gPSAnJy5q
b2luKAorICAgICAgICAgICAgICAgIFsnJSUlMDJYJyAlIG9yZChjKSBmb3IgYyBpbiBwYXRoX2No
YXJzW2ldLmVuY29kZSgndXRmLTgnKV0pCisgICAgcGF0aCA9ICcnLmpvaW4ocGF0aF9jaGFycykK
KyAgICBwYXRoID0gX3VybF9oZXhfZXNjYXBlc19yZS5zdWIoX3VuZXNjYXBlX3NhZmVfY2hhcnMs
IHBhdGgpCisgICAgcmV0dXJuIHN0cihzY2hlbWUgKyAnOi8vJyArIHBhdGgpCiAKIAogZGVmIHJl
bGF0aXZlX3VybChiYXNlLCBvdGhlcik6CkBAIC00NTcsNiArNDc3LDE1IEBACiAjVGhlc2UgZW50
cmllcyBnZXQgbWFwcGVkIHRvIHRoZW1zZWx2ZXMKIF9oZXhfZGlzcGxheV9tYXAudXBkYXRlKCho
ZXgsJyUnK2hleCkgZm9yIGhleCBpbiBfbm9fZGVjb2RlX2hleCkKIAorIyBUaGVzZSBjaGFyYWN0
ZXJzIHNob3VsZG4ndCBiZSBwZXJjZW50LWVuY29kZWQsIGFuZCBpdCdzIGFsd2F5cyBzYWZlIHRv
CisjIHVuZW5jb2RlIHRoZW0gaWYgdGhleSBhcmUuCitfdXJsX2RvbnRfZXNjYXBlX2NoYXJhY3Rl
cnMgPSBzZXQoCisgICAiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXoiICMgTG93ZXJjYXNlIGFs
cGhhCisgICAiQUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVoiICMgVXBwZXJjYXNlIGFscGhhCisg
ICAiMDEyMzQ1Njc4OSIgIyBOdW1iZXJzCisgICAiLS5ffiIgICMgVW5yZXNlcnZlZCBjaGFyYWN0
ZXJzCispCisKICMgVGhlc2UgY2hhcmFjdGVycyBzaG91bGQgbm90IGJlIGVzY2FwZWQKIF91cmxf
c2FmZV9jaGFyYWN0ZXJzID0gc2V0KAogICAgImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6IiAj
IExvd2VyY2FzZSBhbHBoYQoK
=== modified directory // last-changed:andrew.bennetts at canonical.com-200612220
... 55608-t1cfil9ws1l95k0f
# revision id: andrew.bennetts at canonical.com-20061222055608-t1cfil9ws1l95k0f
# sha1: 65915a900385dc539ab4213dbd518571169aa11e
# inventory sha1: b9541aa00fe5778207f5645620b20d313801416a
# parent ids:
# pqm at pqm.ubuntu.com-20061221043820-0b56b176269f173a
# properties:
# branch-nick: bzr
More information about the bazaar
mailing list