UnknownSec Bypass
403
:
/
lib
/
python3
/
dist-packages
/
hyperlink
/
test
/ [
drwxr-xr-x
]
Menu
Upload
Mass depes
Mass delete
Terminal
Info server
About
name :
test_url.py
# -*- coding: utf-8 -*- # Copyright (c) Twisted Matrix Laboratories. # See LICENSE for details. from __future__ import unicode_literals import sys import socket from .common import HyperlinkTestCase from .. import URL, URLParseError # automatically import the py27 windows implementation when appropriate from .. import _url from .._url import inet_pton, SCHEME_PORT_MAP, parse_host PY2 = (sys.version_info[0] == 2) unicode = type(u'') BASIC_URL = "http://www.foo.com/a/nice/path/?zot=23&zut" # Examples from RFC 3986 section 5.4, Reference Resolution Examples relativeLinkBaseForRFC3986 = 'http://a/b/c/d;p?q' relativeLinkTestsForRFC3986 = [ # "Normal" # ('g:h', 'g:h'), # can't click on a scheme-having url without an abs path ('g', 'http://a/b/c/g'), ('./g', 'http://a/b/c/g'), ('g/', 'http://a/b/c/g/'), ('/g', 'http://a/g'), ('//g', 'http://g'), ('?y', 'http://a/b/c/d;p?y'), ('g?y', 'http://a/b/c/g?y'), ('#s', 'http://a/b/c/d;p?q#s'), ('g#s', 'http://a/b/c/g#s'), ('g?y#s', 'http://a/b/c/g?y#s'), (';x', 'http://a/b/c/;x'), ('g;x', 'http://a/b/c/g;x'), ('g;x?y#s', 'http://a/b/c/g;x?y#s'), ('', 'http://a/b/c/d;p?q'), ('.', 'http://a/b/c/'), ('./', 'http://a/b/c/'), ('..', 'http://a/b/'), ('../', 'http://a/b/'), ('../g', 'http://a/b/g'), ('../..', 'http://a/'), ('../../', 'http://a/'), ('../../g', 'http://a/g'), # Abnormal examples # ".." cannot be used to change the authority component of a URI. ('../../../g', 'http://a/g'), ('../../../../g', 'http://a/g'), # Only include "." and ".." when they are only part of a larger segment, # not by themselves. ('/./g', 'http://a/g'), ('/../g', 'http://a/g'), ('g.', 'http://a/b/c/g.'), ('.g', 'http://a/b/c/.g'), ('g..', 'http://a/b/c/g..'), ('..g', 'http://a/b/c/..g'), # Unnecessary or nonsensical forms of "." and "..". ('./../g', 'http://a/b/g'), ('./g/.', 'http://a/b/c/g/'), ('g/./h', 'http://a/b/c/g/h'), ('g/../h', 'http://a/b/c/h'), ('g;x=1/./y', 'http://a/b/c/g;x=1/y'), ('g;x=1/../y', 'http://a/b/c/y'), # Separating the reference's query and fragment components from the path. ('g?y/./x', 'http://a/b/c/g?y/./x'), ('g?y/../x', 'http://a/b/c/g?y/../x'), ('g#s/./x', 'http://a/b/c/g#s/./x'), ('g#s/../x', 'http://a/b/c/g#s/../x') ] ROUNDTRIP_TESTS = ( "http://localhost", "http://localhost/", "http://127.0.0.1/", "http://[::127.0.0.1]/", "http://[::1]/", "http://localhost/foo", "http://localhost/foo/", "http://localhost/foo!!bar/", "http://localhost/foo%20bar/", "http://localhost/foo%2Fbar/", "http://localhost/foo?n", "http://localhost/foo?n=v", "http://localhost/foo?n=/a/b", "http://example.com/foo!@$bar?b!@z=123", "http://localhost/asd?a=asd%20sdf/345", "http://(%2525)/(%2525)?(%2525)&(%2525)=(%2525)#(%2525)", "http://(%C3%A9)/(%C3%A9)?(%C3%A9)&(%C3%A9)=(%C3%A9)#(%C3%A9)", "?sslrootcert=/Users/glyph/Downloads/rds-ca-2015-root.pem&sslmode=verify", # from boltons.urlutils' tests 'http://googlewebsite.com/e-shops.aspx', 'http://example.com:8080/search?q=123&business=Nothing%20Special', 'http://hatnote.com:9000/?arg=1&arg=2&arg=3', 'https://xn--bcher-kva.ch', 'http://xn--ggbla1c4e.xn--ngbc5azd/', 'http://tools.ietf.org/html/rfc3986#section-3.4', # 'http://wiki:pedia@hatnote.com', 'ftp://ftp.rfc-editor.org/in-notes/tar/RFCs0001-0500.tar.gz', 'http://[1080:0:0:0:8:800:200C:417A]/index.html', 'ssh://192.0.2.16:2222/', 'https://[::101.45.75.219]:80/?hi=bye', 'ldap://[::192.9.5.5]/dc=example,dc=com??sub?(sn=Jensen)', 'mailto:me@example.com?to=me@example.com&body=hi%20http://wikipedia.org', 'news:alt.rec.motorcycle', 'tel:+1-800-867-5309', 'urn:oasis:member:A00024:x', ('magnet:?xt=urn:btih:1a42b9e04e122b97a5254e3df77ab3c4b7da725f&dn=Puppy%' '20Linux%20precise-5.7.1.iso&tr=udp://tracker.openbittorrent.com:80&' 'tr=udp://tracker.publicbt.com:80&tr=udp://tracker.istole.it:6969&' 'tr=udp://tracker.ccc.de:80&tr=udp://open.demonii.com:1337'), # percent-encoded delimiters in percent-encodable fields 'https://%3A@example.com/', # colon in username 'https://%40@example.com/', # at sign in username 'https://%2f@example.com/', # slash in username 'https://a:%3a@example.com/', # colon in password 'https://a:%40@example.com/', # at sign in password 'https://a:%2f@example.com/', # slash in password 'https://a:%3f@example.com/', # question mark in password 'https://example.com/%2F/', # slash in path 'https://example.com/%3F/', # question mark in path 'https://example.com/%23/', # hash in path 'https://example.com/?%23=b', # hash in query param name 'https://example.com/?%3D=b', # equals in query param name 'https://example.com/?%26=b', # ampersand in query param name 'https://example.com/?a=%23', # hash in query param value 'https://example.com/?a=%26', # ampersand in query param value 'https://example.com/?a=%3D', # equals in query param value # double-encoded percent sign in all percent-encodable positions: "http://(%2525):(%2525)@example.com/(%2525)/?(%2525)=(%2525)#(%2525)", # colon in first part of schemeless relative url 'first_seg_rel_path__colon%3Anotok/second_seg__colon%3Aok', ) class TestURL(HyperlinkTestCase): """ Tests for L{URL}. """ def assertUnicoded(self, u): """ The given L{URL}'s components should be L{unicode}. @param u: The L{URL} to test. """ self.assertTrue(isinstance(u.scheme, unicode) or u.scheme is None, repr(u)) self.assertTrue(isinstance(u.host, unicode) or u.host is None, repr(u)) for seg in u.path: self.assertEqual(type(seg), unicode, repr(u)) for (k, v) in u.query: self.assertEqual(type(seg), unicode, repr(u)) self.assertTrue(v is None or isinstance(v, unicode), repr(u)) self.assertEqual(type(u.fragment), unicode, repr(u)) def assertURL(self, u, scheme, host, path, query, fragment, port, userinfo=''): """ The given L{URL} should have the given components. @param u: The actual L{URL} to examine. @param scheme: The expected scheme. @param host: The expected host. @param path: The expected path. @param query: The expected query. @param fragment: The expected fragment. @param port: The expected port. @param userinfo: The expected userinfo. """ actual = (u.scheme, u.host, u.path, u.query, u.fragment, u.port, u.userinfo) expected = (scheme, host, tuple(path), tuple(query), fragment, port, u.userinfo) self.assertEqual(actual, expected) def test_initDefaults(self): """ L{URL} should have appropriate default values. """ def check(u): self.assertUnicoded(u) self.assertURL(u, 'http', '', [], [], '', 80, '') check(URL('http', '')) check(URL('http', '', [], [])) check(URL('http', '', [], [], '')) def test_init(self): """ L{URL} should accept L{unicode} parameters. """ u = URL('s', 'h', ['p'], [('k', 'v'), ('k', None)], 'f') self.assertUnicoded(u) self.assertURL(u, 's', 'h', ['p'], [('k', 'v'), ('k', None)], 'f', None) self.assertURL(URL('http', '\xe0', ['\xe9'], [('\u03bb', '\u03c0')], '\u22a5'), 'http', '\xe0', ['\xe9'], [('\u03bb', '\u03c0')], '\u22a5', 80) def test_initPercent(self): """ L{URL} should accept (and not interpret) percent characters. """ u = URL('s', '%68', ['%70'], [('%6B', '%76'), ('%6B', None)], '%66') self.assertUnicoded(u) self.assertURL(u, 's', '%68', ['%70'], [('%6B', '%76'), ('%6B', None)], '%66', None) def test_repr(self): """ L{URL.__repr__} will display the canonical form of the URL, wrapped in a L{URL.from_text} invocation, so that it is C{eval}-able but still easy to read. """ self.assertEqual( repr(URL(scheme='http', host='foo', path=['bar'], query=[('baz', None), ('k', 'v')], fragment='frob')), "URL.from_text(%s)" % (repr(u"http://foo/bar?baz&k=v#frob"),) ) def test_from_text(self): """ Round-tripping L{URL.from_text} with C{str} results in an equivalent URL. """ urlpath = URL.from_text(BASIC_URL) self.assertEqual(BASIC_URL, urlpath.to_text()) def test_roundtrip(self): """ L{URL.to_text} should invert L{URL.from_text}. """ for test in ROUNDTRIP_TESTS: result = URL.from_text(test).to_text(with_password=True) self.assertEqual(test, result) def test_roundtrip_double_iri(self): for test in ROUNDTRIP_TESTS: url = URL.from_text(test) iri = url.to_iri() double_iri = iri.to_iri() assert iri == double_iri iri_text = iri.to_text(with_password=True) double_iri_text = double_iri.to_text(with_password=True) assert iri_text == double_iri_text return def test_equality(self): """ Two URLs decoded using L{URL.from_text} will be equal (C{==}) if they decoded same URL string, and unequal (C{!=}) if they decoded different strings. """ urlpath = URL.from_text(BASIC_URL) self.assertEqual(urlpath, URL.from_text(BASIC_URL)) self.assertNotEqual( urlpath, URL.from_text('ftp://www.anotherinvaliddomain.com/' 'foo/bar/baz/?zot=21&zut') ) def test_fragmentEquality(self): """ An URL created with the empty string for a fragment compares equal to an URL created with an unspecified fragment. """ self.assertEqual(URL(fragment=''), URL()) self.assertEqual(URL.from_text(u"http://localhost/#"), URL.from_text(u"http://localhost/")) def test_child(self): """ L{URL.child} appends a new path segment, but does not affect the query or fragment. """ urlpath = URL.from_text(BASIC_URL) self.assertEqual("http://www.foo.com/a/nice/path/gong?zot=23&zut", urlpath.child('gong').to_text()) self.assertEqual("http://www.foo.com/a/nice/path/gong%2F?zot=23&zut", urlpath.child('gong/').to_text()) self.assertEqual( "http://www.foo.com/a/nice/path/gong%2Fdouble?zot=23&zut", urlpath.child('gong/double').to_text() ) self.assertEqual( "http://www.foo.com/a/nice/path/gong%2Fdouble%2F?zot=23&zut", urlpath.child('gong/double/').to_text() ) def test_multiChild(self): """ L{URL.child} receives multiple segments as C{*args} and appends each in turn. """ url = URL.from_text('http://example.com/a/b') self.assertEqual(url.child('c', 'd', 'e').to_text(), 'http://example.com/a/b/c/d/e') def test_childInitRoot(self): """ L{URL.child} of a L{URL} without a path produces a L{URL} with a single path segment. """ childURL = URL(host=u"www.foo.com").child(u"c") self.assertTrue(childURL.rooted) self.assertEqual("http://www.foo.com/c", childURL.to_text()) def test_emptyChild(self): """ L{URL.child} without any new segments returns the original L{URL}. """ url = URL(host=u"www.foo.com") self.assertEqual(url.child(), url) def test_sibling(self): """ L{URL.sibling} of a L{URL} replaces the last path segment, but does not affect the query or fragment. """ urlpath = URL.from_text(BASIC_URL) self.assertEqual( "http://www.foo.com/a/nice/path/sister?zot=23&zut", urlpath.sibling('sister').to_text() ) # Use an url without trailing '/' to check child removal. url_text = "http://www.foo.com/a/nice/path?zot=23&zut" urlpath = URL.from_text(url_text) self.assertEqual( "http://www.foo.com/a/nice/sister?zot=23&zut", urlpath.sibling('sister').to_text() ) def test_click(self): """ L{URL.click} interprets the given string as a relative URI-reference and returns a new L{URL} interpreting C{self} as the base absolute URI. """ urlpath = URL.from_text(BASIC_URL) # A null uri should be valid (return here). self.assertEqual("http://www.foo.com/a/nice/path/?zot=23&zut", urlpath.click("").to_text()) # A simple relative path remove the query. self.assertEqual("http://www.foo.com/a/nice/path/click", urlpath.click("click").to_text()) # An absolute path replace path and query. self.assertEqual("http://www.foo.com/click", urlpath.click("/click").to_text()) # Replace just the query. self.assertEqual("http://www.foo.com/a/nice/path/?burp", urlpath.click("?burp").to_text()) # One full url to another should not generate '//' between authority. # and path self.assertTrue("//foobar" not in urlpath.click('http://www.foo.com/foobar').to_text()) # From a url with no query clicking a url with a query, the query # should be handled properly. u = URL.from_text('http://www.foo.com/me/noquery') self.assertEqual('http://www.foo.com/me/17?spam=158', u.click('/me/17?spam=158').to_text()) # Check that everything from the path onward is removed when the click # link has no path. u = URL.from_text('http://localhost/foo?abc=def') self.assertEqual(u.click('http://www.python.org').to_text(), 'http://www.python.org') # https://twistedmatrix.com/trac/ticket/8184 u = URL.from_text('http://hatnote.com/a/b/../c/./d/e/..') res = 'http://hatnote.com/a/c/d/' self.assertEqual(u.click('').to_text(), res) # test click default arg is same as empty string above self.assertEqual(u.click().to_text(), res) # test click on a URL instance u = URL.fromText('http://localhost/foo/?abc=def') u2 = URL.from_text('bar') u3 = u.click(u2) self.assertEqual(u3.to_text(), 'http://localhost/foo/bar') def test_clickRFC3986(self): """ L{URL.click} should correctly resolve the examples in RFC 3986. """ base = URL.from_text(relativeLinkBaseForRFC3986) for (ref, expected) in relativeLinkTestsForRFC3986: self.assertEqual(base.click(ref).to_text(), expected) def test_clickSchemeRelPath(self): """ L{URL.click} should not accept schemes with relative paths. """ base = URL.from_text(relativeLinkBaseForRFC3986) self.assertRaises(NotImplementedError, base.click, 'g:h') self.assertRaises(NotImplementedError, base.click, 'http:h') def test_cloneUnchanged(self): """ Verify that L{URL.replace} doesn't change any of the arguments it is passed. """ urlpath = URL.from_text('https://x:1/y?z=1#A') self.assertEqual(urlpath.replace(urlpath.scheme, urlpath.host, urlpath.path, urlpath.query, urlpath.fragment, urlpath.port), urlpath) self.assertEqual(urlpath.replace(), urlpath) def test_clickCollapse(self): """ L{URL.click} collapses C{.} and C{..} according to RFC 3986 section 5.2.4. """ tests = [ ['http://localhost/', '.', 'http://localhost/'], ['http://localhost/', '..', 'http://localhost/'], ['http://localhost/a/b/c', '.', 'http://localhost/a/b/'], ['http://localhost/a/b/c', '..', 'http://localhost/a/'], ['http://localhost/a/b/c', './d/e', 'http://localhost/a/b/d/e'], ['http://localhost/a/b/c', '../d/e', 'http://localhost/a/d/e'], ['http://localhost/a/b/c', '/./d/e', 'http://localhost/d/e'], ['http://localhost/a/b/c', '/../d/e', 'http://localhost/d/e'], ['http://localhost/a/b/c/', '../../d/e/', 'http://localhost/a/d/e/'], ['http://localhost/a/./c', '../d/e', 'http://localhost/d/e'], ['http://localhost/a/./c/', '../d/e', 'http://localhost/a/d/e'], ['http://localhost/a/b/c/d', './e/../f/../g', 'http://localhost/a/b/c/g'], ['http://localhost/a/b/c', 'd//e', 'http://localhost/a/b/d//e'], ] for start, click, expected in tests: actual = URL.from_text(start).click(click).to_text() self.assertEqual( actual, expected, "{start}.click({click}) => {actual} not {expected}".format( start=start, click=repr(click), actual=actual, expected=expected, ) ) def test_queryAdd(self): """ L{URL.add} adds query parameters. """ self.assertEqual( "http://www.foo.com/a/nice/path/?foo=bar", URL.from_text("http://www.foo.com/a/nice/path/") .add(u"foo", u"bar").to_text()) self.assertEqual( "http://www.foo.com/?foo=bar", URL(host=u"www.foo.com").add(u"foo", u"bar") .to_text()) urlpath = URL.from_text(BASIC_URL) self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&burp", urlpath.add(u"burp").to_text()) self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx", urlpath.add(u"burp", u"xxx").to_text()) self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zing", urlpath.add(u"burp", u"xxx").add(u"zing").to_text()) # Note the inversion! self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&zing&burp=xxx", urlpath.add(u"zing").add(u"burp", u"xxx").to_text()) # Note the two values for the same name. self.assertEqual( "http://www.foo.com/a/nice/path/?zot=23&zut&burp=xxx&zot=32", urlpath.add(u"burp", u"xxx").add(u"zot", '32') .to_text()) def test_querySet(self): """ L{URL.set} replaces query parameters by name. """ urlpath = URL.from_text(BASIC_URL) self.assertEqual( "http://www.foo.com/a/nice/path/?zot=32&zut", urlpath.set(u"zot", '32').to_text()) # Replace name without value with name/value and vice-versa. self.assertEqual( "http://www.foo.com/a/nice/path/?zot&zut=itworked", urlpath.set(u"zot").set(u"zut", u"itworked").to_text() ) # Q: what happens when the query has two values and we replace? # A: we replace both values with a single one self.assertEqual( "http://www.foo.com/a/nice/path/?zot=32&zut", urlpath.add(u"zot", u"xxx").set(u"zot", '32').to_text() ) def test_queryRemove(self): """ L{URL.remove} removes instances of a query parameter. """ url = URL.from_text(u"https://example.com/a/b/?foo=1&bar=2&foo=3") self.assertEqual( url.remove(u"foo"), URL.from_text(u"https://example.com/a/b/?bar=2") ) self.assertEqual( url.remove(name=u"foo", value=u"1"), URL.from_text(u"https://example.com/a/b/?bar=2&foo=3") ) self.assertEqual( url.remove(name=u"foo", limit=1), URL.from_text(u"https://example.com/a/b/?bar=2&foo=3") ) self.assertEqual( url.remove(name=u"foo", value=u"1", limit=0), URL.from_text(u"https://example.com/a/b/?foo=1&bar=2&foo=3") ) def test_parseEqualSignInParamValue(self): """ Every C{=}-sign after the first in a query parameter is simply included in the value of the parameter. """ u = URL.from_text('http://localhost/?=x=x=x') self.assertEqual(u.get(''), ['x=x=x']) self.assertEqual(u.to_text(), 'http://localhost/?=x=x=x') u = URL.from_text('http://localhost/?foo=x=x=x&bar=y') self.assertEqual(u.query, (('foo', 'x=x=x'), ('bar', 'y'))) self.assertEqual(u.to_text(), 'http://localhost/?foo=x=x=x&bar=y') u = URL.from_text('https://example.com/?argument=3&argument=4&operator=%3D') iri = u.to_iri() self.assertEqual(iri.get('operator'), ['=']) # assert that the equals is not unnecessarily escaped self.assertEqual(iri.to_uri().get('operator'), ['=']) def test_empty(self): """ An empty L{URL} should serialize as the empty string. """ self.assertEqual(URL().to_text(), '') def test_justQueryText(self): """ An L{URL} with query text should serialize as just query text. """ u = URL(query=[(u"hello", u"world")]) self.assertEqual(u.to_text(), '?hello=world') def test_identicalEqual(self): """ L{URL} compares equal to itself. """ u = URL.from_text('http://localhost/') self.assertEqual(u, u) def test_similarEqual(self): """ URLs with equivalent components should compare equal. """ u1 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') u2 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') self.assertEqual(u1, u2) def test_differentNotEqual(self): """ L{URL}s that refer to different resources are both unequal (C{!=}) and also not equal (not C{==}). """ u1 = URL.from_text('http://localhost/a') u2 = URL.from_text('http://localhost/b') self.assertFalse(u1 == u2, "%r != %r" % (u1, u2)) self.assertNotEqual(u1, u2) def test_otherTypesNotEqual(self): """ L{URL} is not equal (C{==}) to other types. """ u = URL.from_text('http://localhost/') self.assertFalse(u == 42, "URL must not equal a number.") self.assertFalse(u == object(), "URL must not equal an object.") self.assertNotEqual(u, 42) self.assertNotEqual(u, object()) def test_identicalNotUnequal(self): """ Identical L{URL}s are not unequal (C{!=}) to each other. """ u = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') self.assertFalse(u != u, "%r == itself" % u) def test_similarNotUnequal(self): """ Structurally similar L{URL}s are not unequal (C{!=}) to each other. """ u1 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') u2 = URL.from_text('http://u@localhost:8080/p/a/t/h?q=p#f') self.assertFalse(u1 != u2, "%r == %r" % (u1, u2)) def test_differentUnequal(self): """ Structurally different L{URL}s are unequal (C{!=}) to each other. """ u1 = URL.from_text('http://localhost/a') u2 = URL.from_text('http://localhost/b') self.assertTrue(u1 != u2, "%r == %r" % (u1, u2)) def test_otherTypesUnequal(self): """ L{URL} is unequal (C{!=}) to other types. """ u = URL.from_text('http://localhost/') self.assertTrue(u != 42, "URL must differ from a number.") self.assertTrue(u != object(), "URL must be differ from an object.") def test_asURI(self): """ L{URL.asURI} produces an URI which converts any URI unicode encoding into pure US-ASCII and returns a new L{URL}. """ unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}' '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}=' '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}' '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}') iri = URL.from_text(unicodey) uri = iri.asURI() self.assertEqual(iri.host, '\N{LATIN SMALL LETTER E WITH ACUTE}.com') self.assertEqual(iri.path[0], '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}') self.assertEqual(iri.to_text(), unicodey) expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA' actualURI = uri.to_text() self.assertEqual(actualURI, expectedURI, '%r != %r' % (actualURI, expectedURI)) def test_asIRI(self): """ L{URL.asIRI} decodes any percent-encoded text in the URI, making it more suitable for reading by humans, and returns a new L{URL}. """ asciiish = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA' uri = URL.from_text(asciiish) iri = uri.asIRI() self.assertEqual(uri.host, 'xn--9ca.com') self.assertEqual(uri.path[0], '%C3%A9') self.assertEqual(uri.to_text(), asciiish) expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' '\N{LATIN SMALL LETTER E WITH ACUTE}' '?\N{LATIN SMALL LETTER A WITH ACUTE}=' '\N{LATIN SMALL LETTER I WITH ACUTE}' '#\N{LATIN SMALL LETTER U WITH ACUTE}') actualIRI = iri.to_text() self.assertEqual(actualIRI, expectedIRI, '%r != %r' % (actualIRI, expectedIRI)) def test_badUTF8AsIRI(self): """ Bad UTF-8 in a path segment, query parameter, or fragment results in that portion of the URI remaining percent-encoded in the IRI. """ urlWithBinary = 'http://xn--9ca.com/%00%FF/%C3%A9' uri = URL.from_text(urlWithBinary) iri = uri.asIRI() expectedIRI = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' '%00%FF/' '\N{LATIN SMALL LETTER E WITH ACUTE}') actualIRI = iri.to_text() self.assertEqual(actualIRI, expectedIRI, '%r != %r' % (actualIRI, expectedIRI)) def test_alreadyIRIAsIRI(self): """ A L{URL} composed of non-ASCII text will result in non-ASCII text. """ unicodey = ('http://\N{LATIN SMALL LETTER E WITH ACUTE}.com/' '\N{LATIN SMALL LETTER E}\N{COMBINING ACUTE ACCENT}' '?\N{LATIN SMALL LETTER A}\N{COMBINING ACUTE ACCENT}=' '\N{LATIN SMALL LETTER I}\N{COMBINING ACUTE ACCENT}' '#\N{LATIN SMALL LETTER U}\N{COMBINING ACUTE ACCENT}') iri = URL.from_text(unicodey) alsoIRI = iri.asIRI() self.assertEqual(alsoIRI.to_text(), unicodey) def test_alreadyURIAsURI(self): """ A L{URL} composed of encoded text will remain encoded. """ expectedURI = 'http://xn--9ca.com/%C3%A9?%C3%A1=%C3%AD#%C3%BA' uri = URL.from_text(expectedURI) actualURI = uri.asURI().to_text() self.assertEqual(actualURI, expectedURI) def test_userinfo(self): """ L{URL.from_text} will parse the C{userinfo} portion of the URI separately from the host and port. """ url = URL.from_text( 'http://someuser:somepassword@example.com/some-segment@ignore' ) self.assertEqual(url.authority(True), 'someuser:somepassword@example.com') self.assertEqual(url.authority(False), 'someuser:@example.com') self.assertEqual(url.userinfo, 'someuser:somepassword') self.assertEqual(url.user, 'someuser') self.assertEqual(url.to_text(), 'http://someuser:@example.com/some-segment@ignore') self.assertEqual( url.replace(userinfo=u"someuser").to_text(), 'http://someuser@example.com/some-segment@ignore' ) def test_portText(self): """ L{URL.from_text} parses custom port numbers as integers. """ portURL = URL.from_text(u"http://www.example.com:8080/") self.assertEqual(portURL.port, 8080) self.assertEqual(portURL.to_text(), u"http://www.example.com:8080/") def test_mailto(self): """ Although L{URL} instances are mainly for dealing with HTTP, other schemes (such as C{mailto:}) should work as well. For example, L{URL.from_text}/L{URL.to_text} round-trips cleanly for a C{mailto:} URL representing an email address. """ self.assertEqual(URL.from_text(u"mailto:user@example.com").to_text(), u"mailto:user@example.com") def test_queryIterable(self): """ When a L{URL} is created with a C{query} argument, the C{query} argument is converted into an N-tuple of 2-tuples, sensibly handling dictionaries. """ expected = (('alpha', 'beta'),) url = URL(query=[['alpha', 'beta']]) self.assertEqual(url.query, expected) url = URL(query={'alpha': 'beta'}) self.assertEqual(url.query, expected) def test_pathIterable(self): """ When a L{URL} is created with a C{path} argument, the C{path} is converted into a tuple. """ url = URL(path=['hello', 'world']) self.assertEqual(url.path, ('hello', 'world')) def test_invalidArguments(self): """ Passing an argument of the wrong type to any of the constructor arguments of L{URL} will raise a descriptive L{TypeError}. L{URL} typechecks very aggressively to ensure that its constitutent parts are all properly immutable and to prevent confusing errors when bad data crops up in a method call long after the code that called the constructor is off the stack. """ class Unexpected(object): def __str__(self): return "wrong" def __repr__(self): return "<unexpected>" defaultExpectation = "unicode" if bytes is str else "str" def assertRaised(raised, expectation, name): self.assertEqual(str(raised.exception), "expected {0} for {1}, got {2}".format( expectation, name, "<unexpected>")) def check(param, expectation=defaultExpectation): with self.assertRaises(TypeError) as raised: URL(**{param: Unexpected()}) assertRaised(raised, expectation, param) check("scheme") check("host") check("fragment") check("rooted", "bool") check("userinfo") check("port", "int or NoneType") with self.assertRaises(TypeError) as raised: URL(path=[Unexpected()]) assertRaised(raised, defaultExpectation, "path segment") with self.assertRaises(TypeError) as raised: URL(query=[(u"name", Unexpected())]) assertRaised(raised, defaultExpectation + " or NoneType", "query parameter value") with self.assertRaises(TypeError) as raised: URL(query=[(Unexpected(), u"value")]) assertRaised(raised, defaultExpectation, "query parameter name") # No custom error message for this one, just want to make sure # non-2-tuples don't get through. with self.assertRaises(TypeError): URL(query=[Unexpected()]) with self.assertRaises(ValueError): URL(query=[('k', 'v', 'vv')]) with self.assertRaises(ValueError): URL(query=[('k',)]) url = URL.from_text("https://valid.example.com/") with self.assertRaises(TypeError) as raised: url.child(Unexpected()) assertRaised(raised, defaultExpectation, "path segment") with self.assertRaises(TypeError) as raised: url.sibling(Unexpected()) assertRaised(raised, defaultExpectation, "path segment") with self.assertRaises(TypeError) as raised: url.click(Unexpected()) assertRaised(raised, defaultExpectation, "relative URL") def test_technicallyTextIsIterableBut(self): """ Technically, L{str} (or L{unicode}, as appropriate) is iterable, but C{URL(path="foo")} resulting in C{URL.from_text("f/o/o")} is never what you want. """ with self.assertRaises(TypeError) as raised: URL(path='foo') self.assertEqual( str(raised.exception), "expected iterable of text for path, not: {0}" .format(repr('foo')) ) def test_netloc(self): url = URL(scheme='https') self.assertEqual(url.uses_netloc, True) url = URL(scheme='git+https') self.assertEqual(url.uses_netloc, True) url = URL(scheme='mailto') self.assertEqual(url.uses_netloc, False) url = URL(scheme='ztp') self.assertEqual(url.uses_netloc, None) url = URL.from_text('ztp://test.com') self.assertEqual(url.uses_netloc, True) url = URL.from_text('ztp:test:com') self.assertEqual(url.uses_netloc, False) def test_ipv6_with_port(self): t = 'https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:80/' url = URL.from_text(t) assert url.host == '2001:0db8:85a3:0000:0000:8a2e:0370:7334' assert url.port == 80 assert SCHEME_PORT_MAP[url.scheme] != url.port def test_basic(self): text = 'https://user:pass@example.com/path/to/here?k=v#nice' url = URL.from_text(text) assert url.scheme == 'https' assert url.userinfo == 'user:pass' assert url.host == 'example.com' assert url.path == ('path', 'to', 'here') assert url.fragment == 'nice' text = 'https://user:pass@127.0.0.1/path/to/here?k=v#nice' url = URL.from_text(text) assert url.scheme == 'https' assert url.userinfo == 'user:pass' assert url.host == '127.0.0.1' assert url.path == ('path', 'to', 'here') text = 'https://user:pass@[::1]/path/to/here?k=v#nice' url = URL.from_text(text) assert url.scheme == 'https' assert url.userinfo == 'user:pass' assert url.host == '::1' assert url.path == ('path', 'to', 'here') def test_invalid_url(self): self.assertRaises(URLParseError, URL.from_text, '#\n\n') def test_invalid_authority_url(self): self.assertRaises(URLParseError, URL.from_text, 'http://abc:\n\n/#') def test_invalid_ipv6(self): invalid_ipv6_ips = ['2001::0234:C1ab::A0:aabc:003F', '2001::1::3F', ':', '::::', '::256.0.0.1'] for ip in invalid_ipv6_ips: url_text = 'http://[' + ip + ']' self.assertRaises(socket.error, inet_pton, socket.AF_INET6, ip) self.assertRaises(URLParseError, URL.from_text, url_text) def test_invalid_port(self): self.assertRaises(URLParseError, URL.from_text, 'ftp://portmouth:smash') self.assertRaises(ValueError, URL.from_text, 'http://reader.googlewebsite.com:neverforget') def test_idna(self): u1 = URL.from_text('http://bücher.ch') self.assertEquals(u1.host, 'bücher.ch') self.assertEquals(u1.to_text(), 'http://bücher.ch') self.assertEquals(u1.to_uri().to_text(), 'http://xn--bcher-kva.ch') u2 = URL.from_text('https://xn--bcher-kva.ch') self.assertEquals(u2.host, 'xn--bcher-kva.ch') self.assertEquals(u2.to_text(), 'https://xn--bcher-kva.ch') self.assertEquals(u2.to_iri().to_text(), u'https://bücher.ch') def test_netloc_slashes(self): # basic sanity checks url = URL.from_text('mailto:mahmoud@hatnote.com') self.assertEquals(url.scheme, 'mailto') self.assertEquals(url.to_text(), 'mailto:mahmoud@hatnote.com') url = URL.from_text('http://hatnote.com') self.assertEquals(url.scheme, 'http') self.assertEquals(url.to_text(), 'http://hatnote.com') # test that unrecognized schemes stay consistent with '//' url = URL.from_text('newscheme:a:b:c') self.assertEquals(url.scheme, 'newscheme') self.assertEquals(url.to_text(), 'newscheme:a:b:c') url = URL.from_text('newerscheme://a/b/c') self.assertEquals(url.scheme, 'newerscheme') self.assertEquals(url.to_text(), 'newerscheme://a/b/c') # test that reasonable guesses are made url = URL.from_text('git+ftp://gitstub.biz/glyph/lefkowitz') self.assertEquals(url.scheme, 'git+ftp') self.assertEquals(url.to_text(), 'git+ftp://gitstub.biz/glyph/lefkowitz') url = URL.from_text('what+mailto:freerealestate@enotuniq.org') self.assertEquals(url.scheme, 'what+mailto') self.assertEquals(url.to_text(), 'what+mailto:freerealestate@enotuniq.org') url = URL(scheme='ztp', path=('x', 'y', 'z'), rooted=True) self.assertEquals(url.to_text(), 'ztp:/x/y/z') # also works when the input doesn't include '//' url = URL(scheme='git+ftp', path=('x', 'y', 'z' ,''), rooted=True, uses_netloc=True) # broken bc urlunsplit self.assertEquals(url.to_text(), 'git+ftp:///x/y/z/') # really why would this ever come up but ok url = URL.from_text('file:///path/to/heck') url2 = url.replace(scheme='mailto') self.assertEquals(url2.to_text(), 'mailto:/path/to/heck') url_text = 'unregisteredscheme:///a/b/c' url = URL.from_text(url_text) no_netloc_url = url.replace(uses_netloc=False) self.assertEquals(no_netloc_url.to_text(), 'unregisteredscheme:/a/b/c') netloc_url = url.replace(uses_netloc=True) self.assertEquals(netloc_url.to_text(), url_text) return def test_wrong_constructor(self): with self.assertRaises(ValueError): # whole URL not allowed URL(BASIC_URL) with self.assertRaises(ValueError): # explicitly bad scheme not allowed URL('HTTP_____more_like_imHoTTeP') def test_encoded_userinfo(self): url = URL.from_text('http://user:pass@example.com') assert url.userinfo == 'user:pass' url = url.replace(userinfo='us%20her:pass') iri = url.to_iri() assert iri.to_text(with_password=True) == 'http://us her:pass@example.com' assert iri.to_text(with_password=False) == 'http://us her:@example.com' assert iri.to_uri().to_text(with_password=True) == 'http://us%20her:pass@example.com' def test_hash(self): url_map = {} url1 = URL.from_text('http://blog.hatnote.com/ask?utm_source=geocity') assert hash(url1) == hash(url1) # sanity url_map[url1] = 1 url2 = URL.from_text('http://blog.hatnote.com/ask') url2 = url2.set('utm_source', 'geocity') url_map[url2] = 2 assert len(url_map) == 1 assert list(url_map.values()) == [2] assert hash(URL()) == hash(URL()) # slightly more sanity def test_dir(self): url = URL() res = dir(url) assert len(res) > 15 # twisted compat assert 'fromText' not in res assert 'asText' not in res assert 'asURI' not in res assert 'asIRI' not in res def test_twisted_compat(self): url = URL.fromText(u'http://example.com/a%20té%C3%A9st') assert url.asText() == 'http://example.com/a%20té%C3%A9st' assert url.asURI().asText() == 'http://example.com/a%20t%C3%A9%C3%A9st' # TODO: assert url.asIRI().asText() == u'http://example.com/a%20téést' def test_set_ordering(self): # TODO url = URL.from_text('http://example.com/?a=b&c') url = url.set(u'x', u'x') url = url.add(u'x', u'y') assert url.to_text() == u'http://example.com/?a=b&x=x&c&x=y' # Would expect: # assert url.to_text() == u'http://example.com/?a=b&c&x=x&x=y' def test_schemeless_path(self): "See issue #4" u1 = URL.from_text("urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob") u2 = URL.from_text(u1.to_text()) assert u1 == u2 # sanity testing roundtripping u3 = URL.from_text(u1.to_iri().to_text()) assert u1 == u3 assert u2 == u3 # test that colons are ok past the first segment u4 = URL.from_text("first-segment/urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob") u5 = u4.to_iri() assert u5.to_text() == u'first-segment/urn:ietf:wg:oauth:2.0:oob' u6 = URL.from_text(u5.to_text()).to_uri() assert u5 == u6 # colons stay decoded bc they're not in the first seg def test_emoji_domain(self): "See issue #7, affecting only narrow builds (2.6-3.3)" url = URL.from_text('https://xn--vi8hiv.ws') iri = url.to_iri() iri.to_text() # as long as we don't get ValueErrors, we're good def test_delim_in_param(self): "Per issue #6 and #8" self.assertRaises(ValueError, URL, scheme=u'http', host=u'a/c') self.assertRaises(ValueError, URL, path=(u"?",)) self.assertRaises(ValueError, URL, path=(u"#",)) self.assertRaises(ValueError, URL, query=((u"&", "test"))) def test_empty_paths_eq(self): u1 = URL.from_text('http://example.com/') u2 = URL.from_text('http://example.com') assert u1 == u2 u1 = URL.from_text('http://example.com') u2 = URL.from_text('http://example.com') assert u1 == u2 u1 = URL.from_text('http://example.com') u2 = URL.from_text('http://example.com/') assert u1 == u2 u1 = URL.from_text('http://example.com/') u2 = URL.from_text('http://example.com/') assert u1 == u2 def test_from_text_type(self): assert URL.from_text(u'#ok').fragment == u'ok' # sanity self.assertRaises(TypeError, URL.from_text, b'bytes://x.y.z') self.assertRaises(TypeError, URL.from_text, object()) def test_from_text_bad_authority(self): # bad ipv6 brackets self.assertRaises(URLParseError, URL.from_text, 'http://[::1/') self.assertRaises(URLParseError, URL.from_text, 'http://::1]/') self.assertRaises(URLParseError, URL.from_text, 'http://[[::1]/') self.assertRaises(URLParseError, URL.from_text, 'http://[::1]]/') # empty port self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1:') # non-integer port self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1:hi') # extra port colon (makes for an invalid host) self.assertRaises(URLParseError, URL.from_text, 'http://127.0.0.1::80') def test_normalize(self): url = URL.from_text('HTTP://Example.com/A%61/./../A%61?B%62=C%63#D%64') assert url.get('Bb') == [] assert url.get('B%62') == ['C%63'] assert len(url.path) == 4 # test that most expected normalizations happen norm_url = url.normalize() assert norm_url.scheme == 'http' assert norm_url.host == 'example.com' assert norm_url.path == ('Aa',) assert norm_url.get('Bb') == ['Cc'] assert norm_url.fragment == 'Dd' assert norm_url.to_text() == 'http://example.com/Aa?Bb=Cc#Dd' # test that flags work noop_norm_url = url.normalize(scheme=False, host=False, path=False, query=False, fragment=False) assert noop_norm_url == url # test that empty paths get at least one slash slashless_url = URL.from_text('http://example.io') slashful_url = slashless_url.normalize() assert slashful_url.to_text() == 'http://example.io/' # test case normalization for percent encoding delimited_url = URL.from_text('/a%2fb/cd%3f?k%3d=v%23#test') norm_delimited_url = delimited_url.normalize() assert norm_delimited_url.to_text() == '/a%2Fb/cd%3F?k%3D=v%23#test' # test invalid percent encoding during normalize assert URL(path=('', '%te%sts')).normalize(percents=False).to_text() == '/%te%sts' assert URL(path=('', '%te%sts')).normalize().to_text() == '/%25te%25sts' percenty_url = URL(scheme='ftp', path=['%%%', '%a%b'], query=[('%', '%%')], fragment='%', userinfo='%:%') assert percenty_url.to_text(with_password=True) == 'ftp://%:%@/%%%/%a%b?%=%%#%' assert percenty_url.normalize().to_text(with_password=True) == 'ftp://%25:%25@/%25%25%25/%25a%25b?%25=%25%25#%25' def test_str(self): # see also issue #49 text = u'http://example.com/á/y%20a%20y/?b=%25' url = URL.from_text(text) assert unicode(url) == text assert bytes(url) == b'http://example.com/%C3%A1/y%20a%20y/?b=%25' if PY2: assert isinstance(str(url), bytes) assert isinstance(unicode(url), unicode) else: assert isinstance(str(url), unicode) assert isinstance(bytes(url), bytes) def test_idna_corners(self): text = u'http://abé.com/' url = URL.from_text(text) assert url.to_iri().host == u'abé.com' assert url.to_uri().host == u'xn--ab-cja.com' url = URL.from_text("http://ドメイン.テスト.co.jp#test") assert url.to_iri().host == u'ドメイン.テスト.co.jp' assert url.to_uri().host == u'xn--eckwd4c7c.xn--zckzah.co.jp' assert url.to_uri().get_decoded_url().host == u'ドメイン.テスト.co.jp' assert URL.from_text('http://Example.com').to_uri().get_decoded_url().host == 'example.com'
Copyright © 2025 - UnknownSec