| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- from __future__ import annotations
- import dataclasses
- import urllib.parse
- import urllib.request
- from .exceptions import InvalidProxy, InvalidURI
- __all__ = ["parse_uri", "WebSocketURI"]
- # All characters from the gen-delims and sub-delims sets in RFC 3987.
- DELIMS = ":/?#[]@!$&'()*+,;="
- @dataclasses.dataclass
- class WebSocketURI:
- """
- WebSocket URI.
- Attributes:
- secure: :obj:`True` for a ``wss`` URI, :obj:`False` for a ``ws`` URI.
- host: Normalized to lower case.
- port: Always set even if it's the default.
- path: May be empty.
- query: May be empty if the URI doesn't include a query component.
- username: Available when the URI contains `User Information`_.
- password: Available when the URI contains `User Information`_.
- .. _User Information: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1
- """
- secure: bool
- host: str
- port: int
- path: str
- query: str
- username: str | None = None
- password: str | None = None
- @property
- def resource_name(self) -> str:
- if self.path:
- resource_name = self.path
- else:
- resource_name = "/"
- if self.query:
- resource_name += "?" + self.query
- return resource_name
- @property
- def user_info(self) -> tuple[str, str] | None:
- if self.username is None:
- return None
- assert self.password is not None
- return (self.username, self.password)
- def parse_uri(uri: str) -> WebSocketURI:
- """
- Parse and validate a WebSocket URI.
- Args:
- uri: WebSocket URI.
- Returns:
- Parsed WebSocket URI.
- Raises:
- InvalidURI: If ``uri`` isn't a valid WebSocket URI.
- """
- parsed = urllib.parse.urlparse(uri)
- if parsed.scheme not in ["ws", "wss"]:
- raise InvalidURI(uri, "scheme isn't ws or wss")
- if parsed.hostname is None:
- raise InvalidURI(uri, "hostname isn't provided")
- if parsed.fragment != "":
- raise InvalidURI(uri, "fragment identifier is meaningless")
- secure = parsed.scheme == "wss"
- host = parsed.hostname
- port = parsed.port or (443 if secure else 80)
- path = parsed.path
- query = parsed.query
- username = parsed.username
- password = parsed.password
- # urllib.parse.urlparse accepts URLs with a username but without a
- # password. This doesn't make sense for HTTP Basic Auth credentials.
- if username is not None and password is None:
- raise InvalidURI(uri, "username provided without password")
- try:
- uri.encode("ascii")
- except UnicodeEncodeError:
- # Input contains non-ASCII characters.
- # It must be an IRI. Convert it to a URI.
- host = host.encode("idna").decode()
- path = urllib.parse.quote(path, safe=DELIMS)
- query = urllib.parse.quote(query, safe=DELIMS)
- if username is not None:
- assert password is not None
- username = urllib.parse.quote(username, safe=DELIMS)
- password = urllib.parse.quote(password, safe=DELIMS)
- return WebSocketURI(secure, host, port, path, query, username, password)
- @dataclasses.dataclass
- class Proxy:
- """
- Proxy.
- Attributes:
- scheme: ``"socks5h"``, ``"socks5"``, ``"socks4a"``, ``"socks4"``,
- ``"https"``, or ``"http"``.
- host: Normalized to lower case.
- port: Always set even if it's the default.
- username: Available when the proxy address contains `User Information`_.
- password: Available when the proxy address contains `User Information`_.
- .. _User Information: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1
- """
- scheme: str
- host: str
- port: int
- username: str | None = None
- password: str | None = None
- @property
- def user_info(self) -> tuple[str, str] | None:
- if self.username is None:
- return None
- assert self.password is not None
- return (self.username, self.password)
- def parse_proxy(proxy: str) -> Proxy:
- """
- Parse and validate a proxy.
- Args:
- proxy: proxy.
- Returns:
- Parsed proxy.
- Raises:
- InvalidProxy: If ``proxy`` isn't a valid proxy.
- """
- parsed = urllib.parse.urlparse(proxy)
- if parsed.scheme not in ["socks5h", "socks5", "socks4a", "socks4", "https", "http"]:
- raise InvalidProxy(proxy, f"scheme {parsed.scheme} isn't supported")
- if parsed.hostname is None:
- raise InvalidProxy(proxy, "hostname isn't provided")
- if parsed.path not in ["", "/"]:
- raise InvalidProxy(proxy, "path is meaningless")
- if parsed.query != "":
- raise InvalidProxy(proxy, "query is meaningless")
- if parsed.fragment != "":
- raise InvalidProxy(proxy, "fragment is meaningless")
- scheme = parsed.scheme
- host = parsed.hostname
- port = parsed.port or (443 if parsed.scheme == "https" else 80)
- username = parsed.username
- password = parsed.password
- # urllib.parse.urlparse accepts URLs with a username but without a
- # password. This doesn't make sense for HTTP Basic Auth credentials.
- if username is not None and password is None:
- raise InvalidProxy(proxy, "username provided without password")
- try:
- proxy.encode("ascii")
- except UnicodeEncodeError:
- # Input contains non-ASCII characters.
- # It must be an IRI. Convert it to a URI.
- host = host.encode("idna").decode()
- if username is not None:
- assert password is not None
- username = urllib.parse.quote(username, safe=DELIMS)
- password = urllib.parse.quote(password, safe=DELIMS)
- return Proxy(scheme, host, port, username, password)
- def get_proxy(uri: WebSocketURI) -> str | None:
- """
- Return the proxy to use for connecting to the given WebSocket URI, if any.
- """
- if urllib.request.proxy_bypass(f"{uri.host}:{uri.port}"):
- return None
- # According to the _Proxy Usage_ section of RFC 6455, use a SOCKS5 proxy if
- # available, else favor the proxy for HTTPS connections over the proxy for
- # HTTP connections.
- # The priority of a proxy for WebSocket connections is unspecified. We give
- # it the highest priority. This makes it easy to configure a specific proxy
- # for websockets.
- # getproxies() may return SOCKS proxies as {"socks": "http://host:port"} or
- # as {"https": "socks5h://host:port"} depending on whether they're declared
- # in the operating system or in environment variables.
- proxies = urllib.request.getproxies()
- if uri.secure:
- schemes = ["wss", "socks", "https"]
- else:
- schemes = ["ws", "socks", "https", "http"]
- for scheme in schemes:
- proxy = proxies.get(scheme)
- if proxy is not None:
- if scheme == "socks" and proxy.startswith("http://"):
- proxy = "socks5h://" + proxy[7:]
- return proxy
- else:
- return None
|