_url.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. import ipaddress
  2. import os
  3. from typing import Optional
  4. from urllib.parse import unquote, urlparse
  5. from ._exceptions import WebSocketProxyException
  6. """
  7. _url.py
  8. websocket - WebSocket client library for Python
  9. Copyright 2025 engn33r
  10. Licensed under the Apache License, Version 2.0 (the "License");
  11. you may not use this file except in compliance with the License.
  12. You may obtain a copy of the License at
  13. http://www.apache.org/licenses/LICENSE-2.0
  14. Unless required by applicable law or agreed to in writing, software
  15. distributed under the License is distributed on an "AS IS" BASIS,
  16. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17. See the License for the specific language governing permissions and
  18. limitations under the License.
  19. """
  20. __all__ = ["parse_url", "get_proxy_info"]
  21. def parse_url(url: str) -> tuple:
  22. """
  23. parse url and the result is tuple of
  24. (hostname, port, resource path and the flag of secure mode)
  25. Parameters
  26. ----------
  27. url: str
  28. url string.
  29. """
  30. if ":" not in url:
  31. raise ValueError("url is invalid")
  32. scheme, url = url.split(":", 1)
  33. parsed = urlparse(url, scheme="http")
  34. if parsed.hostname:
  35. hostname = parsed.hostname
  36. else:
  37. raise ValueError("hostname is invalid")
  38. port = 0
  39. if parsed.port:
  40. port = parsed.port
  41. is_secure = False
  42. if scheme == "ws":
  43. if not port:
  44. port = 80
  45. elif scheme == "wss":
  46. is_secure = True
  47. if not port:
  48. port = 443
  49. else:
  50. raise ValueError("scheme %s is invalid" % scheme)
  51. if parsed.path:
  52. resource = parsed.path
  53. else:
  54. resource = "/"
  55. if parsed.query:
  56. resource += f"?{parsed.query}"
  57. return hostname, port, resource, is_secure
  58. def _is_ip_address(addr: str) -> bool:
  59. if not isinstance(addr, str):
  60. raise TypeError("_is_ip_address() argument 1 must be str")
  61. try:
  62. ipaddress.ip_address(addr)
  63. except ValueError:
  64. return False
  65. else:
  66. return True
  67. def _is_subnet_address(hostname: str) -> bool:
  68. try:
  69. ipaddress.ip_network(hostname)
  70. except ValueError:
  71. return False
  72. else:
  73. return True
  74. def _is_address_in_network(ip: str, net: str) -> bool:
  75. try:
  76. return ipaddress.ip_network(ip).subnet_of(ipaddress.ip_network(net))
  77. except TypeError:
  78. return False
  79. def _is_no_proxy_host(hostname: str, no_proxy: Optional[list[str]]) -> bool:
  80. if not no_proxy:
  81. if v := os.environ.get("no_proxy", os.environ.get("NO_PROXY", "")).replace(
  82. " ", ""
  83. ):
  84. no_proxy = v.split(",")
  85. if not no_proxy:
  86. no_proxy = []
  87. if "*" in no_proxy:
  88. return True
  89. if hostname in no_proxy:
  90. return True
  91. if _is_ip_address(hostname):
  92. return any(
  93. [
  94. _is_address_in_network(hostname, subnet)
  95. for subnet in no_proxy
  96. if _is_subnet_address(subnet)
  97. ]
  98. )
  99. for domain in [domain for domain in no_proxy if domain.startswith(".")]:
  100. endDomain = domain.lstrip(".")
  101. if hostname.endswith(endDomain):
  102. return True
  103. return False
  104. def get_proxy_info(
  105. hostname: str,
  106. is_secure: bool,
  107. proxy_host: Optional[str] = None,
  108. proxy_port: int = 0,
  109. proxy_auth: Optional[tuple] = None,
  110. no_proxy: Optional[list[str]] = None,
  111. proxy_type: str = "http",
  112. ) -> tuple:
  113. """
  114. Try to retrieve proxy host and port from environment
  115. if not provided in options.
  116. Result is (proxy_host, proxy_port, proxy_auth).
  117. proxy_auth is tuple of username and password
  118. of proxy authentication information.
  119. Parameters
  120. ----------
  121. hostname: str
  122. Websocket server name.
  123. is_secure: bool
  124. Is the connection secure? (wss) looks for "https_proxy" in env
  125. instead of "http_proxy"
  126. proxy_host: str
  127. http proxy host name.
  128. proxy_port: str or int
  129. http proxy port.
  130. no_proxy: list
  131. Whitelisted host names that don't use the proxy.
  132. proxy_auth: tuple
  133. HTTP proxy auth information. Tuple of username and password. Default is None.
  134. proxy_type: str
  135. Specify the proxy protocol (http, socks4, socks4a, socks5, socks5h). Default is "http".
  136. Use socks4a or socks5h if you want to send DNS requests through the proxy.
  137. """
  138. if _is_no_proxy_host(hostname, no_proxy):
  139. return None, 0, None
  140. if proxy_host:
  141. if not proxy_port:
  142. raise WebSocketProxyException("Cannot use port 0 when proxy_host specified")
  143. port = proxy_port
  144. auth = proxy_auth
  145. return proxy_host, port, auth
  146. env_key = "https_proxy" if is_secure else "http_proxy"
  147. value = os.environ.get(env_key, os.environ.get(env_key.upper(), "")).replace(
  148. " ", ""
  149. )
  150. if value:
  151. proxy = urlparse(value)
  152. auth = (
  153. (unquote(proxy.username or ""), unquote(proxy.password or ""))
  154. if proxy.username
  155. else None
  156. )
  157. return proxy.hostname, proxy.port, auth
  158. return None, 0, None