From 884f7666fa7d95d64bf2266f0d08b1a014b83ef7 Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Sat, 21 Sep 2024 12:35:50 +0200 Subject: [PATCH] Integrate advocate and change netiface requirement --- cps/cw_advocate/__init__.py | 22 +++ cps/cw_advocate/adapters.py | 48 +++++ cps/cw_advocate/addrvalidator.py | 281 ++++++++++++++++++++++++++++++ cps/cw_advocate/api.py | 280 +++++++++++++++++++++++++++++ cps/cw_advocate/connection.py | 201 +++++++++++++++++++++ cps/cw_advocate/connectionpool.py | 39 +++++ cps/cw_advocate/exceptions.py | 39 +++++ cps/cw_advocate/poolmanager.py | 61 +++++++ cps/helper.py | 8 +- optional-requirements.txt | 2 +- requirements.txt | 5 +- 11 files changed, 979 insertions(+), 7 deletions(-) create mode 100644 cps/cw_advocate/__init__.py create mode 100644 cps/cw_advocate/adapters.py create mode 100644 cps/cw_advocate/addrvalidator.py create mode 100644 cps/cw_advocate/api.py create mode 100644 cps/cw_advocate/connection.py create mode 100644 cps/cw_advocate/connectionpool.py create mode 100644 cps/cw_advocate/exceptions.py create mode 100644 cps/cw_advocate/poolmanager.py diff --git a/cps/cw_advocate/__init__.py b/cps/cw_advocate/__init__.py new file mode 100644 index 00000000..58407b7b --- /dev/null +++ b/cps/cw_advocate/__init__.py @@ -0,0 +1,22 @@ +# +# Copyright 2015 Jordan Milne +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/JordanMilne/Advocate + + +from .adapters import ValidatingHTTPAdapter +from .api import * +from .addrvalidator import AddrValidator +from .exceptions import UnacceptableAddressException diff --git a/cps/cw_advocate/adapters.py b/cps/cw_advocate/adapters.py new file mode 100644 index 00000000..b15a141d --- /dev/null +++ b/cps/cw_advocate/adapters.py @@ -0,0 +1,48 @@ +# +# Copyright 2015 Jordan Milne +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/JordanMilne/Advocate + +from requests.adapters import HTTPAdapter, DEFAULT_POOLBLOCK + +from .addrvalidator import AddrValidator +from .exceptions import ProxyDisabledException +from .poolmanager import ValidatingPoolManager + + +class ValidatingHTTPAdapter(HTTPAdapter): + __attrs__ = HTTPAdapter.__attrs__ + ['_validator'] + + def __init__(self, *args, **kwargs): + self._validator = kwargs.pop('validator', None) + if not self._validator: + self._validator = AddrValidator() + super().__init__(*args, **kwargs) + + def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, + **pool_kwargs): + self._pool_connections = connections + self._pool_maxsize = maxsize + self._pool_block = block + self.poolmanager = ValidatingPoolManager( + num_pools=connections, + maxsize=maxsize, + block=block, + validator=self._validator, + **pool_kwargs + ) + + def proxy_manager_for(self, proxy, **proxy_kwargs): + raise ProxyDisabledException("Proxies cannot be used with Advocate") diff --git a/cps/cw_advocate/addrvalidator.py b/cps/cw_advocate/addrvalidator.py new file mode 100644 index 00000000..0f14ce85 --- /dev/null +++ b/cps/cw_advocate/addrvalidator.py @@ -0,0 +1,281 @@ +# +# Copyright 2015 Jordan Milne +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/JordanMilne/Advocate + +import functools +import fnmatch +import ipaddress +import re + +try: + import netifaces + HAVE_NETIFACES = True +except ImportError: + netifaces = None + HAVE_NETIFACES = False + +from .exceptions import NameserverException, ConfigException + + +def canonicalize_hostname(hostname): + """Lowercase and punycodify a hostname""" + # We do the lowercasing after IDNA encoding because we only want to + # lowercase the *ASCII* chars. + # TODO: The differences between IDNA2003 and IDNA2008 might be relevant + # to us, but both specs are damn confusing. + return str(hostname.encode("idna").lower(), 'utf-8') + + +def determine_local_addresses(): + """Get all IPs that refer to this machine according to netifaces""" + if not HAVE_NETIFACES: + raise ConfigException("Tried to determine local addresses, " + "but netifaces module was not importable") + ips = [] + for interface in netifaces.interfaces(): + if_families = netifaces.ifaddresses(interface) + for family_kind in {netifaces.AF_INET, netifaces.AF_INET6}: + addrs = if_families.get(family_kind, []) + for addr in (x.get("addr", "") for x in addrs): + if family_kind == netifaces.AF_INET6: + # We can't do anything sensible with the scope here + addr = addr.split("%")[0] + ips.append(ipaddress.ip_network(addr)) + return ips + + +def add_local_address_arg(func): + """Add the "_local_addresses" kwarg if it's missing + + IMO this information shouldn't be cached between calls (what if one of the + adapters got a new IP at runtime?,) and we don't want each function to + recalculate it. Just recalculate it if the caller didn't provide it for us. + """ + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + if "_local_addresses" not in kwargs: + if self.autodetect_local_addresses: + kwargs["_local_addresses"] = determine_local_addresses() + else: + kwargs["_local_addresses"] = [] + return func(self, *args, **kwargs) + return wrapper + + +class AddrValidator: + _6TO4_RELAY_NET = ipaddress.ip_network("192.88.99.0/24") + # Just the well known prefix, DNS64 servers can set their own + # prefix, but in practice most probably don't. + _DNS64_WK_PREFIX = ipaddress.ip_network("64:ff9b::/96") + DEFAULT_PORT_WHITELIST = {80, 8080, 443, 8443, 8000} + + def __init__( + self, + ip_blacklist=None, + ip_whitelist=None, + port_whitelist=None, + port_blacklist=None, + hostname_blacklist=None, + allow_ipv6=False, + allow_teredo=False, + allow_6to4=False, + allow_dns64=False, + # Must be explicitly set to "False" if you don't want to try + # detecting local interface addresses with netifaces. + autodetect_local_addresses=True, + ): + if not port_blacklist and not port_whitelist: + # An assortment of common HTTPS? ports. + port_whitelist = self.DEFAULT_PORT_WHITELIST.copy() + self.ip_blacklist = ip_blacklist or set() + self.ip_whitelist = ip_whitelist or set() + self.port_blacklist = port_blacklist or set() + self.port_whitelist = port_whitelist or set() + # TODO: ATM this can contain either regexes or globs that are converted + # to regexes upon every check. Create a collection that automagically + # converts them to regexes on insert? + self.hostname_blacklist = hostname_blacklist or set() + self.allow_ipv6 = allow_ipv6 + self.allow_teredo = allow_teredo + self.allow_6to4 = allow_6to4 + self.allow_dns64 = allow_dns64 + self.autodetect_local_addresses = autodetect_local_addresses + + @add_local_address_arg + def is_ip_allowed(self, addr_ip, _local_addresses=None): + if not isinstance(addr_ip, + (ipaddress.IPv4Address, ipaddress.IPv6Address)): + addr_ip = ipaddress.ip_address(addr_ip) + + # The whitelist should take precedence over the blacklist so we can + # punch holes in blacklisted ranges + if any(addr_ip in net for net in self.ip_whitelist): + return True + + if any(addr_ip in net for net in self.ip_blacklist): + return False + + if any(addr_ip in net for net in _local_addresses): + return False + + if addr_ip.version == 4: + if not addr_ip.is_private: + # IPs for carrier-grade NAT. Seems weird that it doesn't set + # `is_private`, but we need to check `not is_global` + if not ipaddress.ip_network(addr_ip).is_global: + return False + elif addr_ip.version == 6: + # You'd better have a good reason for enabling IPv6 + # because Advocate's techniques don't work well without NAT. + if not self.allow_ipv6: + return False + + # v6 addresses can also map to IPv4 addresses! Tricky! + v4_nested = [] + if addr_ip.ipv4_mapped: + v4_nested.append(addr_ip.ipv4_mapped) + # WTF IPv6? Why you gotta have a billion tunneling mechanisms? + # XXX: Do we even really care about these? If we're tunneling + # through public servers we shouldn't be able to access + # addresses on our private network, right? + if addr_ip.sixtofour: + if not self.allow_6to4: + return False + v4_nested.append(addr_ip.sixtofour) + if addr_ip.teredo: + if not self.allow_teredo: + return False + # Check both the client *and* server IPs + v4_nested.extend(addr_ip.teredo) + if addr_ip in self._DNS64_WK_PREFIX: + if not self.allow_dns64: + return False + # When using the well-known prefix the last 4 bytes + # are the IPv4 addr + v4_nested.append(ipaddress.ip_address(addr_ip.packed[-4:])) + + if not all(self.is_ip_allowed(addr_v4) for addr_v4 in v4_nested): + return False + + # fec0::*, apparently deprecated? + if addr_ip.is_site_local: + return False + else: + raise ValueError("Unsupported IP version(?): %r" % addr_ip) + + # 169.254.XXX.XXX, AWS uses these for autoconfiguration + if addr_ip.is_link_local: + return False + # 127.0.0.1, ::1, etc. + if addr_ip.is_loopback: + return False + if addr_ip.is_multicast: + return False + # 192.168.XXX.XXX, 10.XXX.XXX.XXX + if addr_ip.is_private: + return False + # 255.255.255.255, ::ffff:XXXX:XXXX (v6->v4) mapping + if addr_ip.is_reserved: + return False + # There's no reason to connect directly to a 6to4 relay + if addr_ip in self._6TO4_RELAY_NET: + return False + # 0.0.0.0 + if addr_ip.is_unspecified: + return False + + # It doesn't look bad, so... it's must be ok! + return True + + def _hostname_matches_pattern(self, hostname, pattern): + # If they specified a string, just assume they only want basic globbing. + # This stops people from not realizing they're dealing in REs and + # not escaping their periods unless they specifically pass in an RE. + # This has the added benefit of letting us sanely handle globbed + # IDNs by default. + if isinstance(pattern, str): + # convert the glob to a punycode glob, then a regex + pattern = fnmatch.translate(canonicalize_hostname(pattern)) + + hostname = canonicalize_hostname(hostname) + # Down the line the hostname may get treated as a null-terminated string + # (as with `socket.getaddrinfo`.) Try to account for that. + # + # >>> socket.getaddrinfo("example.com\x00aaaa", 80) + # [(2, 1, 6, '', ('93.184.216.34', 80)), [...] + no_null_hostname = hostname.split("\x00")[0] + + return any(re.match(pattern, x.strip(".")) for x + in (no_null_hostname, hostname)) + + def is_hostname_allowed(self, hostname): + # Sometimes (like with "external" services that your IP has privileged + # access to) you might not always know the IP range to blacklist access + # to, or the `A` record might change without you noticing. + # For e.x.: `foocorp.external.org`. + # + # Another option is doing something like: + # + # for addrinfo in socket.getaddrinfo("foocorp.external.org", 80): + # global_validator.ip_blacklist.add(ip_address(addrinfo[4][0])) + # + # but that's not always a good idea if they're behind a third-party lb. + for pattern in self.hostname_blacklist: + if self._hostname_matches_pattern(hostname, pattern): + return False + return True + + @add_local_address_arg + def is_addrinfo_allowed(self, addrinfo, _local_addresses=None): + assert(len(addrinfo) == 5) + # XXX: Do we care about any of the other elements? Guessing not. + family, socktype, proto, canonname, sockaddr = addrinfo + + # The 4th elem inaddrinfo may either be a touple of two or four items, + # depending on whether we're dealing with IPv4 or v6 + if len(sockaddr) == 2: + # v4 + ip, port = sockaddr + elif len(sockaddr) == 4: + # v6 + # XXX: what *are* `flow_info` and `scope_id`? Anything useful? + # Seems like we can figure out all we need about the scope from + # the `is_` properties. + ip, port, flow_info, scope_id = sockaddr + else: + raise ValueError("Unexpected addrinfo format %r" % sockaddr) + + # Probably won't help protect against SSRF, but might prevent our being + # used to attack others' non-HTTP services. See + # http://www.remote.org/jochen/sec/hfpa/ + if self.port_whitelist and port not in self.port_whitelist: + return False + if port in self.port_blacklist: + return False + + if self.hostname_blacklist: + if not canonname: + raise NameserverException( + "addrinfo must contain the canon name to do blacklisting " + "based on hostname. Make sure you use the " + "`socket.AI_CANONNAME` flag, and that each record contains " + "the canon name. Your DNS server might also be garbage." + ) + + if not self.is_hostname_allowed(canonname): + return False + + return self.is_ip_allowed(ip, _local_addresses=_local_addresses) diff --git a/cps/cw_advocate/api.py b/cps/cw_advocate/api.py new file mode 100644 index 00000000..c9ed5f58 --- /dev/null +++ b/cps/cw_advocate/api.py @@ -0,0 +1,280 @@ +# +# Copyright 2015 Jordan Milne +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/JordanMilne/Advocate + +""" +advocate.api +~~~~~~~~~~~~ + +This module implements the Requests API, largely a copy/paste from `requests` +itself. + +:copyright: (c) 2015 by Jordan Milne. +:license: Apache2, see LICENSE for more details. + +""" +from collections import OrderedDict +import hashlib +import pickle + +from requests import Session as RequestsSession + +# import cw_advocate +from .adapters import ValidatingHTTPAdapter +from .exceptions import MountDisabledException + + +class Session(RequestsSession): + """Convenience wrapper around `requests.Session` set up for `advocate`ing""" + + __attrs__ = RequestsSession.__attrs__ + ["validator"] + DEFAULT_VALIDATOR = None + """ + User-replaceable default validator to use for all Advocate sessions, + includes sessions created by advocate.get() + """ + + def __init__(self, *args, **kwargs): + self.validator = kwargs.pop("validator", None) or self.DEFAULT_VALIDATOR + adapter_kwargs = kwargs.pop("_adapter_kwargs", {}) + + # `Session.__init__()` calls `mount()` internally, so we need to allow + # it temporarily + self.__mount_allowed = True + RequestsSession.__init__(self, *args, **kwargs) + + # Drop any existing adapters + self.adapters = OrderedDict() + + self.mount("http://", ValidatingHTTPAdapter(validator=self.validator, **adapter_kwargs)) + self.mount("https://", ValidatingHTTPAdapter(validator=self.validator, **adapter_kwargs)) + self.__mount_allowed = False + + def mount(self, *args, **kwargs): + """Wrapper around `mount()` to prevent a protection bypass""" + if self.__mount_allowed: + super().mount(*args, **kwargs) + else: + raise MountDisabledException( + "mount() is disabled to prevent protection bypasses" + ) + + +def session(*args, **kwargs): + return Session(*args, **kwargs) + + +def request(method, url, **kwargs): + """Constructs and sends a :class:`Request `. + + :param method: method for the new :class:`Request` object. + :param url: URL for the new :class:`Request` object. + :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. + :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. + :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. + :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. + :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. + :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. + :param timeout: (optional) How long to wait for the server to send data + before giving up, as a float, or a (`connect timeout, read timeout + `_) tuple. + :type timeout: float or tuple + :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. + :type allow_redirects: bool + :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. + :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. + :param stream: (optional) if ``False``, the response content will be immediately downloaded. + :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. + :return: :class:`Response ` object + :rtype: requests.Response + """ + + validator = kwargs.pop("validator", None) + with Session(validator=validator) as sess: + response = sess.request(method=method, url=url, **kwargs) + return response + + +def get(url, **kwargs): + """Sends a GET request. + + :param url: URL for the new :class:`Request` object. + :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response + """ + + kwargs.setdefault('allow_redirects', True) + return request('get', url, **kwargs) + + +'''def options(url, **kwargs): + """Sends a OPTIONS request. + + :param url: URL for the new :class:`Request` object. + :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response + """ + + kwargs.setdefault('allow_redirects', True) + return request('options', url, **kwargs) + + +def head(url, **kwargs): + """Sends a HEAD request. + + :param url: URL for the new :class:`Request` object. + :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response + """ + + kwargs.setdefault('allow_redirects', False) + return request('head', url, **kwargs) + + +def post(url, data=None, json=None, **kwargs): + """Sends a POST request. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. + :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response + """ + + return request('post', url, data=data, json=json, **kwargs) + + +def put(url, data=None, **kwargs): + """Sends a PUT request. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response + """ + + return request('put', url, data=data, **kwargs) + + +def patch(url, data=None, **kwargs): + """Sends a PATCH request. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response + """ + + return request('patch', url, data=data, **kwargs) + + +def delete(url, **kwargs): + """Sends a DELETE request. + + :param url: URL for the new :class:`Request` object. + :param \*\*kwargs: Optional arguments that ``request`` takes. + :return: :class:`Response ` object + :rtype: requests.Response + """ + + return request('delete', url, **kwargs)''' + + +class RequestsAPIWrapper: + """Provides a `requests.api`-like interface with a specific validator""" + + # Due to how the classes are dynamically constructed pickling may not work + # correctly unless loaded within the same interpreter instance. + # Enable at your peril. + SUPPORT_WRAPPER_PICKLING = False + + def __init__(self, validator): + # Do this here to avoid circular import issues + try: + from .futures import FuturesSession + have_requests_futures = True + except ImportError as e: + have_requests_futures = False + + self.validator = validator + outer_self = self + + class _WrappedSession(Session): + """An `advocate.Session` that uses the wrapper's blacklist + + the wrapper is meant to be a transparent replacement for `requests`, + so people should be able to subclass `wrapper.Session` and still + get the desired validation behaviour + """ + DEFAULT_VALIDATOR = outer_self.validator + + self._make_wrapper_cls_global(_WrappedSession) + + if have_requests_futures: + + class _WrappedFuturesSession(FuturesSession): + """Like _WrappedSession, but for `FuturesSession`s""" + DEFAULT_VALIDATOR = outer_self.validator + self._make_wrapper_cls_global(_WrappedFuturesSession) + + self.FuturesSession = _WrappedFuturesSession + + self.request = self._default_arg_wrapper(request) + self.get = self._default_arg_wrapper(get) + self.Session = _WrappedSession + + def __getattr__(self, item): + # This class is meant to mimic the requests base module, so if we don't + # have this attribute, it might be on the base module (like the Request + # class, etc.) + try: + return object.__getattribute__(self, item) + except AttributeError: + from . import cw_advocate + return getattr(cw_advocate, item) + + def _default_arg_wrapper(self, fun): + def wrapped_func(*args, **kwargs): + kwargs.setdefault("validator", self.validator) + return fun(*args, **kwargs) + return wrapped_func + + def _make_wrapper_cls_global(self, cls): + if not self.SUPPORT_WRAPPER_PICKLING: + return + # Gnarly, but necessary to give pickle a consistent module-level + # reference for each wrapper. + wrapper_hash = hashlib.sha256(pickle.dumps(self)).hexdigest() + cls.__name__ = "_".join((cls.__name__, wrapper_hash)) + cls.__qualname__ = ".".join((__name__, cls.__name__)) + if not globals().get(cls.__name__): + globals()[cls.__name__] = cls + + +__all__ = ( + "get", + "request", + "session", + "Session", + "RequestsAPIWrapper", +) diff --git a/cps/cw_advocate/connection.py b/cps/cw_advocate/connection.py new file mode 100644 index 00000000..ce790ada --- /dev/null +++ b/cps/cw_advocate/connection.py @@ -0,0 +1,201 @@ +# +# Copyright 2015 Jordan Milne +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/JordanMilne/Advocate + +import ipaddress +import socket +from socket import timeout as SocketTimeout + +from urllib3.connection import HTTPSConnection, HTTPConnection +from urllib3.exceptions import ConnectTimeoutError +from urllib3.util.connection import _set_socket_options +from urllib3.util.connection import create_connection as old_create_connection + +from . import addrvalidator +from .exceptions import UnacceptableAddressException + + +def advocate_getaddrinfo(host, port, get_canonname=False): + addrinfo = socket.getaddrinfo( + host, + port, + 0, + socket.SOCK_STREAM, + 0, + # We need what the DNS client sees the hostname as, correctly handles + # IDNs and tricky things like `private.foocorp.org\x00.google.com`. + # All IDNs will be converted to punycode. + socket.AI_CANONNAME if get_canonname else 0, + ) + return fix_addrinfo(addrinfo) + + +def fix_addrinfo(records): + """ + Propagate the canonname across records and parse IPs + + I'm not sure if this is just the behaviour of `getaddrinfo` on Linux, but + it seems like only the first record in the set has the canonname field + populated. + """ + def fix_record(record, canonname): + sa = record[4] + sa = (ipaddress.ip_address(sa[0]),) + sa[1:] + return record[0], record[1], record[2], canonname, sa + + canonname = None + if records: + # Apparently the canonical name is only included in the first record? + # Add it to all of them. + assert(len(records[0]) == 5) + canonname = records[0][3] + return tuple(fix_record(x, canonname) for x in records) + + +# Lifted from requests' urllib3, which in turn lifted it from `socket.py`. Oy! +def validating_create_connection(address, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None, + validator=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + # We can skip asking for the canon name if we're not doing hostname-based + # blacklisting. + need_canonname = False + if validator.hostname_blacklist: + need_canonname = True + # We check both the non-canonical and canonical hostnames so we can + # catch both of these: + # CNAME from nonblacklisted.com -> blacklisted.com + # CNAME from blacklisted.com -> nonblacklisted.com + if not validator.is_hostname_allowed(host): + raise UnacceptableAddressException(host) + + err = None + addrinfo = advocate_getaddrinfo(host, port, get_canonname=need_canonname) + if addrinfo: + if validator.autodetect_local_addresses: + local_addresses = addrvalidator.determine_local_addresses() + else: + local_addresses = [] + for res in addrinfo: + # Are we allowed to connect with this result? + if not validator.is_addrinfo_allowed( + res, + _local_addresses=local_addresses, + ): + continue + af, socktype, proto, canonname, sa = res + # Unparse the validated IP + sa = (sa[0].exploded,) + sa[1:] + sock = None + try: + sock = socket.socket(af, socktype, proto) + + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except socket.error as _: + err = _ + if sock is not None: + sock.close() + sock = None + + if err is None: + # If we got here, none of the results were acceptable + err = UnacceptableAddressException(address) + if err is not None: + raise err + else: + raise socket.error("getaddrinfo returns an empty list") + + +# TODO: Is there a better way to add this to multiple classes with different +# base classes? I tried a mixin, but it used the base method instead. +def _validating_new_conn(self): + """ Establish a socket connection and set nodelay settings on it. + + :return: New socket connection. + """ + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options + + try: + # Hack around HTTPretty's patched sockets + # TODO: some better method of hacking around it that checks if we + # _would have_ connected to a private addr? + conn_func = validating_create_connection + if socket.getaddrinfo.__module__.startswith("httpretty"): + conn_func = old_create_connection + else: + extra_kw["validator"] = self._validator + + conn = conn_func( + (self.host, self.port), + self.timeout, + **extra_kw + ) + + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + return conn + + +# Don't silently break if the private API changes across urllib3 versions +assert(hasattr(HTTPConnection, '_new_conn')) +assert(hasattr(HTTPSConnection, '_new_conn')) + + +class ValidatingHTTPConnection(HTTPConnection): + _new_conn = _validating_new_conn + + def __init__(self, *args, **kwargs): + self._validator = kwargs.pop("validator") + HTTPConnection.__init__(self, *args, **kwargs) + + +class ValidatingHTTPSConnection(HTTPSConnection): + _new_conn = _validating_new_conn + + def __init__(self, *args, **kwargs): + self._validator = kwargs.pop("validator") + HTTPSConnection.__init__(self, *args, **kwargs) diff --git a/cps/cw_advocate/connectionpool.py b/cps/cw_advocate/connectionpool.py new file mode 100644 index 00000000..3bbbfac7 --- /dev/null +++ b/cps/cw_advocate/connectionpool.py @@ -0,0 +1,39 @@ +# +# Copyright 2015 Jordan Milne +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/JordanMilne/Advocate + +from urllib3 import HTTPConnectionPool, HTTPSConnectionPool + +from .connection import ( + ValidatingHTTPConnection, + ValidatingHTTPSConnection, +) + +# Don't silently break if the private API changes across urllib3 versions +assert(hasattr(HTTPConnectionPool, 'ConnectionCls')) +assert(hasattr(HTTPSConnectionPool, 'ConnectionCls')) +assert(hasattr(HTTPConnectionPool, 'scheme')) +assert(hasattr(HTTPSConnectionPool, 'scheme')) + + +class ValidatingHTTPConnectionPool(HTTPConnectionPool): + scheme = 'http' + ConnectionCls = ValidatingHTTPConnection + + +class ValidatingHTTPSConnectionPool(HTTPSConnectionPool): + scheme = 'https' + ConnectionCls = ValidatingHTTPSConnection diff --git a/cps/cw_advocate/exceptions.py b/cps/cw_advocate/exceptions.py new file mode 100644 index 00000000..5ff9852b --- /dev/null +++ b/cps/cw_advocate/exceptions.py @@ -0,0 +1,39 @@ +# +# Copyright 2015 Jordan Milne +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/JordanMilne/Advocate + +class AdvocateException(Exception): + pass + + +class UnacceptableAddressException(AdvocateException): + pass + + +class NameserverException(AdvocateException): + pass + + +class MountDisabledException(AdvocateException): + pass + + +class ProxyDisabledException(NotImplementedError, AdvocateException): + pass + + +class ConfigException(AdvocateException): + pass diff --git a/cps/cw_advocate/poolmanager.py b/cps/cw_advocate/poolmanager.py new file mode 100644 index 00000000..d912d65d --- /dev/null +++ b/cps/cw_advocate/poolmanager.py @@ -0,0 +1,61 @@ +# +# Copyright 2015 Jordan Milne +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/JordanMilne/Advocate + +import collections +import functools + +from urllib3 import PoolManager +from urllib3.poolmanager import _default_key_normalizer, PoolKey + +from .connectionpool import ( + ValidatingHTTPSConnectionPool, + ValidatingHTTPConnectionPool, +) + +pool_classes_by_scheme = { + "http": ValidatingHTTPConnectionPool, + "https": ValidatingHTTPSConnectionPool, +} + +AdvocatePoolKey = collections.namedtuple('AdvocatePoolKey', + PoolKey._fields + ('key_validator',)) + + +def key_normalizer(key_class, request_context): + request_context = request_context.copy() + # TODO: add ability to serialize validator rules to dict, + # allowing pool to be shared between sessions with the same + # rules. + request_context["validator"] = id(request_context["validator"]) + return _default_key_normalizer(key_class, request_context) + + +key_fn_by_scheme = { + 'http': functools.partial(key_normalizer, AdvocatePoolKey), + 'https': functools.partial(key_normalizer, AdvocatePoolKey), +} + + +class ValidatingPoolManager(PoolManager): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Make sure the API hasn't changed + assert (hasattr(self, 'pool_classes_by_scheme')) + + self.pool_classes_by_scheme = pool_classes_by_scheme + self.key_fn_by_scheme = key_fn_by_scheme.copy() diff --git a/cps/helper.py b/cps/helper.py index 728d9964..d6f4dd7d 100644 --- a/cps/helper.py +++ b/cps/helper.py @@ -43,10 +43,10 @@ from markupsafe import escape from urllib.parse import quote try: - import advocate - from advocate.exceptions import UnacceptableAddressException + from . import cw_advocate + from .cw_advocate.exceptions import UnacceptableAddressException use_advocate = True -except ImportError: +except ImportError as e: use_advocate = False advocate = requests UnacceptableAddressException = MissingSchema = BaseException @@ -841,7 +841,7 @@ def save_cover_from_url(url, book_path): if cli_param.allow_localhost: img = requests.get(url, timeout=(10, 200), allow_redirects=False) # ToDo: Error Handling elif use_advocate: - img = advocate.get(url, timeout=(10, 200), allow_redirects=False) # ToDo: Error Handling + img = cw_advocate.get(url, timeout=(10, 200), allow_redirects=False) # ToDo: Error Handling else: log.error("python module advocate is not installed but is needed") return False, _("Python module 'advocate' is not installed but is needed for cover uploads") diff --git a/optional-requirements.txt b/optional-requirements.txt index 4ab6ef0f..b9d98544 100644 --- a/optional-requirements.txt +++ b/optional-requirements.txt @@ -17,7 +17,7 @@ google-api-python-client>=1.7.11,<2.200.0 # goodreads goodreads>=0.3.2,<0.4.0 -python-Levenshtein>=0.12.0,<0.26.0 +python-Levenshtein>=0.12.0,<0.27.0 # ldap login python-ldap>=3.0.0,<3.5.0 diff --git a/requirements.txt b/requirements.txt index 71074892..8fc03665 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ Flask-Babel>=0.11.1,<4.1.0 Flask-Principal>=0.3.2,<0.5.1 Flask>=1.0.2,<3.1.0 iso-639>=0.4.5,<0.5.0 -PyPDF>=3.15.6,<4.3.0 +PyPDF>=3.15.6,<5.1.0 pytz>=2016.10 requests>=2.28.0,<2.32.0 SQLAlchemy>=1.3.0,<2.1.0 @@ -14,7 +14,8 @@ unidecode>=0.04.19,<1.4.0 lxml>=4.9.1,<5.3.0 flask-wtf>=0.14.2,<1.3.0 chardet>=3.0.0,<5.3.0 -advocate>=1.0.0,<1.1.0 +netifaces-plus>0.12.0,<0.13.0 +urllib3<2.0, >=1.22 Flask-Limiter>=2.3.0,<3.9.0 regex>=2022.3.2,<2024.6.25 bleach>=6.0.0,<6.2.0