Source code for django.http.request

import cgi
import codecs
import copy
import warnings
from io import BytesIO
from itertools import chain
from urllib.parse import parse_qsl, quote, urlencode, urljoin, urlsplit

from django.conf import settings
from django.core import signing
from django.core.exceptions import (
    DisallowedHost, ImproperlyConfigured, RequestDataTooBig, TooManyFieldsSent,
)
from django.core.files import uploadhandler
from django.http.multipartparser import MultiPartParser, MultiPartParserError
from django.utils.datastructures import (
    CaseInsensitiveMapping, ImmutableList, MultiValueDict,
)
from django.utils.deprecation import RemovedInDjango40Warning
from django.utils.encoding import escape_uri_path, iri_to_uri
from django.utils.functional import cached_property
from django.utils.http import is_same_domain
from django.utils.inspect import func_supports_parameter
from django.utils.regex_helper import _lazy_re_compile

from .multipartparser import parse_header

# TODO: Remove when dropping support for PY37. inspect.signature() is used to
# detect whether the max_num_fields argument is available as this security fix
# was backported to Python 3.6.8 and 3.7.2, and may also have been applied by
# downstream package maintainers to other versions in their repositories.
if (
    not func_supports_parameter(parse_qsl, 'max_num_fields') or
    not func_supports_parameter(parse_qsl, 'separator')
):
    from django.utils.http import parse_qsl


RAISE_ERROR = object()
host_validation_re = _lazy_re_compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9\.:]+\])(:\d+)?$")


[docs]class UnreadablePostError(OSError): pass
class RawPostDataException(Exception): """ You cannot access raw_post_data from a request that has multipart/* POST data if it has been accessed via POST, FILES, etc.. """ pass
[docs]class HttpRequest: """A basic HTTP request.""" # The encoding used in GET/POST dicts. None means use default setting. _encoding = None _upload_handlers = [] def __init__(self): # WARNING: The `WSGIRequest` subclass doesn't call `super`. # Any variable assignment made here should also happen in # `WSGIRequest.__init__()`. self.GET = QueryDict(mutable=True) self.POST = QueryDict(mutable=True) self.COOKIES = {} self.META = {} self.FILES = MultiValueDict() self.path = '' self.path_info = '' self.method = None self.resolver_match = None self.content_type = None self.content_params = None def __repr__(self): if self.method is None or not self.get_full_path(): return '<%s>' % self.__class__.__name__ return '<%s: %s %r>' % (self.__class__.__name__, self.method, self.get_full_path()) @cached_property def headers(self): return HttpHeaders(self.META) @cached_property def accepted_types(self): """Return a list of MediaType instances.""" return parse_accept_header(self.headers.get('Accept', '*/*'))
[docs] def accepts(self, media_type): return any( accepted_type.match(media_type) for accepted_type in self.accepted_types )
def _set_content_type_params(self, meta): """Set content_type, content_params, and encoding.""" self.content_type, self.content_params = cgi.parse_header(meta.get('CONTENT_TYPE', '')) if 'charset' in self.content_params: try: codecs.lookup(self.content_params['charset']) except LookupError: pass else: self.encoding = self.content_params['charset'] def _get_raw_host(self): """ Return the HTTP host using the environment or request headers. Skip allowed hosts protection, so may return an insecure host. """ # We try three options, in order of decreasing preference. if settings.USE_X_FORWARDED_HOST and ( 'HTTP_X_FORWARDED_HOST' in self.META): host = self.META['HTTP_X_FORWARDED_HOST'] elif 'HTTP_HOST' in self.META: host = self.META['HTTP_HOST'] else: # Reconstruct the host using the algorithm from PEP 333. host = self.META['SERVER_NAME'] server_port = self.get_port() if server_port != ('443' if self.is_secure() else '80'): host = '%s:%s' % (host, server_port) return host
[docs] def get_host(self): """Return the HTTP host using the environment or request headers.""" host = self._get_raw_host() # Allow variants of localhost if ALLOWED_HOSTS is empty and DEBUG=True. allowed_hosts = settings.ALLOWED_HOSTS if settings.DEBUG and not allowed_hosts: allowed_hosts = ['.localhost', '127.0.0.1', '[::1]'] domain, port = split_domain_port(host) if domain and validate_host(domain, allowed_hosts): return host else: msg = "Invalid HTTP_HOST header: %r." % host if domain: msg += " You may need to add %r to ALLOWED_HOSTS." % domain else: msg += " The domain name provided is not valid according to RFC 1034/1035." raise DisallowedHost(msg)
[docs] def get_port(self): """Return the port number for the request as a string.""" if settings.USE_X_FORWARDED_PORT and 'HTTP_X_FORWARDED_PORT' in self.META: port = self.META['HTTP_X_FORWARDED_PORT'] else: port = self.META['SERVER_PORT'] return str(port)
[docs] def get_full_path(self, force_append_slash=False): return self._get_full_path(self.path, force_append_slash)
[docs] def get_full_path_info(self, force_append_slash=False): return self._get_full_path(self.path_info, force_append_slash)
def _get_full_path(self, path, force_append_slash): # RFC 3986 requires query string arguments to be in the ASCII range. # Rather than crash if this doesn't happen, we encode defensively. return '%s%s%s' % ( escape_uri_path(path), '/' if force_append_slash and not path.endswith('/') else '', ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else '' ) def get_raw_uri(self): """ Return an absolute URI from variables available in this request. Skip allowed hosts protection, so may return insecure URI. """ return '{scheme}://{host}{path}'.format( scheme=self.scheme, host=self._get_raw_host(), path=self.get_full_path(), )
[docs] def build_absolute_uri(self, location=None): """ Build an absolute URI from the location and the variables available in this request. If no ``location`` is specified, build the absolute URI using request.get_full_path(). If the location is absolute, convert it to an RFC 3987 compliant URI and return it. If location is relative or is scheme-relative (i.e., ``//example.com/``), urljoin() it to a base URL constructed from the request variables. """ if location is None: # Make it an absolute url (but schemeless and domainless) for the # edge case that the path starts with '//'. location = '//%s' % self.get_full_path() else: # Coerce lazy locations. location = str(location) bits = urlsplit(location) if not (bits.scheme and bits.netloc): # Handle the simple, most common case. If the location is absolute # and a scheme or host (netloc) isn't provided, skip an expensive # urljoin() as long as no path segments are '.' or '..'. if (bits.path.startswith('/') and not bits.scheme and not bits.netloc and '/./' not in bits.path and '/../' not in bits.path): # If location starts with '//' but has no netloc, reuse the # schema and netloc from the current request. Strip the double # slashes and continue as if it wasn't specified. if location.startswith('//'): location = location[2:] location = self._current_scheme_host + location else: # Join the constructed URL with the provided location, which # allows the provided location to apply query strings to the # base path. location = urljoin(self._current_scheme_host + self.path, location) return iri_to_uri(location)
@cached_property def _current_scheme_host(self): return '{}://{}'.format(self.scheme, self.get_host()) def _get_scheme(self): """ Hook for subclasses like WSGIRequest to implement. Return 'http' by default. """ return 'http' @property def scheme(self): if settings.SECURE_PROXY_SSL_HEADER: try: header, secure_value = settings.SECURE_PROXY_SSL_HEADER except ValueError: raise ImproperlyConfigured( 'The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.' ) header_value = self.META.get(header) if header_value is not None: return 'https' if header_value == secure_value else 'http' return self._get_scheme()
[docs] def is_secure(self): return self.scheme == 'https'
def is_ajax(self): warnings.warn( 'request.is_ajax() is deprecated. See Django 3.1 release notes ' 'for more details about this deprecation.', RemovedInDjango40Warning, stacklevel=2, ) return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' @property def encoding(self): return self._encoding @encoding.setter def encoding(self, val): """ Set the encoding used for GET/POST accesses. If the GET or POST dictionary has already been created, remove and recreate it on the next access (so that it is decoded correctly). """ self._encoding = val if hasattr(self, 'GET'): del self.GET if hasattr(self, '_post'): del self._post def _initialize_handlers(self): self._upload_handlers = [uploadhandler.load_handler(handler, self) for handler in settings.FILE_UPLOAD_HANDLERS] @property def upload_handlers(self): if not self._upload_handlers: # If there are no upload handlers defined, initialize them from settings. self._initialize_handlers() return self._upload_handlers @upload_handlers.setter def upload_handlers(self, upload_handlers): if hasattr(self, '_files'): raise AttributeError("You cannot set the upload handlers after the upload has been processed.") self._upload_handlers = upload_handlers def parse_file_upload(self, META, post_data): """Return a tuple of (POST QueryDict, FILES MultiValueDict).""" self.upload_handlers = ImmutableList( self.upload_handlers, warning="You cannot alter upload handlers after the upload has been processed." ) parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) return parser.parse() @property def body(self): if not hasattr(self, '_body'): if self._read_started: raise RawPostDataException("You cannot access body after reading from request's data stream") # Limit the maximum request data size that will be handled in-memory. if (settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None and int(self.META.get('CONTENT_LENGTH') or 0) > settings.DATA_UPLOAD_MAX_MEMORY_SIZE): raise RequestDataTooBig('Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE.') try: self._body = self.read() except OSError as e: raise UnreadablePostError(*e.args) from e self._stream = BytesIO(self._body) return self._body def _mark_post_parse_error(self): self._post = QueryDict() self._files = MultiValueDict() def _load_post_and_files(self): """Populate self._post and self._files if the content-type is a form type""" if self.method != 'POST': self._post, self._files = QueryDict(encoding=self._encoding), MultiValueDict() return if self._read_started and not hasattr(self, '_body'): self._mark_post_parse_error() return if self.content_type == 'multipart/form-data': if hasattr(self, '_body'): # Use already read data data = BytesIO(self._body) else: data = self try: self._post, self._files = self.parse_file_upload(self.META, data) except MultiPartParserError: # An error occurred while parsing POST data. Since when # formatting the error the request handler might access # self.POST, set self._post and self._file to prevent # attempts to parse POST data again. self._mark_post_parse_error() raise elif self.content_type == 'application/x-www-form-urlencoded': self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict() else: self._post, self._files = QueryDict(encoding=self._encoding), MultiValueDict() def close(self): if hasattr(self, '_files'): for f in chain.from_iterable(list_[1] for list_ in self._files.lists()): f.close() # File-like and iterator interface. # # Expects self._stream to be set to an appropriate source of bytes by # a corresponding request subclass (e.g. WSGIRequest). # Also when request data has already been read by request.POST or # request.body, self._stream points to a BytesIO instance # containing that data.
[docs] def read(self, *args, **kwargs): self._read_started = True try: return self._stream.read(*args, **kwargs) except OSError as e: raise UnreadablePostError(*e.args) from e
[docs] def readline(self, *args, **kwargs): self._read_started = True try: return self._stream.readline(*args, **kwargs) except OSError as e: raise UnreadablePostError(*e.args) from e
[docs] def __iter__(self): return iter(self.readline, b'')
[docs] def readlines(self): return list(self)
class HttpHeaders(CaseInsensitiveMapping): HTTP_PREFIX = 'HTTP_' # PEP 333 gives two headers which aren't prepended with HTTP_. UNPREFIXED_HEADERS = {'CONTENT_TYPE', 'CONTENT_LENGTH'} def __init__(self, environ): headers = {} for header, value in environ.items(): name = self.parse_header_name(header) if name: headers[name] = value super().__init__(headers) def __getitem__(self, key): """Allow header lookup using underscores in place of hyphens.""" return super().__getitem__(key.replace('_', '-')) @classmethod def parse_header_name(cls, header): if header.startswith(cls.HTTP_PREFIX): header = header[len(cls.HTTP_PREFIX):] elif header not in cls.UNPREFIXED_HEADERS: return None return header.replace('_', '-').title()
[docs]class QueryDict(MultiValueDict): """ A specialized MultiValueDict which represents a query string. A QueryDict can be used to represent GET or POST data. It subclasses MultiValueDict since keys in such data can be repeated, for instance in the data from a form with a <select multiple> field. By default QueryDicts are immutable, though the copy() method will always return a mutable copy. Both keys and values set on this class are converted from the given encoding (DEFAULT_CHARSET by default) to str. """ # These are both reset in __init__, but is specified here at the class # level so that unpickling will have valid values _mutable = True _encoding = None
[docs] def __init__(self, query_string=None, mutable=False, encoding=None): super().__init__() self.encoding = encoding or settings.DEFAULT_CHARSET query_string = query_string or '' parse_qsl_kwargs = { 'keep_blank_values': True, 'encoding': self.encoding, 'max_num_fields': settings.DATA_UPLOAD_MAX_NUMBER_FIELDS, } if isinstance(query_string, bytes): # query_string normally contains URL-encoded data, a subset of ASCII. try: query_string = query_string.decode(self.encoding) except UnicodeDecodeError: # ... but some user agents are misbehaving :-( query_string = query_string.decode('iso-8859-1') try: for key, value in parse_qsl(query_string, **parse_qsl_kwargs): self.appendlist(key, value) except ValueError as e: # ValueError can also be raised if the strict_parsing argument to # parse_qsl() is True. As that is not used by Django, assume that # the exception was raised by exceeding the value of max_num_fields # instead of fragile checks of exception message strings. raise TooManyFieldsSent( 'The number of GET/POST parameters exceeded ' 'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.' ) from e self._mutable = mutable
[docs] @classmethod def fromkeys(cls, iterable, value='', mutable=False, encoding=None): """ Return a new QueryDict with keys (may be repeated) from an iterable and values from value. """ q = cls('', mutable=True, encoding=encoding) for key in iterable: q.appendlist(key, value) if not mutable: q._mutable = False return q
@property def encoding(self): if self._encoding is None: self._encoding = settings.DEFAULT_CHARSET return self._encoding @encoding.setter def encoding(self, value): self._encoding = value def _assert_mutable(self): if not self._mutable: raise AttributeError("This QueryDict instance is immutable")
[docs] def __setitem__(self, key, value): self._assert_mutable() key = bytes_to_text(key, self.encoding) value = bytes_to_text(value, self.encoding) super().__setitem__(key, value)
def __delitem__(self, key): self._assert_mutable() super().__delitem__(key) def __copy__(self): result = self.__class__('', mutable=True, encoding=self.encoding) for key, value in self.lists(): result.setlist(key, value) return result def __deepcopy__(self, memo): result = self.__class__('', mutable=True, encoding=self.encoding) memo[id(self)] = result for key, value in self.lists(): result.setlist(copy.deepcopy(key, memo), copy.deepcopy(value, memo)) return result
[docs] def setlist(self, key, list_): self._assert_mutable() key = bytes_to_text(key, self.encoding) list_ = [bytes_to_text(elt, self.encoding) for elt in list_] super().setlist(key, list_)
[docs] def setlistdefault(self, key, default_list=None): self._assert_mutable() return super().setlistdefault(key, default_list)
[docs] def appendlist(self, key, value): self._assert_mutable() key = bytes_to_text(key, self.encoding) value = bytes_to_text(value, self.encoding) super().appendlist(key, value)
[docs] def pop(self, key, *args): self._assert_mutable() return super().pop(key, *args)
[docs] def popitem(self): self._assert_mutable() return super().popitem()
def clear(self): self._assert_mutable() super().clear()
[docs] def setdefault(self, key, default=None): self._assert_mutable() key = bytes_to_text(key, self.encoding) default = bytes_to_text(default, self.encoding) return super().setdefault(key, default)
[docs] def copy(self): """Return a mutable copy of this object.""" return self.__deepcopy__({})
[docs] def urlencode(self, safe=None): """ Return an encoded string of all query string arguments. `safe` specifies characters which don't require quoting, for example:: >>> q = QueryDict(mutable=True) >>> q['next'] = '/a&b/' >>> q.urlencode() 'next=%2Fa%26b%2F' >>> q.urlencode(safe='/') 'next=/a%26b/' """ output = [] if safe: safe = safe.encode(self.encoding) def encode(k, v): return '%s=%s' % ((quote(k, safe), quote(v, safe))) else: def encode(k, v): return urlencode({k: v}) for k, list_ in self.lists(): output.extend( encode(k.encode(self.encoding), str(v).encode(self.encoding)) for v in list_ ) return '&'.join(output)
class MediaType: def __init__(self, media_type_raw_line): full_type, self.params = parse_header( media_type_raw_line.encode('ascii') if media_type_raw_line else b'' ) self.main_type, _, self.sub_type = full_type.partition('/') def __str__(self): params_str = ''.join( '; %s=%s' % (k, v.decode('ascii')) for k, v in self.params.items() ) return '%s%s%s' % ( self.main_type, ('/%s' % self.sub_type) if self.sub_type else '', params_str, ) def __repr__(self): return '<%s: %s>' % (self.__class__.__qualname__, self) @property def is_all_types(self): return self.main_type == '*' and self.sub_type == '*' def match(self, other): if self.is_all_types: return True other = MediaType(other) if self.main_type == other.main_type and self.sub_type in {'*', other.sub_type}: return True return False # It's neither necessary nor appropriate to use # django.utils.encoding.force_str() for parsing URLs and form inputs. Thus, # this slightly more restricted function, used by QueryDict. def bytes_to_text(s, encoding): """ Convert bytes objects to strings, using the given encoding. Illegally encoded input characters are replaced with Unicode "unknown" codepoint (\ufffd). Return any non-bytes objects without change. """ if isinstance(s, bytes): return str(s, encoding, 'replace') else: return s def split_domain_port(host): """ Return a (domain, port) tuple from a given host. Returned domain is lowercased. If the host is invalid, the domain will be empty. """ host = host.lower() if not host_validation_re.match(host): return '', '' if host[-1] == ']': # It's an IPv6 address without a port. return host, '' bits = host.rsplit(':', 1) domain, port = bits if len(bits) == 2 else (bits[0], '') # Remove a trailing dot (if present) from the domain. domain = domain[:-1] if domain.endswith('.') else domain return domain, port def validate_host(host, allowed_hosts): """ Validate the given host for this site. Check that the host looks valid and matches a host or host pattern in the given list of ``allowed_hosts``. Any pattern beginning with a period matches a domain and all its subdomains (e.g. ``.example.com`` matches ``example.com`` and any subdomain), ``*`` matches anything, and anything else must match exactly. Note: This function assumes that the given host is lowercased and has already had the port, if any, stripped off. Return ``True`` for a valid host, ``False`` otherwise. """ return any(pattern == '*' or is_same_domain(host, pattern) for pattern in allowed_hosts) def parse_accept_header(header): return [MediaType(token) for token in header.split(',') if token.strip()]
Back to Top