Source code for w3lib.http

from __future__ import annotations

from base64 import b64encode
from collections.abc import Mapping, MutableMapping, Sequence
from io import BytesIO
from typing import Any, TypeAlias, overload

from w3lib.util import to_bytes, to_unicode

HeadersDictInput: TypeAlias = Mapping[bytes, Any | Sequence[bytes]]
HeadersDictOutput: TypeAlias = MutableMapping[bytes, list[bytes]]


@overload
def headers_raw_to_dict(headers_raw: bytes) -> HeadersDictOutput: ...


@overload
def headers_raw_to_dict(headers_raw: None) -> None: ...


[docs] def headers_raw_to_dict(headers_raw: bytes | None) -> HeadersDictOutput | None: r""" Convert raw headers (single multi-line bytestring) to a dictionary. For example: >>> import w3lib.http >>> w3lib.http.headers_raw_to_dict(b"Content-type: text/html\n\rAccept: gzip\n\n") # doctest: +SKIP {'Content-type': ['text/html'], 'Accept': ['gzip']} Incorrect input: >>> w3lib.http.headers_raw_to_dict(b"Content-typt gzip\n\n") {} >>> Argument is ``None`` (return ``None``): >>> w3lib.http.headers_raw_to_dict(None) >>> """ if headers_raw is None: return None if not headers_raw: return {} result_dict: HeadersDictOutput = {} for header in BytesIO(headers_raw): key, sep, value = header.partition(b":") if not sep: continue key, value = key.strip(), value.strip() if key in result_dict: result_dict[key].append(value) else: result_dict[key] = [value] return result_dict
@overload def headers_dict_to_raw(headers_dict: HeadersDictInput) -> bytes: ... @overload def headers_dict_to_raw(headers_dict: None) -> None: ...
[docs] def headers_dict_to_raw(headers_dict: HeadersDictInput | None) -> bytes | None: r""" Returns a raw HTTP headers representation of headers For example: >>> import w3lib.http >>> w3lib.http.headers_dict_to_raw({b'Content-type': b'text/html', b'Accept': b'gzip'}) # doctest: +SKIP 'Content-type: text/html\\r\\nAccept: gzip' >>> Note that keys and values must be bytes. Argument is ``None`` (returns ``None``): >>> w3lib.http.headers_dict_to_raw(None) >>> """ if headers_dict is None: return None if not headers_dict: return b"" parts = bytearray() for key, value in headers_dict.items(): if isinstance(value, bytes): if parts: parts.extend(b"\r\n") parts.extend(key + b": " + value) elif isinstance(value, (list, tuple)): for v in value: if parts: parts.extend(b"\r\n") parts.extend(key + b": " + v) return bytes(parts)
[docs] def basic_auth_header( username: str | bytes, password: str | bytes, encoding: str = "ISO-8859-1" ) -> bytes: """ Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_ >>> import w3lib.http >>> w3lib.http.basic_auth_header('someuser', 'somepass') b'Basic c29tZXVzZXI6c29tZXBhc3M=' .. _HTTP Basic Access Authentication (RFC 2617): http://www.ietf.org/rfc/rfc2617.txt """ auth = f"{to_unicode(username)}:{to_unicode(password)}" # XXX: RFC 2617 doesn't define encoding, but ISO-8859-1 # seems to be the most widely used encoding here. See also: # http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html return b"Basic " + b64encode(to_bytes(auth, encoding=encoding))