Source code for django_registration.validators

"""
Error messages, data and custom validation code used in django-registration's
various user-registration form classes.

"""

# SPDX-License-Identifier: BSD-3-Clause

# pylint: disable=implicit-str-concat
import re
import unicodedata

from confusable_homoglyphs import confusables
from django.core.exceptions import ValidationError
from django.core.validators import EmailValidator, RegexValidator
from django.utils.deconstruct import deconstructible
from django.utils.translation import gettext_lazy as _

CONFUSABLE = _("This name cannot be registered. Please choose a different name.")
CONFUSABLE_EMAIL = _(
    "This email address cannot be registered. Please supply a different email address."
)
DUPLICATE_EMAIL = _(
    "This email address is already in use. Please supply a different email address."
)
DUPLICATE_USERNAME = _("A user with that username already exists.")
FREE_EMAIL = _(
    "Registration using free email addresses is prohibited. "
    "Please supply a different email address."
)
RESERVED_NAME = _("This name is reserved and cannot be registered.")
TOS_REQUIRED = _("You must agree to the terms to register")

# WHATWG HTML5 spec, section 4.10.5.1.5.
HTML5_EMAIL_RE = (
    r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]"
    r"+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}"
    r"[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]"
    r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
)

# Below we construct a large but non-exhaustive list of names which users probably
# should not be able to register with, due to various risks:
#
# * For a site which creates email addresses from username, important common addresses
#   must be reserved.
#
# * For a site which creates subdomains from usernames, important common
#   hostnames/domain names must be reserved.
#
# * For a site which uses the username to generate a URL to the user's profile, common
#   well-known filenames must be reserved.
#
# etc., etc.
#
# Credit for basic idea and most of the list to Geoffrey Thomas's blog post about names
# to reserve: https://ldpreload.com/blog/names-to-reserve
SPECIAL_HOSTNAMES = [
    # Hostnames with special/reserved meaning.
    "autoconfig",  # Thunderbird autoconfig
    "autodiscover",  # MS Outlook/Exchange autoconfig
    "broadcasthost",  # Network broadcast hostname
    "isatap",  # IPv6 tunnel autodiscovery
    "localdomain",  # Loopback
    "localhost",  # Loopback
    "wpad",  # Proxy autodiscovery
]

PROTOCOL_HOSTNAMES = [
    # Common protocol hostnames.
    "ftp",
    "imap",
    "mail",
    "news",
    "pop",
    "pop3",
    "smtp",
    "usenet",
    "uucp",
    "webmail",
    "www",
]

CA_ADDRESSES = [
    # Email addresses known used by certificate authorities during
    # verification.
    "admin",
    "administrator",
    "hostmaster",
    "info",
    "is",
    "it",
    "mis",
    "postmaster",
    "root",
    "ssladmin",
    "ssladministrator",
    "sslwebmaster",
    "sysadmin",
    "webmaster",
]

RFC_2142 = [
    # RFC-2142-defined names not already covered.
    "abuse",
    "marketing",
    "noc",
    "sales",
    "security",
    "support",
]

NOREPLY_ADDRESSES = [
    # Common no-reply email addresses.
    "mailer-daemon",
    "nobody",
    "noreply",
    "no-reply",
]

SENSITIVE_FILENAMES = [
    # Sensitive filenames.
    "clientaccesspolicy.xml",  # Silverlight cross-domain policy file.
    "crossdomain.xml",  # Flash cross-domain policy file.
    "favicon.ico",
    "humans.txt",
    "keybase.txt",  # Keybase ownership-verification URL.
    "robots.txt",
    ".htaccess",
    ".htpasswd",
]

OTHER_SENSITIVE_NAMES = [
    # Other names which could be problems depending on URL/subdomain
    # structure.
    "account",
    "accounts",
    "auth",
    "authorize",
    "blog",
    "buy",
    "cart",
    "clients",
    "contact",
    "contactus",
    "contact-us",
    "copyright",
    "dashboard",
    "doc",
    "docs",
    "download",
    "downloads",
    "enquiry",
    "faq",
    "help",
    "inquiry",
    "license",
    "login",
    "logout",
    "me",
    "myaccount",
    "oauth",
    "pay",
    "payment",
    "payments",
    "plans",
    "portfolio",
    "preferences",
    "pricing",
    "privacy",
    "profile",
    "register",
    "secure",
    "settings",
    "signin",
    "signup",
    "ssl",
    "status",
    "store",
    "subscribe",
    "terms",
    "tos",
    "user",
    "users",
    "weblog",
    "work",
    "xrpc",  # Used by Bluesky/AT protocol for domain verification.
]

DEFAULT_RESERVED_NAMES = (
    SPECIAL_HOSTNAMES
    + PROTOCOL_HOSTNAMES
    + CA_ADDRESSES
    + RFC_2142
    + NOREPLY_ADDRESSES
    + SENSITIVE_FILENAMES
    + OTHER_SENSITIVE_NAMES
)


[docs] @deconstructible class ReservedNameValidator: """ Validator which disallows many reserved names as form field values. """ def __init__(self, reserved_names=DEFAULT_RESERVED_NAMES): self.reserved_names = reserved_names def __call__(self, value): # GH issue 82: this validator only makes sense when the username field is a # string type. if not isinstance(value, str): return if value in self.reserved_names or value.startswith(".well-known"): raise ValidationError(RESERVED_NAME, code="invalid") def __eq__(self, other): return self.reserved_names == other.reserved_names
[docs] @deconstructible class CaseInsensitiveUnique: """ Validator which performs a case-insensitive uniqueness check. """ def __init__(self, model, field_name, error_message): self.model = model self.field_name = field_name self.error_message = error_message def __call__(self, value): # Only run if the username is a string. if not isinstance(value, str): return value = unicodedata.normalize("NFKC", value).casefold() if self.model._default_manager.filter( **{f"{self.field_name}__iexact": value} ).exists(): raise ValidationError(self.error_message, code="unique") def __eq__(self, other): return ( self.model == other.model and self.field_name == other.field_name and self.error_message == other.error_message )
[docs] @deconstructible class HTML5EmailValidator(RegexValidator): """ Validator which applies HTML5's email address rules. """ # pylint: disable=too-few-public-methods message = EmailValidator.message regex = re.compile(HTML5_EMAIL_RE)
[docs] def validate_confusables(value): """ Validator which disallows 'dangerous' usernames likely to represent homograph attacks. A username is 'dangerous' if it is mixed-script (as defined by Unicode 'Script' property) and contains one or more characters appearing in the Unicode Visually Confusable Characters file. """ if not isinstance(value, str): return if confusables.is_dangerous(value): raise ValidationError(CONFUSABLE, code="invalid")
[docs] def validate_confusables_email(value): """ Validator which disallows 'dangerous' email addresses likely to represent homograph attacks. An email address is 'dangerous' if either the local-part or the domain, considered on their own, are mixed-script and contain one or more characters appearing in the Unicode Visually Confusable Characters file. """ # Email addresses are extremely difficult. # # The current RFC governing syntax of email addresses is RFC 5322 which, as the # HTML5 specification succinctly states, "defines a syntax for e-mail addresses that # is simultaneously too strict ... too vague ... and too lax ... to be of # practical use". # # In order to be useful, this validator must consider only the addr-spec portion of # an email address, and must examine the local-part and the domain of that addr-spec # separately. Unfortunately, there are no good general-purpose Python libraries # currently available (that the author of django-registration is aware of), # supported on all versions of Python django-registration supports, which can # reliably provide an RFC-complient parse of either a full address or an addr-spec # which allows the local-part and domain to be treated separately. # # To work around this shortcoming, RegistrationForm applies the HTML5 email # validation rule, which HTML5 admits (in section 4.10.5.1.5) is a "willful # violation" of RFC 5322, to the submitted email address. This will reject many # technically-valid but problematic email addresses, including those which make use # of comments, or which embed otherwise-illegal characters via quoted-string. # # That in turn allows this validator to take a much simpler approach: it considers # any value containing exactly one '@' (U+0040) to be an addr-spec, and consders # everything prior to the '@' to be the local-part and everything after to be the # domain, and performs validation on them. Any value not containing exactly one '@' # is assumed not to be an addr-spec, and is thus "accepted" by not being validated # at all. if value.count("@") != 1: return local_part, domain = value.split("@") if confusables.is_dangerous(local_part) or confusables.is_dangerous(domain): raise ValidationError(CONFUSABLE_EMAIL, code="invalid")