Source code for django_registration.validators

"""
Error messages, data and custom validation code used in
django-registration's various user-registration form classes.

"""

# pylint: disable=implicit-str-concat
import re
import unicodedata

from confusable_homoglyphs import confusables
from django.core.exceptions import ValidationError
from django.core.validators import EmailValidator, RegexValidator
from django.utils.deconstruct import deconstructible
from django.utils.translation import gettext_lazy as _

CONFUSABLE = _("This name cannot be registered. " "Please choose a different name.")
CONFUSABLE_EMAIL = _(
    "This email address cannot be registered. "
    "Please supply a different email address."
)
DUPLICATE_EMAIL = _(
    "This email address is already in use. " "Please supply a different email address."
)
DUPLICATE_USERNAME = _("A user with that username already exists.")
FREE_EMAIL = _(
    "Registration using free email addresses is prohibited. "
    "Please supply a different email address."
)
RESERVED_NAME = _("This name is reserved and cannot be registered.")
TOS_REQUIRED = _("You must agree to the terms to register")

# WHATWG HTML5 spec, section 4.10.5.1.5.
HTML5_EMAIL_RE = (
    r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]"
    r"+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}"
    r"[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]"
    r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
)


# Below we construct a large but non-exhaustive list of names which
# users probably should not be able to register with, due to various
# risks:
#
# * For a site which creates email addresses from username, important
#   common addresses must be reserved.
#
# * For a site which creates subdomains from usernames, important
#   common hostnames/domain names must be reserved.
#
# * For a site which uses the username to generate a URL to the user's
#   profile, common well-known filenames must be reserved.
#
# etc., etc.
#
# Credit for basic idea and most of the list to Geoffrey Thomas's blog
# post about names to reserve:
# https://ldpreload.com/blog/names-to-reserve
SPECIAL_HOSTNAMES = [
    # Hostnames with special/reserved meaning.
    "autoconfig",  # Thunderbird autoconfig
    "autodiscover",  # MS Outlook/Exchange autoconfig
    "broadcasthost",  # Network broadcast hostname
    "isatap",  # IPv6 tunnel autodiscovery
    "localdomain",  # Loopback
    "localhost",  # Loopback
    "wpad",  # Proxy autodiscovery
]


PROTOCOL_HOSTNAMES = [
    # Common protocol hostnames.
    "ftp",
    "imap",
    "mail",
    "news",
    "pop",
    "pop3",
    "smtp",
    "usenet",
    "uucp",
    "webmail",
    "www",
]


CA_ADDRESSES = [
    # Email addresses known used by certificate authorities during
    # verification.
    "admin",
    "administrator",
    "hostmaster",
    "info",
    "is",
    "it",
    "mis",
    "postmaster",
    "root",
    "ssladmin",
    "ssladministrator",
    "sslwebmaster",
    "sysadmin",
    "webmaster",
]


RFC_2142 = [
    # RFC-2142-defined names not already covered.
    "abuse",
    "marketing",
    "noc",
    "sales",
    "security",
    "support",
]


NOREPLY_ADDRESSES = [
    # Common no-reply email addresses.
    "mailer-daemon",
    "nobody",
    "noreply",
    "no-reply",
]


SENSITIVE_FILENAMES = [
    # Sensitive filenames.
    "clientaccesspolicy.xml",  # Silverlight cross-domain policy file.
    "crossdomain.xml",  # Flash cross-domain policy file.
    "favicon.ico",
    "humans.txt",
    "keybase.txt",  # Keybase ownership-verification URL.
    "robots.txt",
    ".htaccess",
    ".htpasswd",
]


OTHER_SENSITIVE_NAMES = [
    # Other names which could be problems depending on URL/subdomain
    # structure.
    "account",
    "accounts",
    "auth",
    "authorize",
    "blog",
    "buy",
    "cart",
    "clients",
    "contact",
    "contactus",
    "contact-us",
    "copyright",
    "dashboard",
    "doc",
    "docs",
    "download",
    "downloads",
    "enquiry",
    "faq",
    "help",
    "inquiry",
    "license",
    "login",
    "logout",
    "me",
    "myaccount",
    "oauth",
    "pay",
    "payment",
    "payments",
    "plans",
    "portfolio",
    "preferences",
    "pricing",
    "privacy",
    "profile",
    "register",
    "secure",
    "settings",
    "signin",
    "signup",
    "ssl",
    "status",
    "store",
    "subscribe",
    "terms",
    "tos",
    "user",
    "users",
    "weblog",
    "work",
    "xrpc",  # Used by Bluesky/AT protocol for domain verification.
]


DEFAULT_RESERVED_NAMES = (
    SPECIAL_HOSTNAMES
    + PROTOCOL_HOSTNAMES
    + CA_ADDRESSES
    + RFC_2142
    + NOREPLY_ADDRESSES
    + SENSITIVE_FILENAMES
    + OTHER_SENSITIVE_NAMES
)


[docs] @deconstructible class ReservedNameValidator: """ Validator which disallows many reserved names as form field values. """ def __init__(self, reserved_names=DEFAULT_RESERVED_NAMES): self.reserved_names = reserved_names def __call__(self, value): # GH issue 82: this validator only makes sense when the # username field is a string type. if not isinstance(value, str): return if value in self.reserved_names or value.startswith(".well-known"): raise ValidationError(RESERVED_NAME, code="invalid") def __eq__(self, other): return self.reserved_names == other.reserved_names
[docs] @deconstructible class CaseInsensitiveUnique: """ Validator which performs a case-insensitive uniqueness check. """ def __init__(self, model, field_name, error_message): self.model = model self.field_name = field_name self.error_message = error_message def __call__(self, value): # Only run if the username is a string. if not isinstance(value, str): return value = unicodedata.normalize("NFKC", value).casefold() if self.model._default_manager.filter( **{f"{self.field_name}__iexact": value} ).exists(): raise ValidationError(self.error_message, code="unique") def __eq__(self, other): return ( self.model == other.model and self.field_name == other.field_name and self.error_message == other.error_message )
[docs] @deconstructible class HTML5EmailValidator(RegexValidator): """ Validator which applies HTML5's email address rules. """ # pylint: disable=too-few-public-methods message = EmailValidator.message regex = re.compile(HTML5_EMAIL_RE)
[docs] def validate_confusables(value): """ Validator which disallows 'dangerous' usernames likely to represent homograph attacks. A username is 'dangerous' if it is mixed-script (as defined by Unicode 'Script' property) and contains one or more characters appearing in the Unicode Visually Confusable Characters file. """ if not isinstance(value, str): return if confusables.is_dangerous(value): raise ValidationError(CONFUSABLE, code="invalid")
[docs] def validate_confusables_email(value): """ Validator which disallows 'dangerous' email addresses likely to represent homograph attacks. An email address is 'dangerous' if either the local-part or the domain, considered on their own, are mixed-script and contain one or more characters appearing in the Unicode Visually Confusable Characters file. """ # Email addresses are extremely difficult. # # The current RFC governing syntax of email addresses is RFC 5322 # which, as the HTML5 specification succinctly states, "defines a # syntax for e-mail addresses that is simultaneously too strict # ... too vague ... and too lax ... to be of practical use". # # In order to be useful, this validator must consider only the # addr-spec portion of an email address, and must examine the # local-part and the domain of that addr-spec # separately. Unfortunately, there are no good general-purpose # Python libraries currently available (that the author of # django-registration is aware of), supported on all versions of # Python django-registration supports, which can reliably provide # an RFC-complient parse of either a full address or an addr-spec # which allows the local-part and domain to be treated separately. # # To work around this shortcoming, RegistrationForm applies the # HTML5 email validation rule, which HTML5 admits (in section # 4.10.5.1.5) is a "willful violation" of RFC 5322, to the # submitted email address. This will reject many technically-valid # but problematic email addresses, including those which make use # of comments, or which embed otherwise-illegal characters via # quoted-string. # # That in turn allows this validator to take a much simpler # approach: it considers any value containing exactly one '@' # (U+0040) to be an addr-spec, and consders everything prior to # the '@' to be the local-part and everything after to be the # domain, and performs validation on them. Any value not # containing exactly one '@' is assumed not to be an addr-spec, # and is thus "accepted" by not being validated at all. if value.count("@") != 1: return local_part, domain = value.split("@") if confusables.is_dangerous(local_part) or confusables.is_dangerous(domain): raise ValidationError(CONFUSABLE_EMAIL, code="invalid")