Skip to content

Normalized email and domain columns in Email #17946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tests/common/db/accounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
User,
UserTermsOfServiceEngagement,
)
from warehouse.filters import get_email_domain, get_normalized_email

from .base import WarehouseFactory

Expand Down Expand Up @@ -116,6 +117,10 @@ class Meta:
# TODO: Replace when factory_boy supports `unique`.
# See https://github.com/FactoryBoy/factory_boy/pull/997
email = factory.Sequence(lambda _: fake.unique.safe_email())
normalized_email = factory.LazyAttribute(
lambda obj: get_normalized_email(obj.email)
)
domain = factory.LazyAttribute(lambda obj: get_email_domain(obj.email))

verified = True
primary = True
Expand Down
10 changes: 9 additions & 1 deletion tests/unit/accounts/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from warehouse.accounts.models import Email, RecoveryCode, User, UserFactory, WebAuthn
from warehouse.authnz import Permissions
from warehouse.filters import get_email_domain, get_normalized_email
from warehouse.utils.security_policy import principals_for

from ...common.db.accounts import (
Expand Down Expand Up @@ -61,7 +62,14 @@ def test_has_primary_verified_email(self, db_session, email, verified, allowed):
user = DBUserFactory.create()

if email:
e = Email(email=email, user=user, primary=True, verified=verified)
e = Email(
email=email,
user=user,
normalized_email=get_normalized_email(email),
domain=get_email_domain(email),
primary=True,
verified=verified,
)
db_session.add(e)
db_session.flush()

Expand Down
5 changes: 4 additions & 1 deletion warehouse/accounts/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
send_recovery_code_used_email,
)
from warehouse.events.tags import EventTag
from warehouse.filters import get_normalized_email
from warehouse.i18n import localize as _

# Common messages, set as constants to keep them from drifting.
Expand Down Expand Up @@ -346,7 +347,9 @@ def validate_email(self, field):
)

# Check if this email address is already in use
userid = self.user_service.find_userid_by_email(field.data)
userid = self.user_service.find_userid_by_email(
get_normalized_email(field.data)
)

if userid and userid == self.user_id:
self.request.metrics.increment(
Expand Down
6 changes: 2 additions & 4 deletions warehouse/accounts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,8 @@ class Email(db.ModelBase):
)
user: Mapped[User] = orm.relationship(back_populates="emails")
email: Mapped[str] = mapped_column(String(length=254))
normalized_email: Mapped[CITEXT] = mapped_column(CITEXT)
domain: Mapped[CITEXT] = mapped_column(CITEXT)
primary: Mapped[bool]
verified: Mapped[bool]
public: Mapped[bool_false]
Expand All @@ -433,10 +435,6 @@ class Email(db.ModelBase):
comment="Status strings returned by the domain validation service.",
)

@property
def domain(self):
return self.email.split("@")[-1].lower()


class ProhibitedEmailDomain(db.Model):
__tablename__ = "prohibited_email_domains"
Expand Down
5 changes: 5 additions & 0 deletions warehouse/accounts/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
WebAuthn,
)
from warehouse.events.tags import EventTag
from warehouse.filters import get_email_domain, get_normalized_email
from warehouse.metrics import IMetricsService
from warehouse.rate_limiting import DummyRateLimiter, IRateLimiter
from warehouse.utils.crypto import BadData, SignatureExpired, URLSafeTimedSerializer
Expand Down Expand Up @@ -300,8 +301,12 @@ def add_email(
if primary is None:
primary = True if user.primary_email is None else False

normalized_email = get_normalized_email(email_address)
domain = get_email_domain(email_address)
email = Email(
email=email_address,
normalized_email=normalized_email,
domain=domain,
user=user,
primary=primary,
verified=verified,
Expand Down
5 changes: 5 additions & 0 deletions warehouse/admin/views/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
send_account_recovery_initiated_email,
send_password_reset_by_admin_email,
)
from warehouse.filters import get_email_domain, get_normalized_email
from warehouse.observations.models import ObservationKind
from warehouse.packaging.models import JournalEntry, Project, Release, Role
from warehouse.utils.paginate import paginate_url_factory
Expand Down Expand Up @@ -101,6 +102,8 @@ class EmailForm(wtforms.Form):
unverify_reason = wtforms.fields.StringField(render_kw={"readonly": True})
domain_last_checked = wtforms.fields.DateTimeField(render_kw={"readonly": True})
domain_last_status = wtforms.fields.StringField(render_kw={"readonly": True})
normalized_email = wtforms.fields.StringField(render_kw={"readonly": True})
domain = wtforms.fields.StringField(render_kw={"readonly": True})


class EmailsForm(wtforms.Form):
Expand Down Expand Up @@ -288,6 +291,8 @@ def user_add_email(user, request):

email = Email(
email=form.email.data,
normalized_email=get_normalized_email(form.email.data),
domain=get_email_domain(form.email.data),
user=user,
primary=form.primary.data,
verified=form.verified.data,
Expand Down
17 changes: 17 additions & 0 deletions warehouse/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,23 @@ def format_email(metadata_email: str) -> tuple[str, str]:
return emails[0][0], emails[0][1]


def get_normalized_email(email: str) -> str:
"""
Normalize the email address by lowercasing it and stripping whitespace.
"""
return email.lower().strip()


def get_email_domain(email: str) -> str:
"""
Extract the domain from the email address.
"""
try:
return email.split("@")[1].lower()
except IndexError:
return ""


def remove_invalid_xml_unicode(value: str | None) -> str | None:
"""
Remove invalid unicode characters from a string.
Expand Down
48 changes: 24 additions & 24 deletions warehouse/locale/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -14,111 +14,111 @@ msgstr ""
msgid "Locale updated"
msgstr ""

#: warehouse/accounts/forms.py:52 warehouse/accounts/forms.py:290
#: warehouse/accounts/forms.py:53 warehouse/accounts/forms.py:291
msgid "The email address isn't valid. Try again."
msgstr ""

#: warehouse/accounts/forms.py:53
#: warehouse/accounts/forms.py:54
msgid "The password is invalid. Try again."
msgstr ""

#: warehouse/accounts/forms.py:55
#: warehouse/accounts/forms.py:56
msgid ""
"The username is invalid. Usernames must be composed of letters, numbers, "
"dots, hyphens and underscores. And must also start and finish with a "
"letter or number. Choose a different username."
msgstr ""

#: warehouse/accounts/forms.py:72
#: warehouse/accounts/forms.py:73
msgid "Null bytes are not allowed."
msgstr ""

#: warehouse/accounts/forms.py:86
#: warehouse/accounts/forms.py:87
msgid "No user found with that username"
msgstr ""

#: warehouse/accounts/forms.py:107
#: warehouse/accounts/forms.py:108
#, python-brace-format
msgid "TOTP code must be ${totp_length} digits."
msgstr ""

#: warehouse/accounts/forms.py:127
#: warehouse/accounts/forms.py:128
#, python-brace-format
msgid "Recovery Codes must be ${recovery_code_length} characters."
msgstr ""

#: warehouse/accounts/forms.py:141
#: warehouse/accounts/forms.py:142
msgid "Choose a username with 50 characters or less."
msgstr ""

#: warehouse/accounts/forms.py:159
#: warehouse/accounts/forms.py:160
msgid ""
"This username is already being used by another account. Choose a "
"different username."
msgstr ""

#: warehouse/accounts/forms.py:172 warehouse/accounts/forms.py:221
#: warehouse/accounts/forms.py:234
#: warehouse/accounts/forms.py:173 warehouse/accounts/forms.py:222
#: warehouse/accounts/forms.py:235
msgid "Password too long."
msgstr ""

#: warehouse/accounts/forms.py:204
#: warehouse/accounts/forms.py:205
#, python-brace-format
msgid ""
"There have been too many unsuccessful login attempts. You have been "
"locked out for ${time}. Please try again later."
msgstr ""

#: warehouse/accounts/forms.py:237
#: warehouse/accounts/forms.py:238
msgid "Your passwords don't match. Try again."
msgstr ""

#: warehouse/accounts/forms.py:271
#: warehouse/accounts/forms.py:272
msgid "The email address is too long. Try again."
msgstr ""

#: warehouse/accounts/forms.py:343
#: warehouse/accounts/forms.py:344
msgid "You can't use an email address from this domain. Use a different email."
msgstr ""

#: warehouse/accounts/forms.py:358
#: warehouse/accounts/forms.py:361
msgid ""
"This email address is already being used by this account. Use a different"
" email."
msgstr ""

#: warehouse/accounts/forms.py:369
#: warehouse/accounts/forms.py:372
msgid ""
"This email address is already being used by another account. Use a "
"different email."
msgstr ""

#: warehouse/accounts/forms.py:409 warehouse/manage/forms.py:141
#: warehouse/accounts/forms.py:412 warehouse/manage/forms.py:141
#: warehouse/manage/forms.py:783
msgid "The name is too long. Choose a name with 100 characters or less."
msgstr ""

#: warehouse/accounts/forms.py:415
#: warehouse/accounts/forms.py:418
msgid "URLs are not allowed in the name field."
msgstr ""

#: warehouse/accounts/forms.py:504
#: warehouse/accounts/forms.py:507
msgid "Invalid TOTP code."
msgstr ""

#: warehouse/accounts/forms.py:521
#: warehouse/accounts/forms.py:524
msgid "Invalid WebAuthn assertion: Bad payload"
msgstr ""

#: warehouse/accounts/forms.py:590
#: warehouse/accounts/forms.py:593
msgid "Invalid recovery code."
msgstr ""

#: warehouse/accounts/forms.py:599
#: warehouse/accounts/forms.py:602
msgid "Recovery code has been previously used."
msgstr ""

#: warehouse/accounts/forms.py:629
#: warehouse/accounts/forms.py:632
msgid "The username isn't valid. Try again."
msgstr ""

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""New Columns in Email Model

Revision ID: c99e8775603d
Revises: 4f8982e60deb
Create Date: 2025-04-12 18:45:40.713109

"""
from collections.abc import Sequence

import sqlalchemy as sa

from alembic import op
from sqlalchemy.dialects.postgresql import CITEXT

# revision identifiers, used by Alembic.
revision = "c99e8775603d"
down_revision = "4f8982e60deb"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Upgrade schema."""
# Add columns
op.add_column("user_emails", sa.Column("normalized_email", CITEXT()))
op.add_column("user_emails", sa.Column("domain", CITEXT()))

# Populate data
op.execute(
"""
UPDATE user_emails
SET normalized_email = LOWER(email),
domain = LOWER(SPLIT_PART(email, '@', 2))
"""
)

# Add constraints
op.alter_column("user_emails", "normalized_email", nullable=False)
op.alter_column("user_emails", "domain", nullable=False)


def downgrade() -> None:
"""Downgrade schema."""
# Drop columns
op.drop_column("user_emails", "domain")
op.drop_column("user_emails", "normalized_email")