Skip to content

Commit

Permalink
Merge pull request #745 from WesternFriend/handle-audio-files
Browse files Browse the repository at this point in the history
Handle audio files
  • Loading branch information
brylie committed Jun 28, 2023
2 parents 2f25a47 + bc2dbcf commit 69f50b3
Show file tree
Hide file tree
Showing 7 changed files with 236 additions and 70 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@ repos:
- id: check-toml
- id: check-merge-conflict
- repo: https://github.com/asottile/pyupgrade
rev: v3.6.0
rev: v3.7.0
hooks:
- id: pyupgrade
args: [--py311-plus]
- repo: https://github.com/adamchainz/django-upgrade
rev: 1.13.0
rev: 1.14.0
hooks:
- id: django-upgrade
args: [--target-version, "4.2"]
- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: "v0.0.272"
rev: "v0.0.275"
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand All @@ -38,7 +38,7 @@ repos:
hooks:
- id: curlylint
- repo: https://github.com/PyCQA/docformatter
rev: "v1.7.2"
rev: "v1.7.3"
hooks:
- id: docformatter
- repo: https://github.com/rtts/djhtml
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ compile-deps: .venv/bin/python
init: .venv/.install.stamp
pre-commit install

install: .venv/bin/python requirements.txt requirements-dev.txt
.venv/bin/python -m pip install -r requirements.txt -r requirements-dev.txt

test: .venv/.install.stamp
.venv/bin/python app/manage.py test app

.PHONY: update-deps compile-deps init test
.PHONY: update-deps compile-deps init install test
181 changes: 148 additions & 33 deletions content_migration/management/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from wagtail.images.models import Image
from wagtail.models import Page
from wagtail.rich_text import RichText
from wagtailmedia.models import Media

from contact.models import Meeting, Organization, Person

Expand All @@ -37,6 +38,26 @@
SITE_BASE_URL,
)

ALLOWED_AUDIO_CONTENT_TYPES = [
"audio/mpeg",
"audio/mp4",
"audio/ogg",
"audio/wav",
"audio/webm",
]

ALLOWED_DOCUMENT_CONTENT_TYPES = [
"application/pdf",
]

ALLOWED_IMAGE_CONTENT_TYPES = [
"image/jpeg",
"image/png",
"image/gif",
"image/svg+xml",
"image/webp",
]

EMPTY_ITEM_VALUES = [
"",
"~",
Expand Down Expand Up @@ -102,7 +123,7 @@ class GenericBlock:
block_content: str


def create_image_block(image_url: str) -> dict:
def create_image_block_from_url(image_url: str) -> dict:
"""Create a Wagtial image block from an image URL."""

try:
Expand All @@ -120,19 +141,12 @@ def create_image_block(image_url: str) -> dict:
file_bytes = BytesIO(response.content)

# create an ImageFile object
file_name = image_url.split("/")[-1]
image_file = ImageFile(
file_bytes,
name=file_name,
file_name = image_url.split(sep="/")[-1]
image = create_image_from_file_bytes(
file_name=file_name,
file_bytes=file_bytes,
)

# create and save a Wagtial image instance
image = Image(
title=file_name,
file=image_file,
)
image.save()

# Create an image block with dictionary properties
image_chooser_block = {
"image": image,
Expand All @@ -155,7 +169,7 @@ def create_block(generic_block: GenericBlock) -> tuple[str, str | dict]:
)
elif generic_block.block_type == "image":
try:
image_block = create_image_block(generic_block.block_content)
image_block = create_image_block_from_url(generic_block.block_content)
except requests.exceptions.MissingSchema:
raise BlockFactoryError("Invalid image URL: missing schema")
except requests.exceptions.InvalidSchema:
Expand Down Expand Up @@ -282,6 +296,27 @@ def adapt_html_to_generic_blocks(html_string: str) -> list[GenericBlock]:
return generic_blocks


def create_document_from_file_bytes(
file_name: str,
file_bytes: BytesIO,
) -> Document:
"""Create a document from a file name and bytes."""

document_file: File = File(
file_bytes,
name=file_name,
)

document: Document = Document(
title=file_name,
file=document_file,
)

document.save()

return document


def create_document_link_block(
file_name: str,
file_bytes: BytesIO,
Expand All @@ -291,19 +326,33 @@ def create_document_link_block(
Returns a tuple of the form: ("document", document)
"""

document_file: File = File(
document = create_document_from_file_bytes(
file_name=file_name,
file_bytes=file_bytes,
)

return ("document", document)


def create_image_from_file_bytes(
file_name: str,
file_bytes: BytesIO,
) -> Image:
"""Create an image from a file name and bytes."""

image_file: ImageFile = ImageFile(
file_bytes,
name=file_name,
)

document: Document = Document(
image: Image = Image(
title=file_name,
file=document_file,
file=image_file,
)

document.save()
image.save()

return ("document", document)
return image


def create_media_embed_block(url: str) -> tuple[str, Embed]:
Expand All @@ -317,24 +366,19 @@ def create_media_embed_block(url: str) -> tuple[str, Embed]:
return embed_block


# TODO: refactor this function to make it
# less redundant with create_image_block
def create_image_block_from_file_bytes(
file_name: str,
file_bytes: BytesIO,
) -> tuple[str, dict]:
# create image
image_file: ImageFile = ImageFile(
file_bytes,
name=file_name,
)
"""Create an image block from a file name and bytes.
image = Image(
title=file_name,
file=image_file,
)
Returns a tuple of the form: ("image", image_block)
"""

image.save()
image = create_image_from_file_bytes(
file_name=file_name,
file_bytes=file_bytes,
)

# Create an image block with dictionary properties
# of FormattedImageChooserStructBlock
Expand All @@ -349,11 +393,67 @@ def create_image_block_from_file_bytes(
return media_item_block


def create_media_from_file_bytes(
file_name: str,
file_bytes: BytesIO,
file_type: str,
) -> Media:
"""Create a media item from a file name and bytes."""

media_file: File = File(
file_bytes,
name=file_name,
)

media: Media = Media(
title=file_name,
file=media_file,
type=file_type,
)

media.save()

return media


def create_media_block_from_file_bytes(
file_name: str,
file_bytes: BytesIO,
file_type: str,
) -> tuple[str, Media]:
"""Create a media item block from a file name and bytes.
Returns a tuple of the form: ("media", media_block)
"""

media = create_media_from_file_bytes(
file_name=file_name,
file_bytes=file_bytes,
file_type=file_type,
)

# Create a media item block with dictionary properties
# of AbstractMediaChooserBlock
media_block = (
"media",
media,
)

return media_block


def extract_pullquotes(item: str) -> list[str]:
"""Get a list of all pullquote strings found within the item, excluding the
pullquote spans.
The pullquote strings are wrapped in a span with class 'pullquote'.
Example:
<span class="pullquote">This is a pullquote</span>
Will return:
["This is a pullquote"]
Returns a list of pullquote strings.
"""

pullquotes = []
Expand Down Expand Up @@ -393,6 +493,15 @@ def parse_media_string_to_list(media_string: str) -> list[str]:


def ensure_absolute_url(url: str) -> str:
"""Ensure that the URL is absolute.
Example:
/media/images/image.jpg
Will be converted to:
https://<site_base_url>/media/images/image.jpg
"""

# Check if the URL starts with / and append the site base URL
# ensuring there are not double // characters
if url.startswith("/"):
Expand Down Expand Up @@ -433,16 +542,22 @@ def parse_media_blocks(media_urls: list[str]) -> list[tuple]:
except requests.exceptions.RequestException:
continue

if fetched_file.content_type == "application/pdf":
if fetched_file.content_type in ALLOWED_DOCUMENT_CONTENT_TYPES:
media_item_block: tuple = create_document_link_block(
file_name=fetched_file.file_name,
file_bytes=fetched_file.file_bytes,
)
elif fetched_file.content_type in ["image/jpeg", "image/png"]:
elif fetched_file.content_type in ALLOWED_IMAGE_CONTENT_TYPES:
media_item_block = create_image_block_from_file_bytes(
file_name=fetched_file.file_name,
file_bytes=fetched_file.file_bytes,
)
elif fetched_file.content_type in ALLOWED_AUDIO_CONTENT_TYPES:
media_item_block = create_media_block_from_file_bytes(
file_name=fetched_file.file_name,
file_bytes=fetched_file.file_bytes,
file_type="audio",
)
else:
logger.error(
f"Could not parse {fetched_file.content_type} media item: { url }"
Expand Down Expand Up @@ -482,7 +597,7 @@ def get_existing_magazine_author_from_db(
meeting = Meeting.objects.filter(Q(drupal_author_id=drupal_author_id))
organization = Organization.objects.filter(Q(drupal_author_id=drupal_author_id))

results = list(chain(person, meeting, organization))
results = list(chain(person, meeting, organization)) # type: ignore

if len(results) == 0:
error_message = f"Could not find matching author for magazine author ID: { int(drupal_author_id) }" # noqa: E501
Expand Down
Loading

0 comments on commit 69f50b3

Please sign in to comment.