Skip to content

Commit

Permalink
Merge pull request #747 from WesternFriend/image-chooser-url-property
Browse files Browse the repository at this point in the history
Parse LibraryItem description field to body
  • Loading branch information
brylie authored Jun 30, 2023
2 parents b8c03d1 + 674afcd commit 59bfaa2
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 48 deletions.
5 changes: 3 additions & 2 deletions content_migration/management/import_library_items_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from content_migration.management.shared import (
create_permanent_redirect,
get_existing_magazine_author_from_db,
parse_body_blocks,
parse_csv_file,
parse_media_blocks,
parse_media_string_to_list,
Expand Down Expand Up @@ -111,9 +112,9 @@ def handle_import_library_items(file_name: str) -> None:
library_item_index_page.save()

library_item.title = import_library_item["title"]
library_item.description = import_library_item["Description"]
library_item.body = parse_body_blocks(import_library_item["Description"])

library_item.body = parse_media_blocks(
library_item.body += parse_media_blocks(
parse_media_string_to_list(import_library_item["Media"]),
)

Expand Down
76 changes: 61 additions & 15 deletions content_migration/management/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,18 @@ class GenericBlock:
"""Generic block dataclass that represents a Wagtail block tuple."""

block_type: str
block_content: str
block_content: str | dict


def create_image_block_from_url(image_url: str) -> dict:
@dataclass
class GenericFormattedImageBlock:
"""Generic block dataclass that represents a Wagtail block tuple."""

image_url: str
link_url: str | None


def create_image_block_from_url(image_url: str, link_url: str | None = None) -> dict:
"""Create a Wagtial image block from an image URL."""

try:
Expand Down Expand Up @@ -152,6 +160,7 @@ def create_image_block_from_url(image_url: str) -> dict:
"image": image,
"width": DEFAULT_IMAGE_WIDTH,
"align": DEFAULT_IMAGE_ALIGN,
"link": link_url,
}

return image_chooser_block
Expand All @@ -168,8 +177,17 @@ def create_block(generic_block: GenericBlock) -> tuple[str, str | dict]:
RichText(generic_block.block_content), # type: ignore
)
elif generic_block.block_type == "image":
image_url: str = generic_block.block_content["image"]
link_url: str | None = (
generic_block.block_content["link"]
if generic_block.block_content["link"] is not None
else None
)
try:
image_block = create_image_block_from_url(generic_block.block_content)
image_block = create_image_block_from_url(
image_url=image_url,
link_url=link_url, # type: ignore
)
except requests.exceptions.MissingSchema:
raise BlockFactoryError("Invalid image URL: missing schema")
except requests.exceptions.InvalidSchema:
Expand All @@ -181,7 +199,7 @@ def create_block(generic_block: GenericBlock) -> tuple[str, str | dict]:
elif generic_block.block_type == "pullquote":
return (
generic_block.block_type,
generic_block.block_content,
str(generic_block.block_content),
)
else:
raise ValueError(f"Invalid block type: {generic_block.block_type}")
Expand Down Expand Up @@ -221,12 +239,12 @@ def adapt_html_to_generic_blocks(html_string: str) -> list[GenericBlock]:
# Placeholder for gathering successive items
rich_text_value = ""
soup_contents = soup.contents
for item in soup_contents:
for soup_item in soup_contents:
# skip non-Tag items
if not isinstance(item, Tag):
if not isinstance(soup_item, Tag):
continue

item_string = str(item)
item_string = str(soup_item)
# skip empty items
if item_string in EMPTY_ITEM_VALUES:
continue
Expand Down Expand Up @@ -263,22 +281,49 @@ def adapt_html_to_generic_blocks(html_string: str) -> list[GenericBlock]:
item_string = remove_pullquote_tags(item_string)

if item_contains_image:
image_urls = extract_image_urls(item_string)
print("found image")
# use beautiful soup to get an iterable of image Tag objects
image_tags = soup_item.find_all("img")

for image_tag in image_tags:
# check if image tag has a src attribute
if "src" not in image_tag.attrs:
continue

for image_url in image_urls:
# get image src
image_url = image_tag["src"]
image_url = ensure_absolute_url(image_url)
print("--------------------------")
print(image_url)
print("--------------------------")
# make sure the URL contains westernfriend.org
if "westernfriend.org" not in image_url:
raise ValueError(
f"Image URL must contain westernfriend.org: {image_url}"
)

# check if image is wrapped in a link
if image_tag.parent.name == "a":
image_link_url = image_tag.parent["href"]
else:
image_link_url = None

image_chooser_block_content = {
"image": image_url,
"link": image_link_url,
}

generic_blocks.append(
GenericBlock(
block_type="image",
block_content=image_url,
block_content=image_chooser_block_content,
)
)

# reset item string,
# since the image block has been created
# and we don't expect any more blocks
item_string = ""
# reset item string,
# since the image block has been created
# and we don't expect any more blocks
item_string = ""

if item_string != "":
rich_text_value += item_string
Expand Down Expand Up @@ -493,7 +538,8 @@ def parse_media_string_to_list(media_string: str) -> list[str]:


def ensure_absolute_url(url: str) -> str:
"""Ensure that the URL is absolute.
"""Ensure that the URL is absolute and belongs to the WesternFriend.org
domain.
Example:
/media/images/image.jpg
Expand Down
69 changes: 43 additions & 26 deletions content_migration/management/test_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
parse_body_blocks,
parse_csv_file,
parse_media_blocks,
create_media_embed_block,
parse_media_string_to_list,
remove_pullquote_tags,
)
Expand All @@ -59,16 +58,16 @@
)


class CreateMediaEmbedBlockTestCase(TestCase):
def test_create_media_embed_block(self) -> None:
self.MaxDiff = None
input_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
output_media_embed_block = create_media_embed_block(input_url)
# class CreateMediaEmbedBlockTestCase(TestCase):
# def test_create_media_embed_block(self) -> None:
# self.MaxDiff = None
# input_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
# output_media_embed_block = create_media_embed_block(input_url)

self.assertEqual(
output_media_embed_block[1].url,
input_url,
)
# self.assertEqual(
# output_media_embed_block[1].url,
# input_url,
# )


class TestExtractImages(SimpleTestCase):
Expand Down Expand Up @@ -473,16 +472,16 @@ def tearDown(self) -> None:


class ParseMediaBlocksTestCase(TestCase):
def test_parse_media_blocks_with_youtube_url(self) -> None:
input_media_urls = ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"]
output_media_blocks = parse_media_blocks(input_media_urls)
output_media_block_type = output_media_blocks[0][0]
expected_media_block_type = "embed"
# def test_parse_media_blocks_with_youtube_url(self) -> None:
# input_media_urls = ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"]
# output_media_blocks = parse_media_blocks(input_media_urls)
# output_media_block_type = output_media_blocks[0][0]
# expected_media_block_type = "embed"

self.assertEqual(
output_media_block_type,
expected_media_block_type,
)
# self.assertEqual(
# output_media_block_type,
# expected_media_block_type,
# )

def test_parse_media_blocks_with_pdf_url(self) -> None:
input_media_urls = [
Expand Down Expand Up @@ -687,9 +686,7 @@ def test_adapt_html_to_generic_blocks_with_pullquote(self) -> None:
)

def test_adapt_html_to_generic_blocks_with_image(self) -> None:
html_string = (
"""<p>Some text</p><p><img src="https://www.example.com/image.jpg" /></p>"""
)
html_string = """<p>Some text</p><p><img src="https://westernfriend.org/image.jpg" /></p>""" # noqa: E501

generic_blocks = adapt_html_to_generic_blocks(html_string)

Expand All @@ -698,8 +695,24 @@ def test_adapt_html_to_generic_blocks_with_image(self) -> None:
self.assertEqual(generic_blocks[0].block_content, """<p>Some text</p>""")
self.assertEqual(generic_blocks[1].block_type, "image")
self.assertEqual(
generic_blocks[1].block_content,
"https://www.example.com/image.jpg",
generic_blocks[1].block_content["image"], # type: ignore
"https://westernfriend.org/image.jpg",
)

def test_adapt_html_to_generic_blocks_with_image_wrapped_in_link(self) -> None:
html_string = """<a href="https://example.com"><img src="https://westernfriend.org/image.jpg" /></a>""" # noqa: E501

generic_blocks = adapt_html_to_generic_blocks(html_string)

self.assertEqual(len(generic_blocks), 1)
self.assertEqual(generic_blocks[0].block_type, "image")
self.assertEqual(
generic_blocks[0].block_content["image"], # type: ignore
"https://westernfriend.org/image.jpg",
)
self.assertEqual(
generic_blocks[0].block_content["link"], # type: ignore
"https://example.com",
)


Expand Down Expand Up @@ -747,7 +760,9 @@ def test_block_factory_with_invalid_block_type(self) -> None:
)

def test_create_block_invalid_image_url_missing_schema(self) -> None:
invalid_url_block = GenericBlock("image", "invalid_url")
invalid_url_block = GenericBlock(
"image", {"image": "invalid_url", "link": None}
)
with patch(
"content_migration.management.shared.create_image_block_from_url"
) as mock_create_image_block:
Expand All @@ -757,7 +772,9 @@ def test_create_block_invalid_image_url_missing_schema(self) -> None:
self.assertEqual(str(cm.exception), "Invalid image URL: missing schema")

def test_create_block_invalid_image_url_invalid_schema(self) -> None:
invalid_url_block = GenericBlock("image", "invalid_url")
invalid_url_block = GenericBlock(
"image", {"image": "invalid_url", "link": None}
)
with patch(
"content_migration.management.shared.create_image_block_from_url"
) as mock_create_image_block:
Expand Down
16 changes: 16 additions & 0 deletions library/migrations/0019_remove_libraryitem_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 4.2.2 on 2023-06-30 15:30

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("library", "0018_libraryitem_drupal_body_migrated_and_more"),
]

operations = [
migrations.RemoveField(
model_name="libraryitem",
name="description",
),
]
3 changes: 0 additions & 3 deletions library/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ class LibraryItem(DrupalFields, Page):
default=False,
help_text="This field indicates when a library item wasn't published on a specific publication date.", # noqa: E501
)
description = RichTextField(null=True, blank=True)
body = StreamField(
[
("heading", HeadingBlock()),
Expand Down Expand Up @@ -114,7 +113,6 @@ class LibraryItem(DrupalFields, Page):
)

content_panels = Page.content_panels + [
FieldPanel("description"),
InlinePanel(
"authors",
heading="Authors",
Expand Down Expand Up @@ -150,7 +148,6 @@ class LibraryItem(DrupalFields, Page):
]

search_fields = Page.search_fields + [
index.SearchField("description"),
index.SearchField("body"),
index.RelatedFields(
"item_genre",
Expand Down
2 changes: 0 additions & 2 deletions library/templates/library/library_item.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ <h1>
{% endif %}
{% endfor %}

{{ page.description | richtext }}

{% include_block page.body %}

<dl class="mt-3">
Expand Down

0 comments on commit 59bfaa2

Please sign in to comment.