|
30 | 30 | LearningResourceFactory,
|
31 | 31 | LearningResourcePlatformFactory,
|
32 | 32 | LearningResourceRunFactory,
|
| 33 | + TutorProblemFileFactory, |
33 | 34 | )
|
34 | 35 | from learning_resources.models import LearningResource
|
35 | 36 | from learning_resources_search.constants import CONTENT_FILE_TYPE
|
36 |
| -from main.utils import now_in_utc |
| 37 | +from main.utils import checksum_for_content, now_in_utc |
37 | 38 |
|
38 | 39 | pytestmark = pytest.mark.django_db
|
39 | 40 |
|
@@ -471,8 +472,7 @@ def test_transform_canvas_problem_files_pdf_calls_pdf_to_markdown(
|
471 | 472 | return_value="markdown content from pdf",
|
472 | 473 | )
|
473 | 474 |
|
474 |
| - # Patch Path(olx_path) / Path(problem_file_data["source_path"]) to exist |
475 |
| - run = mocker.Mock() |
| 475 | + run = LearningResourceRunFactory.create() |
476 | 476 |
|
477 | 477 | results = list(transform_canvas_problem_files(zip_path, run, overwrite=True))
|
478 | 478 |
|
@@ -1534,3 +1534,52 @@ def test_get_published_items_for_unpublshed_but_embedded(mocker, tmp_path):
|
1534 | 1534 | }
|
1535 | 1535 | published = get_published_items(zip_path, url_config)
|
1536 | 1536 | assert Path("web_resources/file1.pdf").resolve() in published
|
| 1537 | + |
| 1538 | + |
| 1539 | +def test_transform_canvas_problem_files_skips_pdf_to_markdown_if_checksum_exists( |
| 1540 | + tmp_path, mocker, settings |
| 1541 | +): |
| 1542 | + """ |
| 1543 | + Test that transform_canvas_problem_files does not call _pdf_to_markdown if the checksum already exists. |
| 1544 | + """ |
| 1545 | + settings.CANVAS_TUTORBOT_FOLDER = "tutorbot/" |
| 1546 | + settings.CANVAS_PDF_TRANSCRIPTION_MODEL = "fake-model" |
| 1547 | + pdf_filename = "problemset3/problem.pdf" |
| 1548 | + pdf_content = b"%PDF-1.4 fake pdf content" |
| 1549 | + zip_path = make_canvas_zip( |
| 1550 | + tmp_path, files=[(f"tutorbot/{pdf_filename}", pdf_content)] |
| 1551 | + ) |
| 1552 | + |
| 1553 | + original_pdf_content = "original pdf content" |
| 1554 | + existing_checksum = checksum_for_content(original_pdf_content) |
| 1555 | + |
| 1556 | + mock_run = LearningResourceRunFactory.create() |
| 1557 | + TutorProblemFileFactory.create( |
| 1558 | + run=mock_run, |
| 1559 | + problem_title="Problem Set 1", |
| 1560 | + type="problem", |
| 1561 | + checksum=existing_checksum, |
| 1562 | + ) |
| 1563 | + |
| 1564 | + fake_file_data = { |
| 1565 | + "run": mock_run, |
| 1566 | + "content": original_pdf_content, |
| 1567 | + "archive_checksum": "checksum", |
| 1568 | + "source_path": f"tutorbot/{pdf_filename}", |
| 1569 | + "file_extension": ".pdf", |
| 1570 | + } |
| 1571 | + |
| 1572 | + mocker.patch( |
| 1573 | + "learning_resources.etl.canvas._process_olx_path", |
| 1574 | + return_value=iter([fake_file_data]), |
| 1575 | + ) |
| 1576 | + |
| 1577 | + pdf_to_md = mocker.patch("learning_resources.etl.canvas._pdf_to_markdown") |
| 1578 | + |
| 1579 | + results = list(transform_canvas_problem_files(zip_path, mock_run, overwrite=True)) |
| 1580 | + |
| 1581 | + pdf_to_md.assert_not_called() |
| 1582 | + |
| 1583 | + assert len(results) == 1 |
| 1584 | + assert results[0]["content"] == "original pdf content" |
| 1585 | + assert results[0]["source_path"] == f"tutorbot/{pdf_filename}" |
0 commit comments