Add tests for Conferences script (#404)

dragid10 · kjaymiller · web-flow · commit b6ce35966aca · 2024-08-31T20:52:34.000-04:00
* Add tests for parsing Conference Issues

* Refactor top-level logic into testable functions

* Run pre-commit linters

* Only add to conference list if the conference details could actually be parse

* Move gh token env var to function

We don't always need to use the GITHUB_TOKEN during tests, so let's make it an explicit call if we need it

* Add sleep command to playwright tests

This should allow playwright to finish setting up before running tests

* Add delay between each test to make CI tests more consistent

* Linter fixes

* Reduce startup time so tests don't take as long to run

* Reduce delay between tests to 1 second

* Update _conferences/__main__.py

Co-authored-by: Jay Miller &lt;kjaymiller@gmail.com&gt;

* Fix linter issues

---------

Co-authored-by: Jay Miller &lt;kjaymiller@gmail.com&gt;
diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml
@@ -31,5 +31,13 @@ jobs:
           bundler-cache: true
       - name: Jekyll detached and pytest
         run: |
+          # Start up local copy of site
           bundle exec jekyll serve --detach
+
+          # Sleep for 5 secs to allow Jekyll to start
+          startup_wait=5
+          echo "Sleeping for $startup_wait seconds"
+          sleep $startup_wait
+
+          # Run tests
           python -m pytest
diff --git a/_conferences/__main__.py b/_conferences/__main__.py
@@ -6,76 +6,112 @@
 
 import yaml
 from github import Auth, Github
+from github.Issue import Issue
+from github.PaginatedList import PaginatedList
 
-TOKEN = os.getenv("GITHUB_TOKEN", "")
 ROOT = Path(__file__).parent.parent
 conferences_path = ROOT / "_data/conferences.yml"
 
-auth = Auth.Token(TOKEN)
-g = Github(auth=auth)
-
-repo = g.get_repo("BlackPythonDevs/blackpythondevs.github.io")
-open_issues = repo.get_issues(state="open", labels=["conference"])
-conferences = []
-today = datetime.combine(datetime.now(), time())
-
-for issue in open_issues:
-    if "conference" in [label.name for label in issue.labels]:
-        # Extract fields from issue body
-        name_match = re.search(
-            r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
-        )
-        url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body)
-        dates_match = re.search(
-            r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
-        )
-        type_match = re.search(
-            r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
-        )
-        location_match = re.search(
-            r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
-        )
-        summary_match = re.search(
-            r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}",
-            issue.body,
-            re.DOTALL,
-        )
-        speaking_match = re.search(
-            r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}",
-            issue.body,
-            re.DOTALL,
-        )
-
-        # Set a default value of None for when the url field isn't as expected
-        valid_url = None
-
-        # Ensure the url field is not blank and the url matches the regex
-        if url_match is not None and url_match[1].strip() != "":
-            # Parse the url and see if a scheme (`https`) is included in it
-            # If not, then prepend `https` to the url from the issue body
-            # This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/
-            parsed_url = urlparse(url_match[1])
-            if "http" not in parsed_url.scheme.casefold():
-                valid_url = f"https://{url_match[1]}"
-
-        if dates_match:
-            conferenceDates = dates_match[1]
-            # Parse the end date of the conference
-            endDateStr = conferenceDates.split("-")[1].strip()
-            endDate = datetime.strptime(endDateStr, "%d %b %Y")
-            # Check if the conference end date is greater than today
-            if endDate >= today:
-                conference = {
-                    "name": name_match[1],
-                    "url": valid_url,
-                    "dates": dates_match[1],
-                    "type": type_match[1],
-                    "location": location_match[1],
-                    "summary": summary_match[1],
-                    "speaking": speaking_match[1] if speaking_match else "",
-                }
-                conferences.append(conference)
-
-# Write the conferences to the _data/conferences.yml file
-with conferences_path.open("w") as f:
-    yaml.dump(conferences, f)
+
+def create_github_client():
+    gh_token = os.getenv("GITHUB_TOKEN", "")
+    auth = Auth.Token(gh_token)
+    client = Github(auth=auth)
+    return client
+
+
+def get_open_issues(gh: Github) -> PaginatedList[Issue]:
+    repo = gh.get_repo("BlackPythonDevs/blackpythondevs.github.io")
+    issues = repo.get_issues(state="open", labels=["conference"])
+    return issues
+
+
+def parse_conference_details(issue_body: str) -> dict | None:
+    # Extract fields from issue body
+    name_match = re.search(
+        r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
+    )
+    url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body)
+    dates_match = re.search(
+        r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
+    )
+    type_match = re.search(
+        r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
+    )
+    location_match = re.search(
+        r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
+    )
+    summary_match = re.search(
+        r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}",
+        issue_body,
+        re.DOTALL,
+    )
+    speaking_match = re.search(
+        r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}",
+        issue_body,
+        re.DOTALL,
+    )
+
+    # Set a default value of None for when the url field isn't as expected
+    valid_url = normalize_url() if not url_match else normalize_url(url_match[1])
+
+    if dates_match:
+        conferenceDates = dates_match[1]
+        # Parse the end date of the conference
+        endDateStr = conferenceDates.split("-")[1].strip()
+        endDate = datetime.strptime(endDateStr, "%d %b %Y")
+        # Check if the conference end date is greater than today
+        today = datetime.combine(datetime.now(), time())
+
+        if endDate >= today:
+            conference = {
+                "name": name_match[1],
+                "url": valid_url,
+                "dates": dates_match[1],
+                "type": type_match[1],
+                "location": location_match[1],
+                "summary": summary_match[1],
+                "speaking": speaking_match[1] if speaking_match else "",
+            }
+            return conference
+    return None
+
+
+def normalize_url(url_match: str = None):
+    valid_url = None
+    # Ensure the url field is not blank and the url matches the regex
+    if url_match is not None and url_match.strip() != "":
+        # Parse the url and see if a scheme (`https`) is included in it
+        # If not, then prepend `https` to the url from the issue body
+        # This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/
+        parsed_url = urlparse(url_match)
+        if "http" not in parsed_url.scheme.casefold():
+            valid_url = f"https://{url_match}"
+        else:
+            valid_url = url_match
+    return valid_url
+
+
+def write_conferences_to_file(confs: list[dict]):
+    # Write the conferences to the _data/conferences.yml file
+    with conferences_path.open("w") as f:
+        yaml.dump(confs, f)
+
+
+if __name__ == "__main__":
+    conferences = []
+
+    # Create Github client object
+    gh_client = create_github_client()
+
+    # Get open issues from repo
+    open_issues: PaginatedList[Issue] = get_open_issues(gh_client)
+
+    # Parse each conference issue so long as it has the "conference" label
+    for issue in open_issues:
+        if "conference" in [label.name for label in issue.labels]:
+            parsed_conf = parse_conference_details(issue_body=issue.body)
+            if parsed_conf:
+                conferences.append(parsed_conf)
+
+    write_conferences_to_file(conferences)
diff --git a/tests/test.py b/tests/test.py
@@ -1,6 +1,10 @@
+import time
+
 import pytest
 from playwright.sync_api import Page, expect
 
+from _conferences.__main__ import parse_conference_details
+
 live_server_url = "http://127.0.0.1:4000"
 
 routes = [
@@ -11,6 +15,13 @@
 ]
 
 
+# Add a delay to each test to help with playwright race conditions
+@pytest.fixture(autouse=True)
+def slow_down_tests():
+    yield
+    time.sleep(1)
+
+
 @pytest.mark.parametrize("url", routes)
 def test_destination(
     page: Page,
@@ -101,3 +112,113 @@ def test_mailto_bpdevs(page: Page) -> None:
     page.goto(f"{live_server_url}")
     mailto = page.get_by_role("link", name="email")
     expect(mailto).to_have_attribute("href", "mailto:contact@blackpythondevs.com")
+
+
+def test_conference_parsing_valid_url():
+    example_conf_issue = """### Conference Name
+
+Test Conference Title
+
+### URL
+
+https://microsoft.com
+
+### Conference Dates
+
+10 - 15 Sep 2050
+
+### Conference Type
+
+both
+
+### Conference Location
+
+Redmond, WA, USA
+
+### Summary
+
+Test Conference Summary
+
+### Speaking
+
+* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
+"""
+    expected_name = "Test Conference Title"
+    expected_url = "https://microsoft.com"
+    parsed_conf = parse_conference_details(issue_body=example_conf_issue)
+
+    assert parsed_conf["name"] == expected_name
+    assert parsed_conf["url"] == expected_url
+
+
+def test_conference_parsing_logic_no_url_scheme():
+    example_conf_issue = """### Conference Name
+
+Test Conference Title
+
+### URL
+
+microsoft.com
+
+### Conference Dates
+
+10 - 15 Sep 2050
+
+### Conference Type
+
+both
+
+### Conference Location
+
+Redmond, WA, USA
+
+### Summary
+
+Test Conference Summary
+
+### Speaking
+
+* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
+"""
+    expected_name = "Test Conference Title"
+    expected_url = "https://microsoft.com"
+    parsed_conf = parse_conference_details(issue_body=example_conf_issue)
+
+    assert parsed_conf["name"] == expected_name
+    assert parsed_conf["url"] == expected_url
+
+
+def test_conference_parsing_logic_no_url():
+    example_conf_issue = """### Conference Name
+
+Test Conference Title
+
+### URL
+
+
+### Conference Dates
+
+10 - 15 Sep 2050
+
+### Conference Type
+
+both
+
+### Conference Location
+
+Redmond, WA, USA
+
+### Summary
+
+Test Conference Summary
+
+### Speaking
+
+* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
+"""
+    expected_name = "Test Conference Title"
+    expected_url = None
+    parsed_conf = parse_conference_details(issue_body=example_conf_issue)
+
+    assert parsed_conf["name"] == expected_name
+    assert parsed_conf["url"] == expected_url