Skip to content

Add tests for Conferences script #404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Sep 1, 2024
Merged
8 changes: 8 additions & 0 deletions .github/workflows/playwright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,13 @@ jobs:
bundler-cache: true
- name: Jekyll detached and pytest
run: |
# Start up local copy of site
bundle exec jekyll serve --detach

# Sleep for 5 secs to allow Jekyll to start
startup_wait=5
echo "Sleeping for $startup_wait seconds"
sleep $startup_wait

# Run tests
python -m pytest
174 changes: 105 additions & 69 deletions _conferences/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,76 +6,112 @@

import yaml
from github import Auth, Github
from github.Issue import Issue
from github.PaginatedList import PaginatedList

TOKEN = os.getenv("GITHUB_TOKEN", "")
ROOT = Path(__file__).parent.parent
conferences_path = ROOT / "_data/conferences.yml"

auth = Auth.Token(TOKEN)
g = Github(auth=auth)

repo = g.get_repo("BlackPythonDevs/blackpythondevs.github.io")
open_issues = repo.get_issues(state="open", labels=["conference"])
conferences = []
today = datetime.combine(datetime.now(), time())

for issue in open_issues:
if "conference" in [label.name for label in issue.labels]:
# Extract fields from issue body
name_match = re.search(
r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
)
url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body)
dates_match = re.search(
r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
)
type_match = re.search(
r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
)
location_match = re.search(
r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body
)
summary_match = re.search(
r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}",
issue.body,
re.DOTALL,
)
speaking_match = re.search(
r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}",
issue.body,
re.DOTALL,
)

# Set a default value of None for when the url field isn't as expected
valid_url = None

# Ensure the url field is not blank and the url matches the regex
if url_match is not None and url_match[1].strip() != "":
# Parse the url and see if a scheme (`https`) is included in it
# If not, then prepend `https` to the url from the issue body
# This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/
parsed_url = urlparse(url_match[1])
if "http" not in parsed_url.scheme.casefold():
valid_url = f"https://{url_match[1]}"

if dates_match:
conferenceDates = dates_match[1]
# Parse the end date of the conference
endDateStr = conferenceDates.split("-")[1].strip()
endDate = datetime.strptime(endDateStr, "%d %b %Y")
# Check if the conference end date is greater than today
if endDate >= today:
conference = {
"name": name_match[1],
"url": valid_url,
"dates": dates_match[1],
"type": type_match[1],
"location": location_match[1],
"summary": summary_match[1],
"speaking": speaking_match[1] if speaking_match else "",
}
conferences.append(conference)

# Write the conferences to the _data/conferences.yml file
with conferences_path.open("w") as f:
yaml.dump(conferences, f)

def create_github_client():
gh_token = os.getenv("GITHUB_TOKEN", "")
auth = Auth.Token(gh_token)
client = Github(auth=auth)
return client


def get_open_issues(gh: Github) -> PaginatedList[Issue]:
repo = gh.get_repo("BlackPythonDevs/blackpythondevs.github.io")
issues = repo.get_issues(state="open", labels=["conference"])
return issues


def parse_conference_details(issue_body: str) -> dict | None:
# Extract fields from issue body
name_match = re.search(
r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
)
url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body)
dates_match = re.search(
r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
)
type_match = re.search(
r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
)
location_match = re.search(
r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body
)
summary_match = re.search(
r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}",
issue_body,
re.DOTALL,
)
speaking_match = re.search(
r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}",
issue_body,
re.DOTALL,
)

# Set a default value of None for when the url field isn't as expected
valid_url = normalize_url() if not url_match else normalize_url(url_match[1])

if dates_match:
conferenceDates = dates_match[1]
# Parse the end date of the conference
endDateStr = conferenceDates.split("-")[1].strip()
endDate = datetime.strptime(endDateStr, "%d %b %Y")
# Check if the conference end date is greater than today
today = datetime.combine(datetime.now(), time())

if endDate >= today:
conference = {
"name": name_match[1],
"url": valid_url,
"dates": dates_match[1],
"type": type_match[1],
"location": location_match[1],
"summary": summary_match[1],
"speaking": speaking_match[1] if speaking_match else "",
}
return conference
return None


def normalize_url(url_match: str = None):
valid_url = None
# Ensure the url field is not blank and the url matches the regex
if url_match is not None and url_match.strip() != "":
# Parse the url and see if a scheme (`https`) is included in it
# If not, then prepend `https` to the url from the issue body
# This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/
parsed_url = urlparse(url_match)
if "http" not in parsed_url.scheme.casefold():
valid_url = f"https://{url_match}"
else:
valid_url = url_match
return valid_url


def write_conferences_to_file(confs: list[dict]):
# Write the conferences to the _data/conferences.yml file
with conferences_path.open("w") as f:
yaml.dump(confs, f)


if __name__ == "__main__":
conferences = []

# Create Github client object
gh_client = create_github_client()

# Get open issues from repo
open_issues: PaginatedList[Issue] = get_open_issues(gh_client)

# Parse each conference issue so long as it has the "conference" label
for issue in open_issues:
if "conference" in [label.name for label in issue.labels]:
parsed_conf = parse_conference_details(issue_body=issue.body)
if parsed_conf:
conferences.append(parsed_conf)

write_conferences_to_file(conferences)
121 changes: 121 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import time

import pytest
from playwright.sync_api import Page, expect

from _conferences.__main__ import parse_conference_details

live_server_url = "http://127.0.0.1:4000"

routes = [
Expand All @@ -11,6 +15,13 @@
]


# Add a delay to each test to help with playwright race conditions
@pytest.fixture(autouse=True)
def slow_down_tests():
yield
time.sleep(1)


@pytest.mark.parametrize("url", routes)
def test_destination(
page: Page,
Expand Down Expand Up @@ -101,3 +112,113 @@ def test_mailto_bpdevs(page: Page) -> None:
page.goto(f"{live_server_url}")
mailto = page.get_by_role("link", name="email")
expect(mailto).to_have_attribute("href", "mailto:[email protected]")


def test_conference_parsing_valid_url():
example_conf_issue = """### Conference Name

Test Conference Title

### URL

https://microsoft.com

### Conference Dates

10 - 15 Sep 2050

### Conference Type

both

### Conference Location

Redmond, WA, USA

### Summary

Test Conference Summary

### Speaking

* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
"""
expected_name = "Test Conference Title"
expected_url = "https://microsoft.com"
parsed_conf = parse_conference_details(issue_body=example_conf_issue)

assert parsed_conf["name"] == expected_name
assert parsed_conf["url"] == expected_url


def test_conference_parsing_logic_no_url_scheme():
example_conf_issue = """### Conference Name

Test Conference Title

### URL

microsoft.com

### Conference Dates

10 - 15 Sep 2050

### Conference Type

both

### Conference Location

Redmond, WA, USA

### Summary

Test Conference Summary

### Speaking

* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
"""
expected_name = "Test Conference Title"
expected_url = "https://microsoft.com"
parsed_conf = parse_conference_details(issue_body=example_conf_issue)

assert parsed_conf["name"] == expected_name
assert parsed_conf["url"] == expected_url


def test_conference_parsing_logic_no_url():
example_conf_issue = """### Conference Name

Test Conference Title

### URL


### Conference Dates

10 - 15 Sep 2050

### Conference Type

both

### Conference Location

Redmond, WA, USA

### Summary

Test Conference Summary

### Speaking

* [Satya Nadella](https://www.linkedin.com/in/satyanadella/)
"""
expected_name = "Test Conference Title"
expected_url = None
parsed_conf = parse_conference_details(issue_body=example_conf_issue)

assert parsed_conf["name"] == expected_name
assert parsed_conf["url"] == expected_url
Loading