|
1 |
| -import os |
2 |
| -import re |
3 |
| -from datetime import datetime, time |
4 |
| -from pathlib import Path |
| 1 | +import datetime |
| 2 | +import pathlib |
5 | 3 | from urllib.parse import urlparse
|
| 4 | +from typing import Iterator |
6 | 5 |
|
7 |
| -import yaml |
8 |
| -from github import Auth, Github |
9 |
| -from github.Issue import Issue |
10 |
| -from github.PaginatedList import PaginatedList |
| 6 | +import json |
| 7 | +import gh_issues |
11 | 8 |
|
12 |
| -ROOT = Path(__file__).parent.parent |
13 |
| -conferences_path = ROOT / "_data/conferences.yml" |
14 | 9 |
|
| 10 | +QUERY = "repo:blackpythondevs/blackpythondevs.github.io type:issue label:conference" |
15 | 11 |
|
16 |
| -def create_github_client(): |
17 |
| - gh_token = os.getenv("GITHUB_TOKEN", "") |
18 |
| - auth = Auth.Token(gh_token) |
19 |
| - client = Github(auth=auth) |
20 |
| - return client |
21 | 12 |
|
22 |
| - |
23 |
| -def get_open_issues(gh: Github) -> PaginatedList[Issue]: |
24 |
| - repo = gh.get_repo("BlackPythonDevs/blackpythondevs.github.io") |
25 |
| - issues = repo.get_issues(state="open", labels=["conference"]) |
| 13 | +def get_conference_issues( |
| 14 | + query: str = QUERY, |
| 15 | +) -> Iterator[gh_issues.Issue]: # pragma no cover |
| 16 | + issues = gh_issues.issues_by_query(query) |
26 | 17 | return issues
|
27 | 18 |
|
28 | 19 |
|
29 |
| -def parse_conference_details(issue_body: str) -> dict | None: |
30 |
| - # Extract fields from issue body |
31 |
| - name_match = re.search( |
32 |
| - r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body |
33 |
| - ) |
34 |
| - url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body) |
35 |
| - dates_match = re.search( |
36 |
| - r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body |
37 |
| - ) |
38 |
| - type_match = re.search( |
39 |
| - r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body |
40 |
| - ) |
41 |
| - location_match = re.search( |
42 |
| - r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body |
43 |
| - ) |
44 |
| - summary_match = re.search( |
45 |
| - r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", |
46 |
| - issue_body, |
47 |
| - re.DOTALL, |
48 |
| - ) |
49 |
| - speaking_match = re.search( |
50 |
| - r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}", |
51 |
| - issue_body, |
52 |
| - re.DOTALL, |
53 |
| - ) |
| 20 | +def normalize_url(url_match: str | None) -> str | None: |
| 21 | + """ |
| 22 | + Parse the url and see if a scheme (`https`) is included in it. |
| 23 | + If not, then prepend `https` to the url from the issue body |
54 | 24 |
|
55 |
| - # Set a default value of None for when the url field isn't as expected |
56 |
| - valid_url = normalize_url() if not url_match else normalize_url(url_match[1]) |
57 |
| - |
58 |
| - if dates_match: |
59 |
| - conferenceDates = dates_match[1] |
60 |
| - # Parse the end date of the conference |
61 |
| - endDateStr = conferenceDates.split("-")[1].strip() |
62 |
| - endDate = datetime.strptime(endDateStr, "%d %b %Y") |
63 |
| - # Check if the conference end date is greater than today |
64 |
| - today = datetime.combine(datetime.now(), time()) |
65 |
| - |
66 |
| - if endDate >= today: |
67 |
| - conference = { |
68 |
| - "name": name_match[1], |
69 |
| - "url": valid_url, |
70 |
| - "dates": dates_match[1], |
71 |
| - "type": type_match[1], |
72 |
| - "location": location_match[1], |
73 |
| - "summary": summary_match[1], |
74 |
| - "speaking": speaking_match[1] if speaking_match else "", |
75 |
| - } |
76 |
| - return conference |
77 |
| - return None |
78 |
| - |
79 |
| - |
80 |
| -def normalize_url(url_match: str = None): |
81 |
| - valid_url = None |
82 |
| - # Ensure the url field is not blank and the url matches the regex |
83 |
| - if url_match is not None and url_match.strip() != "": |
84 |
| - # Parse the url and see if a scheme (`https`) is included in it |
85 |
| - # If not, then prepend `https` to the url from the issue body |
86 |
| - # This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/ |
| 25 | + This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/ |
| 26 | + """ |
| 27 | + if url_match: |
87 | 28 | parsed_url = urlparse(url_match)
|
| 29 | + |
88 | 30 | if "http" not in parsed_url.scheme.casefold():
|
89 |
| - valid_url = f"https://{url_match}" |
| 31 | + return f"https://{url_match}" |
90 | 32 | else:
|
91 |
| - valid_url = url_match |
92 |
| - return valid_url |
| 33 | + return url_match |
93 | 34 |
|
94 | 35 |
|
95 | 36 | def write_conferences_to_file(confs: list[dict]):
|
96 | 37 | # Write the conferences to the _data/conferences.yml file
|
97 |
| - with conferences_path.open("w") as f: |
98 |
| - yaml.dump(confs, f) |
99 |
| - |
100 |
| - |
101 |
| -if __name__ == "__main__": |
102 |
| - conferences = [] |
103 |
| - |
104 |
| - # Create Github client object |
105 |
| - gh_client = create_github_client() |
106 |
| - |
107 |
| - # Get open issues from repo |
108 |
| - open_issues: PaginatedList[Issue] = get_open_issues(gh_client) |
109 |
| - |
110 |
| - # Parse each conference issue so long as it has the "conference" label |
111 |
| - for issue in open_issues: |
112 |
| - if "conference" in [label.name for label in issue.labels]: |
113 |
| - parsed_conf = parse_conference_details(issue_body=issue.body) |
114 |
| - if parsed_conf: |
115 |
| - conferences.append(parsed_conf) |
116 |
| - |
117 |
| - write_conferences_to_file(conferences) |
| 38 | + conferences_path.write_text(json.dumps(confs)) |
| 39 | + |
| 40 | + |
| 41 | +def __to_conference_date(conference_date: str) -> datetime.date: |
| 42 | + return datetime.date.fromisoformat(conference_date) |
| 43 | + |
| 44 | + |
| 45 | +def parse_conference(issue: gh_issues.Issue) -> dict[str, str | None]: |
| 46 | + """convert an issue to a dictionary of parsed data""" |
| 47 | + |
| 48 | + KEYS = [ |
| 49 | + "conference_name", |
| 50 | + "url", |
| 51 | + "conference_start_date", |
| 52 | + "conference_end_date", |
| 53 | + "conference_type", |
| 54 | + "conference_location", |
| 55 | + "summary", |
| 56 | + "speaking", |
| 57 | + ] |
| 58 | + |
| 59 | + _issue = {k: getattr(issue, k, None) for k in KEYS} |
| 60 | + _issue["url"] = normalize_url(_issue.get("url", None)) |
| 61 | + return _issue |
| 62 | + |
| 63 | + |
| 64 | +def _validate_issue(issue: gh_issues.Issue, date_to_check: str) -> bool: |
| 65 | + """Validate an issue based on its `date_to_check`""" |
| 66 | + if not (valid_date := getattr(issue, date_to_check, False)): |
| 67 | + return False |
| 68 | + else: |
| 69 | + return __to_conference_date(valid_date) >= datetime.date.today() |
| 70 | + |
| 71 | + |
| 72 | +def build_conferences() -> list[dict[str, str | None]]: # pragma: no cover |
| 73 | + return [ |
| 74 | + parse_conference(issue) |
| 75 | + for issue in get_conference_issues() |
| 76 | + if _validate_issue(issue, "conference_end_date") |
| 77 | + ] |
| 78 | + |
| 79 | + |
| 80 | +if __name__ == "__main__": # pragma: no cover |
| 81 | + ROOT = pathlib.Path(__file__).parent.parent |
| 82 | + conferences_path = ROOT.joinpath("_data/conferences.json") |
| 83 | + conferences = build_conferences() |
| 84 | + conferences_path.write_text( |
| 85 | + json.dumps( |
| 86 | + list( |
| 87 | + sorted( |
| 88 | + conferences, |
| 89 | + key=lambda x: __to_conference_date(x["conference_start_date"]), |
| 90 | + ) |
| 91 | + ), |
| 92 | + indent=2, |
| 93 | + ) |
| 94 | + ) |
0 commit comments