|
6 | 6 |
|
7 | 7 | import yaml |
8 | 8 | from github import Auth, Github |
| 9 | +from github.Issue import Issue |
| 10 | +from github.PaginatedList import PaginatedList |
9 | 11 |
|
10 | | -TOKEN = os.getenv("GITHUB_TOKEN", "") |
11 | 12 | ROOT = Path(__file__).parent.parent |
12 | 13 | conferences_path = ROOT / "_data/conferences.yml" |
13 | 14 |
|
14 | | -auth = Auth.Token(TOKEN) |
15 | | -g = Github(auth=auth) |
16 | | - |
17 | | -repo = g.get_repo("BlackPythonDevs/blackpythondevs.github.io") |
18 | | -open_issues = repo.get_issues(state="open", labels=["conference"]) |
19 | | -conferences = [] |
20 | | -today = datetime.combine(datetime.now(), time()) |
21 | | - |
22 | | -for issue in open_issues: |
23 | | - if "conference" in [label.name for label in issue.labels]: |
24 | | - # Extract fields from issue body |
25 | | - name_match = re.search( |
26 | | - r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body |
27 | | - ) |
28 | | - url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body) |
29 | | - dates_match = re.search( |
30 | | - r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body |
31 | | - ) |
32 | | - type_match = re.search( |
33 | | - r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body |
34 | | - ) |
35 | | - location_match = re.search( |
36 | | - r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue.body |
37 | | - ) |
38 | | - summary_match = re.search( |
39 | | - r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", |
40 | | - issue.body, |
41 | | - re.DOTALL, |
42 | | - ) |
43 | | - speaking_match = re.search( |
44 | | - r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}", |
45 | | - issue.body, |
46 | | - re.DOTALL, |
47 | | - ) |
48 | | - |
49 | | - # Set a default value of None for when the url field isn't as expected |
50 | | - valid_url = None |
51 | | - |
52 | | - # Ensure the url field is not blank and the url matches the regex |
53 | | - if url_match is not None and url_match[1].strip() != "": |
54 | | - # Parse the url and see if a scheme (`https`) is included in it |
55 | | - # If not, then prepend `https` to the url from the issue body |
56 | | - # This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/ |
57 | | - parsed_url = urlparse(url_match[1]) |
58 | | - if "http" not in parsed_url.scheme.casefold(): |
59 | | - valid_url = f"https://{url_match[1]}" |
60 | | - |
61 | | - if dates_match: |
62 | | - conferenceDates = dates_match[1] |
63 | | - # Parse the end date of the conference |
64 | | - endDateStr = conferenceDates.split("-")[1].strip() |
65 | | - endDate = datetime.strptime(endDateStr, "%d %b %Y") |
66 | | - # Check if the conference end date is greater than today |
67 | | - if endDate >= today: |
68 | | - conference = { |
69 | | - "name": name_match[1], |
70 | | - "url": valid_url, |
71 | | - "dates": dates_match[1], |
72 | | - "type": type_match[1], |
73 | | - "location": location_match[1], |
74 | | - "summary": summary_match[1], |
75 | | - "speaking": speaking_match[1] if speaking_match else "", |
76 | | - } |
77 | | - conferences.append(conference) |
78 | | - |
79 | | -# Write the conferences to the _data/conferences.yml file |
80 | | -with conferences_path.open("w") as f: |
81 | | - yaml.dump(conferences, f) |
| 15 | + |
| 16 | +def create_github_client(): |
| 17 | + gh_token = os.getenv("GITHUB_TOKEN", "") |
| 18 | + auth = Auth.Token(gh_token) |
| 19 | + client = Github(auth=auth) |
| 20 | + return client |
| 21 | + |
| 22 | + |
| 23 | +def get_open_issues(gh: Github) -> PaginatedList[Issue]: |
| 24 | + repo = gh.get_repo("BlackPythonDevs/blackpythondevs.github.io") |
| 25 | + issues = repo.get_issues(state="open", labels=["conference"]) |
| 26 | + return issues |
| 27 | + |
| 28 | + |
| 29 | +def parse_conference_details(issue_body: str) -> dict | None: |
| 30 | + # Extract fields from issue body |
| 31 | + name_match = re.search( |
| 32 | + r"Conference Name(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body |
| 33 | + ) |
| 34 | + url_match = re.search(r"URL(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body) |
| 35 | + dates_match = re.search( |
| 36 | + r"Conference Dates(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body |
| 37 | + ) |
| 38 | + type_match = re.search( |
| 39 | + r"Conference Type(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body |
| 40 | + ) |
| 41 | + location_match = re.search( |
| 42 | + r"Conference Location(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", issue_body |
| 43 | + ) |
| 44 | + summary_match = re.search( |
| 45 | + r"Summary(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}", |
| 46 | + issue_body, |
| 47 | + re.DOTALL, |
| 48 | + ) |
| 49 | + speaking_match = re.search( |
| 50 | + r"Speaking(?:\r\n|\n){2}(.*?)(?:\r\n|\n){2}### Code of Conduct(?:\r\n|\n){2}", |
| 51 | + issue_body, |
| 52 | + re.DOTALL, |
| 53 | + ) |
| 54 | + |
| 55 | + # Set a default value of None for when the url field isn't as expected |
| 56 | + valid_url = normalize_url() if not url_match else normalize_url(url_match[1]) |
| 57 | + |
| 58 | + if dates_match: |
| 59 | + conferenceDates = dates_match[1] |
| 60 | + # Parse the end date of the conference |
| 61 | + endDateStr = conferenceDates.split("-")[1].strip() |
| 62 | + endDate = datetime.strptime(endDateStr, "%d %b %Y") |
| 63 | + # Check if the conference end date is greater than today |
| 64 | + today = datetime.combine(datetime.now(), time()) |
| 65 | + |
| 66 | + if endDate >= today: |
| 67 | + conference = { |
| 68 | + "name": name_match[1], |
| 69 | + "url": valid_url, |
| 70 | + "dates": dates_match[1], |
| 71 | + "type": type_match[1], |
| 72 | + "location": location_match[1], |
| 73 | + "summary": summary_match[1], |
| 74 | + "speaking": speaking_match[1] if speaking_match else "", |
| 75 | + } |
| 76 | + return conference |
| 77 | + return None |
| 78 | + |
| 79 | + |
| 80 | +def normalize_url(url_match: str = None): |
| 81 | + valid_url = None |
| 82 | + # Ensure the url field is not blank and the url matches the regex |
| 83 | + if url_match is not None and url_match.strip() != "": |
| 84 | + # Parse the url and see if a scheme (`https`) is included in it |
| 85 | + # If not, then prepend `https` to the url from the issue body |
| 86 | + # This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/ |
| 87 | + parsed_url = urlparse(url_match) |
| 88 | + if "http" not in parsed_url.scheme.casefold(): |
| 89 | + valid_url = f"https://{url_match}" |
| 90 | + else: |
| 91 | + valid_url = url_match |
| 92 | + return valid_url |
| 93 | + |
| 94 | + |
| 95 | +def write_conferences_to_file(confs: list[dict]): |
| 96 | + # Write the conferences to the _data/conferences.yml file |
| 97 | + with conferences_path.open("w") as f: |
| 98 | + yaml.dump(confs, f) |
| 99 | + |
| 100 | + |
| 101 | +if __name__ == "__main__": |
| 102 | + conferences = [] |
| 103 | + |
| 104 | + # Create Github client object |
| 105 | + gh_client = create_github_client() |
| 106 | + |
| 107 | + # Get open issues from repo |
| 108 | + open_issues: PaginatedList[Issue] = get_open_issues(gh_client) |
| 109 | + |
| 110 | + # Parse each conference issue so long as it has the "conference" label |
| 111 | + for issue in open_issues: |
| 112 | + if "conference" in [label.name for label in issue.labels]: |
| 113 | + parsed_conf = parse_conference_details(issue_body=issue.body) |
| 114 | + if parsed_conf: |
| 115 | + conferences.append(parsed_conf) |
| 116 | + |
| 117 | + write_conferences_to_file(conferences) |
0 commit comments