From f7202f331a0e7b57d225cbc19aad143cbe000d37 Mon Sep 17 00:00:00 2001 From: Alex Oladele Date: Fri, 16 Aug 2024 15:33:42 -0400 Subject: [PATCH 1/2] Ensure the conference URL always has a scheme --- _conferences/__main__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/_conferences/__main__.py b/_conferences/__main__.py index 362513f..f57bb4d 100644 --- a/_conferences/__main__.py +++ b/_conferences/__main__.py @@ -2,6 +2,7 @@ import re from datetime import datetime, time from pathlib import Path +from urllib.parse import urlparse import yaml from github import Auth, Github @@ -45,6 +46,13 @@ re.DOTALL, ) + # Check if there is a scheme (`https`) already in the parsed url + valid_url = None + if url_match is not None and url_match[1].strip() != "": + parsed_url = urlparse(url_match[1]) + if "http" not in parsed_url.scheme.casefold(): + valid_url = f"https://{url_match[1]}" + if dates_match: conferenceDates = dates_match[1] # Parse the end date of the conference @@ -54,7 +62,7 @@ if endDate >= today: conference = { "name": name_match[1], - "url": url_match[1], + "url": valid_url, "dates": dates_match[1], "type": type_match[1], "location": location_match[1], From 3baa048c095504abd431ec6efe3bcd09d474d4ca Mon Sep 17 00:00:00 2001 From: Alex Oladele Date: Fri, 16 Aug 2024 15:58:02 -0400 Subject: [PATCH 2/2] Clarify why the extra parsing logic is necessary --- _conferences/__main__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/_conferences/__main__.py b/_conferences/__main__.py index f57bb4d..4ff4d38 100644 --- a/_conferences/__main__.py +++ b/_conferences/__main__.py @@ -46,9 +46,14 @@ re.DOTALL, ) - # Check if there is a scheme (`https`) already in the parsed url + # Set a default value of None for when the url field isn't as expected valid_url = None + + # Ensure the url field is not blank and the url matches the regex if url_match is not None and url_match[1].strip() != "": + # Parse the url and see if a scheme (`https`) is included in it + # If not, then prepend `https` to the url from the issue body + # This guards against the website thinking the passed in url is another page on https://blackpythondevs.com/ parsed_url = urlparse(url_match[1]) if "http" not in parsed_url.scheme.casefold(): valid_url = f"https://{url_match[1]}"