Skip to content

Commit 684cccf

Browse files
committed
Use a function to assign target path, simplifyt yt_dlp download
1 parent 101a142 commit 684cccf

File tree

1 file changed

+9
-19
lines changed

1 file changed

+9
-19
lines changed

dg_projects/learning_resources/learning_resources/assets/youtube_shorts.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ class YouTubeShortsConfig(Config):
4141
cached_metadata: str | None = None # JSON string of video metadata from sensor
4242

4343

44+
def _assign_s3_path(video_id: str, extension: str) -> str:
45+
"""Assign S3 path for a YouTube video based on its ID and file extension."""
46+
return f"{video_id}/{video_id}.{extension}"
47+
48+
4449
def _load_metadata_from_config(
4550
config: YouTubeShortsConfig,
4651
) -> dict[str, Any] | None:
@@ -147,7 +152,7 @@ def youtube_video_metadata(
147152
with metadata_file.open("w", encoding="utf-8") as f:
148153
json.dump(video_data, f, indent=2, ensure_ascii=False)
149154

150-
target_path = f"youtube_shorts/{video_id}/{video_id}.json"
155+
target_path = _assign_s3_path(video_id, "json")
151156

152157
return Output(
153158
value=(metadata_file, target_path),
@@ -203,22 +208,10 @@ def youtube_video_content(
203208
output_file = Path(f"{video_id}.mp4")
204209

205210
ydl_opts = {
206-
"format": "best[height<=720]/best",
211+
"format": "best",
207212
"outtmpl": str(output_file),
208213
"noplaylist": True,
209214
"extract_flat": False,
210-
# Anti-bot measures
211-
"http_headers": {
212-
"User-Agent": (
213-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
214-
),
215-
"Accept": (
216-
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
217-
),
218-
"Accept-Language": "en-us,en;q=0.5",
219-
"Accept-Encoding": "gzip, deflate",
220-
"Connection": "keep-alive",
221-
},
222215
}
223216

224217
try:
@@ -235,8 +228,7 @@ def youtube_video_content(
235228
)
236229

237230
# Return tuple (local_path, s3_target_path) for s3file_io_manager to upload
238-
# Similar to canvas asset pattern
239-
target_path = f"youtube_shorts/{video_id}/{video_id}.mp4"
231+
target_path = _assign_s3_path(video_id, "mp4")
240232

241233
return Output(
242234
value=(output_file, target_path),
@@ -339,9 +331,7 @@ def youtube_video_thumbnail(
339331
content_length,
340332
)
341333

342-
# Return tuple (local_path, s3_target_path) for s3file_io_manager to upload
343-
# Similar to canvas asset pattern
344-
target_path = f"youtube_shorts/{video_id}/{video_id}.jpg"
334+
target_path = _assign_s3_path(video_id, "jpg")
345335

346336
return Output(
347337
value=(thumbnail_file, target_path),

0 commit comments

Comments
 (0)