Skip to content

Commit 101a142

Browse files
committed
More tweaks
1 parent 607a17d commit 101a142

File tree

2 files changed

+12
-20
lines changed

2 files changed

+12
-20
lines changed

dg_projects/learning_resources/learning_resources/assets/youtube_shorts.py

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def _load_metadata_from_config(
5353
return json.loads(config.cached_metadata)
5454
except (json.JSONDecodeError, TypeError):
5555
log.warning(
56-
"Failed to parse metadata from run_config, will fetch from API",
56+
"Failed to parse metadata from run_config",
5757
exc_info=True,
5858
)
5959
return None
@@ -84,24 +84,11 @@ def fetch_video_metadata(youtube_client, video_id):
8484
return request.execute()
8585

8686

87-
def upload_to_s3(s3_client, bucket, key, body, content_type):
88-
s3_client.put_object(
89-
Bucket=bucket,
90-
Key=key,
91-
Body=body,
92-
ContentType=content_type,
93-
)
94-
95-
96-
def send_webhook(learn_api: ApiClientFactory, video_id: str) -> dict[str, Any]:
97-
return learn_api.client.notify_shorts_processed(video_id)
98-
99-
10087
@asset(
10188
key=AssetKey(["youtube_shorts", "video_metadata"]),
10289
group_name="youtube_shorts",
10390
description="Extract YouTube video metadata from API and upload to S3",
104-
code_version="youtube_video_metadata_v3",
91+
code_version="youtube_video_metadata_v1",
10592
partitions_def=youtube_video_ids,
10693
automation_condition=upstream_or_code_changes(),
10794
io_manager_key="s3file_io_manager",
@@ -176,14 +163,13 @@ def youtube_video_metadata(
176163
except Exception:
177164
log.exception("Failed to extract metadata for video: %s", video_id)
178165
raise
179-
raise
180166

181167

182168
@asset(
183169
key=AssetKey(["youtube_shorts", "video_content"]),
184170
group_name="youtube_shorts",
185171
description="Download and upload YouTube video content to S3",
186-
code_version="youtube_video_content_v3",
172+
code_version="youtube_video_content_v1",
187173
partitions_def=youtube_video_ids,
188174
automation_condition=upstream_or_code_changes(),
189175
io_manager_key="s3file_io_manager",
@@ -205,10 +191,13 @@ def youtube_video_content(
205191
video_id = context.partition_key
206192
video_url = f"https://www.youtube.com/watch?v={video_id}"
207193

208-
log.info("Processing video content for: %s", video_id)
194+
log.info("Processing video file for: %s", video_id)
195+
196+
# Load metadata from config if available for better version tracking
197+
video_data = _load_metadata_from_config(config)
209198

210199
# Generate version for change tracking
211-
version = _generate_video_version({"id": video_id})
200+
version = _generate_video_version(video_data or {"id": video_id})
212201

213202
# Download video using yt-dlp to local file
214203
output_file = Path(f"{video_id}.mp4")
@@ -399,7 +388,7 @@ def youtube_webhook(
399388
video_id = context.partition_key
400389

401390
try:
402-
webhook_response = send_webhook(learn_api, video_id)
391+
webhook_response = learn_api.client.notify_shorts_processed(video_id)
403392
log.info(
404393
"Successfully sent webhook notification for video %s: %s",
405394
video_id,

dg_projects/learning_resources/learning_resources/sensors/youtube_shorts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ def create_run_requests(videos_to_process, metadata_by_video_id):
241241
tags={"video_id": video_id},
242242
run_config={
243243
"ops": {
244+
"youtube_shorts__video_content": {
245+
"config": {"cached_metadata": metadata_json}
246+
},
244247
"youtube_shorts__video_thumbnail": {
245248
"config": {"cached_metadata": metadata_json}
246249
},

0 commit comments

Comments
 (0)