Skip to content

Commit 0bc82ff

Browse files
committed
Tweaks after testing
1 parent ff52bd3 commit 0bc82ff

File tree

4 files changed

+979
-1457
lines changed

4 files changed

+979
-1457
lines changed

dg_projects/edxorg/edxorg/assets/instructor_onboarding.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,14 @@
88
from datetime import UTC, datetime
99

1010
import polars as pl
11-
from dagster import AssetExecutionContext, AssetIn, AssetKey, Output, asset
11+
from dagster import (
12+
AssetExecutionContext,
13+
AssetIn,
14+
AssetKey,
15+
MaterializeResult,
16+
Output,
17+
asset,
18+
)
1219
from github.GithubException import UnknownObjectException
1320
from ol_orchestrate.resources.github import GithubApiClientFactory
1421

@@ -81,29 +88,31 @@ def generate_instructor_onboarding_user_list(
8188

8289
@asset(
8390
name="update_access_forge_repo",
84-
group_name="instructor_onboarding",
85-
ins={"instructor_onboarding_user_list": AssetIn()},
86-
required_resource_keys={"github_api"},
91+
group_name="edxorg_instructor_onboarding",
92+
ins={
93+
"instructor_onboarding_user_list": AssetIn(
94+
key=AssetKey(["instructor_onboarding_user_list"])
95+
)
96+
},
8797
description="Updates the access-forge repository with the generated user list",
8898
)
89-
def update_access_forge_repository(
99+
def update_access_forge_repo(
90100
context: AssetExecutionContext,
91-
instructor_onboarding_user_list: str,
92101
github_api: GithubApiClientFactory,
93-
) -> Output[dict]:
94-
"""Push the generated CSV content to the access-forge GitHub repository.
102+
instructor_onboarding_user_list: str,
103+
) -> MaterializeResult:
104+
"""Update access-forge repository with instructor user list.
95105
96106
This asset updates or creates a CSV file in the private mitodl/access-forge
97107
repository with the user list generated from the dbt model.
98108
99109
Args:
100110
context: Dagster execution context
101-
instructor_onboarding_user_list: CSV string content to upload with columns:
102-
email, role, sent_invite
103111
github_api: GitHub API client factory resource for authentication
112+
instructor_onboarding_user_list: CSV string content from upstream asset
104113
105114
Returns:
106-
Output containing metadata about the commit (repo, file path, action, SHA)
115+
MaterializeResult containing metadata about the commit
107116
108117
Raises:
109118
Exception: If GitHub API call fails or authentication issues occur
@@ -119,8 +128,8 @@ def update_access_forge_repository(
119128
commit_message = "dagster-pipeline - update user list from ol-data-platform"
120129

121130
try:
122-
gh_client = github_api.get_client()
123-
repo = gh_client.get_repo(repo_name)
131+
github_client = github_api.get_client()
132+
repo = github_client.get_repo(repo_name)
124133

125134
# Get the base branch reference
126135
base_ref = repo.get_git_ref(f"heads/{base_branch}")

dg_projects/edxorg/edxorg/definitions.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import Any, Literal
1212

1313
from dagster import (
14+
AssetExecutionContext,
1415
AssetSelection,
1516
DefaultSensorStatus,
1617
Definitions,
@@ -20,11 +21,15 @@
2021
job,
2122
)
2223
from dagster_aws.s3 import S3Resource
24+
from dagster_dbt import DbtCliResource, dbt_assets
2325
from ol_orchestrate.io_managers.filepath import (
2426
FileObjectIOManager,
2527
S3FileObjectIOManager,
2628
)
2729
from ol_orchestrate.lib.constants import DAGSTER_ENV, VAULT_ADDRESS
30+
31+
# Import dbt project
32+
from ol_orchestrate.lib.dbt_project import dbt_project
2833
from ol_orchestrate.lib.utils import authenticate_vault
2934
from ol_orchestrate.resources.gcp_gcs import GCSConnection
3035
from ol_orchestrate.resources.github import GithubApiClientFactory
@@ -49,7 +54,7 @@
4954
)
5055
from edxorg.assets.instructor_onboarding import (
5156
generate_instructor_onboarding_user_list,
52-
update_access_forge_repository,
57+
update_access_forge_repo,
5358
)
5459
from edxorg.assets.openedx_course_archives import (
5560
dummy_edxorg_course_xml,
@@ -175,6 +180,15 @@ def sync_edxorg_program_reports():
175180
upload_files_to_s3(download_files_from_s3())
176181

177182

183+
@dbt_assets(
184+
manifest=dbt_project.manifest_path,
185+
project=dbt_project,
186+
)
187+
def edxorg_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
188+
"""Dbt models for edxorg data transformations."""
189+
yield from dbt.cli(["build"], context=context).stream()
190+
191+
178192
# Sensors
179193
try:
180194
from edxorg.sensors.object_storage import (
@@ -228,7 +242,7 @@ def sync_edxorg_program_reports():
228242
edxorg_program_metadata,
229243
edxorg_mitx_course_metadata,
230244
generate_instructor_onboarding_user_list,
231-
update_access_forge_repository,
245+
update_access_forge_repo,
232246
),
233247
),
234248
cron_schedule="0 5 * * *",
@@ -265,6 +279,7 @@ def sync_edxorg_program_reports():
265279
"s3_download": S3Resource(profile_name="edxorg"),
266280
"s3_upload": S3Resource(),
267281
"results_dir": SimpleResultsDir.configure_at_launch(),
282+
"dbt": DbtCliResource(project_dir=dbt_project),
268283
},
269284
sensors=sensor_list,
270285
jobs=[
@@ -273,6 +288,7 @@ def sync_edxorg_program_reports():
273288
sync_edxorg_program_reports,
274289
],
275290
assets=[
291+
edxorg_dbt_assets,
276292
edxorg_raw_data_archive.to_source_asset(),
277293
edxorg_raw_tracking_logs.to_source_asset(),
278294
normalize_edxorg_tracking_log,
@@ -283,7 +299,7 @@ def sync_edxorg_program_reports():
283299
edxorg_program_metadata,
284300
edxorg_mitx_course_metadata,
285301
generate_instructor_onboarding_user_list,
286-
update_access_forge_repository,
302+
update_access_forge_repo,
287303
],
288304
schedules=[edxorg_api_daily_schedule],
289305
)

dg_projects/edxorg/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@ version = "0.1.0"
55
dependencies = [
66
"dagster>=1.11.13",
77
"dagster-aws ~=0.27.0",
8+
"dagster-dbt>=0.27.0",
89
"dagster-duckdb ~=0.27.0",
910
"dagster-postgres>=0.27.13",
1011
"flatten-dict ~= 0.4.2",
1112
"jsonlines ~= 4.0.0",
1213
"ol-orchestrate-lib",
1314
"polars ~= 1.19",
1415
"universal-pathlib ~= 0.3.1",
16+
"pygithub>=2.8.1",
1517
]
1618

1719
[project.entry-points]

0 commit comments

Comments
 (0)