Skip to content

Commit a3e7bda

Browse files
Fix(dbt): Respect the seed settings provided in the config of a dbt project (#5291)
1 parent be40445 commit a3e7bda

File tree

5 files changed

+133
-2
lines changed

5 files changed

+133
-2
lines changed

sqlmesh/dbt/seed.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,11 @@ def to_sqlmesh(
7979
kwargs["columns"] = new_columns
8080

8181
# dbt treats single whitespace as a null value
82-
csv_settings = CsvSettings(na_values=[" "], keep_default_na=True)
82+
csv_settings = CsvSettings(
83+
delimiter=self.delimiter,
84+
na_values=[" "],
85+
keep_default_na=True,
86+
)
8387

8488
return create_seed_model(
8589
self.canonical_name(context),

tests/dbt/test_config.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def test_source_config(sushi_test_project: Project):
444444
@pytest.mark.slow
445445
def test_seed_config(sushi_test_project: Project, mocker: MockerFixture):
446446
seed_configs = sushi_test_project.packages["sushi"].seeds
447-
assert set(seed_configs) == {"waiter_names"}
447+
assert set(seed_configs) == {"waiter_names", "waiter_revenue_semicolon"}
448448
raw_items_seed = seed_configs["waiter_names"]
449449

450450
expected_config = {
@@ -465,6 +465,25 @@ def test_seed_config(sushi_test_project: Project, mocker: MockerFixture):
465465
== '"MEMORY"."SUSHI"."WAITER_NAMES"'
466466
)
467467

468+
waiter_revenue_semicolon_seed = seed_configs["waiter_revenue_semicolon"]
469+
470+
expected_config_semicolon = {
471+
"path": Path(sushi_test_project.context.project_root, "seeds/waiter_revenue_semicolon.csv"),
472+
"schema_": "sushi",
473+
"delimiter": ";",
474+
}
475+
actual_config_semicolon = {
476+
k: getattr(waiter_revenue_semicolon_seed, k) for k, v in expected_config_semicolon.items()
477+
}
478+
assert actual_config_semicolon == expected_config_semicolon
479+
480+
assert waiter_revenue_semicolon_seed.canonical_name(context) == "sushi.waiter_revenue_semicolon"
481+
assert (
482+
waiter_revenue_semicolon_seed.to_sqlmesh(context).name == "sushi.waiter_revenue_semicolon"
483+
)
484+
assert waiter_revenue_semicolon_seed.delimiter == ";"
485+
assert set(waiter_revenue_semicolon_seed.columns.keys()) == {"waiter_id", "revenue", "quarter"}
486+
468487

469488
def test_quoting():
470489
model = ModelConfig(alias="bar", schema="foo")

tests/dbt/test_transformation.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,58 @@ def test_seed_partial_column_inference(tmp_path):
806806
assert list(seed_df.columns) == list(sqlmesh_seed.columns_to_types.keys())
807807

808808

809+
def test_seed_delimiter(tmp_path):
810+
seed_csv = tmp_path / "seed_with_delimiter.csv"
811+
812+
with open(seed_csv, "w", encoding="utf-8") as fd:
813+
fd.writelines("\n".join(["id|name|city", "0|Ayrton|SP", "1|Max|MC", "2|Niki|VIE"]))
814+
815+
seed = SeedConfig(
816+
name="test_model_pipe",
817+
package="package",
818+
path=Path(seed_csv),
819+
delimiter="|",
820+
)
821+
822+
context = DbtContext()
823+
context.project_name = "TestProject"
824+
context.target = DuckDbConfig(name="target", schema="test")
825+
sqlmesh_seed = seed.to_sqlmesh(context)
826+
827+
# Verify columns are correct with the custom pipe (|) delimiter
828+
expected_columns = {"id", "name", "city"}
829+
assert set(sqlmesh_seed.columns_to_types.keys()) == expected_columns
830+
831+
seed_df = next(sqlmesh_seed.render_seed())
832+
assert list(seed_df.columns) == list(sqlmesh_seed.columns_to_types.keys())
833+
assert len(seed_df) == 3
834+
835+
assert seed_df.iloc[0]["name"] == "Ayrton"
836+
assert seed_df.iloc[0]["city"] == "SP"
837+
assert seed_df.iloc[1]["name"] == "Max"
838+
assert seed_df.iloc[1]["city"] == "MC"
839+
840+
# test with semicolon delimiter
841+
seed_csv_semicolon = tmp_path / "seed_with_semicolon.csv"
842+
with open(seed_csv_semicolon, "w", encoding="utf-8") as fd:
843+
fd.writelines("\n".join(["id;value;status", "1;100;active", "2;200;inactive"]))
844+
845+
seed_semicolon = SeedConfig(
846+
name="test_model_semicolon",
847+
package="package",
848+
path=Path(seed_csv_semicolon),
849+
delimiter=";",
850+
)
851+
852+
sqlmesh_seed_semicolon = seed_semicolon.to_sqlmesh(context)
853+
expected_columns_semicolon = {"id", "value", "status"}
854+
assert set(sqlmesh_seed_semicolon.columns_to_types.keys()) == expected_columns_semicolon
855+
856+
seed_df_semicolon = next(sqlmesh_seed_semicolon.render_seed())
857+
assert seed_df_semicolon.iloc[0]["value"] == 100
858+
assert seed_df_semicolon.iloc[0]["status"] == "active"
859+
860+
809861
def test_seed_column_order(tmp_path):
810862
seed_csv = tmp_path / "seed.csv"
811863

@@ -910,6 +962,45 @@ def test_hooks(sushi_test_dbt_context: Context, model_fqn: str):
910962
assert "post-hook" in mock_logger.call_args[0][0]
911963

912964

965+
@pytest.mark.xdist_group("dbt_manifest")
966+
def test_seed_delimiter_integration(sushi_test_dbt_context: Context):
967+
seed_fqn = '"memory"."sushi"."waiter_revenue_semicolon"'
968+
assert seed_fqn in sushi_test_dbt_context.models
969+
970+
seed_model = sushi_test_dbt_context.models[seed_fqn]
971+
assert seed_model.columns_to_types is not None
972+
973+
# this should be loaded with semicolon delimiter otherwise it'd resylt in an one column table
974+
assert set(seed_model.columns_to_types.keys()) == {"waiter_id", "revenue", "quarter"}
975+
976+
# columns_to_types values are correct types as well
977+
assert seed_model.columns_to_types == {
978+
"waiter_id": exp.DataType.build("int"),
979+
"revenue": exp.DataType.build("double"),
980+
"quarter": exp.DataType.build("text"),
981+
}
982+
983+
df = sushi_test_dbt_context.fetchdf(f"SELECT * FROM {seed_fqn}")
984+
985+
assert len(df) == 6
986+
waiter_ids = set(df["waiter_id"].tolist())
987+
quarters = set(df["quarter"].tolist())
988+
assert waiter_ids == {1, 2, 3}
989+
assert quarters == {"Q1", "Q2"}
990+
991+
q1_w1_rows = df[(df["waiter_id"] == 1) & (df["quarter"] == "Q1")]
992+
assert len(q1_w1_rows) == 1
993+
assert float(q1_w1_rows.iloc[0]["revenue"]) == 100.50
994+
995+
q2_w2_rows = df[(df["waiter_id"] == 2) & (df["quarter"] == "Q2")]
996+
assert len(q2_w2_rows) == 1
997+
assert float(q2_w2_rows.iloc[0]["revenue"]) == 225.50
998+
999+
q2_w3_rows = df[(df["waiter_id"] == 3) & (df["quarter"] == "Q2")]
1000+
assert len(q2_w3_rows) == 1
1001+
assert float(q2_w3_rows.iloc[0]["revenue"]) == 175.75
1002+
1003+
9131004
@pytest.mark.xdist_group("dbt_manifest")
9141005
def test_target_jinja(sushi_test_project: Project):
9151006
context = sushi_test_project.context

tests/fixtures/dbt/sushi_test/seeds/properties.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,13 @@ version: 2
22

33
seeds:
44
- name: waiter_names
5+
- name: waiter_revenue_semicolon
6+
config:
7+
delimiter: ";"
8+
columns:
9+
- name: waiter_id
10+
data_type: int
11+
- name: revenue
12+
data_type: decimal
13+
- name: quarter
14+
data_type: text
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
waiter_id;revenue;quarter
2+
1;100.50;Q1
3+
2;200.75;Q1
4+
3;150.25;Q1
5+
1;125.00;Q2
6+
2;225.50;Q2
7+
3;175.75;Q2

0 commit comments

Comments
 (0)