Skip to content

Commit 19270e1

Browse files
authored
Fix: Use csv header as the primary source for column names when converting dbt seeds (#5173)
1 parent 5e59d18 commit 19270e1

File tree

2 files changed

+33
-39
lines changed

2 files changed

+33
-39
lines changed

sqlmesh/dbt/seed.py

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -58,29 +58,27 @@ def to_sqlmesh(
5858
kwargs = self.sqlmesh_model_kwargs(context)
5959

6060
columns = kwargs.get("columns") or {}
61-
descriptions = kwargs.get("column_descriptions") or {}
62-
missing_types = (set(descriptions) | set(self.columns)) - set(columns)
63-
if not columns or missing_types:
64-
agate_table = (
65-
agate_helper.from_csv(seed_path, [], delimiter=self.delimiter)
66-
if SUPPORTS_DELIMITER
67-
else agate_helper.from_csv(seed_path, [])
68-
)
69-
inferred_types = {
70-
name: AGATE_TYPE_MAPPING[tpe.__class__]
71-
for name, tpe in zip(agate_table.column_names, agate_table.column_types)
72-
}
73-
74-
# The columns list built from the mixture of supplied and inferred types needs to
75-
# be in the same order as the data for assumptions elsewhere in the codebase to hold true
76-
new_columns = {}
77-
for column_name in agate_table.column_names:
78-
if (column_name in missing_types) or (column_name not in columns):
79-
new_columns[column_name] = inferred_types[column_name]
80-
else:
81-
new_columns[column_name] = columns[column_name]
82-
83-
kwargs["columns"] = new_columns
61+
62+
agate_table = (
63+
agate_helper.from_csv(seed_path, [], delimiter=self.delimiter)
64+
if SUPPORTS_DELIMITER
65+
else agate_helper.from_csv(seed_path, [])
66+
)
67+
inferred_types = {
68+
name: AGATE_TYPE_MAPPING[tpe.__class__]
69+
for name, tpe in zip(agate_table.column_names, agate_table.column_types)
70+
}
71+
72+
# The columns list built from the mixture of supplied and inferred types needs to
73+
# be in the same order as the data for assumptions elsewhere in the codebase to hold true
74+
new_columns = {}
75+
for column_name in agate_table.column_names:
76+
if column_name not in columns:
77+
new_columns[column_name] = inferred_types[column_name]
78+
else:
79+
new_columns[column_name] = columns[column_name]
80+
81+
kwargs["columns"] = new_columns
8482

8583
return create_seed_model(
8684
self.canonical_name(context),

tests/dbt/test_transformation.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -471,22 +471,18 @@ def test_seed_columns():
471471
package="package",
472472
path=Path("examples/sushi_dbt/seeds/waiter_names.csv"),
473473
columns={
474-
"address": ColumnConfig(
475-
name="address", data_type="text", description="Business address"
476-
),
477-
"zipcode": ColumnConfig(
478-
name="zipcode", data_type="text", description="Business zipcode"
479-
),
474+
"id": ColumnConfig(name="id", data_type="text", description="The ID"),
475+
"name": ColumnConfig(name="name", data_type="text", description="The name"),
480476
},
481477
)
482478

483479
expected_column_types = {
484-
"address": exp.DataType.build("text"),
485-
"zipcode": exp.DataType.build("text"),
480+
"id": exp.DataType.build("text"),
481+
"name": exp.DataType.build("text"),
486482
}
487483
expected_column_descriptions = {
488-
"address": "Business address",
489-
"zipcode": "Business zipcode",
484+
"id": "The ID",
485+
"name": "The name",
490486
}
491487

492488
context = DbtContext()
@@ -503,21 +499,21 @@ def test_seed_column_types():
503499
package="package",
504500
path=Path("examples/sushi_dbt/seeds/waiter_names.csv"),
505501
column_types={
506-
"address": "text",
507-
"zipcode": "text",
502+
"id": "text",
503+
"name": "text",
508504
},
509505
columns={
510-
"zipcode": ColumnConfig(name="zipcode", description="Business zipcode"),
506+
"name": ColumnConfig(name="name", description="The name"),
511507
},
512508
quote_columns=True,
513509
)
514510

515511
expected_column_types = {
516-
"address": exp.DataType.build("text"),
517-
"zipcode": exp.DataType.build("text"),
512+
"id": exp.DataType.build("text"),
513+
"name": exp.DataType.build("text"),
518514
}
519515
expected_column_descriptions = {
520-
"zipcode": "Business zipcode",
516+
"name": "The name",
521517
}
522518

523519
context = DbtContext()

0 commit comments

Comments
 (0)