Skip to content

Commit f1f8d9a

Browse files
committed
Update error messages
1 parent 24d209e commit f1f8d9a

File tree

1 file changed

+20
-10
lines changed

1 file changed

+20
-10
lines changed

src/together/utils/files.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,15 @@ def validate_messages(
114114
"""Validate the messages column."""
115115
if not isinstance(messages, list):
116116
raise InvalidFileFormatError(
117-
message="The dataset is malformed, the `messages` column must be a list.",
117+
message=f"Invalid format on line {idx + 1} of the input file. "
118+
f"Expected a list of messages. Found {type(messages)}",
118119
line_number=idx + 1,
119120
error_source="key_value",
120121
)
121122
if not messages:
122123
raise InvalidFileFormatError(
123-
message="The dataset is malformed, the `messages` column must not be empty.",
124+
message=f"Invalid format on line {idx + 1} of the input file. "
125+
f"Expected a non-empty list of messages. Found empty list",
124126
line_number=idx + 1,
125127
error_source="key_value",
126128
)
@@ -132,21 +134,29 @@ def validate_messages(
132134

133135
previous_role = None
134136
for message in messages:
135-
if any(column not in message for column in REQUIRED_COLUMNS_MESSAGE):
137+
if not isinstance(message, dict):
136138
raise InvalidFileFormatError(
137-
message="The dataset is malformed. "
138-
"Each message in the messages column must have "
139-
f"{REQUIRED_COLUMNS_MESSAGE} columns.",
139+
message=f"Invalid format on line {idx + 1} of the input file. "
140+
f"Expected a dictionary in the messages list. Found {type(message)}",
140141
line_number=idx + 1,
141142
error_source="key_value",
142143
)
143144
for column in REQUIRED_COLUMNS_MESSAGE:
144-
if not isinstance(message[column], str):
145+
if column not in message:
145146
raise InvalidFileFormatError(
146-
message=f"The dataset is malformed, the column `{column}` must be of the string type.",
147+
message=f"Field `{column}` is missing for a turn `{message}` on line {idx + 1} "
148+
"of the the input file.",
147149
line_number=idx + 1,
148150
error_source="key_value",
149151
)
152+
else:
153+
if not isinstance(message[column], str):
154+
raise InvalidFileFormatError(
155+
message=f"Invalid format on line {idx + 1} in the column {column} for turn `{message}` "
156+
f"of the input file. Expected string. Found {type(message[column])}",
157+
line_number=idx + 1,
158+
error_source="text_field",
159+
)
150160

151161
if has_weights and "weight" in message:
152162
weight = message["weight"]
@@ -164,8 +174,8 @@ def validate_messages(
164174
)
165175
if message["role"] not in POSSIBLE_ROLES_CONVERSATION:
166176
raise InvalidFileFormatError(
167-
message=f"Invalid role {message['role']} in conversation, possible roles: "
168-
f"{', '.join(POSSIBLE_ROLES_CONVERSATION)}",
177+
message=f"Found invalid role `{message['role']}` in the messages on the line {idx + 1}. "
178+
f"Possible roles in the conversation are: {POSSIBLE_ROLES_CONVERSATION}",
169179
line_number=idx + 1,
170180
error_source="key_value",
171181
)

0 commit comments

Comments
 (0)