|
31 | 31 | get_chat_chart_data, list_generate_sql_logs, list_generate_chart_logs, start_log, end_log, \ |
32 | 32 | get_last_execute_sql_error |
33 | 33 | from apps.chat.models.chat_model import ChatQuestion, ChatRecord, Chat, RenameChat, ChatLog, OperationEnum, \ |
34 | | - ChatFinishStep |
| 34 | + ChatFinishStep, AxisObj |
35 | 35 | from apps.data_training.curd.data_training import get_training_template |
36 | 36 | from apps.datasource.crud.datasource import get_table_schema |
37 | 37 | from apps.datasource.crud.permission import get_row_permission_filters, is_normal_user |
@@ -414,7 +414,7 @@ def select_datasource(self, _session: Session): |
414 | 414 | if settings.TABLE_EMBEDDING_ENABLED and ( |
415 | 415 | not self.current_assistant or (self.current_assistant and self.current_assistant.type != 1)): |
416 | 416 | _ds_list = get_ds_embedding(_session, self.current_user, _ds_list, self.out_ds_instance, |
417 | | - self.chat_question.question, self.current_assistant) |
| 417 | + self.chat_question.question, self.current_assistant) |
418 | 418 | # yield {'content': '{"id":' + str(ds.get('id')) + '}'} |
419 | 419 |
|
420 | 420 | _ds_list_dict = [] |
@@ -1056,23 +1056,18 @@ def run_task(self, in_chat: bool = True, stream: bool = True, |
1056 | 1056 | if in_chat: |
1057 | 1057 | yield 'data:' + orjson.dumps({'type': 'finish'}).decode() + '\n\n' |
1058 | 1058 | else: |
1059 | | - data = [] |
1060 | | - _fields_list = [] |
1061 | | - _fields_skip = False |
1062 | | - for _data in result.get('data'): |
1063 | | - _row = [] |
1064 | | - for field in result.get('fields'): |
1065 | | - _row.append(_data.get(field)) |
1066 | | - if not _fields_skip: |
1067 | | - _fields_list.append(field) |
1068 | | - data.append(_row) |
1069 | | - _fields_skip = True |
| 1059 | + _column_list = [] |
| 1060 | + for field in result.get('fields'): |
| 1061 | + _column_list.append(AxisObj(name=field, value=field)) |
| 1062 | + |
| 1063 | + data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data')) |
1070 | 1064 |
|
1071 | 1065 | if not data or not _fields_list: |
1072 | 1066 | yield 'The SQL execution result is empty.\n\n' |
1073 | 1067 | else: |
1074 | 1068 | df = pd.DataFrame(data, columns=_fields_list) |
1075 | | - markdown_table = df.to_markdown(index=False) |
| 1069 | + df_safe = self.safe_convert_to_string(df) |
| 1070 | + markdown_table = df_safe.to_markdown(index=False) |
1076 | 1071 | yield markdown_table + '\n\n' |
1077 | 1072 | else: |
1078 | 1073 | yield json_result |
@@ -1117,22 +1112,19 @@ def run_task(self, in_chat: bool = True, stream: bool = True, |
1117 | 1112 | if chart.get('axis').get('series'): |
1118 | 1113 | _fields[chart.get('axis').get('series').get('value')] = chart.get('axis').get('series').get( |
1119 | 1114 | 'name') |
1120 | | - _fields_list = [] |
1121 | | - _fields_skip = False |
1122 | | - for _data in result.get('data'): |
1123 | | - _row = [] |
1124 | | - for field in result.get('fields'): |
1125 | | - _row.append(_data.get(field)) |
1126 | | - if not _fields_skip: |
1127 | | - _fields_list.append(field if not _fields.get(field) else _fields.get(field)) |
1128 | | - data.append(_row) |
1129 | | - _fields_skip = True |
| 1115 | + _column_list = [] |
| 1116 | + for field in result.get('fields'): |
| 1117 | + _column_list.append( |
| 1118 | + AxisObj(name=field if not _fields.get(field) else _fields.get(field), value=field)) |
| 1119 | + |
| 1120 | + data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data')) |
1130 | 1121 |
|
1131 | 1122 | if not data or not _fields_list: |
1132 | 1123 | yield 'The SQL execution result is empty.\n\n' |
1133 | 1124 | else: |
1134 | 1125 | df = pd.DataFrame(data, columns=_fields_list) |
1135 | | - markdown_table = df.to_markdown(index=False) |
| 1126 | + df_safe = self.safe_convert_to_string(df) |
| 1127 | + markdown_table = df_safe.to_markdown(index=False) |
1136 | 1128 | yield markdown_table + '\n\n' |
1137 | 1129 |
|
1138 | 1130 | if in_chat: |
@@ -1179,6 +1171,64 @@ def run_task(self, in_chat: bool = True, stream: bool = True, |
1179 | 1171 | self.finish(_session) |
1180 | 1172 | session_maker.remove() |
1181 | 1173 |
|
| 1174 | + @staticmethod |
| 1175 | + def safe_convert_to_string(df): |
| 1176 | + """ |
| 1177 | + 安全地将数值列转换为字符串,避免科学记数法 |
| 1178 | + """ |
| 1179 | + df_copy = df.copy() |
| 1180 | + |
| 1181 | + for col in df_copy.columns: |
| 1182 | + # 只处理数值类型的列 |
| 1183 | + if pd.api.types.is_numeric_dtype(df_copy[col]): |
| 1184 | + try: |
| 1185 | + df_copy[col] = df_copy[col].astype(str) |
| 1186 | + except Exception as e: |
| 1187 | + print(f"列 {col} 转换失败: {e}") |
| 1188 | + # 如果转换失败,保持原样 |
| 1189 | + continue |
| 1190 | + |
| 1191 | + return df_copy |
| 1192 | + |
| 1193 | + @staticmethod |
| 1194 | + def format_pd_data(column_list: list, data_list: list, col_formats: dict = None): |
| 1195 | + # 预处理数据并记录每列的格式类型 |
| 1196 | + # 格式类型:'text'(文本)、'number'(数字)、'default'(默认) |
| 1197 | + _fields_list = [] |
| 1198 | + |
| 1199 | + if col_formats is None: |
| 1200 | + col_formats = {} |
| 1201 | + for field_idx, field in enumerate(column_list): |
| 1202 | + _fields_list.append(field.name) |
| 1203 | + col_formats[field_idx] = 'default' # 默认不特殊处理 |
| 1204 | + |
| 1205 | + data = [] |
| 1206 | + |
| 1207 | + for _data in data_list: |
| 1208 | + _row = [] |
| 1209 | + for field_idx, field in enumerate(column_list): |
| 1210 | + value = _data.get(field.value) |
| 1211 | + if value is not None: |
| 1212 | + # 检查是否为数字且需要特殊处理 |
| 1213 | + if isinstance(value, (int, float)): |
| 1214 | + # 整数且超过15位 → 转字符串并标记为文本列 |
| 1215 | + if isinstance(value, int) and len(str(abs(value))) > 15: |
| 1216 | + value = str(value) |
| 1217 | + col_formats[field_idx] = 'text' |
| 1218 | + # 小数且超过15位有效数字 → 转字符串并标记为文本列 |
| 1219 | + elif isinstance(value, float): |
| 1220 | + decimal_str = format(value, '.16f').rstrip('0').rstrip('.') |
| 1221 | + if len(decimal_str) > 15: |
| 1222 | + value = str(value) |
| 1223 | + col_formats[field_idx] = 'text' |
| 1224 | + # 其他数字列标记为数字格式(避免科学记数法) |
| 1225 | + elif col_formats[field_idx] != 'text': |
| 1226 | + col_formats[field_idx] = 'number' |
| 1227 | + _row.append(value) |
| 1228 | + data.append(_row) |
| 1229 | + |
| 1230 | + return data, _fields_list, col_formats |
| 1231 | + |
1182 | 1232 | def run_recommend_questions_task_async(self): |
1183 | 1233 | self.future = executor.submit(self.run_recommend_questions_task_cache) |
1184 | 1234 |
|
|
0 commit comments