Skip to content

Commit 3987133

Browse files
committed
fix: prevent bigint data from displaying in scientific notation in MCP chat
1 parent 97400e3 commit 3987133

File tree

2 files changed

+77
-55
lines changed

2 files changed

+77
-55
lines changed

backend/apps/chat/api/chat.py

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -206,42 +206,14 @@ def _err(_e: Exception):
206206
@router.post("/excel/export")
207207
async def export_excel(excel_data: ExcelData, trans: Trans):
208208
def inner():
209-
_fields_list = []
210-
data = []
209+
211210
if not excel_data.data:
212211
raise HTTPException(
213212
status_code=500,
214213
detail=trans("i18n_excel_export.data_is_empty")
215214
)
216215

217-
# 预处理数据并记录每列的格式类型
218-
col_formats = {} # 格式类型:'text'(文本)、'number'(数字)、'default'(默认)
219-
for field_idx, field in enumerate(excel_data.axis):
220-
_fields_list.append(field.name)
221-
col_formats[field_idx] = 'default' # 默认不特殊处理
222-
223-
for _data in excel_data.data:
224-
_row = []
225-
for field_idx, field in enumerate(excel_data.axis):
226-
value = _data.get(field.value)
227-
if value is not None:
228-
# 检查是否为数字且需要特殊处理
229-
if isinstance(value, (int, float)):
230-
# 整数且超过15位 → 转字符串并标记为文本列
231-
if isinstance(value, int) and len(str(abs(value))) > 15:
232-
value = str(value)
233-
col_formats[field_idx] = 'text'
234-
# 小数且超过15位有效数字 → 转字符串并标记为文本列
235-
elif isinstance(value, float):
236-
decimal_str = format(value, '.16f').rstrip('0').rstrip('.')
237-
if len(decimal_str) > 15:
238-
value = str(value)
239-
col_formats[field_idx] = 'text'
240-
# 其他数字列标记为数字格式(避免科学记数法)
241-
elif col_formats[field_idx] != 'text':
242-
col_formats[field_idx] = 'number'
243-
_row.append(value)
244-
data.append(_row)
216+
data, _fields_list, col_formats = LLMService.format_pd_data(excel_data.axis, excel_data.data)
245217

246218
df = pd.DataFrame(data, columns=_fields_list)
247219

backend/apps/chat/task/llm.py

Lines changed: 75 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
get_chat_chart_data, list_generate_sql_logs, list_generate_chart_logs, start_log, end_log, \
3232
get_last_execute_sql_error
3333
from apps.chat.models.chat_model import ChatQuestion, ChatRecord, Chat, RenameChat, ChatLog, OperationEnum, \
34-
ChatFinishStep
34+
ChatFinishStep, AxisObj
3535
from apps.data_training.curd.data_training import get_training_template
3636
from apps.datasource.crud.datasource import get_table_schema
3737
from apps.datasource.crud.permission import get_row_permission_filters, is_normal_user
@@ -414,7 +414,7 @@ def select_datasource(self, _session: Session):
414414
if settings.TABLE_EMBEDDING_ENABLED and (
415415
not self.current_assistant or (self.current_assistant and self.current_assistant.type != 1)):
416416
_ds_list = get_ds_embedding(_session, self.current_user, _ds_list, self.out_ds_instance,
417-
self.chat_question.question, self.current_assistant)
417+
self.chat_question.question, self.current_assistant)
418418
# yield {'content': '{"id":' + str(ds.get('id')) + '}'}
419419

420420
_ds_list_dict = []
@@ -1056,23 +1056,18 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
10561056
if in_chat:
10571057
yield 'data:' + orjson.dumps({'type': 'finish'}).decode() + '\n\n'
10581058
else:
1059-
data = []
1060-
_fields_list = []
1061-
_fields_skip = False
1062-
for _data in result.get('data'):
1063-
_row = []
1064-
for field in result.get('fields'):
1065-
_row.append(_data.get(field))
1066-
if not _fields_skip:
1067-
_fields_list.append(field)
1068-
data.append(_row)
1069-
_fields_skip = True
1059+
_column_list = []
1060+
for field in result.get('fields'):
1061+
_column_list.append(AxisObj(name=field, value=field))
1062+
1063+
data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
10701064

10711065
if not data or not _fields_list:
10721066
yield 'The SQL execution result is empty.\n\n'
10731067
else:
10741068
df = pd.DataFrame(data, columns=_fields_list)
1075-
markdown_table = df.to_markdown(index=False)
1069+
df_safe = self.safe_convert_to_string(df)
1070+
markdown_table = df_safe.to_markdown(index=False)
10761071
yield markdown_table + '\n\n'
10771072
else:
10781073
yield json_result
@@ -1117,22 +1112,19 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
11171112
if chart.get('axis').get('series'):
11181113
_fields[chart.get('axis').get('series').get('value')] = chart.get('axis').get('series').get(
11191114
'name')
1120-
_fields_list = []
1121-
_fields_skip = False
1122-
for _data in result.get('data'):
1123-
_row = []
1124-
for field in result.get('fields'):
1125-
_row.append(_data.get(field))
1126-
if not _fields_skip:
1127-
_fields_list.append(field if not _fields.get(field) else _fields.get(field))
1128-
data.append(_row)
1129-
_fields_skip = True
1115+
_column_list = []
1116+
for field in result.get('fields'):
1117+
_column_list.append(
1118+
AxisObj(name=field if not _fields.get(field) else _fields.get(field), value=field))
1119+
1120+
data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data'))
11301121

11311122
if not data or not _fields_list:
11321123
yield 'The SQL execution result is empty.\n\n'
11331124
else:
11341125
df = pd.DataFrame(data, columns=_fields_list)
1135-
markdown_table = df.to_markdown(index=False)
1126+
df_safe = self.safe_convert_to_string(df)
1127+
markdown_table = df_safe.to_markdown(index=False)
11361128
yield markdown_table + '\n\n'
11371129

11381130
if in_chat:
@@ -1179,6 +1171,64 @@ def run_task(self, in_chat: bool = True, stream: bool = True,
11791171
self.finish(_session)
11801172
session_maker.remove()
11811173

1174+
@staticmethod
1175+
def safe_convert_to_string(df):
1176+
"""
1177+
安全地将数值列转换为字符串,避免科学记数法
1178+
"""
1179+
df_copy = df.copy()
1180+
1181+
for col in df_copy.columns:
1182+
# 只处理数值类型的列
1183+
if pd.api.types.is_numeric_dtype(df_copy[col]):
1184+
try:
1185+
df_copy[col] = df_copy[col].astype(str)
1186+
except Exception as e:
1187+
print(f"列 {col} 转换失败: {e}")
1188+
# 如果转换失败,保持原样
1189+
continue
1190+
1191+
return df_copy
1192+
1193+
@staticmethod
1194+
def format_pd_data(column_list: list, data_list: list, col_formats: dict = None):
1195+
# 预处理数据并记录每列的格式类型
1196+
# 格式类型:'text'(文本)、'number'(数字)、'default'(默认)
1197+
_fields_list = []
1198+
1199+
if col_formats is None:
1200+
col_formats = {}
1201+
for field_idx, field in enumerate(column_list):
1202+
_fields_list.append(field.name)
1203+
col_formats[field_idx] = 'default' # 默认不特殊处理
1204+
1205+
data = []
1206+
1207+
for _data in data_list:
1208+
_row = []
1209+
for field_idx, field in enumerate(column_list):
1210+
value = _data.get(field.value)
1211+
if value is not None:
1212+
# 检查是否为数字且需要特殊处理
1213+
if isinstance(value, (int, float)):
1214+
# 整数且超过15位 → 转字符串并标记为文本列
1215+
if isinstance(value, int) and len(str(abs(value))) > 15:
1216+
value = str(value)
1217+
col_formats[field_idx] = 'text'
1218+
# 小数且超过15位有效数字 → 转字符串并标记为文本列
1219+
elif isinstance(value, float):
1220+
decimal_str = format(value, '.16f').rstrip('0').rstrip('.')
1221+
if len(decimal_str) > 15:
1222+
value = str(value)
1223+
col_formats[field_idx] = 'text'
1224+
# 其他数字列标记为数字格式(避免科学记数法)
1225+
elif col_formats[field_idx] != 'text':
1226+
col_formats[field_idx] = 'number'
1227+
_row.append(value)
1228+
data.append(_row)
1229+
1230+
return data, _fields_list, col_formats
1231+
11821232
def run_recommend_questions_task_async(self):
11831233
self.future = executor.submit(self.run_recommend_questions_task_cache)
11841234

0 commit comments

Comments
 (0)