-
Notifications
You must be signed in to change notification settings - Fork 307
Description
Bug Description
代码:
import logging
import sys
import os
import glob
Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('add_resources.log')
]
)
def add_file_to_openviking(client, file_path):
"""Add a single file to OpenViking."""
try:
logging.info(f"Adding file: {file_path}")
print(f"正在添加: {os.path.basename(file_path)}")
res = client.add_resource(
path=file_path,
target="viking://resources/contract"
)
logging.info(f"add_resource result: {res}")
# Check for errors in the response
if isinstance(res, dict):
if res.get('status') == 'error':
errors = res.get('errors', [])
error_msg = '; '.join(errors) if errors else '未知错误'
print(f"❌ 解析失败: {os.path.basename(file_path)} - {error_msg}")
logging.error(f"Parse error for {file_path}: {error_msg}")
return None
elif 'root_uri' in res:
print(f"✅ 成功添加: {os.path.basename(file_path)} -> {res['root_uri']}")
return res['root_uri']
# Fallback for unexpected response format
print(f"⚠️ 添加完成: {os.path.basename(file_path)} (无root_uri)")
return None
except Exception as e:
logging.error(f"Failed to add {file_path}: {e}")
print(f"❌ 添加失败: {os.path.basename(file_path)} - {e}")
return None
def add_directory_to_openviking(client, dir_path):
"""Add all files in a directory to OpenViking."""
if not os.path.isdir(dir_path):
print(f"❌ 目录不存在: {dir_path}")
return []
print(f"扫描目录: {dir_path}")
added_uris = []
# Find all files (not directories)
file_pattern = os.path.join(dir_path, "**")
all_files = glob.glob(file_pattern, recursive=True)
files_only = [f for f in all_files if os.path.isfile(f)]
print(f"找到 {len(files_only)} 个文件")
for file_path in files_only:
uri = add_file_to_openviking(client, file_path)
if uri:
added_uris.append(uri)
return added_uris
def main():
if len(sys.argv) < 2:
print("用法:")
print(" python add_resources.py <文件路径>")
print(" python add_resources.py <目录路径>")
print(" python add_resources.py <文件1> <文件2> <文件3>...")
print("")
print("说明:")
print(" 所有文件将添加到固定命名空间: viking://resources/contract")
print(" 使用 OpenViking 的 'target' 参数指定目标URI")
print("")
print("示例:")
print(" python add_resources.py ./docs/contract.pdf")
print(" python add_resources.py ./docs/")
print(" python add_resources.py ./docs/file1.pdf ./docs/file2.docx")
return
try:
logging.info("Starting resource addition session...")
# Import modules
import openviking as ov
logging.info("OpenViking imported")
# Initialize OpenViking
print("初始化 OpenViking...")
client = ov.OpenViking(path="./data")
client.initialize()
logging.info("OpenViking initialized")
print("✅ OpenViking 初始化完成")
# Process all arguments (file/directory paths)
all_uris = []
paths = sys.argv[1:]
for path in paths:
if os.path.isdir(path):
print(f"\n📁 处理目录: {path}")
uris = add_directory_to_openviking(client, path)
all_uris.extend(uris)
elif os.path.isfile(path):
print(f"\n📄 处理文件: {path}")
uri = add_file_to_openviking(client, path)
if uri:
all_uris.append(uri)
else:
print(f"❌ 路径不存在: {path}")
print(f"\n🎉 处理完成!")
print(f"成功添加了 {len(all_uris)} 个资源")
if all_uris:
print("\n添加的资源URI:")
for uri in all_uris:
print(f" - {uri}")
# Wait for processing
print("\n⏳ 等待异步处理完成...")
try:
client.wait_processed()
logging.info("Async processing completed")
print("✅ 异步处理完成")
except Exception as e:
logging.warning(f"等待处理时出错: {e}")
print(f"⚠️ 处理可能仍在后台进行: {e}")
# Cleanup
client.close()
logging.info("Session ended")
print("✅ 资源添加会话结束")
except Exception as e:
logging.error(f"Fatal error: {e}")
print(f"💥 发生严重错误: {e}")
import traceback
traceback.print_exc()
if name == "main":
main()
错误如下:
python add_resources.py ./docs/MinerU_markdown_Contract.md
2026-02-26 10:00:45,505 - INFO - Starting resource addition session...
2026-02-26 10:00:46,436 - INFO - OpenViking imported
初始化 OpenViking...
2026-02-26 10:00:46,554 - INFO - Scheduler started
2026-02-26 10:00:46,559 - INFO - Added job "PersistCollection._register_index_manage_job" to job store "default"
2026-02-26 10:00:46,720 - INFO - OpenViking initialized
✅ OpenViking 初始化完成
📄 处理文件: ./docs/MinerU_markdown_Contract.md
2026-02-26 10:00:46,721 - INFO - Adding file: ./docs/MinerU_markdown_Contract.md
正在添加: MinerU_markdown_Contract.md
2026-02-26 10:00:46,835 - openviking.parse.parsers.markdown - ERROR - [MarkdownParser] Parse failed: failed to open file: open /root/financial-rag/data/viking/temp/02261000_9ca7d4/MinerU_markdown_Contract/5初始销售期间投资者资金的管理及利息处理方式_四基金份额认购金额及付款期限_五投资冷静期_六回访制度_六基金的成立与备案_一本合同签署的方式_二基金成立的条件_三募集失败的处理方式_四基金的备案.md: file name too long
Traceback (most recent call last):
File "/root/financial-rag/venv/lib/python3.12/site-packages/pyagfs/client.py", line 179, in write
response.raise_for_status()
File "/root/financial-rag/venv/lib/python3.12/site-packages/requests/models.py", line 1026, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 500 Server Error: Internal Server Error for url: http://localhost:1833/api/v1/files?path=%2Flocal%2Ftemp%2F02261000_9ca7d4%2FMinerU_markdown_Contract%2F5%E5%88%9D%E5%A7%8B%E9%94%80%E5%94%AE%E6%9C%9F%E9%97%B4%E6%8A%95%E8%B5%84%E8%80%85%E8%B5%84%E9%87%91%E7%9A%84%E7%AE%A1%E7%90%86%E5%8F%8A%E5%88%A9%E6%81%AF%E5%A4%84%E7%90%86%E6%96%B9%E5%BC%8F_%E5%9B%9B%E5%9F%BA%E9%87%91%E4%BB%BD%E9%A2%9D%E8%AE%A4%E8%B4%AD%E9%87%91%E9%A2%9D%E5%8F%8A%E4%BB%98%E6%AC%BE%E6%9C%9F%E9%99%90_%E4%BA%94%E6%8A%95%E8%B5%84%E5%86%B7%E9%9D%99%E6%9C%9F_%E5%85%AD%E5%9B%9E%E8%AE%BF%E5%88%B6%E5%BA%A6_%E5%85%AD%E5%9F%BA%E9%87%91%E7%9A%84%E6%88%90%E7%AB%8B%E4%B8%8E%E5%A4%87%E6%A1%88_%E4%B8%80%E6%9C%AC%E5%90%88%E5%90%8C%E7%AD%BE%E7%BD%B2%E7%9A%84%E6%96%B9%E5%BC%8F_%E4%BA%8C%E5%9F%BA%E9%87%91%E6%88%90%E7%AB%8B%E7%9A%84%E6%9D%A1%E4%BB%B6_%E4%B8%89%E5%8B%9F%E9%9B%86%E5%A4%B1%E8%B4%A5%E7%9A%84%E5%A4%84%E7%90%86%E6%96%B9%E5%BC%8F_%E5%9B%9B%E5%9F%BA%E9%87%91%E7%9A%84%E5%A4%87%E6%A1%88.md
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 188, in parse_content
await self._parse_and_create_structure(content, headings, root_dir, source_path)
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 426, in _parse_and_create_structure
await self._process_sections_with_merge(
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 458, in _process_sections_with_merge
pending = await self._try_add_to_pending(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 487, in _try_add_to_pending
await self._save_merged(viking_fs, parent_dir, pending)
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/parse/parsers/markdown.py", line 573, in _save_merged
await viking_fs.write_file(f"{parent_dir}/{name}.md", content)
File "/root/financial-rag/venv/lib/python3.12/site-packages/openviking/storage/viking_fs.py", line 872, in write_file
self.agfs.write(path, content)
File "/root/financial-rag/venv/lib/python3.12/site-packages/pyagfs/client.py", line 227, in write
self._handle_request_error(e)
File "/root/financial-rag/venv/lib/python3.12/site-packages/pyagfs/client.py", line 54, in _handle_request_error
raise AGFSClientError(error_msg)
pyagfs.exceptions.AGFSClientError: failed to open file: open /root/financial-rag/data/viking/temp/02261000_9ca7d4/MinerU_markdown_Contract/5初始销售期间投资者资金的管理及利息处理方式_四基金份额认购金额及付款期限_五投资冷静期_六回访制度_六基金的成立与备案_一本合同签署的方式_二基金成立的条件_三募集失败的处理方式_四基金的备案.md: file name too long
2026-02-26 10:00:46,837 - INFO - add_resource result: {'status': 'error', 'errors': ['Parse error: failed to open file: open /root/financial-rag/data/viking/temp/02261000_9ca7d4/MinerU_markdown_Contract/5初始销售期间投资者资金的管理及利息处理方式_四基金份额认购金额及付款期限_五投资冷静期_六回访制度_六基金的成立与备案_一本合同签署的方式_二基金成立的条件_三募集失败的处理方式_四基金的备案.md: file name too long'], 'source_path': None}
🎉 处理完成!
成功添加了 0 个资源
⏳ 等待异步处理完成...
2026-02-26 10:00:46,841 - INFO - Async processing completed
✅ 异步处理完成
2026-02-26 10:00:46,841 - INFO - Removed job 1772071246559451622__index_manage
2026-02-26 10:00:46,842 - INFO - Scheduler has been shut down
2026-02-26 10:00:46,844 - INFO - Session ended
✅ 资源添加会话结束
Steps to Reproduce
import logging
import sys
import os
import glob
Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('add_resources.log')
]
)
def add_file_to_openviking(client, file_path):
"""Add a single file to OpenViking."""
try:
logging.info(f"Adding file: {file_path}")
print(f"正在添加: {os.path.basename(file_path)}")
res = client.add_resource(
path=file_path,
target="viking://resources/contract"
)
logging.info(f"add_resource result: {res}")
# Check for errors in the response
if isinstance(res, dict):
if res.get('status') == 'error':
errors = res.get('errors', [])
error_msg = '; '.join(errors) if errors else '未知错误'
print(f"❌ 解析失败: {os.path.basename(file_path)} - {error_msg}")
logging.error(f"Parse error for {file_path}: {error_msg}")
return None
elif 'root_uri' in res:
print(f"✅ 成功添加: {os.path.basename(file_path)} -> {res['root_uri']}")
return res['root_uri']
# Fallback for unexpected response format
print(f"⚠️ 添加完成: {os.path.basename(file_path)} (无root_uri)")
return None
except Exception as e:
logging.error(f"Failed to add {file_path}: {e}")
print(f"❌ 添加失败: {os.path.basename(file_path)} - {e}")
return None
def add_directory_to_openviking(client, dir_path):
"""Add all files in a directory to OpenViking."""
if not os.path.isdir(dir_path):
print(f"❌ 目录不存在: {dir_path}")
return []
print(f"扫描目录: {dir_path}")
added_uris = []
# Find all files (not directories)
file_pattern = os.path.join(dir_path, "**")
all_files = glob.glob(file_pattern, recursive=True)
files_only = [f for f in all_files if os.path.isfile(f)]
print(f"找到 {len(files_only)} 个文件")
for file_path in files_only:
uri = add_file_to_openviking(client, file_path)
if uri:
added_uris.append(uri)
return added_uris
def main():
if len(sys.argv) < 2:
print("用法:")
print(" python add_resources.py <文件路径>")
print(" python add_resources.py <目录路径>")
print(" python add_resources.py <文件1> <文件2> <文件3>...")
print("")
print("说明:")
print(" 所有文件将添加到固定命名空间: viking://resources/contract")
print(" 使用 OpenViking 的 'target' 参数指定目标URI")
print("")
print("示例:")
print(" python add_resources.py ./docs/contract.pdf")
print(" python add_resources.py ./docs/")
print(" python add_resources.py ./docs/file1.pdf ./docs/file2.docx")
return
try:
logging.info("Starting resource addition session...")
# Import modules
import openviking as ov
logging.info("OpenViking imported")
# Initialize OpenViking
print("初始化 OpenViking...")
client = ov.OpenViking(path="./data")
client.initialize()
logging.info("OpenViking initialized")
print("✅ OpenViking 初始化完成")
# Process all arguments (file/directory paths)
all_uris = []
paths = sys.argv[1:]
for path in paths:
if os.path.isdir(path):
print(f"\n📁 处理目录: {path}")
uris = add_directory_to_openviking(client, path)
all_uris.extend(uris)
elif os.path.isfile(path):
print(f"\n📄 处理文件: {path}")
uri = add_file_to_openviking(client, path)
if uri:
all_uris.append(uri)
else:
print(f"❌ 路径不存在: {path}")
print(f"\n🎉 处理完成!")
print(f"成功添加了 {len(all_uris)} 个资源")
if all_uris:
print("\n添加的资源URI:")
for uri in all_uris:
print(f" - {uri}")
# Wait for processing
print("\n⏳ 等待异步处理完成...")
try:
client.wait_processed()
logging.info("Async processing completed")
print("✅ 异步处理完成")
except Exception as e:
logging.warning(f"等待处理时出错: {e}")
print(f"⚠️ 处理可能仍在后台进行: {e}")
# Cleanup
client.close()
logging.info("Session ended")
print("✅ 资源添加会话结束")
except Exception as e:
logging.error(f"Fatal error: {e}")
print(f"💥 发生严重错误: {e}")
import traceback
traceback.print_exc()
if name == "main":
main()
Expected Behavior
能正常添加内容
Actual Behavior
如bug描述
Minimal Reproducible Example
Error Logs
OpenViking Version
openviking-0.1.17.dist-info openviking_cli
Python Version
3.12.0
Operating System
Linux
Model Backend
None
Additional Context
No response
Metadata
Metadata
Assignees
Labels
Type
Projects
Status